source: trunk/minix/kernel/system.c@ 9

Last change on this file since 9 was 9, checked in by Mattia Monga, 13 years ago

Minix 3.1.2a

File size: 20.9 KB
Line 
1/* This task handles the interface between the kernel and user-level servers.
2 * System services can be accessed by doing a system call. System calls are
3 * transformed into request messages, which are handled by this task. By
4 * convention, a sys_call() is transformed in a SYS_CALL request message that
5 * is handled in a function named do_call().
6 *
7 * A private call vector is used to map all system calls to the functions that
8 * handle them. The actual handler functions are contained in separate files
9 * to keep this file clean. The call vector is used in the system task's main
10 * loop to handle all incoming requests.
11 *
12 * In addition to the main sys_task() entry point, which starts the main loop,
13 * there are several other minor entry points:
14 * get_priv: assign privilege structure to user or system process
15 * send_sig: send a signal directly to a system process
16 * cause_sig: take action to cause a signal to occur via PM
17 * umap_local: map virtual address in LOCAL_SEG to physical
18 * umap_remote: map virtual address in REMOTE_SEG to physical
19 * umap_bios: map virtual address in BIOS_SEG to physical
20 * virtual_copy: copy bytes from one virtual address to another
21 * get_randomness: accumulate randomness in a buffer
22 * clear_endpoint: remove a process' ability to send and receive messages
23 *
24 * Changes:
25 * Aug 04, 2005 check if system call is allowed (Jorrit N. Herder)
26 * Jul 20, 2005 send signal to services with message (Jorrit N. Herder)
27 * Jan 15, 2005 new, generalized virtual copy function (Jorrit N. Herder)
28 * Oct 10, 2004 dispatch system calls from call vector (Jorrit N. Herder)
29 * Sep 30, 2004 source code documentation updated (Jorrit N. Herder)
30 */
31
32#include "debug.h"
33#include "kernel.h"
34#include "system.h"
35#include <stdlib.h>
36#include <signal.h>
37#include <unistd.h>
38#include <sys/sigcontext.h>
39#include <minix/endpoint.h>
40#if (CHIP == INTEL)
41#include <ibm/memory.h>
42#include "protect.h"
43#endif
44
45/* Declaration of the call vector that defines the mapping of system calls
46 * to handler functions. The vector is initialized in sys_init() with map(),
47 * which makes sure the system call numbers are ok. No space is allocated,
48 * because the dummy is declared extern. If an illegal call is given, the
49 * array size will be negative and this won't compile.
50 */
51PUBLIC int (*call_vec[NR_SYS_CALLS])(message *m_ptr);
52
53#define map(call_nr, handler) \
54 {extern int dummy[NR_SYS_CALLS>(unsigned)(call_nr-KERNEL_CALL) ? 1:-1];} \
55 call_vec[(call_nr-KERNEL_CALL)] = (handler)
56
57FORWARD _PROTOTYPE( void initialize, (void));
58
59/*===========================================================================*
60 * sys_task *
61 *===========================================================================*/
62PUBLIC void sys_task()
63{
64/* Main entry point of sys_task. Get the message and dispatch on type. */
65 static message m;
66 register int result;
67 register struct proc *caller_ptr;
68 unsigned int call_nr;
69 int s;
70
71 /* Initialize the system task. */
72 initialize();
73
74 while (TRUE) {
75 /* Get work. Block and wait until a request message arrives. */
76 receive(ANY, &m);
77 call_nr = (unsigned) m.m_type - KERNEL_CALL;
78 who_e = m.m_source;
79 okendpt(who_e, &who_p);
80 caller_ptr = proc_addr(who_p);
81
82 /* See if the caller made a valid request and try to handle it. */
83 if (! (priv(caller_ptr)->s_call_mask & (1<<call_nr))) {
84#if DEBUG_ENABLE_IPC_WARNINGS
85 kprintf("SYSTEM: request %d from %d denied.\n", call_nr,m.m_source);
86#endif
87 result = ECALLDENIED; /* illegal message type */
88 } else if (call_nr >= NR_SYS_CALLS) { /* check call number */
89#if DEBUG_ENABLE_IPC_WARNINGS
90 kprintf("SYSTEM: illegal request %d from %d.\n", call_nr,m.m_source);
91#endif
92 result = EBADREQUEST; /* illegal message type */
93 }
94 else {
95 result = (*call_vec[call_nr])(&m); /* handle the system call */
96 }
97
98 /* Send a reply, unless inhibited by a handler function. Use the kernel
99 * function lock_send() to prevent a system call trap. The destination
100 * is known to be blocked waiting for a message.
101 */
102 if (result != EDONTREPLY) {
103 m.m_type = result; /* report status of call */
104 if (OK != (s=lock_send(m.m_source, &m))) {
105 kprintf("SYSTEM, reply to %d failed: %d\n", m.m_source, s);
106 }
107 }
108 }
109}
110
111/*===========================================================================*
112 * initialize *
113 *===========================================================================*/
114PRIVATE void initialize(void)
115{
116 register struct priv *sp;
117 int i;
118
119 /* Initialize IRQ handler hooks. Mark all hooks available. */
120 for (i=0; i<NR_IRQ_HOOKS; i++) {
121 irq_hooks[i].proc_nr_e = NONE;
122 }
123
124 /* Initialize all alarm timers for all processes. */
125 for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
126 tmr_inittimer(&(sp->s_alarm_timer));
127 }
128
129 /* Initialize the call vector to a safe default handler. Some system calls
130 * may be disabled or nonexistant. Then explicitely map known calls to their
131 * handler functions. This is done with a macro that gives a compile error
132 * if an illegal call number is used. The ordering is not important here.
133 */
134 for (i=0; i<NR_SYS_CALLS; i++) {
135 call_vec[i] = do_unused;
136 }
137
138 /* Process management. */
139 map(SYS_FORK, do_fork); /* a process forked a new process */
140 map(SYS_EXEC, do_exec); /* update process after execute */
141 map(SYS_EXIT, do_exit); /* clean up after process exit */
142 map(SYS_NICE, do_nice); /* set scheduling priority */
143 map(SYS_PRIVCTL, do_privctl); /* system privileges control */
144 map(SYS_TRACE, do_trace); /* request a trace operation */
145
146 /* Signal handling. */
147 map(SYS_KILL, do_kill); /* cause a process to be signaled */
148 map(SYS_GETKSIG, do_getksig); /* PM checks for pending signals */
149 map(SYS_ENDKSIG, do_endksig); /* PM finished processing signal */
150 map(SYS_SIGSEND, do_sigsend); /* start POSIX-style signal */
151 map(SYS_SIGRETURN, do_sigreturn); /* return from POSIX-style signal */
152
153 /* Device I/O. */
154 map(SYS_IRQCTL, do_irqctl); /* interrupt control operations */
155 map(SYS_DEVIO, do_devio); /* inb, inw, inl, outb, outw, outl */
156 map(SYS_SDEVIO, do_sdevio); /* phys_insb, _insw, _outsb, _outsw */
157 map(SYS_VDEVIO, do_vdevio); /* vector with devio requests */
158 map(SYS_INT86, do_int86); /* real-mode BIOS calls */
159
160 /* Memory management. */
161 map(SYS_NEWMAP, do_newmap); /* set up a process memory map */
162 map(SYS_SEGCTL, do_segctl); /* add segment and get selector */
163 map(SYS_MEMSET, do_memset); /* write char to memory area */
164 map(SYS_VM_SETBUF, do_vm_setbuf); /* PM passes buffer for page tables */
165 map(SYS_VM_MAP, do_vm_map); /* Map/unmap physical (device) memory */
166
167 /* Copying. */
168 map(SYS_UMAP, do_umap); /* map virtual to physical address */
169 map(SYS_VIRCOPY, do_vircopy); /* use pure virtual addressing */
170 map(SYS_PHYSCOPY, do_physcopy); /* use physical addressing */
171 map(SYS_VIRVCOPY, do_virvcopy); /* vector with copy requests */
172 map(SYS_PHYSVCOPY, do_physvcopy); /* vector with copy requests */
173
174 /* Clock functionality. */
175 map(SYS_TIMES, do_times); /* get uptime and process times */
176 map(SYS_SETALARM, do_setalarm); /* schedule a synchronous alarm */
177
178 /* System control. */
179 map(SYS_ABORT, do_abort); /* abort MINIX */
180 map(SYS_GETINFO, do_getinfo); /* request system information */
181 map(SYS_IOPENABLE, do_iopenable); /* Enable I/O */
182}
183
184/*===========================================================================*
185 * get_priv *
186 *===========================================================================*/
187PUBLIC int get_priv(rc, proc_type)
188register struct proc *rc; /* new (child) process pointer */
189int proc_type; /* system or user process flag */
190{
191/* Get a privilege structure. All user processes share the same privilege
192 * structure. System processes get their own privilege structure.
193 */
194 register struct priv *sp; /* privilege structure */
195
196 if (proc_type == SYS_PROC) { /* find a new slot */
197 for (sp = BEG_PRIV_ADDR; sp < END_PRIV_ADDR; ++sp)
198 if (sp->s_proc_nr == NONE && sp->s_id != USER_PRIV_ID) break;
199 if (sp->s_proc_nr != NONE) return(ENOSPC);
200 rc->p_priv = sp; /* assign new slot */
201 rc->p_priv->s_proc_nr = proc_nr(rc); /* set association */
202 rc->p_priv->s_flags = SYS_PROC; /* mark as privileged */
203 } else {
204 rc->p_priv = &priv[USER_PRIV_ID]; /* use shared slot */
205 rc->p_priv->s_proc_nr = INIT_PROC_NR; /* set association */
206 rc->p_priv->s_flags = 0; /* no initial flags */
207 }
208 return(OK);
209}
210
211/*===========================================================================*
212 * get_randomness *
213 *===========================================================================*/
214PUBLIC void get_randomness(source)
215int source;
216{
217/* On machines with the RDTSC (cycle counter read instruction - pentium
218 * and up), use that for high-resolution raw entropy gathering. Otherwise,
219 * use the realtime clock (tick resolution).
220 *
221 * Unfortunately this test is run-time - we don't want to bother with
222 * compiling different kernels for different machines.
223 *
224 * On machines without RDTSC, we use read_clock().
225 */
226 int r_next;
227 unsigned long tsc_high, tsc_low;
228
229 source %= RANDOM_SOURCES;
230 r_next= krandom.bin[source].r_next;
231 if (machine.processor > 486) {
232 read_tsc(&tsc_high, &tsc_low);
233 krandom.bin[source].r_buf[r_next] = tsc_low;
234 } else {
235 krandom.bin[source].r_buf[r_next] = read_clock();
236 }
237 if (krandom.bin[source].r_size < RANDOM_ELEMENTS) {
238 krandom.bin[source].r_size ++;
239 }
240 krandom.bin[source].r_next = (r_next + 1 ) % RANDOM_ELEMENTS;
241}
242
243/*===========================================================================*
244 * send_sig *
245 *===========================================================================*/
246PUBLIC void send_sig(int proc_nr, int sig_nr)
247{
248/* Notify a system process about a signal. This is straightforward. Simply
249 * set the signal that is to be delivered in the pending signals map and
250 * send a notification with source SYSTEM.
251 *
252 * Process number is verified to avoid writing in random places, but we
253 * don't kprintf() or panic() because that causes send_sig() invocations.
254 */
255 register struct proc *rp;
256 static int n;
257
258 if(!isokprocn(proc_nr) || isemptyn(proc_nr))
259 return;
260
261 rp = proc_addr(proc_nr);
262 sigaddset(&priv(rp)->s_sig_pending, sig_nr);
263 lock_notify(SYSTEM, rp->p_endpoint);
264}
265
266/*===========================================================================*
267 * cause_sig *
268 *===========================================================================*/
269PUBLIC void cause_sig(proc_nr, sig_nr)
270int proc_nr; /* process to be signalled */
271int sig_nr; /* signal to be sent, 1 to _NSIG */
272{
273/* A system process wants to send a signal to a process. Examples are:
274 * - HARDWARE wanting to cause a SIGSEGV after a CPU exception
275 * - TTY wanting to cause SIGINT upon getting a DEL
276 * - FS wanting to cause SIGPIPE for a broken pipe
277 * Signals are handled by sending a message to PM. This function handles the
278 * signals and makes sure the PM gets them by sending a notification. The
279 * process being signaled is blocked while PM has not finished all signals
280 * for it.
281 * Race conditions between calls to this function and the system calls that
282 * process pending kernel signals cannot exist. Signal related functions are
283 * only called when a user process causes a CPU exception and from the kernel
284 * process level, which runs to completion.
285 */
286 register struct proc *rp;
287
288 /* Check if the signal is already pending. Process it otherwise. */
289 rp = proc_addr(proc_nr);
290 if (! sigismember(&rp->p_pending, sig_nr)) {
291 sigaddset(&rp->p_pending, sig_nr);
292 if (! (rp->p_rts_flags & SIGNALED)) { /* other pending */
293 if (rp->p_rts_flags == 0) lock_dequeue(rp); /* make not ready */
294 rp->p_rts_flags |= SIGNALED | SIG_PENDING; /* update flags */
295 send_sig(PM_PROC_NR, SIGKSIG);
296 }
297 }
298}
299
300/*===========================================================================*
301 * umap_bios *
302 *===========================================================================*/
303PUBLIC phys_bytes umap_bios(rp, vir_addr, bytes)
304register struct proc *rp; /* pointer to proc table entry for process */
305vir_bytes vir_addr; /* virtual address in BIOS segment */
306vir_bytes bytes; /* # of bytes to be copied */
307{
308/* Calculate the physical memory address at the BIOS. Note: currently, BIOS
309 * address zero (the first BIOS interrupt vector) is not considered, as an
310 * error here, but since the physical address will be zero as well, the
311 * calling function will think an error occurred. This is not a problem,
312 * since no one uses the first BIOS interrupt vector.
313 */
314
315 /* Check all acceptable ranges. */
316 if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= BIOS_MEM_END)
317 return (phys_bytes) vir_addr;
318 else if (vir_addr >= BASE_MEM_TOP && vir_addr + bytes <= UPPER_MEM_END)
319 return (phys_bytes) vir_addr;
320
321#if DEAD_CODE /* brutal fix, if the above is too restrictive */
322 if (vir_addr >= BIOS_MEM_BEGIN && vir_addr + bytes <= UPPER_MEM_END)
323 return (phys_bytes) vir_addr;
324#endif
325
326 kprintf("Warning, error in umap_bios, virtual address 0x%x\n", vir_addr);
327 return 0;
328}
329
330/*===========================================================================*
331 * umap_local *
332 *===========================================================================*/
333PUBLIC phys_bytes umap_local(rp, seg, vir_addr, bytes)
334register struct proc *rp; /* pointer to proc table entry for process */
335int seg; /* T, D, or S segment */
336vir_bytes vir_addr; /* virtual address in bytes within the seg */
337vir_bytes bytes; /* # of bytes to be copied */
338{
339/* Calculate the physical memory address for a given virtual address. */
340 vir_clicks vc; /* the virtual address in clicks */
341 phys_bytes pa; /* intermediate variables as phys_bytes */
342#if (CHIP == INTEL)
343 phys_bytes seg_base;
344#endif
345
346 /* If 'seg' is D it could really be S and vice versa. T really means T.
347 * If the virtual address falls in the gap, it causes a problem. On the
348 * 8088 it is probably a legal stack reference, since "stackfaults" are
349 * not detected by the hardware. On 8088s, the gap is called S and
350 * accepted, but on other machines it is called D and rejected.
351 * The Atari ST behaves like the 8088 in this respect.
352 */
353
354 if (bytes <= 0) return( (phys_bytes) 0);
355 if (vir_addr + bytes <= vir_addr) return 0; /* overflow */
356 vc = (vir_addr + bytes - 1) >> CLICK_SHIFT; /* last click of data */
357
358#if (CHIP == INTEL) || (CHIP == M68000)
359 if (seg != T)
360 seg = (vc < rp->p_memmap[D].mem_vir + rp->p_memmap[D].mem_len ? D : S);
361#else
362 if (seg != T)
363 seg = (vc < rp->p_memmap[S].mem_vir ? D : S);
364#endif
365
366 if ((vir_addr>>CLICK_SHIFT) >= rp->p_memmap[seg].mem_vir +
367 rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
368
369 if (vc >= rp->p_memmap[seg].mem_vir +
370 rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
371
372#if (CHIP == INTEL)
373 seg_base = (phys_bytes) rp->p_memmap[seg].mem_phys;
374 seg_base = seg_base << CLICK_SHIFT; /* segment origin in bytes */
375#endif
376 pa = (phys_bytes) vir_addr;
377#if (CHIP != M68000)
378 pa -= rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
379 return(seg_base + pa);
380#endif
381#if (CHIP == M68000)
382 pa -= (phys_bytes)rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
383 pa += (phys_bytes)rp->p_memmap[seg].mem_phys << CLICK_SHIFT;
384 return(pa);
385#endif
386}
387
388/*===========================================================================*
389 * umap_remote *
390 *===========================================================================*/
391PUBLIC phys_bytes umap_remote(rp, seg, vir_addr, bytes)
392register struct proc *rp; /* pointer to proc table entry for process */
393int seg; /* index of remote segment */
394vir_bytes vir_addr; /* virtual address in bytes within the seg */
395vir_bytes bytes; /* # of bytes to be copied */
396{
397/* Calculate the physical memory address for a given virtual address. */
398 struct far_mem *fm;
399
400 if (bytes <= 0) return( (phys_bytes) 0);
401 if (seg < 0 || seg >= NR_REMOTE_SEGS) return( (phys_bytes) 0);
402
403 fm = &rp->p_priv->s_farmem[seg];
404 if (! fm->in_use) return( (phys_bytes) 0);
405 if (vir_addr + bytes > fm->mem_len) return( (phys_bytes) 0);
406
407 return(fm->mem_phys + (phys_bytes) vir_addr);
408}
409
410/*===========================================================================*
411 * virtual_copy *
412 *===========================================================================*/
413PUBLIC int virtual_copy(src_addr, dst_addr, bytes)
414struct vir_addr *src_addr; /* source virtual address */
415struct vir_addr *dst_addr; /* destination virtual address */
416vir_bytes bytes; /* # of bytes to copy */
417{
418/* Copy bytes from virtual address src_addr to virtual address dst_addr.
419 * Virtual addresses can be in ABS, LOCAL_SEG, REMOTE_SEG, or BIOS_SEG.
420 */
421 struct vir_addr *vir_addr[2]; /* virtual source and destination address */
422 phys_bytes phys_addr[2]; /* absolute source and destination */
423 int seg_index;
424 int i;
425
426 /* Check copy count. */
427 if (bytes <= 0) return(EDOM);
428
429 /* Do some more checks and map virtual addresses to physical addresses. */
430 vir_addr[_SRC_] = src_addr;
431 vir_addr[_DST_] = dst_addr;
432 for (i=_SRC_; i<=_DST_; i++) {
433 int proc_nr, type;
434 struct proc *p;
435
436 type = vir_addr[i]->segment & SEGMENT_TYPE;
437 if(type != PHYS_SEG && isokendpt(vir_addr[i]->proc_nr_e, &proc_nr))
438 p = proc_addr(proc_nr);
439 else
440 p = NULL;
441
442 /* Get physical address. */
443 switch(type) {
444 case LOCAL_SEG:
445 if(!p) return EDEADSRCDST;
446 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
447 phys_addr[i] = umap_local(p, seg_index, vir_addr[i]->offset, bytes);
448 break;
449 case REMOTE_SEG:
450 if(!p) return EDEADSRCDST;
451 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
452 phys_addr[i] = umap_remote(p, seg_index, vir_addr[i]->offset, bytes);
453 break;
454 case BIOS_SEG:
455 if(!p) return EDEADSRCDST;
456 phys_addr[i] = umap_bios(p, vir_addr[i]->offset, bytes );
457 break;
458 case PHYS_SEG:
459 phys_addr[i] = vir_addr[i]->offset;
460 break;
461 default:
462 return(EINVAL);
463 }
464
465 /* Check if mapping succeeded. */
466 if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG)
467 return(EFAULT);
468 }
469
470 /* Now copy bytes between physical addresseses. */
471 phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes);
472 return(OK);
473}
474
475
476/*===========================================================================*
477 * clear_endpoint *
478 *===========================================================================*/
479PUBLIC void clear_endpoint(rc)
480register struct proc *rc; /* slot of process to clean up */
481{
482 register struct proc *rp; /* iterate over process table */
483 register struct proc **xpp; /* iterate over caller queue */
484 int i;
485 int sys_id;
486
487 if(isemptyp(rc)) panic("clear_proc: empty process", proc_nr(rc));
488
489 /* Make sure that the exiting process is no longer scheduled. */
490 if (rc->p_rts_flags == 0) lock_dequeue(rc);
491 rc->p_rts_flags |= NO_ENDPOINT;
492
493 /* If the process happens to be queued trying to send a
494 * message, then it must be removed from the message queues.
495 */
496 if (rc->p_rts_flags & SENDING) {
497 int target_proc;
498
499 okendpt(rc->p_sendto_e, &target_proc);
500 xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
501 while (*xpp != NIL_PROC) { /* check entire queue */
502 if (*xpp == rc) { /* process is on the queue */
503 *xpp = (*xpp)->p_q_link; /* replace by next process */
504#if DEBUG_ENABLE_IPC_WARNINGS
505 kprintf("Proc %d removed from queue at %d\n",
506 proc_nr(rc), rc->p_sendto_e);
507#endif
508 break; /* can only be queued once */
509 }
510 xpp = &(*xpp)->p_q_link; /* proceed to next queued */
511 }
512 rc->p_rts_flags &= ~SENDING;
513 }
514 rc->p_rts_flags &= ~RECEIVING;
515
516 /* Likewise, if another process was sending or receive a message to or from
517 * the exiting process, it must be alerted that process no longer is alive.
518 * Check all processes.
519 */
520 for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
521 if(isemptyp(rp))
522 continue;
523
524 /* Unset pending notification bits. */
525 unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
526
527 /* Check if process is receiving from exiting process. */
528 if ((rp->p_rts_flags & RECEIVING) && rp->p_getfrom_e == rc->p_endpoint) {
529 rp->p_reg.retreg = ESRCDIED; /* report source died */
530 rp->p_rts_flags &= ~RECEIVING; /* no longer receiving */
531#if DEBUG_ENABLE_IPC_WARNINGS
532 kprintf("Proc %d receive dead src %d\n", proc_nr(rp), proc_nr(rc));
533#endif
534 if (rp->p_rts_flags == 0) lock_enqueue(rp);/* let process run again */
535 }
536 if ((rp->p_rts_flags & SENDING) && rp->p_sendto_e == rc->p_endpoint) {
537 rp->p_reg.retreg = EDSTDIED; /* report destination died */
538 rp->p_rts_flags &= ~SENDING; /* no longer sending */
539#if DEBUG_ENABLE_IPC_WARNINGS
540 kprintf("Proc %d send dead dst %d\n", proc_nr(rp), proc_nr(rc));
541#endif
542 if (rp->p_rts_flags == 0) lock_enqueue(rp);/* let process run again */
543 }
544 }
545}
546
547
Note: See TracBrowser for help on using the repository browser.