Annotation of sys/kern/kern_fork.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: kern_fork.c,v 1.92 2007/07/25 23:11:52 art Exp $ */
2: /* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1982, 1986, 1989, 1991, 1993
6: * The Regents of the University of California. All rights reserved.
7: * (c) UNIX System Laboratories, Inc.
8: * All or some portions of this file are derived from material licensed
9: * to the University of California by American Telephone and Telegraph
10: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11: * the permission of UNIX System Laboratories, Inc.
12: *
13: * Redistribution and use in source and binary forms, with or without
14: * modification, are permitted provided that the following conditions
15: * are met:
16: * 1. Redistributions of source code must retain the above copyright
17: * notice, this list of conditions and the following disclaimer.
18: * 2. Redistributions in binary form must reproduce the above copyright
19: * notice, this list of conditions and the following disclaimer in the
20: * documentation and/or other materials provided with the distribution.
21: * 3. Neither the name of the University nor the names of its contributors
22: * may be used to endorse or promote products derived from this software
23: * without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35: * SUCH DAMAGE.
36: *
37: * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
38: */
39:
40: #include <sys/param.h>
41: #include <sys/systm.h>
42: #include <sys/filedesc.h>
43: #include <sys/kernel.h>
44: #include <sys/malloc.h>
45: #include <sys/mount.h>
46: #include <sys/proc.h>
47: #include <sys/exec.h>
48: #include <sys/resourcevar.h>
49: #include <sys/signalvar.h>
50: #include <sys/vnode.h>
51: #include <sys/file.h>
52: #include <sys/acct.h>
53: #include <sys/ktrace.h>
54: #include <sys/sched.h>
55: #include <dev/rndvar.h>
56: #include <sys/pool.h>
57: #include <sys/mman.h>
58: #include <sys/ptrace.h>
59:
60: #include <sys/syscallargs.h>
61:
62: #include "systrace.h"
63: #include <dev/systrace.h>
64:
65: #include <uvm/uvm_extern.h>
66: #include <uvm/uvm_map.h>
67:
68: int nprocs = 1; /* process 0 */
69: int randompid; /* when set to 1, pid's go random */
70: pid_t lastpid;
71: struct forkstat forkstat;
72:
73: void fork_return(void *);
74: int pidtaken(pid_t);
75:
76: void process_new(struct proc *, struct proc *);
77:
78: void
79: fork_return(void *arg)
80: {
81: struct proc *p = (struct proc *)arg;
82:
83: if (p->p_flag & P_TRACED)
84: psignal(p, SIGTRAP);
85:
86: child_return(p);
87: }
88:
89: /*ARGSUSED*/
90: int
91: sys_fork(struct proc *p, void *v, register_t *retval)
92: {
93: int flags;
94:
95: flags = FORK_FORK;
96: if (p->p_ptmask & PTRACE_FORK)
97: flags |= FORK_PTRACE;
98: return (fork1(p, SIGCHLD, flags, NULL, 0,
99: fork_return, NULL, retval, NULL));
100: }
101:
102: /*ARGSUSED*/
103: int
104: sys_vfork(struct proc *p, void *v, register_t *retval)
105: {
106: return (fork1(p, SIGCHLD, FORK_VFORK|FORK_PPWAIT, NULL, 0, NULL,
107: NULL, retval, NULL));
108: }
109:
110: int
111: sys_rfork(struct proc *p, void *v, register_t *retval)
112: {
113: struct sys_rfork_args /* {
114: syscallarg(int) flags;
115: } */ *uap = v;
116:
117: int rforkflags;
118: int flags;
119:
120: flags = FORK_RFORK;
121: rforkflags = SCARG(uap, flags);
122:
123: if ((rforkflags & RFPROC) == 0)
124: return (EINVAL);
125:
126: switch(rforkflags & (RFFDG|RFCFDG)) {
127: case (RFFDG|RFCFDG):
128: return EINVAL;
129: case RFCFDG:
130: flags |= FORK_CLEANFILES;
131: break;
132: case RFFDG:
133: break;
134: default:
135: flags |= FORK_SHAREFILES;
136: break;
137: }
138:
139: if (rforkflags & RFNOWAIT)
140: flags |= FORK_NOZOMBIE;
141:
142: if (rforkflags & RFMEM)
143: flags |= FORK_SHAREVM;
144: #ifdef RTHREADS
145: if (rforkflags & RFTHREAD)
146: flags |= FORK_THREAD;
147: #endif
148:
149: return (fork1(p, SIGCHLD, flags, NULL, 0, NULL, NULL, retval, NULL));
150: }
151:
152: /*
153: * Allocate and initialize a new process.
154: */
155: void
156: process_new(struct proc *newproc, struct proc *parent)
157: {
158: struct process *pr;
159:
160: pr = pool_get(&process_pool, PR_WAITOK);
161: pr->ps_mainproc = newproc;
162: TAILQ_INIT(&pr->ps_threads);
163: TAILQ_INSERT_TAIL(&pr->ps_threads, newproc, p_thr_link);
164: newproc->p_p = pr;
165: }
166:
167: /* print the 'table full' message once per 10 seconds */
168: struct timeval fork_tfmrate = { 10, 0 };
169:
170: int
171: fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
172: void (*func)(void *), void *arg, register_t *retval,
173: struct proc **rnewprocp)
174: {
175: struct proc *p2;
176: uid_t uid;
177: struct vmspace *vm;
178: int count;
179: vaddr_t uaddr;
180: int s;
181: extern void endtsleep(void *);
182: extern void realitexpire(void *);
183:
184: /*
185: * Although process entries are dynamically created, we still keep
186: * a global limit on the maximum number we will create. We reserve
187: * the last 5 processes to root. The variable nprocs is the current
188: * number of processes, maxproc is the limit.
189: */
190: uid = p1->p_cred->p_ruid;
191: if ((nprocs >= maxproc - 5 && uid != 0) || nprocs >= maxproc) {
192: static struct timeval lasttfm;
193:
194: if (ratecheck(&lasttfm, &fork_tfmrate))
195: tablefull("proc");
196: return (EAGAIN);
197: }
198: nprocs++;
199:
200: /*
201: * Increment the count of procs running with this uid. Don't allow
202: * a nonprivileged user to exceed their current limit.
203: */
204: count = chgproccnt(uid, 1);
205: if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
206: (void)chgproccnt(uid, -1);
207: nprocs--;
208: return (EAGAIN);
209: }
210:
211: uaddr = uvm_km_alloc1(kernel_map, USPACE, USPACE_ALIGN, 1);
212: if (uaddr == 0) {
213: chgproccnt(uid, -1);
214: nprocs--;
215: return (ENOMEM);
216: }
217:
218: /*
219: * From now on, we're committed to the fork and cannot fail.
220: */
221:
222: /* Allocate new proc. */
223: p2 = pool_get(&proc_pool, PR_WAITOK);
224:
225: p2->p_stat = SIDL; /* protect against others */
226: p2->p_exitsig = exitsig;
227: p2->p_forw = p2->p_back = NULL;
228:
229: #ifdef RTHREADS
230: if (flags & FORK_THREAD) {
231: atomic_setbits_int(&p2->p_flag, P_THREAD);
232: p2->p_p = p1->p_p;
233: TAILQ_INSERT_TAIL(&p2->p_p->ps_threads, p2, p_thr_link);
234: } else {
235: process_new(p2, p1);
236: }
237: #else
238: process_new(p2, p1);
239: #endif
240:
241: /*
242: * Make a proc table entry for the new process.
243: * Start by zeroing the section of proc that is zero-initialized,
244: * then copy the section that is copied directly from the parent.
245: */
246: bzero(&p2->p_startzero,
247: (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
248: bcopy(&p1->p_startcopy, &p2->p_startcopy,
249: (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
250:
251: /*
252: * Initialize the timeouts.
253: */
254: timeout_set(&p2->p_sleep_to, endtsleep, p2);
255: timeout_set(&p2->p_realit_to, realitexpire, p2);
256:
257: p2->p_cpu = p1->p_cpu;
258:
259: /*
260: * Duplicate sub-structures as needed.
261: * Increase reference counts on shared objects.
262: * The p_stats and p_sigacts substructs are set in vm_fork.
263: */
264: p2->p_flag = 0;
265: p2->p_emul = p1->p_emul;
266: if (p1->p_flag & P_PROFIL)
267: startprofclock(p2);
268: atomic_setbits_int(&p2->p_flag, p1->p_flag & (P_SUGID | P_SUGIDEXEC));
269: if (flags & FORK_PTRACE)
270: atomic_setbits_int(&p2->p_flag, p1->p_flag & P_TRACED);
271: #ifdef RTHREADS
272: if (flags & FORK_THREAD) {
273: /* nothing */
274: } else
275: #endif
276: {
277: p2->p_p->ps_cred = pool_get(&pcred_pool, PR_WAITOK);
278: bcopy(p1->p_p->ps_cred, p2->p_p->ps_cred, sizeof(*p2->p_p->ps_cred));
279: p2->p_p->ps_cred->p_refcnt = 1;
280: crhold(p1->p_ucred);
281: }
282:
283: /* bump references to the text vnode (for procfs) */
284: p2->p_textvp = p1->p_textvp;
285: if (p2->p_textvp)
286: VREF(p2->p_textvp);
287:
288: if (flags & FORK_CLEANFILES)
289: p2->p_fd = fdinit(p1);
290: else if (flags & FORK_SHAREFILES)
291: p2->p_fd = fdshare(p1);
292: else
293: p2->p_fd = fdcopy(p1);
294:
295: /*
296: * If ps_limit is still copy-on-write, bump refcnt,
297: * otherwise get a copy that won't be modified.
298: * (If PL_SHAREMOD is clear, the structure is shared
299: * copy-on-write.)
300: */
301: #ifdef RTHREADS
302: if (flags & FORK_THREAD) {
303: /* nothing */
304: } else
305: #endif
306: {
307: if (p1->p_p->ps_limit->p_lflags & PL_SHAREMOD)
308: p2->p_p->ps_limit = limcopy(p1->p_p->ps_limit);
309: else {
310: p2->p_p->ps_limit = p1->p_p->ps_limit;
311: p2->p_p->ps_limit->p_refcnt++;
312: }
313: }
314:
315: if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
316: atomic_setbits_int(&p2->p_flag, P_CONTROLT);
317: if (flags & FORK_PPWAIT)
318: atomic_setbits_int(&p2->p_flag, P_PPWAIT);
319: p2->p_pptr = p1;
320: if (flags & FORK_NOZOMBIE)
321: atomic_setbits_int(&p2->p_flag, P_NOZOMBIE);
322: LIST_INIT(&p2->p_children);
323:
324: #ifdef KTRACE
325: /*
326: * Copy traceflag and tracefile if enabled.
327: * If not inherited, these were zeroed above.
328: */
329: if (p1->p_traceflag & KTRFAC_INHERIT) {
330: p2->p_traceflag = p1->p_traceflag;
331: if ((p2->p_tracep = p1->p_tracep) != NULL)
332: VREF(p2->p_tracep);
333: }
334: #endif
335:
336: /*
337: * set priority of child to be that of parent
338: * XXX should move p_estcpu into the region of struct proc which gets
339: * copied.
340: */
341: scheduler_fork_hook(p1, p2);
342:
343: /*
344: * Create signal actions for the child process.
345: */
346: if (flags & FORK_SIGHAND)
347: sigactsshare(p1, p2);
348: else
349: p2->p_sigacts = sigactsinit(p1);
350:
351: /*
352: * If emulation has process fork hook, call it now.
353: */
354: if (p2->p_emul->e_proc_fork)
355: (*p2->p_emul->e_proc_fork)(p2, p1);
356:
357: p2->p_addr = (struct user *)uaddr;
358:
359: /*
360: * Finish creating the child process. It will return through a
361: * different path later.
362: */
363: uvm_fork(p1, p2, ((flags & FORK_SHAREVM) ? TRUE : FALSE), stack,
364: stacksize, func ? func : child_return, arg ? arg : p2);
365:
366: timeout_set(&p2->p_stats->p_virt_to, virttimer_trampoline, p2);
367: timeout_set(&p2->p_stats->p_prof_to, proftimer_trampoline, p2);
368:
369: vm = p2->p_vmspace;
370:
371: if (flags & FORK_FORK) {
372: forkstat.cntfork++;
373: forkstat.sizfork += vm->vm_dsize + vm->vm_ssize;
374: } else if (flags & FORK_VFORK) {
375: forkstat.cntvfork++;
376: forkstat.sizvfork += vm->vm_dsize + vm->vm_ssize;
377: } else if (flags & FORK_RFORK) {
378: forkstat.cntrfork++;
379: forkstat.sizrfork += vm->vm_dsize + vm->vm_ssize;
380: } else {
381: forkstat.cntkthread++;
382: forkstat.sizkthread += vm->vm_dsize + vm->vm_ssize;
383: }
384:
385: /* Find an unused pid satisfying 1 <= lastpid <= PID_MAX */
386: do {
387: lastpid = 1 + (randompid ? arc4random() : lastpid) % PID_MAX;
388: } while (pidtaken(lastpid));
389: p2->p_pid = lastpid;
390:
391: LIST_INSERT_HEAD(&allproc, p2, p_list);
392: LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
393: LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
394: LIST_INSERT_AFTER(p1, p2, p_pglist);
395: if (p2->p_flag & P_TRACED) {
396: p2->p_oppid = p1->p_pid;
397: if (p2->p_pptr != p1->p_pptr)
398: proc_reparent(p2, p1->p_pptr);
399:
400: /*
401: * Set ptrace status.
402: */
403: if (flags & FORK_FORK) {
404: p2->p_ptstat = malloc(sizeof(*p2->p_ptstat),
405: M_SUBPROC, M_WAITOK);
406: p1->p_ptstat->pe_report_event = PTRACE_FORK;
407: p2->p_ptstat->pe_report_event = PTRACE_FORK;
408: p1->p_ptstat->pe_other_pid = p2->p_pid;
409: p2->p_ptstat->pe_other_pid = p1->p_pid;
410: }
411: }
412:
413: #if NSYSTRACE > 0
414: if (ISSET(p1->p_flag, P_SYSTRACE))
415: systrace_fork(p1, p2);
416: #endif
417:
418: /*
419: * Make child runnable, set start time, and add to run queue.
420: */
421: SCHED_LOCK(s);
422: getmicrotime(&p2->p_stats->p_start);
423: p2->p_acflag = AFORK;
424: p2->p_stat = SRUN;
425: setrunqueue(p2);
426: SCHED_UNLOCK(s);
427:
428: /*
429: * Notify any interested parties about the new process.
430: */
431: KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
432:
433: /*
434: * Update stats now that we know the fork was successfull.
435: */
436: uvmexp.forks++;
437: if (flags & FORK_PPWAIT)
438: uvmexp.forks_ppwait++;
439: if (flags & FORK_SHAREVM)
440: uvmexp.forks_sharevm++;
441:
442: /*
443: * Pass a pointer to the new process to the caller.
444: */
445: if (rnewprocp != NULL)
446: *rnewprocp = p2;
447:
448: /*
449: * Preserve synchronization semantics of vfork. If waiting for
450: * child to exec or exit, set P_PPWAIT on child, and sleep on our
451: * proc (in case of exit).
452: */
453: if (flags & FORK_PPWAIT)
454: while (p2->p_flag & P_PPWAIT)
455: tsleep(p1, PWAIT, "ppwait", 0);
456:
457: /*
458: * If we're tracing the child, alert the parent too.
459: */
460: if ((flags & FORK_PTRACE) && (p1->p_flag & P_TRACED))
461: psignal(p1, SIGTRAP);
462:
463: /*
464: * Return child pid to parent process,
465: * marking us as parent via retval[1].
466: */
467: if (retval != NULL) {
468: retval[0] = p2->p_pid;
469: retval[1] = 0;
470: }
471: return (0);
472: }
473:
474: /*
475: * Checks for current use of a pid, either as a pid or pgid.
476: */
477: int
478: pidtaken(pid_t pid)
479: {
480: struct proc *p;
481:
482: if (pfind(pid) != NULL)
483: return (1);
484: if (pgfind(pid) != NULL)
485: return (1);
486: LIST_FOREACH(p, &zombproc, p_list)
487: if (p->p_pid == pid || p->p_pgid == pid)
488: return (1);
489: return (0);
490: }
491:
492: #if defined(MULTIPROCESSOR)
493: /*
494: * XXX This is a slight hack to get newly-formed processes to
495: * XXX acquire the kernel lock as soon as they run.
496: */
497: void
498: proc_trampoline_mp(void)
499: {
500: struct proc *p;
501:
502: p = curproc;
503:
504: SCHED_ASSERT_UNLOCKED();
505: KERNEL_PROC_LOCK(p);
506: }
507: #endif
CVSweb