sys/kern/kern_fork.c - annotate

Return to kern_fork.c CVS log
Up to [local] / sys / kern
Annotation of sys/kern/kern_fork.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: kern_fork.c,v 1.92 2007/07/25 23:11:52 art Exp $      */
                      2: /*     $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $  */
                      3:
                      4: /*
                      5:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  * (c) UNIX System Laboratories, Inc.
                      8:  * All or some portions of this file are derived from material licensed
                      9:  * to the University of California by American Telephone and Telegraph
                     10:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     11:  * the permission of UNIX System Laboratories, Inc.
                     12:  *
                     13:  * Redistribution and use in source and binary forms, with or without
                     14:  * modification, are permitted provided that the following conditions
                     15:  * are met:
                     16:  * 1. Redistributions of source code must retain the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer.
                     18:  * 2. Redistributions in binary form must reproduce the above copyright
                     19:  *    notice, this list of conditions and the following disclaimer in the
                     20:  *    documentation and/or other materials provided with the distribution.
                     21:  * 3. Neither the name of the University nor the names of its contributors
                     22:  *    may be used to endorse or promote products derived from this software
                     23:  *    without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     26:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     27:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     28:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     29:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     30:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     31:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     32:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     33:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     34:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     35:  * SUCH DAMAGE.
                     36:  *
                     37:  *     @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
                     38:  */
                     39:
                     40: #include <sys/param.h>
                     41: #include <sys/systm.h>
                     42: #include <sys/filedesc.h>
                     43: #include <sys/kernel.h>
                     44: #include <sys/malloc.h>
                     45: #include <sys/mount.h>
                     46: #include <sys/proc.h>
                     47: #include <sys/exec.h>
                     48: #include <sys/resourcevar.h>
                     49: #include <sys/signalvar.h>
                     50: #include <sys/vnode.h>
                     51: #include <sys/file.h>
                     52: #include <sys/acct.h>
                     53: #include <sys/ktrace.h>
                     54: #include <sys/sched.h>
                     55: #include <dev/rndvar.h>
                     56: #include <sys/pool.h>
                     57: #include <sys/mman.h>
                     58: #include <sys/ptrace.h>
                     59:
                     60: #include <sys/syscallargs.h>
                     61:
                     62: #include "systrace.h"
                     63: #include <dev/systrace.h>
                     64:
                     65: #include <uvm/uvm_extern.h>
                     66: #include <uvm/uvm_map.h>
                     67:
                     68: int    nprocs = 1;             /* process 0 */
                     69: int    randompid;              /* when set to 1, pid's go random */
                     70: pid_t  lastpid;
                     71: struct forkstat forkstat;
                     72:
                     73: void fork_return(void *);
                     74: int pidtaken(pid_t);
                     75:
                     76: void process_new(struct proc *, struct proc *);
                     77:
                     78: void
                     79: fork_return(void *arg)
                     80: {
                     81:        struct proc *p = (struct proc *)arg;
                     82:
                     83:        if (p->p_flag & P_TRACED)
                     84:                psignal(p, SIGTRAP);
                     85:
                     86:        child_return(p);
                     87: }
                     88:
                     89: /*ARGSUSED*/
                     90: int
                     91: sys_fork(struct proc *p, void *v, register_t *retval)
                     92: {
                     93:        int flags;
                     94:
                     95:        flags = FORK_FORK;
                     96:        if (p->p_ptmask & PTRACE_FORK)
                     97:                flags |= FORK_PTRACE;
                     98:        return (fork1(p, SIGCHLD, flags, NULL, 0,
                     99:            fork_return, NULL, retval, NULL));
                    100: }
                    101:
                    102: /*ARGSUSED*/
                    103: int
                    104: sys_vfork(struct proc *p, void *v, register_t *retval)
                    105: {
                    106:        return (fork1(p, SIGCHLD, FORK_VFORK|FORK_PPWAIT, NULL, 0, NULL,
                    107:            NULL, retval, NULL));
                    108: }
                    109:
                    110: int
                    111: sys_rfork(struct proc *p, void *v, register_t *retval)
                    112: {
                    113:        struct sys_rfork_args /* {
                    114:                syscallarg(int) flags;
                    115:        } */ *uap = v;
                    116:
                    117:        int rforkflags;
                    118:        int flags;
                    119:
                    120:        flags = FORK_RFORK;
                    121:        rforkflags = SCARG(uap, flags);
                    122:
                    123:        if ((rforkflags & RFPROC) == 0)
                    124:                return (EINVAL);
                    125:
                    126:        switch(rforkflags & (RFFDG|RFCFDG)) {
                    127:        case (RFFDG|RFCFDG):
                    128:                return EINVAL;
                    129:        case RFCFDG:
                    130:                flags |= FORK_CLEANFILES;
                    131:                break;
                    132:        case RFFDG:
                    133:                break;
                    134:        default:
                    135:                flags |= FORK_SHAREFILES;
                    136:                break;
                    137:        }
                    138:
                    139:        if (rforkflags & RFNOWAIT)
                    140:                flags |= FORK_NOZOMBIE;
                    141:
                    142:        if (rforkflags & RFMEM)
                    143:                flags |= FORK_SHAREVM;
                    144: #ifdef RTHREADS
                    145:        if (rforkflags & RFTHREAD)
                    146:                flags |= FORK_THREAD;
                    147: #endif
                    148:
                    149:        return (fork1(p, SIGCHLD, flags, NULL, 0, NULL, NULL, retval, NULL));
                    150: }
                    151:
                    152: /*
                    153:  * Allocate and initialize a new process.
                    154:  */
                    155: void
                    156: process_new(struct proc *newproc, struct proc *parent)
                    157: {
                    158:        struct process *pr;
                    159:
                    160:        pr = pool_get(&process_pool, PR_WAITOK);
                    161:        pr->ps_mainproc = newproc;
                    162:        TAILQ_INIT(&pr->ps_threads);
                    163:        TAILQ_INSERT_TAIL(&pr->ps_threads, newproc, p_thr_link);
                    164:        newproc->p_p = pr;
                    165: }
                    166:
                    167: /* print the 'table full' message once per 10 seconds */
                    168: struct timeval fork_tfmrate = { 10, 0 };
                    169:
                    170: int
                    171: fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
                    172:     void (*func)(void *), void *arg, register_t *retval,
                    173:     struct proc **rnewprocp)
                    174: {
                    175:        struct proc *p2;
                    176:        uid_t uid;
                    177:        struct vmspace *vm;
                    178:        int count;
                    179:        vaddr_t uaddr;
                    180:        int s;
                    181:        extern void endtsleep(void *);
                    182:        extern void realitexpire(void *);
                    183:
                    184:        /*
                    185:         * Although process entries are dynamically created, we still keep
                    186:         * a global limit on the maximum number we will create. We reserve
                    187:         * the last 5 processes to root. The variable nprocs is the current
                    188:         * number of processes, maxproc is the limit.
                    189:         */
                    190:        uid = p1->p_cred->p_ruid;
                    191:        if ((nprocs >= maxproc - 5 && uid != 0) || nprocs >= maxproc) {
                    192:                static struct timeval lasttfm;
                    193:
                    194:                if (ratecheck(&lasttfm, &fork_tfmrate))
                    195:                        tablefull("proc");
                    196:                return (EAGAIN);
                    197:        }
                    198:        nprocs++;
                    199:
                    200:        /*
                    201:         * Increment the count of procs running with this uid. Don't allow
                    202:         * a nonprivileged user to exceed their current limit.
                    203:         */
                    204:        count = chgproccnt(uid, 1);
                    205:        if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
                    206:                (void)chgproccnt(uid, -1);
                    207:                nprocs--;
                    208:                return (EAGAIN);
                    209:        }
                    210:
                    211:        uaddr = uvm_km_alloc1(kernel_map, USPACE, USPACE_ALIGN, 1);
                    212:        if (uaddr == 0) {
                    213:                chgproccnt(uid, -1);
                    214:                nprocs--;
                    215:                return (ENOMEM);
                    216:        }
                    217:
                    218:        /*
                    219:         * From now on, we're committed to the fork and cannot fail.
                    220:         */
                    221:
                    222:        /* Allocate new proc. */
                    223:        p2 = pool_get(&proc_pool, PR_WAITOK);
                    224:
                    225:        p2->p_stat = SIDL;                      /* protect against others */
                    226:        p2->p_exitsig = exitsig;
                    227:        p2->p_forw = p2->p_back = NULL;
                    228:
                    229: #ifdef RTHREADS
                    230:        if (flags & FORK_THREAD) {
                    231:                atomic_setbits_int(&p2->p_flag, P_THREAD);
                    232:                p2->p_p = p1->p_p;
                    233:                TAILQ_INSERT_TAIL(&p2->p_p->ps_threads, p2, p_thr_link);
                    234:        } else {
                    235:                process_new(p2, p1);
                    236:        }
                    237: #else
                    238:        process_new(p2, p1);
                    239: #endif
                    240:
                    241:        /*
                    242:         * Make a proc table entry for the new process.
                    243:         * Start by zeroing the section of proc that is zero-initialized,
                    244:         * then copy the section that is copied directly from the parent.
                    245:         */
                    246:        bzero(&p2->p_startzero,
                    247:            (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
                    248:        bcopy(&p1->p_startcopy, &p2->p_startcopy,
                    249:            (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
                    250:
                    251:        /*
                    252:         * Initialize the timeouts.
                    253:         */
                    254:        timeout_set(&p2->p_sleep_to, endtsleep, p2);
                    255:        timeout_set(&p2->p_realit_to, realitexpire, p2);
                    256:
                    257:        p2->p_cpu = p1->p_cpu;
                    258:
                    259:        /*
                    260:         * Duplicate sub-structures as needed.
                    261:         * Increase reference counts on shared objects.
                    262:         * The p_stats and p_sigacts substructs are set in vm_fork.
                    263:         */
                    264:        p2->p_flag = 0;
                    265:        p2->p_emul = p1->p_emul;
                    266:        if (p1->p_flag & P_PROFIL)
                    267:                startprofclock(p2);
                    268:        atomic_setbits_int(&p2->p_flag, p1->p_flag & (P_SUGID | P_SUGIDEXEC));
                    269:        if (flags & FORK_PTRACE)
                    270:                atomic_setbits_int(&p2->p_flag, p1->p_flag & P_TRACED);
                    271: #ifdef RTHREADS
                    272:        if (flags & FORK_THREAD) {
                    273:                /* nothing */
                    274:        } else
                    275: #endif
                    276:        {
                    277:                p2->p_p->ps_cred = pool_get(&pcred_pool, PR_WAITOK);
                    278:                bcopy(p1->p_p->ps_cred, p2->p_p->ps_cred, sizeof(*p2->p_p->ps_cred));
                    279:                p2->p_p->ps_cred->p_refcnt = 1;
                    280:                crhold(p1->p_ucred);
                    281:        }
                    282:
                    283:        /* bump references to the text vnode (for procfs) */
                    284:        p2->p_textvp = p1->p_textvp;
                    285:        if (p2->p_textvp)
                    286:                VREF(p2->p_textvp);
                    287:
                    288:        if (flags & FORK_CLEANFILES)
                    289:                p2->p_fd = fdinit(p1);
                    290:        else if (flags & FORK_SHAREFILES)
                    291:                p2->p_fd = fdshare(p1);
                    292:        else
                    293:                p2->p_fd = fdcopy(p1);
                    294:
                    295:        /*
                    296:         * If ps_limit is still copy-on-write, bump refcnt,
                    297:         * otherwise get a copy that won't be modified.
                    298:         * (If PL_SHAREMOD is clear, the structure is shared
                    299:         * copy-on-write.)
                    300:         */
                    301: #ifdef RTHREADS
                    302:        if (flags & FORK_THREAD) {
                    303:                /* nothing */
                    304:        } else
                    305: #endif
                    306:        {
                    307:                if (p1->p_p->ps_limit->p_lflags & PL_SHAREMOD)
                    308:                        p2->p_p->ps_limit = limcopy(p1->p_p->ps_limit);
                    309:                else {
                    310:                        p2->p_p->ps_limit = p1->p_p->ps_limit;
                    311:                        p2->p_p->ps_limit->p_refcnt++;
                    312:                }
                    313:        }
                    314:
                    315:        if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
                    316:                atomic_setbits_int(&p2->p_flag, P_CONTROLT);
                    317:        if (flags & FORK_PPWAIT)
                    318:                atomic_setbits_int(&p2->p_flag, P_PPWAIT);
                    319:        p2->p_pptr = p1;
                    320:        if (flags & FORK_NOZOMBIE)
                    321:                atomic_setbits_int(&p2->p_flag, P_NOZOMBIE);
                    322:        LIST_INIT(&p2->p_children);
                    323:
                    324: #ifdef KTRACE
                    325:        /*
                    326:         * Copy traceflag and tracefile if enabled.
                    327:         * If not inherited, these were zeroed above.
                    328:         */
                    329:        if (p1->p_traceflag & KTRFAC_INHERIT) {
                    330:                p2->p_traceflag = p1->p_traceflag;
                    331:                if ((p2->p_tracep = p1->p_tracep) != NULL)
                    332:                        VREF(p2->p_tracep);
                    333:        }
                    334: #endif
                    335:
                    336:        /*
                    337:         * set priority of child to be that of parent
                    338:         * XXX should move p_estcpu into the region of struct proc which gets
                    339:         * copied.
                    340:         */
                    341:        scheduler_fork_hook(p1, p2);
                    342:
                    343:        /*
                    344:         * Create signal actions for the child process.
                    345:         */
                    346:        if (flags & FORK_SIGHAND)
                    347:                sigactsshare(p1, p2);
                    348:        else
                    349:                p2->p_sigacts = sigactsinit(p1);
                    350:
                    351:        /*
                    352:         * If emulation has process fork hook, call it now.
                    353:         */
                    354:        if (p2->p_emul->e_proc_fork)
                    355:                (*p2->p_emul->e_proc_fork)(p2, p1);
                    356:
                    357:        p2->p_addr = (struct user *)uaddr;
                    358:
                    359:        /*
                    360:         * Finish creating the child process.  It will return through a
                    361:         * different path later.
                    362:         */
                    363:        uvm_fork(p1, p2, ((flags & FORK_SHAREVM) ? TRUE : FALSE), stack,
                    364:            stacksize, func ? func : child_return, arg ? arg : p2);
                    365:
                    366:        timeout_set(&p2->p_stats->p_virt_to, virttimer_trampoline, p2);
                    367:        timeout_set(&p2->p_stats->p_prof_to, proftimer_trampoline, p2);
                    368:
                    369:        vm = p2->p_vmspace;
                    370:
                    371:        if (flags & FORK_FORK) {
                    372:                forkstat.cntfork++;
                    373:                forkstat.sizfork += vm->vm_dsize + vm->vm_ssize;
                    374:        } else if (flags & FORK_VFORK) {
                    375:                forkstat.cntvfork++;
                    376:                forkstat.sizvfork += vm->vm_dsize + vm->vm_ssize;
                    377:        } else if (flags & FORK_RFORK) {
                    378:                forkstat.cntrfork++;
                    379:                forkstat.sizrfork += vm->vm_dsize + vm->vm_ssize;
                    380:        } else {
                    381:                forkstat.cntkthread++;
                    382:                forkstat.sizkthread += vm->vm_dsize + vm->vm_ssize;
                    383:        }
                    384:
                    385:        /* Find an unused pid satisfying 1 <= lastpid <= PID_MAX */
                    386:        do {
                    387:                lastpid = 1 + (randompid ? arc4random() : lastpid) % PID_MAX;
                    388:        } while (pidtaken(lastpid));
                    389:        p2->p_pid = lastpid;
                    390:
                    391:        LIST_INSERT_HEAD(&allproc, p2, p_list);
                    392:        LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
                    393:        LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
                    394:        LIST_INSERT_AFTER(p1, p2, p_pglist);
                    395:        if (p2->p_flag & P_TRACED) {
                    396:                p2->p_oppid = p1->p_pid;
                    397:                if (p2->p_pptr != p1->p_pptr)
                    398:                        proc_reparent(p2, p1->p_pptr);
                    399:
                    400:                /*
                    401:                 * Set ptrace status.
                    402:                 */
                    403:                if (flags & FORK_FORK) {
                    404:                        p2->p_ptstat = malloc(sizeof(*p2->p_ptstat),
                    405:                            M_SUBPROC, M_WAITOK);
                    406:                        p1->p_ptstat->pe_report_event = PTRACE_FORK;
                    407:                        p2->p_ptstat->pe_report_event = PTRACE_FORK;
                    408:                        p1->p_ptstat->pe_other_pid = p2->p_pid;
                    409:                        p2->p_ptstat->pe_other_pid = p1->p_pid;
                    410:                }
                    411:        }
                    412:
                    413: #if NSYSTRACE > 0
                    414:        if (ISSET(p1->p_flag, P_SYSTRACE))
                    415:                systrace_fork(p1, p2);
                    416: #endif
                    417:
                    418:        /*
                    419:         * Make child runnable, set start time, and add to run queue.
                    420:         */
                    421:        SCHED_LOCK(s);
                    422:        getmicrotime(&p2->p_stats->p_start);
                    423:        p2->p_acflag = AFORK;
                    424:        p2->p_stat = SRUN;
                    425:        setrunqueue(p2);
                    426:        SCHED_UNLOCK(s);
                    427:
                    428:        /*
                    429:         * Notify any interested parties about the new process.
                    430:         */
                    431:        KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
                    432:
                    433:        /*
                    434:         * Update stats now that we know the fork was successfull.
                    435:         */
                    436:        uvmexp.forks++;
                    437:        if (flags & FORK_PPWAIT)
                    438:                uvmexp.forks_ppwait++;
                    439:        if (flags & FORK_SHAREVM)
                    440:                uvmexp.forks_sharevm++;
                    441:
                    442:        /*
                    443:         * Pass a pointer to the new process to the caller.
                    444:         */
                    445:        if (rnewprocp != NULL)
                    446:                *rnewprocp = p2;
                    447:
                    448:        /*
                    449:         * Preserve synchronization semantics of vfork.  If waiting for
                    450:         * child to exec or exit, set P_PPWAIT on child, and sleep on our
                    451:         * proc (in case of exit).
                    452:         */
                    453:        if (flags & FORK_PPWAIT)
                    454:                while (p2->p_flag & P_PPWAIT)
                    455:                        tsleep(p1, PWAIT, "ppwait", 0);
                    456:
                    457:        /*
                    458:         * If we're tracing the child, alert the parent too.
                    459:         */
                    460:        if ((flags & FORK_PTRACE) && (p1->p_flag & P_TRACED))
                    461:                psignal(p1, SIGTRAP);
                    462:
                    463:        /*
                    464:         * Return child pid to parent process,
                    465:         * marking us as parent via retval[1].
                    466:         */
                    467:        if (retval != NULL) {
                    468:                retval[0] = p2->p_pid;
                    469:                retval[1] = 0;
                    470:        }
                    471:        return (0);
                    472: }
                    473:
                    474: /*
                    475:  * Checks for current use of a pid, either as a pid or pgid.
                    476:  */
                    477: int
                    478: pidtaken(pid_t pid)
                    479: {
                    480:        struct proc *p;
                    481:
                    482:        if (pfind(pid) != NULL)
                    483:                return (1);
                    484:        if (pgfind(pid) != NULL)
                    485:                return (1);
                    486:        LIST_FOREACH(p, &zombproc, p_list)
                    487:                if (p->p_pid == pid || p->p_pgid == pid)
                    488:                        return (1);
                    489:        return (0);
                    490: }
                    491:
                    492: #if defined(MULTIPROCESSOR)
                    493: /*
                    494:  * XXX This is a slight hack to get newly-formed processes to
                    495:  * XXX acquire the kernel lock as soon as they run.
                    496:  */
                    497: void
                    498: proc_trampoline_mp(void)
                    499: {
                    500:        struct proc *p;
                    501:
                    502:        p = curproc;
                    503:
                    504:        SCHED_ASSERT_UNLOCKED();
                    505:        KERNEL_PROC_LOCK(p);
                    506: }
                    507: #endif
CVSweb