Annotation of sys/kern/sys_generic.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: sys_generic.c,v 1.57 2007/07/25 23:11:52 art Exp $ */
2: /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */
3:
4: /*
5: * Copyright (c) 1996 Theo de Raadt
6: * Copyright (c) 1982, 1986, 1989, 1993
7: * The Regents of the University of California. All rights reserved.
8: * (c) UNIX System Laboratories, Inc.
9: * All or some portions of this file are derived from material licensed
10: * to the University of California by American Telephone and Telegraph
11: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12: * the permission of UNIX System Laboratories, Inc.
13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: * 3. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: *
38: * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
39: */
40:
41: #include <sys/param.h>
42: #include <sys/systm.h>
43: #include <sys/filedesc.h>
44: #include <sys/ioctl.h>
45: #include <sys/file.h>
46: #include <sys/proc.h>
47: #include <sys/resourcevar.h>
48: #include <sys/socketvar.h>
49: #include <sys/signalvar.h>
50: #include <sys/uio.h>
51: #include <sys/kernel.h>
52: #include <sys/stat.h>
53: #include <sys/malloc.h>
54: #include <sys/poll.h>
55: #ifdef KTRACE
56: #include <sys/ktrace.h>
57: #endif
58: #include <sys/sched.h>
59:
60: #include <sys/mount.h>
61: #include <sys/syscallargs.h>
62:
63: #include <uvm/uvm_extern.h>
64:
65: int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *);
66: int seltrue(dev_t, int, struct proc *);
67: void pollscan(struct proc *, struct pollfd *, u_int, register_t *);
68:
69: /*
70: * Read system call.
71: */
72: /* ARGSUSED */
73: int
74: sys_read(struct proc *p, void *v, register_t *retval)
75: {
76: struct sys_read_args /* {
77: syscallarg(int) fd;
78: syscallarg(void *) buf;
79: syscallarg(size_t) nbyte;
80: } */ *uap = v;
81: int fd = SCARG(uap, fd);
82: struct file *fp;
83: struct filedesc *fdp = p->p_fd;
84:
85: if ((fp = fd_getfile(fdp, fd)) == NULL)
86: return (EBADF);
87: if ((fp->f_flag & FREAD) == 0)
88: return (EBADF);
89:
90: FREF(fp);
91:
92: /* dofileread() will FRELE the descriptor for us */
93: return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
94: &fp->f_offset, retval));
95: }
96:
97: int
98: dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
99: off_t *offset, register_t *retval)
100: {
101: struct uio auio;
102: struct iovec aiov;
103: long cnt, error = 0;
104: #ifdef KTRACE
105: struct iovec ktriov;
106: #endif
107:
108: aiov.iov_base = buf;
109: aiov.iov_len = nbyte;
110: auio.uio_iov = &aiov;
111: auio.uio_iovcnt = 1;
112: auio.uio_resid = nbyte;
113: auio.uio_rw = UIO_READ;
114: auio.uio_segflg = UIO_USERSPACE;
115: auio.uio_procp = p;
116:
117: /*
118: * Reads return ssize_t because -1 is returned on error. Therefore
119: * we must restrict the length to SSIZE_MAX to avoid garbage return
120: * values.
121: */
122: if (auio.uio_resid > SSIZE_MAX) {
123: error = EINVAL;
124: goto out;
125: }
126:
127: #ifdef KTRACE
128: /*
129: * if tracing, save a copy of iovec
130: */
131: if (KTRPOINT(p, KTR_GENIO))
132: ktriov = aiov;
133: #endif
134: cnt = auio.uio_resid;
135: error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
136: if (error)
137: if (auio.uio_resid != cnt && (error == ERESTART ||
138: error == EINTR || error == EWOULDBLOCK))
139: error = 0;
140: cnt -= auio.uio_resid;
141:
142: fp->f_rxfer++;
143: fp->f_rbytes += cnt;
144: #ifdef KTRACE
145: if (KTRPOINT(p, KTR_GENIO) && error == 0)
146: ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
147: #endif
148: *retval = cnt;
149: out:
150: FRELE(fp);
151: return (error);
152: }
153:
154: /*
155: * Scatter read system call.
156: */
157: int
158: sys_readv(struct proc *p, void *v, register_t *retval)
159: {
160: struct sys_readv_args /* {
161: syscallarg(int) fd;
162: syscallarg(const struct iovec *) iovp;
163: syscallarg(int) iovcnt;
164: } */ *uap = v;
165: int fd = SCARG(uap, fd);
166: struct file *fp;
167: struct filedesc *fdp = p->p_fd;
168:
169: if ((fp = fd_getfile(fdp, fd)) == NULL)
170: return (EBADF);
171: if ((fp->f_flag & FREAD) == 0)
172: return (EBADF);
173:
174: FREF(fp);
175:
176: /* dofilereadv() will FRELE the descriptor for us */
177: return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
178: &fp->f_offset, retval));
179: }
180:
181: int
182: dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
183: int iovcnt, off_t *offset, register_t *retval)
184: {
185: struct uio auio;
186: struct iovec *iov;
187: struct iovec *needfree;
188: struct iovec aiov[UIO_SMALLIOV];
189: long i, cnt, error = 0;
190: u_int iovlen;
191: #ifdef KTRACE
192: struct iovec *ktriov = NULL;
193: #endif
194:
195: /* note: can't use iovlen until iovcnt is validated */
196: iovlen = iovcnt * sizeof(struct iovec);
197: if ((u_int)iovcnt > UIO_SMALLIOV) {
198: if ((u_int)iovcnt > IOV_MAX) {
199: error = EINVAL;
200: goto out;
201: }
202: iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
203: } else if ((u_int)iovcnt > 0) {
204: iov = aiov;
205: needfree = NULL;
206: } else {
207: error = EINVAL;
208: goto out;
209: }
210:
211: auio.uio_iov = iov;
212: auio.uio_iovcnt = iovcnt;
213: auio.uio_rw = UIO_READ;
214: auio.uio_segflg = UIO_USERSPACE;
215: auio.uio_procp = p;
216: error = copyin(iovp, iov, iovlen);
217: if (error)
218: goto done;
219: auio.uio_resid = 0;
220: for (i = 0; i < iovcnt; i++) {
221: auio.uio_resid += iov->iov_len;
222: /*
223: * Reads return ssize_t because -1 is returned on error.
224: * Therefore we must restrict the length to SSIZE_MAX to
225: * avoid garbage return values.
226: */
227: if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
228: error = EINVAL;
229: goto done;
230: }
231: iov++;
232: }
233: #ifdef KTRACE
234: /*
235: * if tracing, save a copy of iovec
236: */
237: if (KTRPOINT(p, KTR_GENIO)) {
238: ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
239: bcopy(auio.uio_iov, ktriov, iovlen);
240: }
241: #endif
242: cnt = auio.uio_resid;
243: error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
244: if (error)
245: if (auio.uio_resid != cnt && (error == ERESTART ||
246: error == EINTR || error == EWOULDBLOCK))
247: error = 0;
248: cnt -= auio.uio_resid;
249:
250: fp->f_rxfer++;
251: fp->f_rbytes += cnt;
252: #ifdef KTRACE
253: if (ktriov != NULL) {
254: if (error == 0)
255: ktrgenio(p, fd, UIO_READ, ktriov, cnt,
256: error);
257: free(ktriov, M_TEMP);
258: }
259: #endif
260: *retval = cnt;
261: done:
262: if (needfree)
263: free(needfree, M_IOV);
264: out:
265: FRELE(fp);
266: return (error);
267: }
268:
269: /*
270: * Write system call
271: */
272: int
273: sys_write(struct proc *p, void *v, register_t *retval)
274: {
275: struct sys_write_args /* {
276: syscallarg(int) fd;
277: syscallarg(const void *) buf;
278: syscallarg(size_t) nbyte;
279: } */ *uap = v;
280: int fd = SCARG(uap, fd);
281: struct file *fp;
282: struct filedesc *fdp = p->p_fd;
283:
284: if ((fp = fd_getfile(fdp, fd)) == NULL)
285: return (EBADF);
286: if ((fp->f_flag & FWRITE) == 0)
287: return (EBADF);
288:
289: FREF(fp);
290:
291: /* dofilewrite() will FRELE the descriptor for us */
292: return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
293: &fp->f_offset, retval));
294: }
295:
296: int
297: dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
298: size_t nbyte, off_t *offset, register_t *retval)
299: {
300: struct uio auio;
301: struct iovec aiov;
302: long cnt, error = 0;
303: #ifdef KTRACE
304: struct iovec ktriov;
305: #endif
306:
307: aiov.iov_base = (void *)buf; /* XXX kills const */
308: aiov.iov_len = nbyte;
309: auio.uio_iov = &aiov;
310: auio.uio_iovcnt = 1;
311: auio.uio_resid = nbyte;
312: auio.uio_rw = UIO_WRITE;
313: auio.uio_segflg = UIO_USERSPACE;
314: auio.uio_procp = p;
315:
316: /*
317: * Writes return ssize_t because -1 is returned on error. Therefore
318: * we must restrict the length to SSIZE_MAX to avoid garbage return
319: * values.
320: */
321: if (auio.uio_resid > SSIZE_MAX) {
322: error = EINVAL;
323: goto out;
324: }
325:
326: #ifdef KTRACE
327: /*
328: * if tracing, save a copy of iovec
329: */
330: if (KTRPOINT(p, KTR_GENIO))
331: ktriov = aiov;
332: #endif
333: cnt = auio.uio_resid;
334: error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
335: if (error) {
336: if (auio.uio_resid != cnt && (error == ERESTART ||
337: error == EINTR || error == EWOULDBLOCK))
338: error = 0;
339: if (error == EPIPE)
340: psignal(p, SIGPIPE);
341: }
342: cnt -= auio.uio_resid;
343:
344: fp->f_wxfer++;
345: fp->f_wbytes += cnt;
346: #ifdef KTRACE
347: if (KTRPOINT(p, KTR_GENIO) && error == 0)
348: ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
349: #endif
350: *retval = cnt;
351: out:
352: FRELE(fp);
353: return (error);
354: }
355:
356: /*
357: * Gather write system call
358: */
359: int
360: sys_writev(struct proc *p, void *v, register_t *retval)
361: {
362: struct sys_writev_args /* {
363: syscallarg(int) fd;
364: syscallarg(const struct iovec *) iovp;
365: syscallarg(int) iovcnt;
366: } */ *uap = v;
367: int fd = SCARG(uap, fd);
368: struct file *fp;
369: struct filedesc *fdp = p->p_fd;
370:
371: if ((fp = fd_getfile(fdp, fd)) == NULL)
372: return (EBADF);
373: if ((fp->f_flag & FWRITE) == 0)
374: return (EBADF);
375:
376: FREF(fp);
377:
378: /* dofilewritev() will FRELE the descriptor for us */
379: return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
380: &fp->f_offset, retval));
381: }
382:
383: int
384: dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
385: int iovcnt, off_t *offset, register_t *retval)
386: {
387: struct uio auio;
388: struct iovec *iov;
389: struct iovec *needfree;
390: struct iovec aiov[UIO_SMALLIOV];
391: long i, cnt, error = 0;
392: u_int iovlen;
393: #ifdef KTRACE
394: struct iovec *ktriov = NULL;
395: #endif
396:
397: /* note: can't use iovlen until iovcnt is validated */
398: iovlen = iovcnt * sizeof(struct iovec);
399: if ((u_int)iovcnt > UIO_SMALLIOV) {
400: if ((u_int)iovcnt > IOV_MAX) {
401: error = EINVAL;
402: goto out;
403: }
404: iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
405: } else if ((u_int)iovcnt > 0) {
406: iov = aiov;
407: needfree = NULL;
408: } else {
409: error = EINVAL;
410: goto out;
411: }
412:
413: auio.uio_iov = iov;
414: auio.uio_iovcnt = iovcnt;
415: auio.uio_rw = UIO_WRITE;
416: auio.uio_segflg = UIO_USERSPACE;
417: auio.uio_procp = p;
418: error = copyin(iovp, iov, iovlen);
419: if (error)
420: goto done;
421: auio.uio_resid = 0;
422: for (i = 0; i < iovcnt; i++) {
423: auio.uio_resid += iov->iov_len;
424: /*
425: * Writes return ssize_t because -1 is returned on error.
426: * Therefore we must restrict the length to SSIZE_MAX to
427: * avoid garbage return values.
428: */
429: if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
430: error = EINVAL;
431: goto done;
432: }
433: iov++;
434: }
435: #ifdef KTRACE
436: /*
437: * if tracing, save a copy of iovec
438: */
439: if (KTRPOINT(p, KTR_GENIO)) {
440: ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
441: bcopy(auio.uio_iov, ktriov, iovlen);
442: }
443: #endif
444: cnt = auio.uio_resid;
445: error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
446: if (error) {
447: if (auio.uio_resid != cnt && (error == ERESTART ||
448: error == EINTR || error == EWOULDBLOCK))
449: error = 0;
450: if (error == EPIPE)
451: psignal(p, SIGPIPE);
452: }
453: cnt -= auio.uio_resid;
454:
455: fp->f_wxfer++;
456: fp->f_wbytes += cnt;
457: #ifdef KTRACE
458: if (ktriov != NULL) {
459: if (error == 0)
460: ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
461: free(ktriov, M_TEMP);
462: }
463: #endif
464: *retval = cnt;
465: done:
466: if (needfree)
467: free(needfree, M_IOV);
468: out:
469: FRELE(fp);
470: return (error);
471: }
472:
473: /*
474: * Ioctl system call
475: */
476: /* ARGSUSED */
477: int
478: sys_ioctl(struct proc *p, void *v, register_t *retval)
479: {
480: struct sys_ioctl_args /* {
481: syscallarg(int) fd;
482: syscallarg(u_long) com;
483: syscallarg(void *) data;
484: } */ *uap = v;
485: struct file *fp;
486: struct filedesc *fdp;
487: u_long com;
488: int error;
489: u_int size;
490: caddr_t data, memp;
491: int tmp;
492: #define STK_PARAMS 128
493: char stkbuf[STK_PARAMS];
494:
495: fdp = p->p_fd;
496: if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
497: return (EBADF);
498:
499: if ((fp->f_flag & (FREAD | FWRITE)) == 0)
500: return (EBADF);
501:
502: switch (com = SCARG(uap, com)) {
503: case FIONCLEX:
504: fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
505: return (0);
506: case FIOCLEX:
507: fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
508: return (0);
509: }
510:
511: /*
512: * Interpret high order word to find amount of data to be
513: * copied to/from the user's address space.
514: */
515: size = IOCPARM_LEN(com);
516: if (size > IOCPARM_MAX)
517: return (ENOTTY);
518: FREF(fp);
519: memp = NULL;
520: if (size > sizeof (stkbuf)) {
521: memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
522: data = memp;
523: } else
524: data = stkbuf;
525: if (com&IOC_IN) {
526: if (size) {
527: error = copyin(SCARG(uap, data), data, (u_int)size);
528: if (error) {
529: goto out;
530: }
531: } else
532: *(caddr_t *)data = SCARG(uap, data);
533: } else if ((com&IOC_OUT) && size)
534: /*
535: * Zero the buffer so the user always
536: * gets back something deterministic.
537: */
538: bzero(data, size);
539: else if (com&IOC_VOID)
540: *(caddr_t *)data = SCARG(uap, data);
541:
542: switch (com) {
543:
544: case FIONBIO:
545: if ((tmp = *(int *)data) != 0)
546: fp->f_flag |= FNONBLOCK;
547: else
548: fp->f_flag &= ~FNONBLOCK;
549: error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
550: break;
551:
552: case FIOASYNC:
553: if ((tmp = *(int *)data) != 0)
554: fp->f_flag |= FASYNC;
555: else
556: fp->f_flag &= ~FASYNC;
557: error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
558: break;
559:
560: case FIOSETOWN:
561: tmp = *(int *)data;
562: if (fp->f_type == DTYPE_SOCKET) {
563: struct socket *so = (struct socket *)fp->f_data;
564:
565: so->so_pgid = tmp;
566: so->so_siguid = p->p_cred->p_ruid;
567: so->so_sigeuid = p->p_ucred->cr_uid;
568: error = 0;
569: break;
570: }
571: if (tmp <= 0) {
572: tmp = -tmp;
573: } else {
574: struct proc *p1 = pfind(tmp);
575: if (p1 == 0) {
576: error = ESRCH;
577: break;
578: }
579: tmp = p1->p_pgrp->pg_id;
580: }
581: error = (*fp->f_ops->fo_ioctl)
582: (fp, TIOCSPGRP, (caddr_t)&tmp, p);
583: break;
584:
585: case FIOGETOWN:
586: if (fp->f_type == DTYPE_SOCKET) {
587: error = 0;
588: *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
589: break;
590: }
591: error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
592: *(int *)data = -*(int *)data;
593: break;
594:
595: default:
596: error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
597: break;
598: }
599: /*
600: * Copy any data to user, size was
601: * already set and checked above.
602: */
603: if (error == 0 && (com&IOC_OUT) && size)
604: error = copyout(data, SCARG(uap, data), (u_int)size);
605: out:
606: FRELE(fp);
607: if (memp)
608: free(memp, M_IOCTLOPS);
609: return (error);
610: }
611:
612: int selwait, nselcoll;
613:
614: /*
615: * Select system call.
616: */
617: int
618: sys_select(struct proc *p, void *v, register_t *retval)
619: {
620: struct sys_select_args /* {
621: syscallarg(int) nd;
622: syscallarg(fd_set *) in;
623: syscallarg(fd_set *) ou;
624: syscallarg(fd_set *) ex;
625: syscallarg(struct timeval *) tv;
626: } */ *uap = v;
627: fd_mask bits[6];
628: fd_set *pibits[3], *pobits[3];
629: struct timeval atv, rtv, ttv;
630: int s, ncoll, error = 0, timo;
631: u_int nd, ni;
632:
633: nd = SCARG(uap, nd);
634: if (nd > p->p_fd->fd_nfiles) {
635: /* forgiving; slightly wrong */
636: nd = p->p_fd->fd_nfiles;
637: }
638: ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
639: if (nd > sizeof(bits[0])) {
640: caddr_t mbits;
641:
642: mbits = malloc(ni * 6, M_TEMP, M_WAITOK);
643: bzero(mbits, ni * 6);
644: pibits[0] = (fd_set *)&mbits[ni * 0];
645: pibits[1] = (fd_set *)&mbits[ni * 1];
646: pibits[2] = (fd_set *)&mbits[ni * 2];
647: pobits[0] = (fd_set *)&mbits[ni * 3];
648: pobits[1] = (fd_set *)&mbits[ni * 4];
649: pobits[2] = (fd_set *)&mbits[ni * 5];
650: } else {
651: bzero(bits, sizeof(bits));
652: pibits[0] = (fd_set *)&bits[0];
653: pibits[1] = (fd_set *)&bits[1];
654: pibits[2] = (fd_set *)&bits[2];
655: pobits[0] = (fd_set *)&bits[3];
656: pobits[1] = (fd_set *)&bits[4];
657: pobits[2] = (fd_set *)&bits[5];
658: }
659:
660: #define getbits(name, x) \
661: if (SCARG(uap, name) && (error = copyin(SCARG(uap, name), \
662: pibits[x], ni))) \
663: goto done;
664: getbits(in, 0);
665: getbits(ou, 1);
666: getbits(ex, 2);
667: #undef getbits
668:
669: if (SCARG(uap, tv)) {
670: error = copyin(SCARG(uap, tv), &atv, sizeof (atv));
671: if (error)
672: goto done;
673: if (itimerfix(&atv)) {
674: error = EINVAL;
675: goto done;
676: }
677: getmicrouptime(&rtv);
678: timeradd(&atv, &rtv, &atv);
679: } else {
680: atv.tv_sec = 0;
681: atv.tv_usec = 0;
682: }
683: timo = 0;
684:
685: retry:
686: ncoll = nselcoll;
687: atomic_setbits_int(&p->p_flag, P_SELECT);
688: error = selscan(p, pibits[0], pobits[0], nd, ni, retval);
689: if (error || *retval)
690: goto done;
691: if (SCARG(uap, tv)) {
692: getmicrouptime(&rtv);
693: if (timercmp(&rtv, &atv, >=))
694: goto done;
695: ttv = atv;
696: timersub(&ttv, &rtv, &ttv);
697: timo = ttv.tv_sec > 24 * 60 * 60 ?
698: 24 * 60 * 60 * hz : tvtohz(&ttv);
699: }
700: s = splhigh();
701: if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
702: splx(s);
703: goto retry;
704: }
705: atomic_clearbits_int(&p->p_flag, P_SELECT);
706: error = tsleep(&selwait, PSOCK | PCATCH, "select", timo);
707: splx(s);
708: if (error == 0)
709: goto retry;
710: done:
711: atomic_clearbits_int(&p->p_flag, P_SELECT);
712: /* select is not restarted after signals... */
713: if (error == ERESTART)
714: error = EINTR;
715: if (error == EWOULDBLOCK)
716: error = 0;
717: #define putbits(name, x) \
718: if (SCARG(uap, name) && (error2 = copyout(pobits[x], \
719: SCARG(uap, name), ni))) \
720: error = error2;
721: if (error == 0) {
722: int error2;
723:
724: putbits(in, 0);
725: putbits(ou, 1);
726: putbits(ex, 2);
727: #undef putbits
728: }
729:
730: if (pibits[0] != (fd_set *)&bits[0])
731: free(pibits[0], M_TEMP);
732: return (error);
733: }
734:
735: int
736: selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni,
737: register_t *retval)
738: {
739: caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits;
740: struct filedesc *fdp = p->p_fd;
741: int msk, i, j, fd;
742: fd_mask bits;
743: struct file *fp;
744: int n = 0;
745: static const int flag[3] = { POLLIN, POLLOUT, POLLPRI };
746:
747: for (msk = 0; msk < 3; msk++) {
748: fd_set *pibits = (fd_set *)&cibits[msk*ni];
749: fd_set *pobits = (fd_set *)&cobits[msk*ni];
750:
751: for (i = 0; i < nfd; i += NFDBITS) {
752: bits = pibits->fds_bits[i/NFDBITS];
753: while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
754: bits &= ~(1 << j);
755: if ((fp = fd_getfile(fdp, fd)) == NULL)
756: return (EBADF);
757: FREF(fp);
758: if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
759: FD_SET(fd, pobits);
760: n++;
761: }
762: FRELE(fp);
763: }
764: }
765: }
766: *retval = n;
767: return (0);
768: }
769:
770: /*ARGSUSED*/
771: int
772: seltrue(dev_t dev, int events, struct proc *p)
773: {
774:
775: return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
776: }
777:
778: /*
779: * Record a select request.
780: */
781: void
782: selrecord(struct proc *selector, struct selinfo *sip)
783: {
784: struct proc *p;
785: pid_t mypid;
786:
787: mypid = selector->p_pid;
788: if (sip->si_selpid == mypid)
789: return;
790: if (sip->si_selpid && (p = pfind(sip->si_selpid)) &&
791: p->p_wchan == (caddr_t)&selwait)
792: sip->si_flags |= SI_COLL;
793: else
794: sip->si_selpid = mypid;
795: }
796:
797: /*
798: * Do a wakeup when a selectable event occurs.
799: */
800: void
801: selwakeup(struct selinfo *sip)
802: {
803: struct proc *p;
804: int s;
805:
806: if (sip->si_selpid == 0)
807: return;
808: if (sip->si_flags & SI_COLL) {
809: nselcoll++;
810: sip->si_flags &= ~SI_COLL;
811: wakeup(&selwait);
812: }
813: p = pfind(sip->si_selpid);
814: sip->si_selpid = 0;
815: if (p != NULL) {
816: SCHED_LOCK(s);
817: if (p->p_wchan == (caddr_t)&selwait) {
818: if (p->p_stat == SSLEEP)
819: setrunnable(p);
820: else
821: unsleep(p);
822: } else if (p->p_flag & P_SELECT)
823: atomic_clearbits_int(&p->p_flag, P_SELECT);
824: SCHED_UNLOCK(s);
825: }
826: }
827:
828: void
829: pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval)
830: {
831: struct filedesc *fdp = p->p_fd;
832: struct file *fp;
833: u_int i;
834: int n = 0;
835:
836: for (i = 0; i < nfd; i++, pl++) {
837: /* Check the file descriptor. */
838: if (pl->fd < 0) {
839: pl->revents = 0;
840: continue;
841: }
842: if ((fp = fd_getfile(fdp, pl->fd)) == NULL) {
843: pl->revents = POLLNVAL;
844: n++;
845: continue;
846: }
847: FREF(fp);
848: pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p);
849: FRELE(fp);
850: if (pl->revents != 0)
851: n++;
852: }
853: *retval = n;
854: }
855:
856: /*
857: * We are using the same mechanism as select only we encode/decode args
858: * differently.
859: */
860: int
861: sys_poll(struct proc *p, void *v, register_t *retval)
862: {
863: struct sys_poll_args /* {
864: syscallarg(struct pollfd *) fds;
865: syscallarg(u_int) nfds;
866: syscallarg(int) timeout;
867: } */ *uap = v;
868: size_t sz;
869: struct pollfd pfds[4], *pl = pfds;
870: int msec = SCARG(uap, timeout);
871: struct timeval atv, rtv, ttv;
872: int timo, ncoll, i, s, error;
873: extern int nselcoll, selwait;
874: u_int nfds = SCARG(uap, nfds);
875:
876: /* Standards say no more than MAX_OPEN; this is possibly better. */
877: if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles))
878: return (EINVAL);
879:
880: sz = sizeof(struct pollfd) * nfds;
881:
882: /* optimize for the default case, of a small nfds value */
883: if (sz > sizeof(pfds))
884: pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK);
885:
886: if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0)
887: goto bad;
888:
889: for (i = 0; i < nfds; i++)
890: pl[i].revents = 0;
891:
892: if (msec != INFTIM) {
893: atv.tv_sec = msec / 1000;
894: atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000;
895:
896: if (itimerfix(&atv)) {
897: error = EINVAL;
898: goto done;
899: }
900: getmicrouptime(&rtv);
901: timeradd(&atv, &rtv, &atv);
902: } else {
903: atv.tv_sec = 0;
904: atv.tv_usec = 0;
905: }
906: timo = 0;
907:
908: retry:
909: ncoll = nselcoll;
910: atomic_setbits_int(&p->p_flag, P_SELECT);
911: pollscan(p, pl, nfds, retval);
912: if (*retval)
913: goto done;
914: if (msec != INFTIM) {
915: getmicrouptime(&rtv);
916: if (timercmp(&rtv, &atv, >=))
917: goto done;
918: ttv = atv;
919: timersub(&ttv, &rtv, &ttv);
920: timo = ttv.tv_sec > 24 * 60 * 60 ?
921: 24 * 60 * 60 * hz : tvtohz(&ttv);
922: }
923: s = splhigh();
924: if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
925: splx(s);
926: goto retry;
927: }
928: atomic_clearbits_int(&p->p_flag, P_SELECT);
929: error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo);
930: splx(s);
931: if (error == 0)
932: goto retry;
933:
934: done:
935: atomic_clearbits_int(&p->p_flag, P_SELECT);
936: /*
937: * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is
938: * ignored (since the whole point is to see what would block).
939: */
940: switch (error) {
941: case ERESTART:
942: error = copyout(pl, SCARG(uap, fds), sz);
943: if (error == 0)
944: error = EINTR;
945: break;
946: case EWOULDBLOCK:
947: case 0:
948: error = copyout(pl, SCARG(uap, fds), sz);
949: break;
950: }
951: bad:
952: if (pl != pfds)
953: free(pl, M_TEMP);
954: return (error);
955: }
CVSweb