Annotation of sys/netinet/tcp_usrreq.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: tcp_usrreq.c,v 1.91 2007/06/25 12:17:43 markus Exp $ */
2: /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1982, 1986, 1988, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the University nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: *
32: * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33: *
34: * NRL grants permission for redistribution and use in source and binary
35: * forms, with or without modification, of the software and documentation
36: * created at NRL provided that the following conditions are met:
37: *
38: * 1. Redistributions of source code must retain the above copyright
39: * notice, this list of conditions and the following disclaimer.
40: * 2. Redistributions in binary form must reproduce the above copyright
41: * notice, this list of conditions and the following disclaimer in the
42: * documentation and/or other materials provided with the distribution.
43: * 3. All advertising materials mentioning features or use of this software
44: * must display the following acknowledgements:
45: * This product includes software developed by the University of
46: * California, Berkeley and its contributors.
47: * This product includes software developed at the Information
48: * Technology Division, US Naval Research Laboratory.
49: * 4. Neither the name of the NRL nor the names of its contributors
50: * may be used to endorse or promote products derived from this software
51: * without specific prior written permission.
52: *
53: * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54: * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56: * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64: *
65: * The views and conclusions contained in the software and documentation
66: * are those of the authors and should not be interpreted as representing
67: * official policies, either expressed or implied, of the US Naval
68: * Research Laboratory (NRL).
69: */
70:
71: #include <sys/param.h>
72: #include <sys/systm.h>
73: #include <sys/mbuf.h>
74: #include <sys/socket.h>
75: #include <sys/socketvar.h>
76: #include <sys/protosw.h>
77: #include <sys/stat.h>
78: #include <sys/sysctl.h>
79: #include <sys/domain.h>
80: #include <sys/kernel.h>
81:
82: #include <dev/rndvar.h>
83:
84: #include <net/if.h>
85: #include <net/route.h>
86:
87: #include <netinet/in.h>
88: #include <netinet/in_systm.h>
89: #include <netinet/in_var.h>
90: #include <netinet/ip.h>
91: #include <netinet/in_pcb.h>
92: #include <netinet/ip_var.h>
93: #include <netinet/tcp.h>
94: #include <netinet/tcp_fsm.h>
95: #include <netinet/tcp_seq.h>
96: #include <netinet/tcp_timer.h>
97: #include <netinet/tcp_var.h>
98: #include <netinet/tcpip.h>
99: #include <netinet/tcp_debug.h>
100:
101: /*
102: * TCP protocol interface to socket abstraction.
103: */
104: extern char *tcpstates[];
105: extern int tcptv_keep_init;
106:
107: extern int tcp_rst_ppslim;
108:
109: /* from in_pcb.c */
110: extern struct baddynamicports baddynamicports;
111:
112: #ifndef TCP_SENDSPACE
113: #define TCP_SENDSPACE 1024*16
114: #endif
115: u_int tcp_sendspace = TCP_SENDSPACE;
116: #ifndef TCP_RECVSPACE
117: #define TCP_RECVSPACE 1024*16
118: #endif
119: u_int tcp_recvspace = TCP_RECVSPACE;
120:
121: int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS;
122:
123: struct inpcbtable tcbtable;
124:
125: int tcp_ident(void *, size_t *, void *, size_t, int);
126:
127: #ifdef INET6
128: int
129: tcp6_usrreq(so, req, m, nam, control, p)
130: struct socket *so;
131: int req;
132: struct mbuf *m, *nam, *control;
133: struct proc *p;
134: {
135:
136: return tcp_usrreq(so, req, m, nam, control);
137: }
138: #endif
139:
140: /*
141: * Process a TCP user request for TCP tb. If this is a send request
142: * then m is the mbuf chain of send data. If this is a timer expiration
143: * (called from the software clock routine), then timertype tells which timer.
144: */
145: /*ARGSUSED*/
146: int
147: tcp_usrreq(so, req, m, nam, control)
148: struct socket *so;
149: int req;
150: struct mbuf *m, *nam, *control;
151: {
152: struct sockaddr_in *sin;
153: struct inpcb *inp;
154: struct tcpcb *tp = NULL;
155: int s;
156: int error = 0;
157: short ostate;
158:
159: if (req == PRU_CONTROL) {
160: #ifdef INET6
161: if (sotopf(so) == PF_INET6)
162: return in6_control(so, (u_long)m, (caddr_t)nam,
163: (struct ifnet *)control, 0);
164: else
165: #endif /* INET6 */
166: return (in_control(so, (u_long)m, (caddr_t)nam,
167: (struct ifnet *)control));
168: }
169: if (control && control->m_len) {
170: m_freem(control);
171: if (m)
172: m_freem(m);
173: return (EINVAL);
174: }
175:
176: s = splsoftnet();
177: inp = sotoinpcb(so);
178: /*
179: * When a TCP is attached to a socket, then there will be
180: * a (struct inpcb) pointed at by the socket, and this
181: * structure will point at a subsidiary (struct tcpcb).
182: */
183: if (inp == 0 && req != PRU_ATTACH) {
184: splx(s);
185: /*
186: * The following corrects an mbuf leak under rare
187: * circumstances
188: */
189: if (m && (req == PRU_SEND || req == PRU_SENDOOB))
190: m_freem(m);
191: return (EINVAL); /* XXX */
192: }
193: if (inp) {
194: tp = intotcpcb(inp);
195: /* WHAT IF TP IS 0? */
196: #ifdef KPROF
197: tcp_acounts[tp->t_state][req]++;
198: #endif
199: ostate = tp->t_state;
200: } else
201: ostate = 0;
202: switch (req) {
203:
204: /*
205: * TCP attaches to socket via PRU_ATTACH, reserving space,
206: * and an internet control block.
207: */
208: case PRU_ATTACH:
209: if (inp) {
210: error = EISCONN;
211: break;
212: }
213: error = tcp_attach(so);
214: if (error)
215: break;
216: if ((so->so_options & SO_LINGER) && so->so_linger == 0)
217: so->so_linger = TCP_LINGERTIME;
218: tp = sototcpcb(so);
219: break;
220:
221: /*
222: * PRU_DETACH detaches the TCP protocol from the socket.
223: * If the protocol state is non-embryonic, then can't
224: * do this directly: have to initiate a PRU_DISCONNECT,
225: * which may finish later; embryonic TCB's can just
226: * be discarded here.
227: */
228: case PRU_DETACH:
229: tp = tcp_disconnect(tp);
230: break;
231:
232: /*
233: * Give the socket an address.
234: */
235: case PRU_BIND:
236: #ifdef INET6
237: if (inp->inp_flags & INP_IPV6)
238: error = in6_pcbbind(inp, nam);
239: else
240: #endif
241: error = in_pcbbind(inp, nam);
242: if (error)
243: break;
244: break;
245:
246: /*
247: * Prepare to accept connections.
248: */
249: case PRU_LISTEN:
250: if (inp->inp_lport == 0) {
251: #ifdef INET6
252: if (inp->inp_flags & INP_IPV6)
253: error = in6_pcbbind(inp, NULL);
254: else
255: #endif
256: error = in_pcbbind(inp, NULL);
257: }
258: /* If the in_pcbbind() above is called, the tp->pf
259: should still be whatever it was before. */
260: if (error == 0)
261: tp->t_state = TCPS_LISTEN;
262: break;
263:
264: /*
265: * Initiate connection to peer.
266: * Create a template for use in transmissions on this connection.
267: * Enter SYN_SENT state, and mark socket as connecting.
268: * Start keep-alive timer, and seed output sequence space.
269: * Send initial segment on connection.
270: */
271: case PRU_CONNECT:
272: sin = mtod(nam, struct sockaddr_in *);
273:
274: #ifdef INET6
275: if (sin->sin_family == AF_INET6) {
276: struct in6_addr *in6_addr = &mtod(nam,
277: struct sockaddr_in6 *)->sin6_addr;
278:
279: if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
280: IN6_IS_ADDR_MULTICAST(in6_addr) ||
281: (IN6_IS_ADDR_V4MAPPED(in6_addr) &&
282: ((in6_addr->s6_addr32[3] == INADDR_ANY) ||
283: IN_MULTICAST(in6_addr->s6_addr32[3]) ||
284: in_broadcast(sin->sin_addr, NULL)))) {
285: error = EINVAL;
286: break;
287: }
288:
289: if (inp->inp_lport == 0) {
290: error = in6_pcbbind(inp, NULL);
291: if (error)
292: break;
293: }
294: error = in6_pcbconnect(inp, nam);
295: } else if (sin->sin_family == AF_INET)
296: #endif /* INET6 */
297: {
298: if ((sin->sin_addr.s_addr == INADDR_ANY) ||
299: IN_MULTICAST(sin->sin_addr.s_addr) ||
300: in_broadcast(sin->sin_addr, NULL)) {
301: error = EINVAL;
302: break;
303: }
304:
305: if (inp->inp_lport == 0) {
306: error = in_pcbbind(inp, NULL);
307: if (error)
308: break;
309: }
310: error = in_pcbconnect(inp, nam);
311: }
312:
313: if (error)
314: break;
315:
316: tp->t_template = tcp_template(tp);
317: if (tp->t_template == 0) {
318: in_pcbdisconnect(inp);
319: error = ENOBUFS;
320: break;
321: }
322:
323: so->so_state |= SS_CONNECTOUT;
324:
325: /* Compute window scaling to request. */
326: tcp_rscale(tp, so->so_rcv.sb_hiwat);
327:
328: soisconnecting(so);
329: tcpstat.tcps_connattempt++;
330: tp->t_state = TCPS_SYN_SENT;
331: TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
332: #ifdef TCP_COMPAT_42
333: tp->iss = tcp_iss;
334: tcp_iss += TCP_ISSINCR/2;
335: #else /* TCP_COMPAT_42 */
336: tcp_set_iss_tsm(tp);
337: #endif /* !TCP_COMPAT_42 */
338: tcp_sendseqinit(tp);
339: #if defined(TCP_SACK)
340: tp->snd_last = tp->snd_una;
341: #endif
342: #if defined(TCP_SACK) && defined(TCP_FACK)
343: tp->snd_fack = tp->snd_una;
344: tp->retran_data = 0;
345: tp->snd_awnd = 0;
346: #endif
347: error = tcp_output(tp);
348: break;
349:
350: /*
351: * Create a TCP connection between two sockets.
352: */
353: case PRU_CONNECT2:
354: error = EOPNOTSUPP;
355: break;
356:
357: /*
358: * Initiate disconnect from peer.
359: * If connection never passed embryonic stage, just drop;
360: * else if don't need to let data drain, then can just drop anyways,
361: * else have to begin TCP shutdown process: mark socket disconnecting,
362: * drain unread data, state switch to reflect user close, and
363: * send segment (e.g. FIN) to peer. Socket will be really disconnected
364: * when peer sends FIN and acks ours.
365: *
366: * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
367: */
368: case PRU_DISCONNECT:
369: tp = tcp_disconnect(tp);
370: break;
371:
372: /*
373: * Accept a connection. Essentially all the work is
374: * done at higher levels; just return the address
375: * of the peer, storing through addr.
376: */
377: case PRU_ACCEPT:
378: #ifdef INET6
379: if (inp->inp_flags & INP_IPV6)
380: in6_setpeeraddr(inp, nam);
381: else
382: #endif
383: in_setpeeraddr(inp, nam);
384: break;
385:
386: /*
387: * Mark the connection as being incapable of further output.
388: */
389: case PRU_SHUTDOWN:
390: if (so->so_state & SS_CANTSENDMORE)
391: break;
392: socantsendmore(so);
393: tp = tcp_usrclosed(tp);
394: if (tp)
395: error = tcp_output(tp);
396: break;
397:
398: /*
399: * After a receive, possibly send window update to peer.
400: */
401: case PRU_RCVD:
402: /*
403: * soreceive() calls this function when a user receives
404: * ancillary data on a listening socket. We don't call
405: * tcp_output in such a case, since there is no header
406: * template for a listening socket and hence the kernel
407: * will panic.
408: */
409: if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
410: (void) tcp_output(tp);
411: break;
412:
413: /*
414: * Do a send by putting data in output queue and updating urgent
415: * marker if URG set. Possibly send more data.
416: */
417: case PRU_SEND:
418: sbappendstream(&so->so_snd, m);
419: error = tcp_output(tp);
420: break;
421:
422: /*
423: * Abort the TCP.
424: */
425: case PRU_ABORT:
426: tp = tcp_drop(tp, ECONNABORTED);
427: break;
428:
429: case PRU_SENSE:
430: ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
431: splx(s);
432: return (0);
433:
434: case PRU_RCVOOB:
435: if ((so->so_oobmark == 0 &&
436: (so->so_state & SS_RCVATMARK) == 0) ||
437: so->so_options & SO_OOBINLINE ||
438: tp->t_oobflags & TCPOOB_HADDATA) {
439: error = EINVAL;
440: break;
441: }
442: if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
443: error = EWOULDBLOCK;
444: break;
445: }
446: m->m_len = 1;
447: *mtod(m, caddr_t) = tp->t_iobc;
448: if (((long)nam & MSG_PEEK) == 0)
449: tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
450: break;
451:
452: case PRU_SENDOOB:
453: if (sbspace(&so->so_snd) < -512) {
454: m_freem(m);
455: error = ENOBUFS;
456: break;
457: }
458: /*
459: * According to RFC961 (Assigned Protocols),
460: * the urgent pointer points to the last octet
461: * of urgent data. We continue, however,
462: * to consider it to indicate the first octet
463: * of data past the urgent section.
464: * Otherwise, snd_up should be one lower.
465: */
466: sbappendstream(&so->so_snd, m);
467: tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
468: tp->t_force = 1;
469: error = tcp_output(tp);
470: tp->t_force = 0;
471: break;
472:
473: case PRU_SOCKADDR:
474: #ifdef INET6
475: if (inp->inp_flags & INP_IPV6)
476: in6_setsockaddr(inp, nam);
477: else
478: #endif
479: in_setsockaddr(inp, nam);
480: break;
481:
482: case PRU_PEERADDR:
483: #ifdef INET6
484: if (inp->inp_flags & INP_IPV6)
485: in6_setpeeraddr(inp, nam);
486: else
487: #endif
488: in_setpeeraddr(inp, nam);
489: break;
490:
491: default:
492: panic("tcp_usrreq");
493: }
494: if (tp && (so->so_options & SO_DEBUG))
495: tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
496: splx(s);
497: return (error);
498: }
499:
500: int
501: tcp_ctloutput(op, so, level, optname, mp)
502: int op;
503: struct socket *so;
504: int level, optname;
505: struct mbuf **mp;
506: {
507: int error = 0, s;
508: struct inpcb *inp;
509: struct tcpcb *tp;
510: struct mbuf *m;
511: int i;
512:
513: s = splsoftnet();
514: inp = sotoinpcb(so);
515: if (inp == NULL) {
516: splx(s);
517: if (op == PRCO_SETOPT && *mp)
518: (void) m_free(*mp);
519: return (ECONNRESET);
520: }
521: #ifdef INET6
522: tp = intotcpcb(inp);
523: #endif /* INET6 */
524: if (level != IPPROTO_TCP) {
525: switch (so->so_proto->pr_domain->dom_family) {
526: #ifdef INET6
527: case PF_INET6:
528: error = ip6_ctloutput(op, so, level, optname, mp);
529: break;
530: #endif /* INET6 */
531: case PF_INET:
532: error = ip_ctloutput(op, so, level, optname, mp);
533: break;
534: default:
535: error = EAFNOSUPPORT; /*?*/
536: break;
537: }
538: splx(s);
539: return (error);
540: }
541: #ifndef INET6
542: tp = intotcpcb(inp);
543: #endif /* !INET6 */
544:
545: switch (op) {
546:
547: case PRCO_SETOPT:
548: m = *mp;
549: switch (optname) {
550:
551: case TCP_NODELAY:
552: if (m == NULL || m->m_len < sizeof (int))
553: error = EINVAL;
554: else if (*mtod(m, int *))
555: tp->t_flags |= TF_NODELAY;
556: else
557: tp->t_flags &= ~TF_NODELAY;
558: break;
559:
560: case TCP_MAXSEG:
561: if (m == NULL || m->m_len < sizeof (int)) {
562: error = EINVAL;
563: break;
564: }
565:
566: i = *mtod(m, int *);
567: if (i > 0 && i <= tp->t_maxseg)
568: tp->t_maxseg = i;
569: else
570: error = EINVAL;
571: break;
572:
573: #ifdef TCP_SACK
574: case TCP_SACK_ENABLE:
575: if (m == NULL || m->m_len < sizeof (int)) {
576: error = EINVAL;
577: break;
578: }
579:
580: if (TCPS_HAVEESTABLISHED(tp->t_state)) {
581: error = EPERM;
582: break;
583: }
584:
585: if (tp->t_flags & TF_SIGNATURE) {
586: error = EPERM;
587: break;
588: }
589:
590: if (*mtod(m, int *))
591: tp->sack_enable = 1;
592: else
593: tp->sack_enable = 0;
594: break;
595: #endif
596: #ifdef TCP_SIGNATURE
597: case TCP_MD5SIG:
598: if (m == NULL || m->m_len < sizeof (int)) {
599: error = EINVAL;
600: break;
601: }
602:
603: if (TCPS_HAVEESTABLISHED(tp->t_state)) {
604: error = EPERM;
605: break;
606: }
607:
608: if (*mtod(m, int *)) {
609: tp->t_flags |= TF_SIGNATURE;
610: #ifdef TCP_SACK
611: tp->sack_enable = 0;
612: #endif /* TCP_SACK */
613: } else
614: tp->t_flags &= ~TF_SIGNATURE;
615: break;
616: #endif /* TCP_SIGNATURE */
617: default:
618: error = ENOPROTOOPT;
619: break;
620: }
621: if (m)
622: (void) m_free(m);
623: break;
624:
625: case PRCO_GETOPT:
626: *mp = m = m_get(M_WAIT, MT_SOOPTS);
627: m->m_len = sizeof(int);
628:
629: switch (optname) {
630: case TCP_NODELAY:
631: *mtod(m, int *) = tp->t_flags & TF_NODELAY;
632: break;
633: case TCP_MAXSEG:
634: *mtod(m, int *) = tp->t_maxseg;
635: break;
636: #ifdef TCP_SACK
637: case TCP_SACK_ENABLE:
638: *mtod(m, int *) = tp->sack_enable;
639: break;
640: #endif
641: #ifdef TCP_SIGNATURE
642: case TCP_MD5SIG:
643: *mtod(m, int *) = tp->t_flags & TF_SIGNATURE;
644: break;
645: #endif
646: default:
647: error = ENOPROTOOPT;
648: break;
649: }
650: break;
651: }
652: splx(s);
653: return (error);
654: }
655:
656: /*
657: * Attach TCP protocol to socket, allocating
658: * internet protocol control block, tcp control block,
659: * bufer space, and entering LISTEN state if to accept connections.
660: */
661: int
662: tcp_attach(so)
663: struct socket *so;
664: {
665: struct tcpcb *tp;
666: struct inpcb *inp;
667: int error;
668:
669: if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
670: error = soreserve(so, tcp_sendspace, tcp_recvspace);
671: if (error)
672: return (error);
673: }
674: error = in_pcballoc(so, &tcbtable);
675: if (error)
676: return (error);
677: inp = sotoinpcb(so);
678: tp = tcp_newtcpcb(inp);
679: if (tp == NULL) {
680: int nofd = so->so_state & SS_NOFDREF; /* XXX */
681:
682: so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
683: in_pcbdetach(inp);
684: so->so_state |= nofd;
685: return (ENOBUFS);
686: }
687: tp->t_state = TCPS_CLOSED;
688: #ifdef INET6
689: /* we disallow IPv4 mapped address completely. */
690: if (inp->inp_flags & INP_IPV6)
691: tp->pf = PF_INET6;
692: else
693: tp->pf = PF_INET;
694: #else
695: tp->pf = PF_INET;
696: #endif
697: return (0);
698: }
699:
700: /*
701: * Initiate (or continue) disconnect.
702: * If embryonic state, just send reset (once).
703: * If in ``let data drain'' option and linger null, just drop.
704: * Otherwise (hard), mark socket disconnecting and drop
705: * current input data; switch states based on user close, and
706: * send segment to peer (with FIN).
707: */
708: struct tcpcb *
709: tcp_disconnect(tp)
710: struct tcpcb *tp;
711: {
712: struct socket *so = tp->t_inpcb->inp_socket;
713:
714: if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
715: tp = tcp_close(tp);
716: else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
717: tp = tcp_drop(tp, 0);
718: else {
719: soisdisconnecting(so);
720: sbflush(&so->so_rcv);
721: tp = tcp_usrclosed(tp);
722: if (tp)
723: (void) tcp_output(tp);
724: }
725: return (tp);
726: }
727:
728: /*
729: * User issued close, and wish to trail through shutdown states:
730: * if never received SYN, just forget it. If got a SYN from peer,
731: * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
732: * If already got a FIN from peer, then almost done; go to LAST_ACK
733: * state. In all other cases, have already sent FIN to peer (e.g.
734: * after PRU_SHUTDOWN), and just have to play tedious game waiting
735: * for peer to send FIN or not respond to keep-alives, etc.
736: * We can let the user exit from the close as soon as the FIN is acked.
737: */
738: struct tcpcb *
739: tcp_usrclosed(tp)
740: struct tcpcb *tp;
741: {
742:
743: switch (tp->t_state) {
744:
745: case TCPS_CLOSED:
746: case TCPS_LISTEN:
747: case TCPS_SYN_SENT:
748: tp->t_state = TCPS_CLOSED;
749: tp = tcp_close(tp);
750: break;
751:
752: case TCPS_SYN_RECEIVED:
753: case TCPS_ESTABLISHED:
754: tp->t_state = TCPS_FIN_WAIT_1;
755: break;
756:
757: case TCPS_CLOSE_WAIT:
758: tp->t_state = TCPS_LAST_ACK;
759: break;
760: }
761: if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
762: soisdisconnected(tp->t_inpcb->inp_socket);
763: /*
764: * If we are in FIN_WAIT_2, we arrived here because the
765: * application did a shutdown of the send side. Like the
766: * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
767: * a full close, we start a timer to make sure sockets are
768: * not left in FIN_WAIT_2 forever.
769: */
770: if (tp->t_state == TCPS_FIN_WAIT_2)
771: TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
772: }
773: return (tp);
774: }
775:
776: /*
777: * Look up a socket for ident or tcpdrop, ...
778: */
779: int
780: tcp_ident(oldp, oldlenp, newp, newlen, dodrop)
781: void *oldp;
782: size_t *oldlenp;
783: void *newp;
784: size_t newlen;
785: int dodrop;
786: {
787: int error = 0, s;
788: struct tcp_ident_mapping tir;
789: struct inpcb *inp;
790: struct tcpcb *tp = NULL;
791: struct sockaddr_in *fin, *lin;
792: #ifdef INET6
793: struct sockaddr_in6 *fin6, *lin6;
794: struct in6_addr f6, l6;
795: #endif
796: if (dodrop) {
797: if (oldp != NULL || *oldlenp != 0)
798: return (EINVAL);
799: if (newp == NULL)
800: return (EPERM);
801: if (newlen < sizeof(tir))
802: return (ENOMEM);
803: if ((error = copyin(newp, &tir, sizeof (tir))) != 0 )
804: return (error);
805: } else {
806: if (oldp == NULL)
807: return (EINVAL);
808: if (*oldlenp < sizeof(tir))
809: return (ENOMEM);
810: if (newp != NULL || newlen != 0)
811: return (EINVAL);
812: if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
813: return (error);
814: }
815: switch (tir.faddr.ss_family) {
816: #ifdef INET6
817: case AF_INET6:
818: fin6 = (struct sockaddr_in6 *)&tir.faddr;
819: error = in6_embedscope(&f6, fin6, NULL, NULL);
820: if (error)
821: return EINVAL; /*?*/
822: lin6 = (struct sockaddr_in6 *)&tir.laddr;
823: error = in6_embedscope(&l6, lin6, NULL, NULL);
824: if (error)
825: return EINVAL; /*?*/
826: break;
827: #endif
828: case AF_INET:
829: fin = (struct sockaddr_in *)&tir.faddr;
830: lin = (struct sockaddr_in *)&tir.laddr;
831: break;
832: default:
833: return (EINVAL);
834: }
835:
836: s = splsoftnet();
837: switch (tir.faddr.ss_family) {
838: #ifdef INET6
839: case AF_INET6:
840: inp = in6_pcbhashlookup(&tcbtable, &f6,
841: fin6->sin6_port, &l6, lin6->sin6_port);
842: break;
843: #endif
844: case AF_INET:
845: inp = in_pcbhashlookup(&tcbtable, fin->sin_addr,
846: fin->sin_port, lin->sin_addr, lin->sin_port);
847: break;
848: }
849:
850: if (dodrop) {
851: if (inp && (tp = intotcpcb(inp)) &&
852: ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0))
853: tp = tcp_drop(tp, ECONNABORTED);
854: else
855: error = ESRCH;
856: splx(s);
857: return (error);
858: }
859:
860: if (inp == NULL) {
861: ++tcpstat.tcps_pcbhashmiss;
862: switch (tir.faddr.ss_family) {
863: #ifdef INET6
864: case AF_INET6:
865: inp = in6_pcblookup_listen(&tcbtable,
866: &l6, lin6->sin6_port, 0);
867: break;
868: #endif
869: case AF_INET:
870: inp = in_pcblookup_listen(&tcbtable,
871: lin->sin_addr, lin->sin_port, 0);
872: break;
873: }
874: }
875:
876: if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
877: tir.ruid = inp->inp_socket->so_ruid;
878: tir.euid = inp->inp_socket->so_euid;
879: } else {
880: tir.ruid = -1;
881: tir.euid = -1;
882: }
883: splx(s);
884:
885: *oldlenp = sizeof (tir);
886: error = copyout((void *)&tir, oldp, sizeof (tir));
887: return (error);
888: }
889:
890: /*
891: * Sysctl for tcp variables.
892: */
893: int
894: tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
895: int *name;
896: u_int namelen;
897: void *oldp;
898: size_t *oldlenp;
899: void *newp;
900: size_t newlen;
901: {
902: int error, nval;
903:
904: /* All sysctl names at this level are terminal. */
905: if (namelen != 1)
906: return (ENOTDIR);
907:
908: switch (name[0]) {
909: #ifdef TCP_SACK
910: case TCPCTL_SACK:
911: return (sysctl_int(oldp, oldlenp, newp, newlen,
912: &tcp_do_sack));
913: #endif
914: case TCPCTL_SLOWHZ:
915: return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
916:
917: case TCPCTL_BADDYNAMIC:
918: return (sysctl_struct(oldp, oldlenp, newp, newlen,
919: baddynamicports.tcp, sizeof(baddynamicports.tcp)));
920:
921: case TCPCTL_IDENT:
922: return (tcp_ident(oldp, oldlenp, newp, newlen, 0));
923:
924: case TCPCTL_DROP:
925: return (tcp_ident(oldp, oldlenp, newp, newlen, 1));
926:
927: #ifdef TCP_ECN
928: case TCPCTL_ECN:
929: return (sysctl_int(oldp, oldlenp, newp, newlen,
930: &tcp_do_ecn));
931: #endif
932: case TCPCTL_REASS_LIMIT:
933: nval = tcp_reass_limit;
934: error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
935: if (error)
936: return (error);
937: if (nval != tcp_reass_limit) {
938: error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
939: if (error)
940: return (error);
941: tcp_reass_limit = nval;
942: }
943: return (0);
944: #ifdef TCP_SACK
945: case TCPCTL_SACKHOLE_LIMIT:
946: nval = tcp_sackhole_limit;
947: error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
948: if (error)
949: return (error);
950: if (nval != tcp_sackhole_limit) {
951: error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0);
952: if (error)
953: return (error);
954: tcp_sackhole_limit = nval;
955: }
956: return (0);
957: #endif
958: default:
959: if (name[0] < TCPCTL_MAXID)
960: return (sysctl_int_arr(tcpctl_vars, name, namelen,
961: oldp, oldlenp, newp, newlen));
962: return (ENOPROTOOPT);
963: }
964: /* NOTREACHED */
965: }
CVSweb