Annotation of sys/netinet/tcp_subr.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: tcp_subr.c,v 1.98 2007/06/25 12:17:43 markus Exp $ */
2: /* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1982, 1986, 1988, 1990, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the University nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: *
32: * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
33: *
34: * NRL grants permission for redistribution and use in source and binary
35: * forms, with or without modification, of the software and documentation
36: * created at NRL provided that the following conditions are met:
37: *
38: * 1. Redistributions of source code must retain the above copyright
39: * notice, this list of conditions and the following disclaimer.
40: * 2. Redistributions in binary form must reproduce the above copyright
41: * notice, this list of conditions and the following disclaimer in the
42: * documentation and/or other materials provided with the distribution.
43: * 3. All advertising materials mentioning features or use of this software
44: * must display the following acknowledgements:
45: * This product includes software developed by the University of
46: * California, Berkeley and its contributors.
47: * This product includes software developed at the Information
48: * Technology Division, US Naval Research Laboratory.
49: * 4. Neither the name of the NRL nor the names of its contributors
50: * may be used to endorse or promote products derived from this software
51: * without specific prior written permission.
52: *
53: * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54: * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56: * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
57: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64: *
65: * The views and conclusions contained in the software and documentation
66: * are those of the authors and should not be interpreted as representing
67: * official policies, either expressed or implied, of the US Naval
68: * Research Laboratory (NRL).
69: */
70:
71: #include <sys/param.h>
72: #include <sys/systm.h>
73: #include <sys/proc.h>
74: #include <sys/mbuf.h>
75: #include <sys/socket.h>
76: #include <sys/socketvar.h>
77: #include <sys/protosw.h>
78: #include <sys/kernel.h>
79:
80: #include <net/route.h>
81: #include <net/if.h>
82:
83: #include <netinet/in.h>
84: #include <netinet/in_systm.h>
85: #include <netinet/ip.h>
86: #include <netinet/in_pcb.h>
87: #include <netinet/ip_var.h>
88: #include <netinet/ip_icmp.h>
89: #include <netinet/tcp.h>
90: #include <netinet/tcp_fsm.h>
91: #include <netinet/tcp_seq.h>
92: #include <netinet/tcp_timer.h>
93: #include <netinet/tcp_var.h>
94: #include <netinet/tcpip.h>
95: #include <dev/rndvar.h>
96:
97: #ifdef INET6
98: #include <netinet6/in6_var.h>
99: #include <netinet6/ip6protosw.h>
100: #endif /* INET6 */
101:
102: #include <crypto/md5.h>
103:
104: /* patchable/settable parameters for tcp */
105: int tcp_mssdflt = TCP_MSS;
106: int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
107:
108: /* values controllable via sysctl */
109: int tcp_do_rfc1323 = 1;
110: #ifdef TCP_SACK
111: int tcp_do_sack = 1; /* RFC 2018 selective ACKs */
112: #endif
113: int tcp_ack_on_push = 0; /* set to enable immediate ACK-on-PUSH */
114: #ifdef TCP_ECN
115: int tcp_do_ecn = 0; /* RFC3168 ECN enabled/disabled? */
116: #endif
117: int tcp_do_rfc3390 = 1; /* RFC3390 Increasing TCP's Initial Window */
118:
119: u_int32_t tcp_now = 1;
120:
121: #ifndef TCBHASHSIZE
122: #define TCBHASHSIZE 128
123: #endif
124: int tcbhashsize = TCBHASHSIZE;
125:
126: /* syn hash parameters */
127: #define TCP_SYN_HASH_SIZE 293
128: #define TCP_SYN_BUCKET_SIZE 35
129: int tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
130: int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
131: int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
132: struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
133:
134: int tcp_reass_limit = NMBCLUSTERS / 2; /* hardlimit for tcpqe_pool */
135: #ifdef TCP_SACK
136: int tcp_sackhole_limit = 32*1024; /* hardlimit for sackhl_pool */
137: #endif
138:
139: #ifdef INET6
140: extern int ip6_defhlim;
141: #endif /* INET6 */
142:
143: struct pool tcpcb_pool;
144: struct pool tcpqe_pool;
145: #ifdef TCP_SACK
146: struct pool sackhl_pool;
147: #endif
148:
149: struct tcpstat tcpstat; /* tcp statistics */
150: tcp_seq tcp_iss;
151:
152: /*
153: * Tcp initialization
154: */
155: void
156: tcp_init()
157: {
158: tcp_iss = 1; /* wrong */
159: pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl",
160: NULL);
161: pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, 0, 0, "tcpqepl",
162: NULL);
163: pool_sethardlimit(&tcpqe_pool, tcp_reass_limit, NULL, 0);
164: #ifdef TCP_SACK
165: pool_init(&sackhl_pool, sizeof(struct sackhole), 0, 0, 0, "sackhlpl",
166: NULL);
167: pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0);
168: #endif /* TCP_SACK */
169: in_pcbinit(&tcbtable, tcbhashsize);
170:
171: #ifdef INET6
172: /*
173: * Since sizeof(struct ip6_hdr) > sizeof(struct ip), we
174: * do max length checks/computations only on the former.
175: */
176: if (max_protohdr < (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)))
177: max_protohdr = (sizeof(struct ip6_hdr) + sizeof(struct tcphdr));
178: if ((max_linkhdr + sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) >
179: MHLEN)
180: panic("tcp_init");
181:
182: icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);
183: #endif /* INET6 */
184:
185: /* Initialize the compressed state engine. */
186: syn_cache_init();
187:
188: /* Initialize timer state. */
189: tcp_timer_init();
190: }
191:
192: /*
193: * Create template to be used to send tcp packets on a connection.
194: * Call after host entry created, allocates an mbuf and fills
195: * in a skeletal tcp/ip header, minimizing the amount of work
196: * necessary when the connection is used.
197: *
198: * To support IPv6 in addition to IPv4 and considering that the sizes of
199: * the IPv4 and IPv6 headers are not the same, we now use a separate pointer
200: * for the TCP header. Also, we made the former tcpiphdr header pointer
201: * into just an IP overlay pointer, with casting as appropriate for v6. rja
202: */
203: struct mbuf *
204: tcp_template(tp)
205: struct tcpcb *tp;
206: {
207: struct inpcb *inp = tp->t_inpcb;
208: struct mbuf *m;
209: struct tcphdr *th;
210:
211: if ((m = tp->t_template) == 0) {
212: m = m_get(M_DONTWAIT, MT_HEADER);
213: if (m == NULL)
214: return (0);
215:
216: switch (tp->pf) {
217: case 0: /*default to PF_INET*/
218: #ifdef INET
219: case AF_INET:
220: m->m_len = sizeof(struct ip);
221: break;
222: #endif /* INET */
223: #ifdef INET6
224: case AF_INET6:
225: m->m_len = sizeof(struct ip6_hdr);
226: break;
227: #endif /* INET6 */
228: }
229: m->m_len += sizeof (struct tcphdr);
230:
231: /*
232: * The link header, network header, TCP header, and TCP options
233: * all must fit in this mbuf. For now, assume the worst case of
234: * TCP options size. Eventually, compute this from tp flags.
235: */
236: if (m->m_len + MAX_TCPOPTLEN + max_linkhdr >= MHLEN) {
237: MCLGET(m, M_DONTWAIT);
238: if ((m->m_flags & M_EXT) == 0) {
239: m_free(m);
240: return (0);
241: }
242: }
243: }
244:
245: switch(tp->pf) {
246: #ifdef INET
247: case AF_INET:
248: {
249: struct ipovly *ipovly;
250:
251: ipovly = mtod(m, struct ipovly *);
252:
253: bzero(ipovly->ih_x1, sizeof ipovly->ih_x1);
254: ipovly->ih_pr = IPPROTO_TCP;
255: ipovly->ih_len = htons(sizeof (struct tcphdr));
256: ipovly->ih_src = inp->inp_laddr;
257: ipovly->ih_dst = inp->inp_faddr;
258:
259: th = (struct tcphdr *)(mtod(m, caddr_t) +
260: sizeof(struct ip));
261: th->th_sum = in_cksum_phdr(ipovly->ih_src.s_addr,
262: ipovly->ih_dst.s_addr,
263: htons(sizeof (struct tcphdr) + IPPROTO_TCP));
264: }
265: break;
266: #endif /* INET */
267: #ifdef INET6
268: case AF_INET6:
269: {
270: struct ip6_hdr *ip6;
271:
272: ip6 = mtod(m, struct ip6_hdr *);
273:
274: ip6->ip6_src = inp->inp_laddr6;
275: ip6->ip6_dst = inp->inp_faddr6;
276: ip6->ip6_flow = htonl(0x60000000) |
277: (inp->inp_flowinfo & IPV6_FLOWLABEL_MASK);
278:
279: ip6->ip6_nxt = IPPROTO_TCP;
280: ip6->ip6_plen = htons(sizeof(struct tcphdr)); /*XXX*/
281: ip6->ip6_hlim = in6_selecthlim(inp, NULL); /*XXX*/
282:
283: th = (struct tcphdr *)(mtod(m, caddr_t) +
284: sizeof(struct ip6_hdr));
285: th->th_sum = 0;
286: }
287: break;
288: #endif /* INET6 */
289: }
290:
291: th->th_sport = inp->inp_lport;
292: th->th_dport = inp->inp_fport;
293: th->th_seq = 0;
294: th->th_ack = 0;
295: th->th_x2 = 0;
296: th->th_off = 5;
297: th->th_flags = 0;
298: th->th_win = 0;
299: th->th_urp = 0;
300: return (m);
301: }
302:
303: /*
304: * Send a single message to the TCP at address specified by
305: * the given TCP/IP header. If m == 0, then we make a copy
306: * of the tcpiphdr at ti and send directly to the addressed host.
307: * This is used to force keep alive messages out using the TCP
308: * template for a connection tp->t_template. If flags are given
309: * then we send a message back to the TCP which originated the
310: * segment ti, and discard the mbuf containing it and any other
311: * attached mbufs.
312: *
313: * In any case the ack and sequence number of the transmitted
314: * segment are as specified by the parameters.
315: */
316: #ifdef INET6
317: /* This function looks hairy, because it was so IPv4-dependent. */
318: #endif /* INET6 */
319: void
320: tcp_respond(tp, template, m, ack, seq, flags)
321: struct tcpcb *tp;
322: caddr_t template;
323: struct mbuf *m;
324: tcp_seq ack, seq;
325: int flags;
326: {
327: int tlen;
328: int win = 0;
329: struct route *ro = 0;
330: struct tcphdr *th;
331: struct tcpiphdr *ti = (struct tcpiphdr *)template;
332: int af; /* af on wire */
333:
334: if (tp) {
335: win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
336: /*
337: * If this is called with an unconnected
338: * socket/tp/pcb (tp->pf is 0), we lose.
339: */
340: af = tp->pf;
341:
342: /*
343: * The route/route6 distinction is meaningless
344: * unless you're allocating space or passing parameters.
345: */
346: ro = &tp->t_inpcb->inp_route;
347: } else
348: af = (((struct ip *)ti)->ip_v == 6) ? AF_INET6 : AF_INET;
349: if (m == 0) {
350: m = m_gethdr(M_DONTWAIT, MT_HEADER);
351: if (m == NULL)
352: return;
353: #ifdef TCP_COMPAT_42
354: tlen = 1;
355: #else
356: tlen = 0;
357: #endif
358: m->m_data += max_linkhdr;
359: switch (af) {
360: #ifdef INET6
361: case AF_INET6:
362: bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) +
363: sizeof(struct ip6_hdr));
364: break;
365: #endif /* INET6 */
366: case AF_INET:
367: bcopy(ti, mtod(m, caddr_t), sizeof(struct tcphdr) +
368: sizeof(struct ip));
369: break;
370: }
371:
372: ti = mtod(m, struct tcpiphdr *);
373: flags = TH_ACK;
374: } else {
375: m_freem(m->m_next);
376: m->m_next = 0;
377: m->m_data = (caddr_t)ti;
378: tlen = 0;
379: #define xchg(a,b,type) do { type t; t=a; a=b; b=t; } while (0)
380: switch (af) {
381: #ifdef INET6
382: case AF_INET6:
383: m->m_len = sizeof(struct tcphdr) + sizeof(struct ip6_hdr);
384: xchg(((struct ip6_hdr *)ti)->ip6_dst,
385: ((struct ip6_hdr *)ti)->ip6_src, struct in6_addr);
386: th = (void *)((caddr_t)ti + sizeof(struct ip6_hdr));
387: break;
388: #endif /* INET6 */
389: case AF_INET:
390: m->m_len = sizeof (struct tcpiphdr);
391: xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t);
392: th = (void *)((caddr_t)ti + sizeof(struct ip));
393: break;
394: }
395: xchg(th->th_dport, th->th_sport, u_int16_t);
396: #undef xchg
397: }
398: switch (af) {
399: #ifdef INET6
400: case AF_INET6:
401: tlen += sizeof(struct tcphdr) + sizeof(struct ip6_hdr);
402: th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip6_hdr));
403: break;
404: #endif /* INET6 */
405: case AF_INET:
406: ti->ti_len = htons((u_int16_t)(sizeof (struct tcphdr) + tlen));
407: tlen += sizeof (struct tcpiphdr);
408: th = (struct tcphdr *)((caddr_t)ti + sizeof(struct ip));
409: break;
410: }
411:
412: m->m_len = tlen;
413: m->m_pkthdr.len = tlen;
414: m->m_pkthdr.rcvif = (struct ifnet *) 0;
415: th->th_seq = htonl(seq);
416: th->th_ack = htonl(ack);
417: th->th_x2 = 0;
418: th->th_off = sizeof (struct tcphdr) >> 2;
419: th->th_flags = flags;
420: if (tp)
421: win >>= tp->rcv_scale;
422: if (win > TCP_MAXWIN)
423: win = TCP_MAXWIN;
424: th->th_win = htons((u_int16_t)win);
425: th->th_urp = 0;
426:
427: switch (af) {
428: #ifdef INET6
429: case AF_INET6:
430: ((struct ip6_hdr *)ti)->ip6_flow = htonl(0x60000000);
431: ((struct ip6_hdr *)ti)->ip6_nxt = IPPROTO_TCP;
432: ((struct ip6_hdr *)ti)->ip6_hlim =
433: in6_selecthlim(tp ? tp->t_inpcb : NULL, NULL); /*XXX*/
434: ((struct ip6_hdr *)ti)->ip6_plen = tlen - sizeof(struct ip6_hdr);
435: th->th_sum = 0;
436: th->th_sum = in6_cksum(m, IPPROTO_TCP,
437: sizeof(struct ip6_hdr), ((struct ip6_hdr *)ti)->ip6_plen);
438: HTONS(((struct ip6_hdr *)ti)->ip6_plen);
439: ip6_output(m, tp ? tp->t_inpcb->inp_outputopts6 : NULL,
440: (struct route_in6 *)ro, 0, NULL, NULL,
441: tp ? tp->t_inpcb : NULL);
442: break;
443: #endif /* INET6 */
444: case AF_INET:
445: bzero(ti->ti_x1, sizeof ti->ti_x1);
446: ti->ti_len = htons((u_short)tlen - sizeof(struct ip));
447:
448: /*
449: * There's no point deferring to hardware checksum processing
450: * here, as we only send a minimal TCP packet whose checksum
451: * we need to compute in any case.
452: */
453: th->th_sum = 0;
454: th->th_sum = in_cksum(m, tlen);
455: ((struct ip *)ti)->ip_len = htons(tlen);
456: ((struct ip *)ti)->ip_ttl = ip_defttl;
457: ip_output(m, (void *)NULL, ro, ip_mtudisc ? IP_MTUDISC : 0,
458: (void *)NULL, tp ? tp->t_inpcb : (void *)NULL);
459: }
460: }
461:
462: /*
463: * Create a new TCP control block, making an
464: * empty reassembly queue and hooking it to the argument
465: * protocol control block.
466: */
467: struct tcpcb *
468: tcp_newtcpcb(struct inpcb *inp)
469: {
470: struct tcpcb *tp;
471: int i;
472:
473: tp = pool_get(&tcpcb_pool, PR_NOWAIT);
474: if (tp == NULL)
475: return ((struct tcpcb *)0);
476: bzero((char *) tp, sizeof(struct tcpcb));
477: TAILQ_INIT(&tp->t_segq);
478: tp->t_maxseg = tcp_mssdflt;
479: tp->t_maxopd = 0;
480:
481: TCP_INIT_DELACK(tp);
482: for (i = 0; i < TCPT_NTIMERS; i++)
483: TCP_TIMER_INIT(tp, i);
484: timeout_set(&tp->t_reap_to, tcp_reaper, tp);
485:
486: #ifdef TCP_SACK
487: tp->sack_enable = tcp_do_sack;
488: #endif
489: tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
490: tp->t_inpcb = inp;
491: /*
492: * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
493: * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
494: * reasonable initial retransmit time.
495: */
496: tp->t_srtt = TCPTV_SRTTBASE;
497: tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ <<
498: (TCP_RTTVAR_SHIFT + TCP_RTT_BASE_SHIFT - 1);
499: tp->t_rttmin = TCPTV_MIN;
500: TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
501: TCPTV_MIN, TCPTV_REXMTMAX);
502: tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
503: tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
504:
505: tp->t_pmtud_mtu_sent = 0;
506: tp->t_pmtud_mss_acked = 0;
507:
508: #ifdef INET6
509: /* we disallow IPv4 mapped address completely. */
510: if ((inp->inp_flags & INP_IPV6) == 0)
511: tp->pf = PF_INET;
512: else
513: tp->pf = PF_INET6;
514: #else
515: tp->pf = PF_INET;
516: #endif
517:
518: #ifdef INET6
519: if (inp->inp_flags & INP_IPV6)
520: inp->inp_ipv6.ip6_hlim = ip6_defhlim;
521: else
522: #endif /* INET6 */
523: inp->inp_ip.ip_ttl = ip_defttl;
524:
525: inp->inp_ppcb = (caddr_t)tp;
526: return (tp);
527: }
528:
529: /*
530: * Drop a TCP connection, reporting
531: * the specified error. If connection is synchronized,
532: * then send a RST to peer.
533: */
534: struct tcpcb *
535: tcp_drop(tp, errno)
536: struct tcpcb *tp;
537: int errno;
538: {
539: struct socket *so = tp->t_inpcb->inp_socket;
540:
541: if (TCPS_HAVERCVDSYN(tp->t_state)) {
542: tp->t_state = TCPS_CLOSED;
543: (void) tcp_output(tp);
544: tcpstat.tcps_drops++;
545: } else
546: tcpstat.tcps_conndrops++;
547: if (errno == ETIMEDOUT && tp->t_softerror)
548: errno = tp->t_softerror;
549: so->so_error = errno;
550: return (tcp_close(tp));
551: }
552:
553: /*
554: * Close a TCP control block:
555: * discard all space held by the tcp
556: * discard internet protocol block
557: * wake up any sleepers
558: */
559: struct tcpcb *
560: tcp_close(struct tcpcb *tp)
561: {
562: struct inpcb *inp = tp->t_inpcb;
563: struct socket *so = inp->inp_socket;
564: #ifdef TCP_SACK
565: struct sackhole *p, *q;
566: #endif
567:
568: /* free the reassembly queue, if any */
569: tcp_reass_lock(tp);
570: tcp_freeq(tp);
571: tcp_reass_unlock(tp);
572:
573: tcp_canceltimers(tp);
574: TCP_CLEAR_DELACK(tp);
575: syn_cache_cleanup(tp);
576:
577: #ifdef TCP_SACK
578: /* Free SACK holes. */
579: q = p = tp->snd_holes;
580: while (p != 0) {
581: q = p->next;
582: pool_put(&sackhl_pool, p);
583: p = q;
584: }
585: #endif
586: if (tp->t_template)
587: (void) m_free(tp->t_template);
588:
589: tp->t_flags |= TF_DEAD;
590: timeout_add(&tp->t_reap_to, 0);
591:
592: inp->inp_ppcb = 0;
593: soisdisconnected(so);
594: in_pcbdetach(inp);
595: return ((struct tcpcb *)0);
596: }
597:
598: void
599: tcp_reaper(void *arg)
600: {
601: struct tcpcb *tp = arg;
602: int s;
603:
604: s = splsoftnet();
605: pool_put(&tcpcb_pool, tp);
606: splx(s);
607: tcpstat.tcps_closed++;
608: }
609:
610: int
611: tcp_freeq(struct tcpcb *tp)
612: {
613: struct tcpqent *qe;
614: int rv = 0;
615:
616: while ((qe = TAILQ_FIRST(&tp->t_segq)) != NULL) {
617: TAILQ_REMOVE(&tp->t_segq, qe, tcpqe_q);
618: m_freem(qe->tcpqe_m);
619: pool_put(&tcpqe_pool, qe);
620: rv = 1;
621: }
622: return (rv);
623: }
624:
625: void
626: tcp_drain()
627: {
628: struct inpcb *inp;
629:
630: /* called at splnet() */
631: CIRCLEQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) {
632: struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
633:
634: if (tp != NULL) {
635: if (tcp_reass_lock_try(tp) == 0)
636: continue;
637: if (tcp_freeq(tp))
638: tcpstat.tcps_conndrained++;
639: tcp_reass_unlock(tp);
640: }
641: }
642: }
643:
644: /*
645: * Compute proper scaling value for receiver window from buffer space
646: */
647:
648: void
649: tcp_rscale(struct tcpcb *tp, u_long hiwat)
650: {
651: tp->request_r_scale = 0;
652: while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
653: TCP_MAXWIN << tp->request_r_scale < hiwat)
654: tp->request_r_scale++;
655: }
656:
657: /*
658: * Notify a tcp user of an asynchronous error;
659: * store error as soft error, but wake up user
660: * (for now, won't do anything until can select for soft error).
661: */
662: void
663: tcp_notify(inp, error)
664: struct inpcb *inp;
665: int error;
666: {
667: struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
668: struct socket *so = inp->inp_socket;
669:
670: /*
671: * Ignore some errors if we are hooked up.
672: * If connection hasn't completed, has retransmitted several times,
673: * and receives a second error, give up now. This is better
674: * than waiting a long time to establish a connection that
675: * can never complete.
676: */
677: if (tp->t_state == TCPS_ESTABLISHED &&
678: (error == EHOSTUNREACH || error == ENETUNREACH ||
679: error == EHOSTDOWN)) {
680: return;
681: } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
682: tp->t_rxtshift > 3 && tp->t_softerror)
683: so->so_error = error;
684: else
685: tp->t_softerror = error;
686: wakeup((caddr_t) &so->so_timeo);
687: sorwakeup(so);
688: sowwakeup(so);
689: }
690:
691: #ifdef INET6
692: void
693: tcp6_ctlinput(cmd, sa, d)
694: int cmd;
695: struct sockaddr *sa;
696: void *d;
697: {
698: struct tcphdr th;
699: struct tcpcb *tp;
700: void (*notify)(struct inpcb *, int) = tcp_notify;
701: struct ip6_hdr *ip6;
702: const struct sockaddr_in6 *sa6_src = NULL;
703: struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
704: struct inpcb *inp;
705: struct mbuf *m;
706: tcp_seq seq;
707: int off;
708: struct {
709: u_int16_t th_sport;
710: u_int16_t th_dport;
711: u_int32_t th_seq;
712: } *thp;
713:
714: if (sa->sa_family != AF_INET6 ||
715: sa->sa_len != sizeof(struct sockaddr_in6) ||
716: IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
717: IN6_IS_ADDR_V4MAPPED(&sa6->sin6_addr))
718: return;
719: if ((unsigned)cmd >= PRC_NCMDS)
720: return;
721: else if (cmd == PRC_QUENCH) {
722: /*
723: * Don't honor ICMP Source Quench messages meant for
724: * TCP connections.
725: */
726: /* XXX there's no PRC_QUENCH in IPv6 */
727: return;
728: } else if (PRC_IS_REDIRECT(cmd))
729: notify = in_rtchange, d = NULL;
730: else if (cmd == PRC_MSGSIZE)
731: ; /* special code is present, see below */
732: else if (cmd == PRC_HOSTDEAD)
733: d = NULL;
734: else if (inet6ctlerrmap[cmd] == 0)
735: return;
736:
737: /* if the parameter is from icmp6, decode it. */
738: if (d != NULL) {
739: struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
740: m = ip6cp->ip6c_m;
741: ip6 = ip6cp->ip6c_ip6;
742: off = ip6cp->ip6c_off;
743: sa6_src = ip6cp->ip6c_src;
744: } else {
745: m = NULL;
746: ip6 = NULL;
747: sa6_src = &sa6_any;
748: }
749:
750: if (ip6) {
751: /*
752: * XXX: We assume that when ip6 is non NULL,
753: * M and OFF are valid.
754: */
755:
756: /* check if we can safely examine src and dst ports */
757: if (m->m_pkthdr.len < off + sizeof(*thp))
758: return;
759:
760: bzero(&th, sizeof(th));
761: #ifdef DIAGNOSTIC
762: if (sizeof(*thp) > sizeof(th))
763: panic("assumption failed in tcp6_ctlinput");
764: #endif
765: m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
766:
767: /*
768: * Check to see if we have a valid TCP connection
769: * corresponding to the address in the ICMPv6 message
770: * payload.
771: */
772: inp = in6_pcbhashlookup(&tcbtable, &sa6->sin6_addr,
773: th.th_dport, (struct in6_addr *)&sa6_src->sin6_addr,
774: th.th_sport);
775: if (cmd == PRC_MSGSIZE) {
776: /*
777: * Depending on the value of "valid" and routing table
778: * size (mtudisc_{hi,lo}wat), we will:
779: * - recalcurate the new MTU and create the
780: * corresponding routing entry, or
781: * - ignore the MTU change notification.
782: */
783: icmp6_mtudisc_update((struct ip6ctlparam *)d, inp != NULL);
784: return;
785: }
786: if (inp) {
787: seq = ntohl(th.th_seq);
788: if (inp->inp_socket &&
789: (tp = intotcpcb(inp)) &&
790: SEQ_GEQ(seq, tp->snd_una) &&
791: SEQ_LT(seq, tp->snd_max))
792: notify(inp, inet6ctlerrmap[cmd]);
793: } else if (syn_cache_count &&
794: (inet6ctlerrmap[cmd] == EHOSTUNREACH ||
795: inet6ctlerrmap[cmd] == ENETUNREACH ||
796: inet6ctlerrmap[cmd] == EHOSTDOWN))
797: syn_cache_unreach((struct sockaddr *)sa6_src,
798: sa, &th);
799: } else {
800: (void) in6_pcbnotify(&tcbtable, sa, 0,
801: (struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
802: }
803: }
804: #endif
805:
806: void *
807: tcp_ctlinput(cmd, sa, v)
808: int cmd;
809: struct sockaddr *sa;
810: void *v;
811: {
812: struct ip *ip = v;
813: struct tcphdr *th;
814: struct tcpcb *tp;
815: struct inpcb *inp;
816: struct in_addr faddr;
817: tcp_seq seq;
818: u_int mtu;
819: extern int inetctlerrmap[];
820: void (*notify)(struct inpcb *, int) = tcp_notify;
821: int errno;
822:
823: if (sa->sa_family != AF_INET)
824: return NULL;
825: faddr = satosin(sa)->sin_addr;
826: if (faddr.s_addr == INADDR_ANY)
827: return NULL;
828:
829: if ((unsigned)cmd >= PRC_NCMDS)
830: return NULL;
831: errno = inetctlerrmap[cmd];
832: if (cmd == PRC_QUENCH)
833: /*
834: * Don't honor ICMP Source Quench messages meant for
835: * TCP connections.
836: */
837: return NULL;
838: else if (PRC_IS_REDIRECT(cmd))
839: notify = in_rtchange, ip = 0;
840: else if (cmd == PRC_MSGSIZE && ip_mtudisc && ip) {
841: /*
842: * Verify that the packet in the icmp payload refers
843: * to an existing TCP connection.
844: */
845: th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
846: seq = ntohl(th->th_seq);
847: inp = in_pcbhashlookup(&tcbtable,
848: ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport);
849: if (inp && (tp = intotcpcb(inp)) &&
850: SEQ_GEQ(seq, tp->snd_una) &&
851: SEQ_LT(seq, tp->snd_max)) {
852: struct icmp *icp;
853: icp = (struct icmp *)((caddr_t)ip -
854: offsetof(struct icmp, icmp_ip));
855:
856: /*
857: * If the ICMP message advertises a Next-Hop MTU
858: * equal or larger than the maximum packet size we have
859: * ever sent, drop the message.
860: */
861: mtu = (u_int)ntohs(icp->icmp_nextmtu);
862: if (mtu >= tp->t_pmtud_mtu_sent)
863: return NULL;
864: if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) {
865: /*
866: * Calculate new MTU, and create corresponding
867: * route (traditional PMTUD).
868: */
869: tp->t_flags &= ~TF_PMTUD_PEND;
870: icmp_mtudisc(icp);
871: } else {
872: /*
873: * Record the information got in the ICMP
874: * message; act on it later.
875: * If we had already recorded an ICMP message,
876: * replace the old one only if the new message
877: * refers to an older TCP segment
878: */
879: if (tp->t_flags & TF_PMTUD_PEND) {
880: if (SEQ_LT(tp->t_pmtud_th_seq, seq))
881: return NULL;
882: } else
883: tp->t_flags |= TF_PMTUD_PEND;
884: tp->t_pmtud_th_seq = seq;
885: tp->t_pmtud_nextmtu = icp->icmp_nextmtu;
886: tp->t_pmtud_ip_len = icp->icmp_ip.ip_len;
887: tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl;
888: return NULL;
889: }
890: } else {
891: /* ignore if we don't have a matching connection */
892: return NULL;
893: }
894: notify = tcp_mtudisc, ip = 0;
895: } else if (cmd == PRC_MTUINC)
896: notify = tcp_mtudisc_increase, ip = 0;
897: else if (cmd == PRC_HOSTDEAD)
898: ip = 0;
899: else if (errno == 0)
900: return NULL;
901:
902: if (ip) {
903: th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
904: inp = in_pcbhashlookup(&tcbtable,
905: ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport);
906: if (inp) {
907: seq = ntohl(th->th_seq);
908: if (inp->inp_socket &&
909: (tp = intotcpcb(inp)) &&
910: SEQ_GEQ(seq, tp->snd_una) &&
911: SEQ_LT(seq, tp->snd_max))
912: notify(inp, errno);
913: } else if (syn_cache_count &&
914: (inetctlerrmap[cmd] == EHOSTUNREACH ||
915: inetctlerrmap[cmd] == ENETUNREACH ||
916: inetctlerrmap[cmd] == EHOSTDOWN)) {
917: struct sockaddr_in sin;
918:
919: bzero(&sin, sizeof(sin));
920: sin.sin_len = sizeof(sin);
921: sin.sin_family = AF_INET;
922: sin.sin_port = th->th_sport;
923: sin.sin_addr = ip->ip_src;
924: syn_cache_unreach((struct sockaddr *)&sin,
925: sa, th);
926: }
927: } else
928: in_pcbnotifyall(&tcbtable, sa, errno, notify);
929:
930: return NULL;
931: }
932:
933:
934: #ifdef INET6
935: /*
936: * Path MTU Discovery handlers.
937: */
938: void
939: tcp6_mtudisc_callback(faddr)
940: struct in6_addr *faddr;
941: {
942: struct sockaddr_in6 sin6;
943:
944: bzero(&sin6, sizeof(sin6));
945: sin6.sin6_family = AF_INET6;
946: sin6.sin6_len = sizeof(struct sockaddr_in6);
947: sin6.sin6_addr = *faddr;
948: (void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0,
949: (struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp_mtudisc);
950: }
951: #endif /* INET6 */
952:
953: /*
954: * On receipt of path MTU corrections, flush old route and replace it
955: * with the new one. Retransmit all unacknowledged packets, to ensure
956: * that all packets will be received.
957: */
958: void
959: tcp_mtudisc(inp, errno)
960: struct inpcb *inp;
961: int errno;
962: {
963: struct tcpcb *tp = intotcpcb(inp);
964: struct rtentry *rt = in_pcbrtentry(inp);
965: int change = 0;
966:
967: if (tp != 0) {
968: int orig_maxseg = tp->t_maxseg;
969: if (rt != 0) {
970: /*
971: * If this was not a host route, remove and realloc.
972: */
973: if ((rt->rt_flags & RTF_HOST) == 0) {
974: in_rtchange(inp, errno);
975: if ((rt = in_pcbrtentry(inp)) == 0)
976: return;
977: }
978: if (orig_maxseg != tp->t_maxseg ||
979: (rt->rt_rmx.rmx_locks & RTV_MTU))
980: change = 1;
981: }
982: tcp_mss(tp, -1);
983:
984: /*
985: * Resend unacknowledged packets
986: */
987: tp->snd_nxt = tp->snd_una;
988: if (change || errno > 0)
989: tcp_output(tp);
990: }
991: }
992:
993: void
994: tcp_mtudisc_increase(inp, errno)
995: struct inpcb *inp;
996: int errno;
997: {
998: struct tcpcb *tp = intotcpcb(inp);
999: struct rtentry *rt = in_pcbrtentry(inp);
1000:
1001: if (tp != 0 && rt != 0) {
1002: /*
1003: * If this was a host route, remove and realloc.
1004: */
1005: if (rt->rt_flags & RTF_HOST)
1006: in_rtchange(inp, errno);
1007:
1008: /* also takes care of congestion window */
1009: tcp_mss(tp, -1);
1010: }
1011: }
1012:
1013: #define TCP_ISS_CONN_INC 4096
1014: int tcp_secret_init;
1015: u_char tcp_secret[16];
1016: MD5_CTX tcp_secret_ctx;
1017:
1018: void
1019: tcp_set_iss_tsm(struct tcpcb *tp)
1020: {
1021: MD5_CTX ctx;
1022: u_int32_t digest[4];
1023:
1024: if (tcp_secret_init == 0) {
1025: arc4random_bytes(tcp_secret, sizeof(tcp_secret));
1026: MD5Init(&tcp_secret_ctx);
1027: MD5Update(&tcp_secret_ctx, tcp_secret, sizeof(tcp_secret));
1028: tcp_secret_init = 1;
1029: }
1030: ctx = tcp_secret_ctx;
1031: MD5Update(&ctx, (char *)&tp->t_inpcb->inp_lport, sizeof(u_short));
1032: MD5Update(&ctx, (char *)&tp->t_inpcb->inp_fport, sizeof(u_short));
1033: if (tp->pf == AF_INET6) {
1034: MD5Update(&ctx, (char *)&tp->t_inpcb->inp_laddr6,
1035: sizeof(struct in6_addr));
1036: MD5Update(&ctx, (char *)&tp->t_inpcb->inp_faddr6,
1037: sizeof(struct in6_addr));
1038: } else {
1039: MD5Update(&ctx, (char *)&tp->t_inpcb->inp_laddr,
1040: sizeof(struct in_addr));
1041: MD5Update(&ctx, (char *)&tp->t_inpcb->inp_faddr,
1042: sizeof(struct in_addr));
1043: }
1044: MD5Final((u_char *)digest, &ctx);
1045: tcp_iss += TCP_ISS_CONN_INC;
1046: tp->iss = digest[0] + tcp_iss;
1047: tp->ts_modulate = digest[1];
1048: }
1049:
1050: #ifdef TCP_SIGNATURE
1051: int
1052: tcp_signature_tdb_attach()
1053: {
1054: return (0);
1055: }
1056:
1057: int
1058: tcp_signature_tdb_init(tdbp, xsp, ii)
1059: struct tdb *tdbp;
1060: struct xformsw *xsp;
1061: struct ipsecinit *ii;
1062: {
1063: if ((ii->ii_authkeylen < 1) || (ii->ii_authkeylen > 80))
1064: return (EINVAL);
1065:
1066: tdbp->tdb_amxkey = malloc(ii->ii_authkeylen, M_XDATA, M_DONTWAIT);
1067: if (tdbp->tdb_amxkey == NULL)
1068: return (ENOMEM);
1069: bcopy(ii->ii_authkey, tdbp->tdb_amxkey, ii->ii_authkeylen);
1070: tdbp->tdb_amxkeylen = ii->ii_authkeylen;
1071:
1072: return (0);
1073: }
1074:
1075: int
1076: tcp_signature_tdb_zeroize(tdbp)
1077: struct tdb *tdbp;
1078: {
1079: if (tdbp->tdb_amxkey) {
1080: bzero(tdbp->tdb_amxkey, tdbp->tdb_amxkeylen);
1081: free(tdbp->tdb_amxkey, M_XDATA);
1082: tdbp->tdb_amxkey = NULL;
1083: }
1084:
1085: return (0);
1086: }
1087:
1088: int
1089: tcp_signature_tdb_input(m, tdbp, skip, protoff)
1090: struct mbuf *m;
1091: struct tdb *tdbp;
1092: int skip, protoff;
1093: {
1094: return (0);
1095: }
1096:
1097: int
1098: tcp_signature_tdb_output(m, tdbp, mp, skip, protoff)
1099: struct mbuf *m;
1100: struct tdb *tdbp;
1101: struct mbuf **mp;
1102: int skip, protoff;
1103: {
1104: return (EINVAL);
1105: }
1106:
1107: int
1108: tcp_signature_apply(fstate, data, len)
1109: caddr_t fstate;
1110: caddr_t data;
1111: unsigned int len;
1112: {
1113: MD5Update((MD5_CTX *)fstate, (char *)data, len);
1114: return 0;
1115: }
1116:
1117: int
1118: tcp_signature(struct tdb *tdb, int af, struct mbuf *m, struct tcphdr *th,
1119: int iphlen, int doswap, char *sig)
1120: {
1121: MD5_CTX ctx;
1122: int len;
1123: struct tcphdr th0;
1124:
1125: MD5Init(&ctx);
1126:
1127: switch(af) {
1128: case 0:
1129: #ifdef INET
1130: case AF_INET: {
1131: struct ippseudo ippseudo;
1132: struct ip *ip;
1133:
1134: ip = mtod(m, struct ip *);
1135:
1136: ippseudo.ippseudo_src = ip->ip_src;
1137: ippseudo.ippseudo_dst = ip->ip_dst;
1138: ippseudo.ippseudo_pad = 0;
1139: ippseudo.ippseudo_p = IPPROTO_TCP;
1140: ippseudo.ippseudo_len = htons(m->m_pkthdr.len - iphlen);
1141:
1142: MD5Update(&ctx, (char *)&ippseudo,
1143: sizeof(struct ippseudo));
1144: break;
1145: }
1146: #endif
1147: #ifdef INET6
1148: case AF_INET6: {
1149: struct ip6_hdr_pseudo ip6pseudo;
1150: struct ip6_hdr *ip6;
1151:
1152: ip6 = mtod(m, struct ip6_hdr *);
1153: bzero(&ip6pseudo, sizeof(ip6pseudo));
1154: ip6pseudo.ip6ph_src = ip6->ip6_src;
1155: ip6pseudo.ip6ph_dst = ip6->ip6_dst;
1156: in6_clearscope(&ip6pseudo.ip6ph_src);
1157: in6_clearscope(&ip6pseudo.ip6ph_dst);
1158: ip6pseudo.ip6ph_nxt = IPPROTO_TCP;
1159: ip6pseudo.ip6ph_len = htonl(m->m_pkthdr.len - iphlen);
1160:
1161: MD5Update(&ctx, (char *)&ip6pseudo,
1162: sizeof(ip6pseudo));
1163: break;
1164: }
1165: #endif
1166: }
1167:
1168: th0 = *th;
1169: th0.th_sum = 0;
1170:
1171: if (doswap) {
1172: HTONL(th0.th_seq);
1173: HTONL(th0.th_ack);
1174: HTONS(th0.th_win);
1175: HTONS(th0.th_urp);
1176: }
1177: MD5Update(&ctx, (char *)&th0, sizeof(th0));
1178:
1179: len = m->m_pkthdr.len - iphlen - th->th_off * sizeof(uint32_t);
1180:
1181: if (len > 0 &&
1182: m_apply(m, iphlen + th->th_off * sizeof(uint32_t), len,
1183: tcp_signature_apply, (caddr_t)&ctx))
1184: return (-1);
1185:
1186: MD5Update(&ctx, tdb->tdb_amxkey, tdb->tdb_amxkeylen);
1187: MD5Final(sig, &ctx);
1188:
1189: return (0);
1190: }
1191: #endif /* TCP_SIGNATURE */
1192:
1193: #define TCP_RNDISS_ROUNDS 16
1194: #define TCP_RNDISS_OUT 7200
1195: #define TCP_RNDISS_MAX 30000
1196:
1197: u_int8_t tcp_rndiss_sbox[128];
1198: u_int16_t tcp_rndiss_msb;
1199: u_int16_t tcp_rndiss_cnt;
1200: long tcp_rndiss_reseed;
1201:
1202: u_int16_t
1203: tcp_rndiss_encrypt(val)
1204: u_int16_t val;
1205: {
1206: u_int16_t sum = 0, i;
1207:
1208: for (i = 0; i < TCP_RNDISS_ROUNDS; i++) {
1209: sum += 0x79b9;
1210: val ^= ((u_int16_t)tcp_rndiss_sbox[(val^sum) & 0x7f]) << 7;
1211: val = ((val & 0xff) << 7) | (val >> 8);
1212: }
1213:
1214: return val;
1215: }
1216:
1217: void
1218: tcp_rndiss_init()
1219: {
1220: get_random_bytes(tcp_rndiss_sbox, sizeof(tcp_rndiss_sbox));
1221:
1222: tcp_rndiss_reseed = time_second + TCP_RNDISS_OUT;
1223: tcp_rndiss_msb = tcp_rndiss_msb == 0x8000 ? 0 : 0x8000;
1224: tcp_rndiss_cnt = 0;
1225: }
1226:
1227: tcp_seq
1228: tcp_rndiss_next()
1229: {
1230: if (tcp_rndiss_cnt >= TCP_RNDISS_MAX ||
1231: time_second > tcp_rndiss_reseed)
1232: tcp_rndiss_init();
1233:
1234: /* (arc4random() & 0x7fff) ensures a 32768 byte gap between ISS */
1235: return ((tcp_rndiss_encrypt(tcp_rndiss_cnt++) | tcp_rndiss_msb) <<16) |
1236: (arc4random() & 0x7fff);
1237: }
1238:
CVSweb