[BACK]Return to tcp_timer.c CVS log [TXT][DIR] Up to [local] / sys / netinet

Annotation of sys/netinet/tcp_timer.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: tcp_timer.c,v 1.39 2007/06/15 18:23:07 markus Exp $   */
                      2: /*     $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $  */
                      3:
                      4: /*
                      5:  * Copyright (c) 1982, 1986, 1988, 1990, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  * 3. Neither the name of the University nor the names of its contributors
                     17:  *    may be used to endorse or promote products derived from this software
                     18:  *    without specific prior written permission.
                     19:  *
                     20:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     21:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     22:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     23:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     24:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     25:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     26:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     27:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     28:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     29:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     30:  * SUCH DAMAGE.
                     31:  *
                     32:  *     @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93
                     33:  */
                     34:
                     35: #include <sys/param.h>
                     36: #include <sys/systm.h>
                     37: #include <sys/mbuf.h>
                     38: #include <sys/socket.h>
                     39: #include <sys/socketvar.h>
                     40: #include <sys/protosw.h>
                     41: #include <sys/kernel.h>
                     42:
                     43: #include <net/route.h>
                     44:
                     45: #include <netinet/in.h>
                     46: #include <netinet/in_systm.h>
                     47: #include <netinet/ip.h>
                     48: #include <netinet/in_pcb.h>
                     49: #include <netinet/ip_var.h>
                     50: #include <netinet/tcp.h>
                     51: #include <netinet/tcp_fsm.h>
                     52: #include <netinet/tcp_timer.h>
                     53: #include <netinet/tcp_var.h>
                     54: #include <netinet/ip_icmp.h>
                     55: #include <netinet/tcp_seq.h>
                     56:
                     57: int    tcp_keepidle;
                     58: int    tcp_keepintvl;
                     59: int    tcp_maxpersistidle;     /* max idle time in persist */
                     60: int    tcp_maxidle;
                     61:
                     62: /*
                     63:  * Time to delay the ACK.  This is initialized in tcp_init(), unless
                     64:  * its patched.
                     65:  */
                     66: int    tcp_delack_ticks;
                     67:
                     68: void   tcp_timer_rexmt(void *);
                     69: void   tcp_timer_persist(void *);
                     70: void   tcp_timer_keep(void *);
                     71: void   tcp_timer_2msl(void *);
                     72:
                     73: const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
                     74:        tcp_timer_rexmt,
                     75:        tcp_timer_persist,
                     76:        tcp_timer_keep,
                     77:        tcp_timer_2msl,
                     78: };
                     79:
                     80: /*
                     81:  * Timer state initialization, called from tcp_init().
                     82:  */
                     83: void
                     84: tcp_timer_init(void)
                     85: {
                     86:
                     87:        if (tcp_keepidle == 0)
                     88:                tcp_keepidle = TCPTV_KEEP_IDLE;
                     89:
                     90:        if (tcp_keepintvl == 0)
                     91:                tcp_keepintvl = TCPTV_KEEPINTVL;
                     92:
                     93:        if (tcp_maxpersistidle == 0)
                     94:                tcp_maxpersistidle = TCPTV_KEEP_IDLE;
                     95:
                     96:        if (tcp_delack_ticks == 0)
                     97:                tcp_delack_ticks = TCP_DELACK_TICKS;
                     98: }
                     99:
                    100: /*
                    101:  * Callout to process delayed ACKs for a TCPCB.
                    102:  */
                    103: void
                    104: tcp_delack(void *arg)
                    105: {
                    106:        struct tcpcb *tp = arg;
                    107:        int s;
                    108:
                    109:        /*
                    110:         * If tcp_output() wasn't able to transmit the ACK
                    111:         * for whatever reason, it will restart the delayed
                    112:         * ACK callout.
                    113:         */
                    114:
                    115:        s = splsoftnet();
                    116:        if (tp->t_flags & TF_DEAD) {
                    117:                splx(s);
                    118:                return;
                    119:        }
                    120:        tp->t_flags |= TF_ACKNOW;
                    121:        (void) tcp_output(tp);
                    122:        splx(s);
                    123: }
                    124:
                    125: /*
                    126:  * Tcp protocol timeout routine called every 500 ms.
                    127:  * Updates the timers in all active tcb's and
                    128:  * causes finite state machine actions if timers expire.
                    129:  */
                    130: void
                    131: tcp_slowtimo()
                    132: {
                    133:        int s;
                    134:
                    135:        s = splsoftnet();
                    136:        tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
                    137: #ifdef TCP_COMPAT_42
                    138:        tcp_iss += TCP_ISSINCR/PR_SLOWHZ;               /* increment iss */
                    139:        if ((int)tcp_iss < 0)
                    140:                tcp_iss = 0;                            /* XXX */
                    141: #else
                    142:        tcp_iss += TCP_ISSINCR2/PR_SLOWHZ;              /* increment iss */
                    143: #endif /* TCP_COMPAT_42 */
                    144:        tcp_now++;                                      /* for timestamps */
                    145:        splx(s);
                    146: }
                    147:
                    148: /*
                    149:  * Cancel all timers for TCP tp.
                    150:  */
                    151: void
                    152: tcp_canceltimers(tp)
                    153:        struct tcpcb *tp;
                    154: {
                    155:        int i;
                    156:
                    157:        for (i = 0; i < TCPT_NTIMERS; i++)
                    158:                TCP_TIMER_DISARM(tp, i);
                    159: }
                    160:
                    161: int    tcp_backoff[TCP_MAXRXTSHIFT + 1] =
                    162:     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
                    163:
                    164: int tcp_totbackoff = 511;      /* sum of tcp_backoff[] */
                    165:
                    166: /*
                    167:  * TCP timer processing.
                    168:  */
                    169:
                    170: #ifdef TCP_SACK
                    171: void   tcp_timer_freesack(struct tcpcb *);
                    172:
                    173: void
                    174: tcp_timer_freesack(struct tcpcb *tp)
                    175: {
                    176:        struct sackhole *p, *q;
                    177:        /*
                    178:         * Free SACK holes for 2MSL and REXMT timers.
                    179:         */
                    180:        q = tp->snd_holes;
                    181:        while (q != NULL) {
                    182:                p = q;
                    183:                q = q->next;
                    184:                pool_put(&sackhl_pool, p);
                    185:        }
                    186:        tp->snd_holes = 0;
                    187: #ifdef TCP_FACK
                    188:        tp->snd_fack = tp->snd_una;
                    189:        tp->retran_data = 0;
                    190:        tp->snd_awnd = 0;
                    191: #endif /* TCP_FACK */
                    192: }
                    193: #endif /* TCP_SACK */
                    194:
                    195: void
                    196: tcp_timer_rexmt(void *arg)
                    197: {
                    198:        struct tcpcb *tp = arg;
                    199:        uint32_t rto;
                    200:        int s;
                    201:
                    202:        s = splsoftnet();
                    203:        if (tp->t_flags & TF_DEAD) {
                    204:                splx(s);
                    205:                return;
                    206:        }
                    207:
                    208:        if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
                    209:            SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
                    210:            SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) {
                    211:                extern struct sockaddr_in icmpsrc;
                    212:                struct icmp icmp;
                    213:
                    214:                tp->t_flags &= ~TF_PMTUD_PEND;
                    215:
                    216:                /* XXX create fake icmp message with relevant entries */
                    217:                icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
                    218:                icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
                    219:                icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
                    220:                icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
                    221:                icmp_mtudisc(&icmp);
                    222:
                    223:                /*
                    224:                 * Notify all connections to the same peer about
                    225:                 * new mss and trigger retransmit.
                    226:                 */
                    227:                in_pcbnotifyall(&tcbtable, sintosa(&icmpsrc), EMSGSIZE,
                    228:                    tcp_mtudisc);
                    229:                splx(s);
                    230:                return;
                    231:        }
                    232:
                    233: #ifdef TCP_SACK
                    234:        tcp_timer_freesack(tp);
                    235: #endif
                    236:        if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
                    237:                tp->t_rxtshift = TCP_MAXRXTSHIFT;
                    238:                tcpstat.tcps_timeoutdrop++;
                    239:                (void)tcp_drop(tp, tp->t_softerror ?
                    240:                    tp->t_softerror : ETIMEDOUT);
                    241:                goto out;
                    242:        }
                    243:        tcpstat.tcps_rexmttimeo++;
                    244:        rto = TCP_REXMTVAL(tp);
                    245:        if (rto < tp->t_rttmin)
                    246:                rto = tp->t_rttmin;
                    247:        TCPT_RANGESET(tp->t_rxtcur,
                    248:            rto * tcp_backoff[tp->t_rxtshift],
                    249:            tp->t_rttmin, TCPTV_REXMTMAX);
                    250:        TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
                    251:
                    252:        /*
                    253:         * If we are losing and we are trying path MTU discovery,
                    254:         * try turning it off.  This will avoid black holes in
                    255:         * the network which suppress or fail to send "packet
                    256:         * too big" ICMP messages.  We should ideally do
                    257:         * lots more sophisticated searching to find the right
                    258:         * value here...
                    259:         */
                    260:        if (ip_mtudisc && tp->t_inpcb &&
                    261:            TCPS_HAVEESTABLISHED(tp->t_state) &&
                    262:            tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
                    263:                struct inpcb *inp = tp->t_inpcb;
                    264:                struct rtentry *rt = NULL;
                    265:                struct sockaddr_in sin;
                    266:
                    267:                /* No data to send means path mtu is not a problem */
                    268:                if (!inp->inp_socket->so_snd.sb_cc)
                    269:                        goto leave;
                    270:
                    271:                rt = in_pcbrtentry(inp);
                    272:                /* Check if path MTU discovery is disabled already */
                    273:                if (rt && (rt->rt_flags & RTF_HOST) &&
                    274:                    (rt->rt_rmx.rmx_locks & RTV_MTU))
                    275:                        goto leave;
                    276:
                    277:                rt = NULL;
                    278:                switch(tp->pf) {
                    279: #ifdef INET6
                    280:                case PF_INET6:
                    281:                        /*
                    282:                         * We can not turn off path MTU for IPv6.
                    283:                         * Do nothing for now, maybe lower to
                    284:                         * minimum MTU.
                    285:                         */
                    286:                        break;
                    287: #endif
                    288:                case PF_INET:
                    289:                        bzero(&sin, sizeof(struct sockaddr_in));
                    290:                        sin.sin_family = AF_INET;
                    291:                        sin.sin_len = sizeof(struct sockaddr_in);
                    292:                        sin.sin_addr = inp->inp_faddr;
                    293:                        rt = icmp_mtudisc_clone(sintosa(&sin));
                    294:                        break;
                    295:                }
                    296:                if (rt != NULL) {
                    297:                        /* Disable path MTU discovery */
                    298:                        if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
                    299:                                rt->rt_rmx.rmx_locks |= RTV_MTU;
                    300:                                in_rtchange(inp, 0);
                    301:                        }
                    302:
                    303:                        rtfree(rt);
                    304:                }
                    305:        leave:
                    306:                ;
                    307:        }
                    308:
                    309:        /*
                    310:         * If losing, let the lower level know and try for
                    311:         * a better route.  Also, if we backed off this far,
                    312:         * our srtt estimate is probably bogus.  Clobber it
                    313:         * so we'll take the next rtt measurement as our srtt;
                    314:         * move the current srtt into rttvar to keep the current
                    315:         * retransmit times until then.
                    316:         */
                    317:        if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
                    318:                in_losing(tp->t_inpcb);
                    319:                tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
                    320:                tp->t_srtt = 0;
                    321:        }
                    322:        tp->snd_nxt = tp->snd_una;
                    323: #if defined(TCP_SACK)
                    324:        /*
                    325:         * Note:  We overload snd_last to function also as the
                    326:         * snd_last variable described in RFC 2582
                    327:         */
                    328:        tp->snd_last = tp->snd_max;
                    329: #endif /* TCP_SACK */
                    330:        /*
                    331:         * If timing a segment in this window, stop the timer.
                    332:         */
                    333:        tp->t_rtttime = 0;
                    334: #ifdef TCP_ECN
                    335:        /*
                    336:         * if ECN is enabled, there might be a broken firewall which
                    337:         * blocks ecn packets.  fall back to non-ecn.
                    338:         */
                    339:        if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
                    340:            && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
                    341:                tp->t_flags |= TF_DISABLE_ECN;
                    342: #endif
                    343:        /*
                    344:         * Close the congestion window down to one segment
                    345:         * (we'll open it by one segment for each ack we get).
                    346:         * Since we probably have a window's worth of unacked
                    347:         * data accumulated, this "slow start" keeps us from
                    348:         * dumping all that data as back-to-back packets (which
                    349:         * might overwhelm an intermediate gateway).
                    350:         *
                    351:         * There are two phases to the opening: Initially we
                    352:         * open by one mss on each ack.  This makes the window
                    353:         * size increase exponentially with time.  If the
                    354:         * window is larger than the path can handle, this
                    355:         * exponential growth results in dropped packet(s)
                    356:         * almost immediately.  To get more time between
                    357:         * drops but still "push" the network to take advantage
                    358:         * of improving conditions, we switch from exponential
                    359:         * to linear window opening at some threshold size.
                    360:         * For a threshold, we use half the current window
                    361:         * size, truncated to a multiple of the mss.
                    362:         *
                    363:         * (the minimum cwnd that will give us exponential
                    364:         * growth is 2 mss.  We don't allow the threshold
                    365:         * to go below this.)
                    366:         */
                    367:        {
                    368:                u_long win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
                    369:                if (win < 2)
                    370:                        win = 2;
                    371:                tp->snd_cwnd = tp->t_maxseg;
                    372:                tp->snd_ssthresh = win * tp->t_maxseg;
                    373:                tp->t_dupacks = 0;
                    374: #ifdef TCP_ECN
                    375:                tp->snd_last = tp->snd_max;
                    376:                tp->t_flags |= TF_SEND_CWR;
                    377: #endif
                    378: #if 1 /* TCP_ECN */
                    379:                tcpstat.tcps_cwr_timeout++;
                    380: #endif
                    381:        }
                    382:        (void) tcp_output(tp);
                    383:
                    384:  out:
                    385:        splx(s);
                    386: }
                    387:
                    388: void
                    389: tcp_timer_persist(void *arg)
                    390: {
                    391:        struct tcpcb *tp = arg;
                    392:        uint32_t rto;
                    393:        int s;
                    394:
                    395:        s = splsoftnet();
                    396:        if ((tp->t_flags & TF_DEAD) ||
                    397:             TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
                    398:                splx(s);
                    399:                return;
                    400:        }
                    401:        tcpstat.tcps_persisttimeo++;
                    402:        /*
                    403:         * Hack: if the peer is dead/unreachable, we do not
                    404:         * time out if the window is closed.  After a full
                    405:         * backoff, drop the connection if the idle time
                    406:         * (no responses to probes) reaches the maximum
                    407:         * backoff that we would use if retransmitting.
                    408:         */
                    409:        rto = TCP_REXMTVAL(tp);
                    410:        if (rto < tp->t_rttmin)
                    411:                rto = tp->t_rttmin;
                    412:        if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
                    413:            ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
                    414:            (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
                    415:                tcpstat.tcps_persistdrop++;
                    416:                tp = tcp_drop(tp, ETIMEDOUT);
                    417:                goto out;
                    418:        }
                    419:        tcp_setpersist(tp);
                    420:        tp->t_force = 1;
                    421:        (void) tcp_output(tp);
                    422:        tp->t_force = 0;
                    423:  out:
                    424:        splx(s);
                    425: }
                    426:
                    427: void
                    428: tcp_timer_keep(void *arg)
                    429: {
                    430:        struct tcpcb *tp = arg;
                    431:        int s;
                    432:
                    433:        s = splsoftnet();
                    434:        if (tp->t_flags & TF_DEAD) {
                    435:                splx(s);
                    436:                return;
                    437:        }
                    438:
                    439:        tcpstat.tcps_keeptimeo++;
                    440:        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
                    441:                goto dropit;
                    442:        if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
                    443:            tp->t_state <= TCPS_CLOSING) {
                    444:                if ((tcp_maxidle > 0) &&
                    445:                    ((tcp_now - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle))
                    446:                        goto dropit;
                    447:                /*
                    448:                 * Send a packet designed to force a response
                    449:                 * if the peer is up and reachable:
                    450:                 * either an ACK if the connection is still alive,
                    451:                 * or an RST if the peer has closed the connection
                    452:                 * due to timeout or reboot.
                    453:                 * Using sequence number tp->snd_una-1
                    454:                 * causes the transmitted zero-length segment
                    455:                 * to lie outside the receive window;
                    456:                 * by the protocol spec, this requires the
                    457:                 * correspondent TCP to respond.
                    458:                 */
                    459:                tcpstat.tcps_keepprobe++;
                    460: #ifdef TCP_COMPAT_42
                    461:                /*
                    462:                 * The keepalive packet must have nonzero length
                    463:                 * to get a 4.2 host to respond.
                    464:                 */
                    465:                tcp_respond(tp, mtod(tp->t_template, caddr_t),
                    466:                    (struct mbuf *)NULL, tp->rcv_nxt - 1, tp->snd_una - 1, 0);
                    467: #else
                    468:                tcp_respond(tp, mtod(tp->t_template, caddr_t),
                    469:                    (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0);
                    470: #endif
                    471:                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
                    472:        } else
                    473:                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
                    474:
                    475:        splx(s);
                    476:        return;
                    477:
                    478:  dropit:
                    479:        tcpstat.tcps_keepdrops++;
                    480:        tp = tcp_drop(tp, ETIMEDOUT);
                    481:
                    482:        splx(s);
                    483: }
                    484:
                    485: void
                    486: tcp_timer_2msl(void *arg)
                    487: {
                    488:        struct tcpcb *tp = arg;
                    489:        int s;
                    490:
                    491:        s = splsoftnet();
                    492:        if (tp->t_flags & TF_DEAD) {
                    493:                splx(s);
                    494:                return;
                    495:        }
                    496:
                    497: #ifdef TCP_SACK
                    498:        tcp_timer_freesack(tp);
                    499: #endif
                    500:
                    501:        if (tp->t_state != TCPS_TIME_WAIT &&
                    502:            ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle)))
                    503:                TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
                    504:        else
                    505:                tp = tcp_close(tp);
                    506:
                    507:        splx(s);
                    508: }

CVSweb