[BACK]Return to tcp_timer.c CVS log [TXT][DIR] Up to [local] / sys / netinet

Annotation of sys/netinet/tcp_timer.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: tcp_timer.c,v 1.39 2007/06/15 18:23:07 markus Exp $   */
        !             2: /*     $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $  */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1982, 1986, 1988, 1990, 1993
        !             6:  *     The Regents of the University of California.  All rights reserved.
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms, with or without
        !             9:  * modification, are permitted provided that the following conditions
        !            10:  * are met:
        !            11:  * 1. Redistributions of source code must retain the above copyright
        !            12:  *    notice, this list of conditions and the following disclaimer.
        !            13:  * 2. Redistributions in binary form must reproduce the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer in the
        !            15:  *    documentation and/or other materials provided with the distribution.
        !            16:  * 3. Neither the name of the University nor the names of its contributors
        !            17:  *    may be used to endorse or promote products derived from this software
        !            18:  *    without specific prior written permission.
        !            19:  *
        !            20:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            21:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            22:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            23:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            24:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            25:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            26:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            27:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            28:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            29:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            30:  * SUCH DAMAGE.
        !            31:  *
        !            32:  *     @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93
        !            33:  */
        !            34:
        !            35: #include <sys/param.h>
        !            36: #include <sys/systm.h>
        !            37: #include <sys/mbuf.h>
        !            38: #include <sys/socket.h>
        !            39: #include <sys/socketvar.h>
        !            40: #include <sys/protosw.h>
        !            41: #include <sys/kernel.h>
        !            42:
        !            43: #include <net/route.h>
        !            44:
        !            45: #include <netinet/in.h>
        !            46: #include <netinet/in_systm.h>
        !            47: #include <netinet/ip.h>
        !            48: #include <netinet/in_pcb.h>
        !            49: #include <netinet/ip_var.h>
        !            50: #include <netinet/tcp.h>
        !            51: #include <netinet/tcp_fsm.h>
        !            52: #include <netinet/tcp_timer.h>
        !            53: #include <netinet/tcp_var.h>
        !            54: #include <netinet/ip_icmp.h>
        !            55: #include <netinet/tcp_seq.h>
        !            56:
        !            57: int    tcp_keepidle;
        !            58: int    tcp_keepintvl;
        !            59: int    tcp_maxpersistidle;     /* max idle time in persist */
        !            60: int    tcp_maxidle;
        !            61:
        !            62: /*
        !            63:  * Time to delay the ACK.  This is initialized in tcp_init(), unless
        !            64:  * its patched.
        !            65:  */
        !            66: int    tcp_delack_ticks;
        !            67:
        !            68: void   tcp_timer_rexmt(void *);
        !            69: void   tcp_timer_persist(void *);
        !            70: void   tcp_timer_keep(void *);
        !            71: void   tcp_timer_2msl(void *);
        !            72:
        !            73: const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
        !            74:        tcp_timer_rexmt,
        !            75:        tcp_timer_persist,
        !            76:        tcp_timer_keep,
        !            77:        tcp_timer_2msl,
        !            78: };
        !            79:
        !            80: /*
        !            81:  * Timer state initialization, called from tcp_init().
        !            82:  */
        !            83: void
        !            84: tcp_timer_init(void)
        !            85: {
        !            86:
        !            87:        if (tcp_keepidle == 0)
        !            88:                tcp_keepidle = TCPTV_KEEP_IDLE;
        !            89:
        !            90:        if (tcp_keepintvl == 0)
        !            91:                tcp_keepintvl = TCPTV_KEEPINTVL;
        !            92:
        !            93:        if (tcp_maxpersistidle == 0)
        !            94:                tcp_maxpersistidle = TCPTV_KEEP_IDLE;
        !            95:
        !            96:        if (tcp_delack_ticks == 0)
        !            97:                tcp_delack_ticks = TCP_DELACK_TICKS;
        !            98: }
        !            99:
        !           100: /*
        !           101:  * Callout to process delayed ACKs for a TCPCB.
        !           102:  */
        !           103: void
        !           104: tcp_delack(void *arg)
        !           105: {
        !           106:        struct tcpcb *tp = arg;
        !           107:        int s;
        !           108:
        !           109:        /*
        !           110:         * If tcp_output() wasn't able to transmit the ACK
        !           111:         * for whatever reason, it will restart the delayed
        !           112:         * ACK callout.
        !           113:         */
        !           114:
        !           115:        s = splsoftnet();
        !           116:        if (tp->t_flags & TF_DEAD) {
        !           117:                splx(s);
        !           118:                return;
        !           119:        }
        !           120:        tp->t_flags |= TF_ACKNOW;
        !           121:        (void) tcp_output(tp);
        !           122:        splx(s);
        !           123: }
        !           124:
        !           125: /*
        !           126:  * Tcp protocol timeout routine called every 500 ms.
        !           127:  * Updates the timers in all active tcb's and
        !           128:  * causes finite state machine actions if timers expire.
        !           129:  */
        !           130: void
        !           131: tcp_slowtimo()
        !           132: {
        !           133:        int s;
        !           134:
        !           135:        s = splsoftnet();
        !           136:        tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
        !           137: #ifdef TCP_COMPAT_42
        !           138:        tcp_iss += TCP_ISSINCR/PR_SLOWHZ;               /* increment iss */
        !           139:        if ((int)tcp_iss < 0)
        !           140:                tcp_iss = 0;                            /* XXX */
        !           141: #else
        !           142:        tcp_iss += TCP_ISSINCR2/PR_SLOWHZ;              /* increment iss */
        !           143: #endif /* TCP_COMPAT_42 */
        !           144:        tcp_now++;                                      /* for timestamps */
        !           145:        splx(s);
        !           146: }
        !           147:
        !           148: /*
        !           149:  * Cancel all timers for TCP tp.
        !           150:  */
        !           151: void
        !           152: tcp_canceltimers(tp)
        !           153:        struct tcpcb *tp;
        !           154: {
        !           155:        int i;
        !           156:
        !           157:        for (i = 0; i < TCPT_NTIMERS; i++)
        !           158:                TCP_TIMER_DISARM(tp, i);
        !           159: }
        !           160:
        !           161: int    tcp_backoff[TCP_MAXRXTSHIFT + 1] =
        !           162:     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
        !           163:
        !           164: int tcp_totbackoff = 511;      /* sum of tcp_backoff[] */
        !           165:
        !           166: /*
        !           167:  * TCP timer processing.
        !           168:  */
        !           169:
        !           170: #ifdef TCP_SACK
        !           171: void   tcp_timer_freesack(struct tcpcb *);
        !           172:
        !           173: void
        !           174: tcp_timer_freesack(struct tcpcb *tp)
        !           175: {
        !           176:        struct sackhole *p, *q;
        !           177:        /*
        !           178:         * Free SACK holes for 2MSL and REXMT timers.
        !           179:         */
        !           180:        q = tp->snd_holes;
        !           181:        while (q != NULL) {
        !           182:                p = q;
        !           183:                q = q->next;
        !           184:                pool_put(&sackhl_pool, p);
        !           185:        }
        !           186:        tp->snd_holes = 0;
        !           187: #ifdef TCP_FACK
        !           188:        tp->snd_fack = tp->snd_una;
        !           189:        tp->retran_data = 0;
        !           190:        tp->snd_awnd = 0;
        !           191: #endif /* TCP_FACK */
        !           192: }
        !           193: #endif /* TCP_SACK */
        !           194:
        !           195: void
        !           196: tcp_timer_rexmt(void *arg)
        !           197: {
        !           198:        struct tcpcb *tp = arg;
        !           199:        uint32_t rto;
        !           200:        int s;
        !           201:
        !           202:        s = splsoftnet();
        !           203:        if (tp->t_flags & TF_DEAD) {
        !           204:                splx(s);
        !           205:                return;
        !           206:        }
        !           207:
        !           208:        if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
        !           209:            SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
        !           210:            SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) {
        !           211:                extern struct sockaddr_in icmpsrc;
        !           212:                struct icmp icmp;
        !           213:
        !           214:                tp->t_flags &= ~TF_PMTUD_PEND;
        !           215:
        !           216:                /* XXX create fake icmp message with relevant entries */
        !           217:                icmp.icmp_nextmtu = tp->t_pmtud_nextmtu;
        !           218:                icmp.icmp_ip.ip_len = tp->t_pmtud_ip_len;
        !           219:                icmp.icmp_ip.ip_hl = tp->t_pmtud_ip_hl;
        !           220:                icmpsrc.sin_addr = tp->t_inpcb->inp_faddr;
        !           221:                icmp_mtudisc(&icmp);
        !           222:
        !           223:                /*
        !           224:                 * Notify all connections to the same peer about
        !           225:                 * new mss and trigger retransmit.
        !           226:                 */
        !           227:                in_pcbnotifyall(&tcbtable, sintosa(&icmpsrc), EMSGSIZE,
        !           228:                    tcp_mtudisc);
        !           229:                splx(s);
        !           230:                return;
        !           231:        }
        !           232:
        !           233: #ifdef TCP_SACK
        !           234:        tcp_timer_freesack(tp);
        !           235: #endif
        !           236:        if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
        !           237:                tp->t_rxtshift = TCP_MAXRXTSHIFT;
        !           238:                tcpstat.tcps_timeoutdrop++;
        !           239:                (void)tcp_drop(tp, tp->t_softerror ?
        !           240:                    tp->t_softerror : ETIMEDOUT);
        !           241:                goto out;
        !           242:        }
        !           243:        tcpstat.tcps_rexmttimeo++;
        !           244:        rto = TCP_REXMTVAL(tp);
        !           245:        if (rto < tp->t_rttmin)
        !           246:                rto = tp->t_rttmin;
        !           247:        TCPT_RANGESET(tp->t_rxtcur,
        !           248:            rto * tcp_backoff[tp->t_rxtshift],
        !           249:            tp->t_rttmin, TCPTV_REXMTMAX);
        !           250:        TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
        !           251:
        !           252:        /*
        !           253:         * If we are losing and we are trying path MTU discovery,
        !           254:         * try turning it off.  This will avoid black holes in
        !           255:         * the network which suppress or fail to send "packet
        !           256:         * too big" ICMP messages.  We should ideally do
        !           257:         * lots more sophisticated searching to find the right
        !           258:         * value here...
        !           259:         */
        !           260:        if (ip_mtudisc && tp->t_inpcb &&
        !           261:            TCPS_HAVEESTABLISHED(tp->t_state) &&
        !           262:            tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
        !           263:                struct inpcb *inp = tp->t_inpcb;
        !           264:                struct rtentry *rt = NULL;
        !           265:                struct sockaddr_in sin;
        !           266:
        !           267:                /* No data to send means path mtu is not a problem */
        !           268:                if (!inp->inp_socket->so_snd.sb_cc)
        !           269:                        goto leave;
        !           270:
        !           271:                rt = in_pcbrtentry(inp);
        !           272:                /* Check if path MTU discovery is disabled already */
        !           273:                if (rt && (rt->rt_flags & RTF_HOST) &&
        !           274:                    (rt->rt_rmx.rmx_locks & RTV_MTU))
        !           275:                        goto leave;
        !           276:
        !           277:                rt = NULL;
        !           278:                switch(tp->pf) {
        !           279: #ifdef INET6
        !           280:                case PF_INET6:
        !           281:                        /*
        !           282:                         * We can not turn off path MTU for IPv6.
        !           283:                         * Do nothing for now, maybe lower to
        !           284:                         * minimum MTU.
        !           285:                         */
        !           286:                        break;
        !           287: #endif
        !           288:                case PF_INET:
        !           289:                        bzero(&sin, sizeof(struct sockaddr_in));
        !           290:                        sin.sin_family = AF_INET;
        !           291:                        sin.sin_len = sizeof(struct sockaddr_in);
        !           292:                        sin.sin_addr = inp->inp_faddr;
        !           293:                        rt = icmp_mtudisc_clone(sintosa(&sin));
        !           294:                        break;
        !           295:                }
        !           296:                if (rt != NULL) {
        !           297:                        /* Disable path MTU discovery */
        !           298:                        if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
        !           299:                                rt->rt_rmx.rmx_locks |= RTV_MTU;
        !           300:                                in_rtchange(inp, 0);
        !           301:                        }
        !           302:
        !           303:                        rtfree(rt);
        !           304:                }
        !           305:        leave:
        !           306:                ;
        !           307:        }
        !           308:
        !           309:        /*
        !           310:         * If losing, let the lower level know and try for
        !           311:         * a better route.  Also, if we backed off this far,
        !           312:         * our srtt estimate is probably bogus.  Clobber it
        !           313:         * so we'll take the next rtt measurement as our srtt;
        !           314:         * move the current srtt into rttvar to keep the current
        !           315:         * retransmit times until then.
        !           316:         */
        !           317:        if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
        !           318:                in_losing(tp->t_inpcb);
        !           319:                tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
        !           320:                tp->t_srtt = 0;
        !           321:        }
        !           322:        tp->snd_nxt = tp->snd_una;
        !           323: #if defined(TCP_SACK)
        !           324:        /*
        !           325:         * Note:  We overload snd_last to function also as the
        !           326:         * snd_last variable described in RFC 2582
        !           327:         */
        !           328:        tp->snd_last = tp->snd_max;
        !           329: #endif /* TCP_SACK */
        !           330:        /*
        !           331:         * If timing a segment in this window, stop the timer.
        !           332:         */
        !           333:        tp->t_rtttime = 0;
        !           334: #ifdef TCP_ECN
        !           335:        /*
        !           336:         * if ECN is enabled, there might be a broken firewall which
        !           337:         * blocks ecn packets.  fall back to non-ecn.
        !           338:         */
        !           339:        if ((tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED)
        !           340:            && tcp_do_ecn && !(tp->t_flags & TF_DISABLE_ECN))
        !           341:                tp->t_flags |= TF_DISABLE_ECN;
        !           342: #endif
        !           343:        /*
        !           344:         * Close the congestion window down to one segment
        !           345:         * (we'll open it by one segment for each ack we get).
        !           346:         * Since we probably have a window's worth of unacked
        !           347:         * data accumulated, this "slow start" keeps us from
        !           348:         * dumping all that data as back-to-back packets (which
        !           349:         * might overwhelm an intermediate gateway).
        !           350:         *
        !           351:         * There are two phases to the opening: Initially we
        !           352:         * open by one mss on each ack.  This makes the window
        !           353:         * size increase exponentially with time.  If the
        !           354:         * window is larger than the path can handle, this
        !           355:         * exponential growth results in dropped packet(s)
        !           356:         * almost immediately.  To get more time between
        !           357:         * drops but still "push" the network to take advantage
        !           358:         * of improving conditions, we switch from exponential
        !           359:         * to linear window opening at some threshold size.
        !           360:         * For a threshold, we use half the current window
        !           361:         * size, truncated to a multiple of the mss.
        !           362:         *
        !           363:         * (the minimum cwnd that will give us exponential
        !           364:         * growth is 2 mss.  We don't allow the threshold
        !           365:         * to go below this.)
        !           366:         */
        !           367:        {
        !           368:                u_long win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
        !           369:                if (win < 2)
        !           370:                        win = 2;
        !           371:                tp->snd_cwnd = tp->t_maxseg;
        !           372:                tp->snd_ssthresh = win * tp->t_maxseg;
        !           373:                tp->t_dupacks = 0;
        !           374: #ifdef TCP_ECN
        !           375:                tp->snd_last = tp->snd_max;
        !           376:                tp->t_flags |= TF_SEND_CWR;
        !           377: #endif
        !           378: #if 1 /* TCP_ECN */
        !           379:                tcpstat.tcps_cwr_timeout++;
        !           380: #endif
        !           381:        }
        !           382:        (void) tcp_output(tp);
        !           383:
        !           384:  out:
        !           385:        splx(s);
        !           386: }
        !           387:
        !           388: void
        !           389: tcp_timer_persist(void *arg)
        !           390: {
        !           391:        struct tcpcb *tp = arg;
        !           392:        uint32_t rto;
        !           393:        int s;
        !           394:
        !           395:        s = splsoftnet();
        !           396:        if ((tp->t_flags & TF_DEAD) ||
        !           397:             TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
        !           398:                splx(s);
        !           399:                return;
        !           400:        }
        !           401:        tcpstat.tcps_persisttimeo++;
        !           402:        /*
        !           403:         * Hack: if the peer is dead/unreachable, we do not
        !           404:         * time out if the window is closed.  After a full
        !           405:         * backoff, drop the connection if the idle time
        !           406:         * (no responses to probes) reaches the maximum
        !           407:         * backoff that we would use if retransmitting.
        !           408:         */
        !           409:        rto = TCP_REXMTVAL(tp);
        !           410:        if (rto < tp->t_rttmin)
        !           411:                rto = tp->t_rttmin;
        !           412:        if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
        !           413:            ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle ||
        !           414:            (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) {
        !           415:                tcpstat.tcps_persistdrop++;
        !           416:                tp = tcp_drop(tp, ETIMEDOUT);
        !           417:                goto out;
        !           418:        }
        !           419:        tcp_setpersist(tp);
        !           420:        tp->t_force = 1;
        !           421:        (void) tcp_output(tp);
        !           422:        tp->t_force = 0;
        !           423:  out:
        !           424:        splx(s);
        !           425: }
        !           426:
        !           427: void
        !           428: tcp_timer_keep(void *arg)
        !           429: {
        !           430:        struct tcpcb *tp = arg;
        !           431:        int s;
        !           432:
        !           433:        s = splsoftnet();
        !           434:        if (tp->t_flags & TF_DEAD) {
        !           435:                splx(s);
        !           436:                return;
        !           437:        }
        !           438:
        !           439:        tcpstat.tcps_keeptimeo++;
        !           440:        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
        !           441:                goto dropit;
        !           442:        if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
        !           443:            tp->t_state <= TCPS_CLOSING) {
        !           444:                if ((tcp_maxidle > 0) &&
        !           445:                    ((tcp_now - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle))
        !           446:                        goto dropit;
        !           447:                /*
        !           448:                 * Send a packet designed to force a response
        !           449:                 * if the peer is up and reachable:
        !           450:                 * either an ACK if the connection is still alive,
        !           451:                 * or an RST if the peer has closed the connection
        !           452:                 * due to timeout or reboot.
        !           453:                 * Using sequence number tp->snd_una-1
        !           454:                 * causes the transmitted zero-length segment
        !           455:                 * to lie outside the receive window;
        !           456:                 * by the protocol spec, this requires the
        !           457:                 * correspondent TCP to respond.
        !           458:                 */
        !           459:                tcpstat.tcps_keepprobe++;
        !           460: #ifdef TCP_COMPAT_42
        !           461:                /*
        !           462:                 * The keepalive packet must have nonzero length
        !           463:                 * to get a 4.2 host to respond.
        !           464:                 */
        !           465:                tcp_respond(tp, mtod(tp->t_template, caddr_t),
        !           466:                    (struct mbuf *)NULL, tp->rcv_nxt - 1, tp->snd_una - 1, 0);
        !           467: #else
        !           468:                tcp_respond(tp, mtod(tp->t_template, caddr_t),
        !           469:                    (struct mbuf *)NULL, tp->rcv_nxt, tp->snd_una - 1, 0);
        !           470: #endif
        !           471:                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
        !           472:        } else
        !           473:                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
        !           474:
        !           475:        splx(s);
        !           476:        return;
        !           477:
        !           478:  dropit:
        !           479:        tcpstat.tcps_keepdrops++;
        !           480:        tp = tcp_drop(tp, ETIMEDOUT);
        !           481:
        !           482:        splx(s);
        !           483: }
        !           484:
        !           485: void
        !           486: tcp_timer_2msl(void *arg)
        !           487: {
        !           488:        struct tcpcb *tp = arg;
        !           489:        int s;
        !           490:
        !           491:        s = splsoftnet();
        !           492:        if (tp->t_flags & TF_DEAD) {
        !           493:                splx(s);
        !           494:                return;
        !           495:        }
        !           496:
        !           497: #ifdef TCP_SACK
        !           498:        tcp_timer_freesack(tp);
        !           499: #endif
        !           500:
        !           501:        if (tp->t_state != TCPS_TIME_WAIT &&
        !           502:            ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle)))
        !           503:                TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
        !           504:        else
        !           505:                tp = tcp_close(tp);
        !           506:
        !           507:        splx(s);
        !           508: }

CVSweb