patch-pre2.0.10 linux/net/ipv4/tcp_input.c
Next file: linux/net/ipv4/tcp_output.c
Previous file: linux/net/ipv4/tcp.c
Back to the patch index
Back to the overall index
- Lines: 364
- Date:
Sat Jun 1 10:50:38 1996
- Orig file:
pre2.0.9/linux/net/ipv4/tcp_input.c
- Orig date:
Tue May 21 19:52:40 1996
diff -u --recursive --new-file pre2.0.9/linux/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c
@@ -25,6 +25,9 @@
* Eric Schenk : Yet another double ACK bug.
* Eric Schenk : Delayed ACK bug fixes.
* Eric Schenk : Floyd style fast retrans war avoidance.
+ * Eric Schenk : Skip fast retransmit on small windows.
+ * Eric schenk : Fixes to retransmission code to
+ * : avoid extra retransmission.
*/
#include <linux/config.h>
@@ -404,6 +407,7 @@
skb_queue_head_init(&newsk->receive_queue);
newsk->send_head = NULL;
newsk->send_tail = NULL;
+ newsk->send_next = NULL;
skb_queue_head_init(&newsk->back_log);
newsk->rtt = 0;
newsk->rto = TCP_TIMEOUT_INIT;
@@ -562,6 +566,7 @@
skb2 = sk->send_head;
sk->send_head = NULL;
sk->send_tail = NULL;
+ sk->send_next = NULL;
/*
* This is an artifact of a flawed concept. We want one
@@ -595,6 +600,7 @@
{
sk->send_head = skb;
sk->send_tail = skb;
+ sk->send_next = skb;
}
else
{
@@ -685,6 +691,7 @@
{
sk->send_head = NULL;
sk->send_tail = NULL;
+ sk->send_next = NULL;
sk->packets_out= 0;
}
@@ -745,8 +752,8 @@
* The packet acked data after high_seq;
* I've tried to order these in occurrence of most likely to fail
* to least likely to fail.
- * [These are the rules BSD stacks use to determine if an ACK is a
- * duplicate.]
+ * [These are an extension of the rules BSD stacks use to
+ * determine if an ACK is a duplicate.]
*/
if (sk->rcv_ack_seq == ack
@@ -755,22 +762,23 @@
&& before(ack, sk->sent_seq)
&& after(ack, sk->high_seq))
{
+ /* Prevent counting of duplicate ACKs if the congestion
+ * window is smaller than 3. Note that since we reduce
+ * the congestion window when we do a fast retransmit,
+ * we must be careful to keep counting if we were already
+ * counting. The idea behind this is to avoid doing
+ * fast retransmits if the congestion window is so small
+ * that we cannot get 3 ACKs due to the loss of a packet
+ * unless we are getting ACKs for retransmitted packets.
+ */
+ if (sk->cong_window >= 3 || sk->rcv_ack_cnt > MAX_DUP_ACKS+1)
+ sk->rcv_ack_cnt++;
/* See draft-stevens-tcpca-spec-01 for explanation
* of what we are doing here.
*/
- sk->rcv_ack_cnt++;
if (sk->rcv_ack_cnt == MAX_DUP_ACKS+1) {
sk->ssthresh = max(sk->cong_window >> 1, 2);
sk->cong_window = sk->ssthresh+MAX_DUP_ACKS+1;
- /* FIXME:
- * reduce the count. We don't want to be
- * seen to be in "retransmit" mode if we
- * are doing a fast retransmit.
- * This is also a signal to tcp_do_retransmit
- * not to set sk->high_seq.
- * This is a horrible ugly hack.
- */
- sk->retransmits--;
tcp_do_retransmit(sk,0);
} else if (sk->rcv_ack_cnt > MAX_DUP_ACKS+1) {
sk->cong_window++;
@@ -878,6 +886,13 @@
sk->send_tail = NULL;
sk->retransmits = 0;
}
+
+ /*
+ * advance the send_next pointer if needed.
+ */
+ if (sk->send_next == skb)
+ sk->send_next = sk->send_head;
+
/*
* Note that we only reset backoff and rto in the
* rtt recomputation code. And that doesn't happen
@@ -916,86 +931,97 @@
}
/*
- * XXX someone ought to look at this too.. at the moment, if skb_peek()
- * returns non-NULL, we complete ignore the timer stuff in the else
- * clause. We ought to organize the code so that else clause can
- * (should) be executed regardless, possibly moving the PROBE timer
- * reset over. The skb_peek() thing should only move stuff to the
- * write queue, NOT also manage the timer functions.
- */
-
- /*
* Maybe we can take some stuff off of the write queue,
* and put it onto the xmit queue.
+ * FIXME: (?) There is bizzare case being tested here, to check if
+ * the data at the head of the queue ends before the start of
+ * the sequence we already ACKed. This does not appear to be
+ * a case that can actually occur. Why are we testing it?
*/
- if (skb_peek(&sk->write_queue) != NULL)
- {
- if (!before(sk->window_seq, sk->write_queue.next->end_seq) &&
- (sk->retransmits == 0 ||
- sk->ip_xmit_timeout != TIME_WRITE ||
- !after(sk->write_queue.next->end_seq, sk->rcv_ack_seq))
- && sk->packets_out < sk->cong_window)
- {
- /*
- * Add more data to the send queue.
- */
- flag |= 1;
- tcp_write_xmit(sk);
- }
- else if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
- sk->send_head == NULL &&
- sk->ack_backlog == 0 &&
- sk->state != TCP_TIME_WAIT)
- {
- /*
- * Data to queue but no room.
- */
- tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
- }
- }
- else
+
+ if (!skb_queue_empty(&sk->write_queue) &&
+ !before(sk->window_seq, sk->write_queue.next->end_seq) &&
+ (sk->retransmits == 0 ||
+ sk->ip_xmit_timeout != TIME_WRITE ||
+ !after(sk->write_queue.next->end_seq, sk->rcv_ack_seq)) &&
+ sk->packets_out < sk->cong_window)
{
/*
- * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
- * from TCP_CLOSE we don't do anything
- *
- * from anything else, if there is write data (or fin) pending,
- * we use a TIME_WRITE timeout, else if keepalive we reset to
- * a KEEPALIVE timeout, else we delete the timer.
- *
- * We do not set flag for nominal write data, otherwise we may
- * force a state where we start to write itsy bitsy tidbits
- * of data.
+ * Add more data to the send queue.
*/
+ flag |= 1;
+ tcp_write_xmit(sk);
+ }
- switch(sk->state) {
- case TCP_TIME_WAIT:
- /*
- * keep us in TIME_WAIT until we stop getting packets,
- * reset the timeout.
- */
- tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
- break;
- case TCP_CLOSE:
- /*
- * don't touch the timer.
- */
- break;
- default:
- /*
- * Must check send_head and write_queue
- * to determine which timeout to use.
+ /*
+ * Reset timers to reflect the new state.
+ *
+ * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
+ * from TCP_CLOSE we don't do anything
+ *
+ * from anything else, if there is queued data (or fin) pending,
+ * we use a TIME_WRITE timeout, if there is data to write but
+ * no room in the window we use TIME_PROBE0, else if keepalive
+ * we reset to a KEEPALIVE timeout, else we delete the timer.
+ *
+ * We do not set flag for nominal write data, otherwise we may
+ * force a state where we start to write itsy bitsy tidbits
+ * of data.
+ */
+
+ switch(sk->state) {
+ case TCP_TIME_WAIT:
+ /*
+ * keep us in TIME_WAIT until we stop getting packets,
+ * reset the timeout.
+ */
+ tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+ break;
+ case TCP_CLOSE:
+ /*
+ * don't touch the timer.
+ */
+ break;
+ default:
+ /*
+ * Must check send_head and write_queue
+ * to determine which timeout to use.
+ */
+ if (sk->send_head) {
+ tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+ } else if (!skb_queue_empty(&sk->write_queue)
+ && sk->ack_backlog == 0)
+ {
+ /*
+ * if the write queue is not empty when we get here
+ * then we failed to move any data to the retransmit
+ * queue above. (If we had send_head would be non-NULL).
+ * Furthermore, since the send_head is NULL here
+ * we must not be in retransmit mode at this point.
+ * This implies we have no packets in flight,
+ * hence sk->packets_out < sk->cong_window.
+ * Examining the conditions for the test to move
+ * data to the retransmission queue we find that
+ * we must therefore have a zero window.
+ * Hence, if the ack_backlog is 0 we should initiate
+ * a zero probe.
+ * We don't do a zero probe if we have a delayed
+ * ACK in hand since the other side may have a
+ * window opening, but they are waiting to hear
+ * from us before they tell us about it.
+ * (They are applying Nagle's rule).
+ * So, we don't set up the zero window probe
+ * just yet. We do have to clear the timer
+ * though in this case...
*/
- if (sk->send_head || !skb_queue_empty(&sk->write_queue)) {
- tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
- } else if (sk->keepopen) {
- tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
- } else {
- del_timer(&sk->retransmit_timer);
- sk->ip_xmit_timeout = 0;
- }
- break;
+ tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
+ } else if (sk->keepopen) {
+ tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
+ } else {
+ del_timer(&sk->retransmit_timer);
+ sk->ip_xmit_timeout = 0;
}
+ break;
}
/*
@@ -1053,6 +1079,12 @@
flag |= 1;
sk->shutdown |= SEND_SHUTDOWN;
tcp_set_state(sk, TCP_FIN_WAIT2);
+ /* If the socket is dead, then there is no
+ * user process hanging around using it.
+ * We want to set up a FIN_WAIT2 timeout ala BSD.
+ */
+ if (sk->dead)
+ tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
}
}
@@ -1094,45 +1126,18 @@
}
/*
- * I make no guarantees about the first clause in the following
- * test, i.e. "(!flag) || (flag&4)". I'm not entirely sure under
- * what conditions "!flag" would be true. However I think the rest
- * of the conditions would prevent that from causing any
- * unnecessary retransmission.
- * Clearly if the first packet has expired it should be
- * retransmitted. The other alternative, "flag&2 && retransmits", is
- * harder to explain: You have to look carefully at how and when the
- * timer is set and with what timeout. The most recent transmission always
- * sets the timer. So in general if the most recent thing has timed
- * out, everything before it has as well. So we want to go ahead and
- * retransmit some more. If we didn't explicitly test for this
- * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
- * would not be true. If you look at the pattern of timing, you can
- * show that rto is increased fast enough that the next packet would
- * almost never be retransmitted immediately. Then you'd end up
- * waiting for a timeout to send each packet on the retransmission
- * queue. With my implementation of the Karn sampling algorithm,
- * the timeout would double each time. The net result is that it would
- * take a hideous amount of time to recover from a single dropped packet.
- * It's possible that there should also be a test for TIME_WRITE, but
- * I think as long as "send_head != NULL" and "retransmit" is on, we've
- * got to be in real retransmission mode.
- * Note that tcp_do_retransmit is called with all==1. Setting cong_window
- * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
- * As long as no further losses occur, this seems reasonable.
+ * The following code has been greatly simplified from the
+ * old hacked up stuff. The wonders of properly setting the
+ * retransmission timeouts.
+ *
+ * If we are retransmitting, and we acked a packet on the retransmit
+ * queue, and there is still something in the retransmit queue,
+ * then we can output some retransmission packets.
*/
-
- if (((!flag) || (flag&4)) && sk->send_head != NULL &&
- (((flag&2) && sk->retransmits) ||
- (sk->send_head->when + sk->rto < jiffies)))
- {
- if(sk->send_head->when + sk->rto < jiffies)
- tcp_retransmit(sk,0);
- else
- {
- tcp_do_retransmit(sk, 1);
- tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
- }
+
+ if (sk->send_head != NULL && (flag&2) && sk->retransmits)
+ {
+ tcp_do_retransmit(sk, 1);
}
return 1;
@@ -1230,8 +1235,12 @@
* for handling this timeout.
*/
- if(sk->ip_xmit_timeout != TIME_WRITE)
- tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+ if (sk->ip_xmit_timeout != TIME_WRITE) {
+ if (sk->send_head)
+ tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+ else
+ printk(KERN_ERR "send_head NULL in FIN_WAIT1\n");
+ }
tcp_set_state(sk,TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
@@ -1965,7 +1974,7 @@
* Note most of these are inline now. I'll inline the lot when
* I have time to test it hard and look at what gcc outputs
*/
-
+
if (!tcp_sequence(sk, skb->seq, skb->end_seq-th->syn))
{
bad_tcp_sequence(sk, th, skb->end_seq-th->syn, dev);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov
with Sam's (original) version of this