patch-2.0.30 linux/net/ipv4/tcp.c
Next file: linux/net/ipv4/tcp_input.c
Previous file: linux/net/ipv4/sysctl_net_ipv4.c
Back to the patch index
Back to the overall index
- Lines: 389
- Date:
Tue Apr 8 08:47:47 1997
- Orig file:
v2.0.29/linux/net/ipv4/tcp.c
- Orig date:
Tue Nov 26 23:44:11 1996
diff -u --recursive --new-file v2.0.29/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c
@@ -203,6 +203,8 @@
* Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD
* Willy Konynenberg : Transparent proxying support.
* Theodore Ts'o : Do secure TCP sequence numbers.
+ * David S. Miller : New socket lookup architecture for ISS.
+ * This code is dedicated to John Dyson.
*
* To Fix:
* Fast path the code. Two things here - fix the window calculation
@@ -438,6 +440,203 @@
unsigned long seq_offset;
struct tcp_mib tcp_statistics;
+/* This is for sockets with full identity only. Sockets here will always
+ * be without wildcards and will have the following invariant:
+ * TCP_ESTABLISHED <= sk->state < TCP_CLOSE
+ */
+struct sock *tcp_established_hash[TCP_HTABLE_SIZE];
+
+/* All sockets in TCP_LISTEN state will be in here. This is the only table
+ * where wildcard'd TCP sockets can exist. Hash function here is just local
+ * port number. XXX Fix or we'll lose with thousands of IP aliases...
+ */
+struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE];
+
+/* Ok, let's try this, I give up, we do need a local binding
+ * TCP hash as well as the others for fast bind/connect.
+ */
+struct sock *tcp_bound_hash[TCP_BHTABLE_SIZE];
+
+extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport);
+
+static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
+{
+ struct sock *sk2;
+ int retval = 0, sk_reuse = sk->reuse;
+
+ SOCKHASH_LOCK();
+ sk2 = tcp_bound_hash[tcp_bhashfn(snum)];
+ for(; sk2 != NULL; sk2 = sk2->bind_next) {
+ if((sk2->num == snum) && (sk2 != sk)) {
+ unsigned char state = sk2->state;
+ int sk2_reuse = sk2->reuse;
+
+ if(!sk2->rcv_saddr || !sk->rcv_saddr) {
+ if((!sk2_reuse) ||
+ (!sk_reuse) ||
+ (state == TCP_LISTEN)) {
+ retval = 1;
+ break;
+ }
+ } else if(sk2->rcv_saddr == sk->rcv_saddr) {
+ if((!sk_reuse) ||
+ (!sk2_reuse) ||
+ (state == TCP_LISTEN)) {
+ retval = 1;
+ break;
+ }
+ }
+ }
+ }
+ SOCKHASH_UNLOCK();
+
+ return retval;
+}
+
+static __inline__ int tcp_lport_inuse(int num)
+{
+ struct sock *sk = tcp_bound_hash[tcp_bhashfn(num)];
+
+ for(; sk != NULL; sk = sk->bind_next) {
+ if(sk->num == num)
+ return 1;
+ }
+ return 0;
+}
+
+/* Find a "good" local port, this is family independant.
+ * There are several strategies working in unison here to
+ * get the best possible performance. The current socket
+ * load is kept track of, if it is zero there is a strong
+ * likely hood that there is a zero length chain we will
+ * find with a small amount of searching, else the load is
+ * what we shoot for for when the chains all have at least
+ * one entry. The base helps us walk the chains in an
+ * order such that a good chain is found as quickly as possible. -DaveM
+ */
+unsigned short tcp_good_socknum(void)
+{
+ static int start = PROT_SOCK;
+ static int binding_contour = 0;
+ int best = 0;
+ int size = 32767; /* a big num. */
+ int retval = 0, i, end, bc;
+
+ SOCKHASH_LOCK();
+ i = tcp_bhashfn(start);
+ end = i + TCP_BHTABLE_SIZE;
+ bc = binding_contour;
+ do {
+ struct sock *sk = tcp_bound_hash[tcp_bhashfn(i)];
+ if(!sk) {
+ retval = (start + i);
+ start = (retval + 1);
+
+ /* Check for decreasing load. */
+ if(bc != 0)
+ binding_contour = 0;
+ goto done;
+ } else {
+ int j = 0;
+ do { sk = sk->bind_next; } while(++j < size && sk);
+ if(j < size) {
+ best = (start + i);
+ size = j;
+ if(bc && size <= bc) {
+ start = best + 1;
+ goto verify;
+ }
+ }
+ }
+ } while(++i != end);
+
+ /* Socket load is increasing, adjust our load average. */
+ binding_contour = size;
+verify:
+ if(size < binding_contour)
+ binding_contour = size;
+
+ if(best > 32767)
+ best -= (32768 - PROT_SOCK);
+
+ while(tcp_lport_inuse(best))
+ best += TCP_BHTABLE_SIZE;
+ retval = best;
+done:
+ if(start > 32767)
+ start -= (32768 - PROT_SOCK);
+
+ SOCKHASH_UNLOCK();
+
+ return retval;
+}
+
+void tcp_v4_hash(struct sock *sk)
+{
+ unsigned char state;
+
+ SOCKHASH_LOCK();
+ state = sk->state;
+ if(state != TCP_CLOSE || !sk->dead) {
+ struct sock **skp;
+
+ if(state == TCP_LISTEN)
+ skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+ else
+ skp = &tcp_established_hash[tcp_sk_hashfn(sk)];
+
+ if((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ tcp_sk_bindify(sk);
+ }
+ SOCKHASH_UNLOCK();
+}
+
+void tcp_v4_unhash(struct sock *sk)
+{
+ SOCKHASH_LOCK();
+ if(sk->pprev) {
+ if(sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ tcp_sk_unbindify(sk);
+ }
+ SOCKHASH_UNLOCK();
+}
+
+void tcp_v4_rehash(struct sock *sk)
+{
+ unsigned char state;
+
+ SOCKHASH_LOCK();
+ state = sk->state;
+ if(sk->pprev) {
+ if(sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ tcp_sk_unbindify(sk);
+ }
+ if(state != TCP_CLOSE || !sk->dead) {
+ struct sock **skp;
+
+ if(state == TCP_LISTEN)
+ skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+ else
+ skp = &tcp_established_hash[tcp_sk_hashfn(sk)];
+
+ if((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ tcp_sk_bindify(sk);
+ }
+ SOCKHASH_UNLOCK();
+}
+
static void tcp_close(struct sock *sk, unsigned long timeout);
/*
@@ -518,8 +717,7 @@
if (len < 8) /* NOT sizeof(struct tcphdr) */
return;
- sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr, 0, 0);
-
+ sk = tcp_v4_lookup(daddr, th->dest, saddr, th->source);
if (sk == NULL)
return;
@@ -1640,6 +1838,7 @@
case TCP_CLOSE:
case TCP_LISTEN:
break;
+ case TCP_LAST_ACK: /* Could have shutdown() then close()!*/
case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and
wait only for the ACK */
ns=TCP_LAST_ACK;
@@ -1753,7 +1952,6 @@
lock_sock(sk);
- tcp_cache_zap();
if(sk->state == TCP_LISTEN)
{
/* Special case */
@@ -1761,6 +1959,7 @@
tcp_close_pending(sk);
release_sock(sk);
sk->dead = 1;
+ tcp_v4_unhash(sk);
return;
}
@@ -1813,12 +2012,6 @@
sti();
}
- /*
- * This will destroy it. The timers will take care of actually
- * free'ing up the memory.
- */
- tcp_cache_zap(); /* Kill the cache again. */
-
/* Now that the socket is dead, if we are in the FIN_WAIT2 state
* we may need to set up a timer.
*/
@@ -1833,6 +2026,9 @@
release_sock(sk);
sk->dead = 1;
+
+ if(sk->state == TCP_CLOSE)
+ tcp_v4_unhash(sk);
}
@@ -1916,27 +2112,24 @@
*/
static int tcp_unique_address(u32 saddr, u16 snum, u32 daddr, u16 dnum)
{
- int retval = 1;
+ int retval = 1, hashent = tcp_hashfn(saddr, snum, daddr, dnum);
struct sock * sk;
- /* Make sure we are allowed to connect here. */
- cli();
- for (sk = tcp_prot.sock_array[snum & (SOCK_ARRAY_SIZE -1)];
- sk != NULL; sk = sk->next)
- {
- /* hash collision? */
- if (sk->num != snum)
- continue;
- if (sk->saddr != saddr)
- continue;
- if (sk->daddr != daddr)
- continue;
- if (sk->dummy_th.dest != dnum)
- continue;
- retval = 0;
- break;
+ /* Make sure we are allowed to connect here.
+ * But freeze the hash while we snoop around.
+ */
+ SOCKHASH_LOCK();
+ sk = tcp_established_hash[hashent];
+ for (; sk != NULL; sk = sk->next) {
+ if(sk->daddr == daddr && /* remote address */
+ sk->dummy_th.dest == dnum && /* remote port */
+ sk->num == snum && /* local port */
+ sk->saddr == saddr) { /* local address */
+ retval = 0;
+ break;
+ }
}
- sti();
+ SOCKHASH_UNLOCK();
return retval;
}
@@ -2077,12 +2270,13 @@
tcp_send_check(t1, sk->saddr, sk->daddr,
sizeof(struct tcphdr) + 4, buff);
- /*
- * This must go first otherwise a really quick response will get reset.
+ tcp_set_state(sk,TCP_SYN_SENT);
+
+ /* Socket identity change complete, no longer
+ * in TCP_CLOSE, so rehash.
*/
+ tcp_v4_rehash(sk);
- tcp_cache_zap();
- tcp_set_state(sk,TCP_SYN_SENT);
if(rt&&rt->rt_flags&RTF_IRTT)
sk->rto = rt->rt_irtt;
else
@@ -2174,27 +2368,34 @@
struct proto tcp_prot = {
- tcp_close,
- ip_build_header,
- tcp_connect,
- tcp_accept,
- ip_queue_xmit,
- tcp_retransmit,
- tcp_write_wakeup,
- tcp_read_wakeup,
- tcp_rcv,
- tcp_select,
- tcp_ioctl,
- NULL,
- tcp_shutdown,
- tcp_setsockopt,
- tcp_getsockopt,
- tcp_sendmsg,
- tcp_recvmsg,
- NULL, /* No special bind() */
- 128,
- 0,
- "TCP",
- 0, 0,
- {NULL,}
+ (struct sock *)&tcp_prot, /* sklist_next */
+ (struct sock *)&tcp_prot, /* sklist_prev */
+ tcp_close, /* close */
+ ip_build_header, /* build_header */
+ tcp_connect, /* connect */
+ tcp_accept, /* accept */
+ ip_queue_xmit, /* queue_xmit */
+ tcp_retransmit, /* retransmit */
+ tcp_write_wakeup, /* write_wakeup */
+ tcp_read_wakeup, /* read_wakeup */
+ tcp_rcv, /* rcv */
+ tcp_select, /* select */
+ tcp_ioctl, /* ioctl */
+ NULL, /* init */
+ tcp_shutdown, /* shutdown */
+ tcp_setsockopt, /* setsockopt */
+ tcp_getsockopt, /* getsockopt */
+ tcp_sendmsg, /* sendmsg */
+ tcp_recvmsg, /* recvmsg */
+ NULL, /* bind */
+ tcp_v4_hash, /* hash */
+ tcp_v4_unhash, /* unhash */
+ tcp_v4_rehash, /* rehash */
+ tcp_good_socknum, /* good_socknum */
+ tcp_v4_verify_bind, /* verify_bind */
+ 128, /* max_header */
+ 0, /* retransmits */
+ "TCP", /* name */
+ 0, /* inuse */
+ 0 /* highestinuse */
};
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov