From: Herbert Poetzl (herbert_at_13thfloor.at)
Date: Sun 29 Dec 2002 - 21:55:57 GMT
On Sun, Dec 29, 2002 at 04:42:09PM -0500, tedsuo wrote:
>    i notice that Jacques got the same problems before releasing ctx16
> 
>    maybe the bind(any) for multiple IP is a bug...
> 
> 
>    On Fri, 29 Nov 2002 10:26:01 -0500, Klavs Klavsen wrote
>    > With tonnes of fixes :-)
>    >
>    > Jacques, how is your release schedule? have you got a ctx15 release
>    > planned in the near future?
> 
>    Yes I have a ctx-15 which does bind(any) for multiple IP in a vserver,
>    but it
>    is still crashing on me.
please do not get me wrong, I do not exclude the possibiliy
that the changes introduced by CTX-16 do crash your system
except for cosmetic changes the appended diff shows all
differences between the ctx14/ctx16 patch sets, so the
crash, if it is code based, must be there ...
best,
Herbert
------ CUT HERE ------
diff -NurbP --minimal linux-2.4.20-ctx14/fs/proc/array.c linux-2.4.20-ctx16/fs/proc/array.c
--- linux-2.4.20-ctx14/fs/proc/array.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/fs/proc/array.c	Sat Dec 28 04:16:19 2002
@@ -332,6 +332,8 @@
                 *buffer++ = '\n';
                 buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
                         ,task->ip_info->v4_bcast);
+		buffer += sprintf (buffer,"ipv4root_refcnt: %d\n"
+			,task->ip_info->refcount);
         }else{
                 buffer += sprintf (buffer,"ipv4root: 0\n");
                 buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
diff -NurbP --minimal linux-2.4.20-ctx14/include/net/route.h linux-2.4.20-ctx16/include/net/route.h
--- linux-2.4.20-ctx14/include/net/route.h	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/include/net/route.h	Sat Dec 28 04:16:19 2002
@@ -164,14 +164,20 @@
 static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif)
 {
         int err;
-	if (current->ip_info != NULL){
-		__u32 ipv4root = current->ip_info->ipv4[0];
+	struct iproot_info *ip_info = current->ip_info;
+	if (ip_info != NULL){
+		__u32 ipv4root = ip_info->ipv4[0];
                 if (ipv4root != 0){
                         if (src == 0){
                                 src = dst == 0x0100007f
                                         ? 0x0100007f: ipv4root;
-			}else if (ipv4root != src){
-				return -EPERM;
+			}else{
+				int n=ip_info->nbipv4;
+				int i;
+				for (i=0; i<n; i++){
+					if (ip_info->ipv4[i] == src) break;
+				}
+				if (i==n) return -EPERM;
                         }
                         if (dst == 0x0100007f && current->s_context != 0){
                                 dst = ipv4root;
diff -NurbP --minimal linux-2.4.20-ctx14/include/net/sock.h linux-2.4.20-ctx16/include/net/sock.h
--- linux-2.4.20-ctx14/include/net/sock.h	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/include/net/sock.h	Sat Dec 28 04:16:19 2002
@@ -508,6 +509,8 @@
         unsigned char		reuse;		/* SO_REUSEADDR setting			*/
         unsigned char		shutdown;
         atomic_t		refcnt;		/* Reference count			*/
+	struct iproot_info	*ip_info;
+	/* End of common section with tcp_tw_bucket */
 
         socket_lock_t		lock;		/* Synchronizer...			*/
         int			rcvbuf;		/* Size of receive buffer in bytes	*/
@@ -525,7 +528,7 @@
         __u32			saddr;		/* Sending source			*/
         unsigned int		allocation;	/* Allocation mode			*/
         int			sndbuf;		/* Size of send buffer in bytes		*/
-	__u32			bcast_addr;	/* Local bcast addr, for ipv4root */
+	__u32			rcv_saddr2;	/* Second bound ipv4 addr, for ipv4root */
         struct sock		*prev;
 
         /* Not all are volatile, but some are, so we might as well say they all are.
diff -NurbP --minimal linux-2.4.20-ctx14/include/net/tcp.h linux-2.4.20-ctx16/include/net/tcp.h
--- linux-2.4.20-ctx14/include/net/tcp.h	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/include/net/tcp.h	Sat Dec 28 04:16:19 2002
@@ -172,6 +172,7 @@
         unsigned char		reuse,
                                 rcv_wscale; /* It is also TW bucket specific */
         atomic_t		refcnt;
+	struct ipv4_info	*ip_info;
 
         /* And these are ours. */
         int			hashent;
diff -NurbP --minimal linux-2.4.20-ctx14/kernel/exit.c linux-2.4.20-ctx16/kernel/exit.c
--- linux-2.4.20-ctx14/kernel/exit.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/kernel/exit.c	Sat Dec 28 04:16:19 2002
@@ -67,7 +67,7 @@
                 if (current->counter >= MAX_COUNTER)
                         current->counter = MAX_COUNTER;
                  sys_release_s_info(p);
- 		sys_release_ip_info(p);
+		sys_release_ip_info(p->ip_info);
                 p->pid = 0;
                 free_task_struct(p);
         } else {
diff -NurbP --minimal linux-2.4.20-ctx14/kernel/fork.c linux-2.4.20-ctx16/kernel/fork.c
--- linux-2.4.20-ctx14/kernel/fork.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/kernel/fork.c	Sat Dec 28 04:16:19 2002
@@ -618,7 +618,7 @@
                 goto bad_fork_free;
 
         sys_assign_s_info (p);
-	sys_assign_ip_info (p);
+	sys_assign_ip_info (p->ip_info);
 
         atomic_inc(&p->user->__count);
         atomic_inc(&p->user->processes);
diff -NurbP --minimal linux-2.4.20-ctx14/kernel/sys.c linux-2.4.20-ctx16/kernel/sys.c
--- linux-2.4.20-ctx14/kernel/sys.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/kernel/sys.c	Sat Dec 28 04:16:19 2002
@@ -1088,31 +1088,33 @@
                 current->s_info = s_info;
         }
 }
+
 /*
-	Decrease the reference count on the ip_info member of a task
+	Decrease the reference count on the ip_info struct
         Free the struct if the reference count reach 0.
 */
-void sys_release_ip_info (struct task_struct *p)
+void sys_release_ip_info (struct iproot_info *ip_info)
 {
+	if (ip_info != NULL){
         down_write (&uts_sem);
-	if (p->ip_info != NULL){
-		p->ip_info->refcount--;
-		if (p->ip_info->refcount == 0){
+		ip_info->refcount--;
+		if (ip_info->refcount == 0){
                         // printk ("vfree s_info %d\n",p->pid);
-			vfree (p->ip_info);
-			p->ip_info = NULL;
-		}
+			vfree (ip_info);
         }
         up_write (&uts_sem);
+	}
 }
 /*
         Increase the reference count on the ip_info member of a task
 */
-void sys_assign_ip_info (struct task_struct *p)
+void sys_assign_ip_info (struct iproot_info *ip_info)
 {
+	if (ip_info != NULL){
         down_write (&uts_sem);
-	if (p->ip_info != NULL) p->ip_info->refcount++;
+		ip_info->refcount++;
         up_write (&uts_sem);
+	}
 }
 
 /*
@@ -1125,7 +1127,7 @@
         // printk ("new s_info %d\n",current->pid);
         memset (ip_info,0,sizeof(*ip_info));
         ip_info->refcount = 1;
-	sys_release_ip_info (current);	
+	sys_release_ip_info (current->ip_info);	
         current->ip_info = ip_info;
 }
 
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/af_inet.c linux-2.4.20-ctx16/net/ipv4/af_inet.c
--- linux-2.4.20-ctx14/net/ipv4/af_inet.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/af_inet.c	Sat Dec 28 04:16:19 2002
@@ -177,6 +177,8 @@
 
         if (sk->protinfo.af_inet.opt)
                 kfree(sk->protinfo.af_inet.opt);
+	sys_release_ip_info (sk->ip_info);
+	sk->ip_info = NULL;
         dst_release(sk->dst_cache);
 #ifdef INET_REFCNT_DEBUG
         atomic_dec(&inet_sock_nr);
@@ -394,6 +396,7 @@
         sk->protinfo.af_inet.mc_list	= NULL;
 
         sk->s_context = current->s_context;
+	sk->ip_info = NULL;
 
 #ifdef INET_REFCNT_DEBUG
         atomic_inc(&inet_sock_nr);
@@ -479,9 +482,11 @@
         unsigned short snum;
         int chk_addr_ret;
         int err;
-	__u32 s_addr;
-	__u32 bcast_addr = 0xffffffffl;
-	__u32 ipv4root;
+	__u32 s_addr;	/* Address used for validation */
+	__u32 s_addr1;
+	__u32 s_addr2 = 0xffffffffl;	/* Optional address of the socket */
+					/* bcast in ipv4root world */
+	struct iproot_info *ip_info;
 
         /* If the socket has its own bind function then use it. (RAW) */
         if(sk->prot->bind)
@@ -490,30 +495,40 @@
         if (addr_len < sizeof(struct sockaddr_in))
                 return -EINVAL;
 
-	s_addr = addr->sin_addr.s_addr;
-	ipv4root = current->ip_info != NULL ? current->ip_info->ipv4[0] : 0;
-	if (ipv4root != 0){
+	s_addr = s_addr1 = addr->sin_addr.s_addr;
+	ip_info = current->ip_info;
+	if (ip_info != NULL){
+		__u32 v4_bcast = ip_info->v4_bcast;
+		__u32 ipv4root = ip_info->ipv4[0];
+		int nbipv4 = ip_info->nbipv4;
                 // printk ("ipv4root0 %08lx %08x\n",ipv4root,s_addr);
-		__u32 v4_bcast =  current->ip_info->v4_bcast;
                 if (s_addr == 0){
                         s_addr = ipv4root;
-			bcast_addr = v4_bcast;
+			if (nbipv4 > 1){
+				s_addr1 = 0;
+			}else{
+				s_addr1 = ipv4root;
+				ip_info = NULL;
+			}
+			s_addr2 = v4_bcast;
                 }else if (s_addr == 0x0100007f){
-			s_addr = ipv4root;
+			s_addr = s_addr1 = ipv4root;
+			ip_info = NULL;
                 }else if (s_addr != v4_bcast
                         && s_addr != ipv4root){
                         int i;
-			int nbipv4 = current->ip_info->nbipv4;
                         for (i=0; i<nbipv4; i++){
-				if (s_addr == current->ip_info->ipv4[i]){
+				if (s_addr == ip_info->ipv4[i]){
                                         break;
                                 }
                         }
                         if (i == nbipv4) return -EADDRNOTAVAIL;
+			ip_info = NULL;
                 }
+		//printk ("bind: ip_info != NULL, s_addr %x, s_addr1 %x, s_addr2 %x\n"
+		//	,s_addr,s_addr1,s_addr2);
         }
         chk_addr_ret = inet_addr_type(s_addr);
-	// printk ("ipv4root %08lx %08x %d\n",ipv4root,s_addr,chk_addr_ret);
 
         /* Not specified by any standard per-se, however it breaks too
          * many applications when removed.  It is unfortunate since
@@ -549,14 +564,18 @@
             (sk->num != 0))
                 goto out;
 
-	sk->rcv_saddr = sk->saddr = s_addr;
-	sk->bcast_addr = bcast_addr;
+	sk->rcv_saddr = sk->saddr = s_addr1;
+	sk->rcv_saddr2 = s_addr2;
+	sk->ip_info = ip_info;
+	if (ip_info != NULL) sys_assign_ip_info (ip_info);
         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
                 sk->saddr = 0;  /* Use device */
 
         /* Make sure we are allowed to bind here. */
         if (sk->prot->get_port(sk, snum) != 0) {
                 sk->saddr = sk->rcv_saddr = 0;
+		sk->ip_info = NULL;
+		sys_release_ip_info (ip_info);
                 err = -EADDRINUSE;
                 goto out;
         }
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/raw.c linux-2.4.20-ctx16/net/ipv4/raw.c
--- linux-2.4.20-ctx14/net/ipv4/raw.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/raw.c	Sat Dec 28 04:16:19 2002
@@ -96,16 +96,48 @@
         write_unlock_bh(&raw_v4_lock);
 }
 
+
+/*
+	Check if an address is in the list
+*/
+static inline int raw_addr_in_list (
+	u32 rcv_saddr1,
+	u32 rcv_saddr2,
+	u32 loc_addr,
+	struct iproot_info *ip_info)
+{
+	int ret = 0;
+	if (loc_addr != 0
+		&& (rcv_saddr1 == loc_addr || rcv_saddr2 == loc_addr)){
+		ret = 1;
+	}else if (rcv_saddr1 == 0){
+		/* Accept any address or only the one in the list */
+		if (ip_info == NULL){
+			ret = 1;
+		}else{
+			int n = ip_info->nbipv4;
+			int i;
+			for (i=0; i<n; i++){
+				if (ip_info->ipv4[i] == loc_addr){
+					ret = 1;
+					break;
+				}
+			}
+		}
+	}
+	return ret;
+}
+
 struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
                              unsigned long raddr, unsigned long laddr,
                              int dif)
 {
         struct sock *s = sk;
-
         for (s = sk; s; s = s->next) {
                 if (s->num == num 				&&
                     !(s->daddr && s->daddr != raddr) 		&&
-		    !(s->rcv_saddr && s->rcv_saddr != laddr)	&&
+		    raw_addr_in_list(s->rcv_saddr,s->rcv_saddr2,laddr,s->ip_info) &&
+//		    !(s->rcv_saddr && s->rcv_saddr != laddr)	&&
                     !(s->bound_dev_if && s->bound_dev_if != dif))
                         break; /* gotcha */
         }
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/tcp_ipv4.c linux-2.4.20-ctx16/net/ipv4/tcp_ipv4.c
--- linux-2.4.20-ctx14/net/ipv4/tcp_ipv4.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/tcp_ipv4.c	Sat Dec 28 04:16:19 2002
@@ -174,6 +174,56 @@
         sk->prev = (struct sock *) tb;
 }
 
+/*
+	Return 1 if addr match the socket IP list
+	or the socket is INADDR_ANY
+*/
+static inline int tcp_in_list (struct sock *sk, u32 addr)
+{
+	int ret = 0;
+	struct iproot_info *ip_info = sk->ip_info;
+	if (ip_info != NULL){
+		int n = ip_info->nbipv4;
+		int i;
+		for (i=0; i<n; i++){
+			if (ip_info->ipv4[i] == addr){
+				ret = 1;
+				break;
+			}
+		}
+	}else if (!sk->rcv_saddr || sk->rcv_saddr == addr){
+		ret = 1;
+	}
+	return ret;
+}
+	
+/*
+	Check if the addresses in sk1 conflict with those in sk2
+*/
+int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2)
+{
+	int ret = 0;
+	if (sk1->rcv_saddr){
+		/* Bind to one address only */
+		ret = tcp_in_list (sk2,sk1->rcv_saddr);
+	}else if (sk1->ip_info != NULL){
+		/* A restricted bind(any) */
+		struct iproot_info *ip_info = sk1->ip_info;
+		int n = ip_info->nbipv4;
+		int i;
+		for (i=0; i<n; i++){
+			if (tcp_in_list (sk2,ip_info->ipv4[i])){
+				ret = 1;
+				break;
+			}
+		}
+	}else{
+		/* A bind(any) do not allow other bind on the same port */
+		ret = 1;
+	}
+	return ret;
+}
+
 static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
 {
         struct sock *sk2 = tb->owners;
@@ -186,9 +236,7 @@
                         if (!sk_reuse	||
                             !sk2->reuse	||
                             sk2->state == TCP_LISTEN) {
-				if (!sk2->rcv_saddr	||
-				    !sk->rcv_saddr	||
-				    (sk2->rcv_saddr == sk->rcv_saddr))
+				if (tcp_ipv4_addr_conflict(sk,sk2))
                                         break;
                         }
                 }
@@ -407,6 +455,37 @@
                 wake_up(&tcp_lhash_wait);
 }
 
+/*
+	Check if an address is in the list
+*/
+static inline int tcp_addr_in_list (
+	u32 rcv_saddr,
+	u32 daddr,
+	struct iproot_info *ip_info)
+{
+	int ret = 0;
+	if (rcv_saddr == daddr){
+		ret = 1;
+	}else if (rcv_saddr == 0){
+		/* Accept any address or only the one in the list */
+		if (ip_info == NULL){
+			ret = 1;
+		}else{
+			int n = ip_info->nbipv4;
+			int i;
+			for (i=0; i<n; i++){
+				if (ip_info->ipv4[i] == daddr){
+					ret = 1;
+					break;
+				}
+			}
+		}
+	}
+	return ret;
+}
+
+
+
 /* Don't inline this cruft.  Here are some nice properties to
  * exploit here.  The BSD API does not allow a listening TCP
  * to specify the remote port nor the remote address for the
@@ -424,10 +503,10 @@
                         __u32 rcv_saddr = sk->rcv_saddr;
 
                         score = 1;
-			if(rcv_saddr) {
-				if (rcv_saddr != daddr)
-					continue;
+			if (tcp_addr_in_list(rcv_saddr,daddr,sk->ip_info)){
                                 score++;
+			}else{
+				continue;
                         }
                         if (sk->bound_dev_if) {
                                 if (sk->bound_dev_if != dif)
@@ -455,7 +534,7 @@
         if (sk) {
                 if (sk->num == hnum &&
                     sk->next == NULL &&
-		    (!sk->rcv_saddr || sk->rcv_saddr == daddr) &&
+		    tcp_addr_in_list(sk->rcv_saddr,daddr,sk->ip_info) &&
                     !sk->bound_dev_if)
                         goto sherry_cache;
                 sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif);
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/tcp_minisocks.c linux-2.4.20-ctx16/net/ipv4/tcp_minisocks.c
--- linux-2.4.20-ctx14/net/ipv4/tcp_minisocks.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/tcp_minisocks.c	Sat Dec 28 04:16:19 2002
@@ -381,6 +381,7 @@
                 tw->pprev_death = NULL;
 
                 tw->s_context	= sk->s_context;
+		tw->ip_info	= NULL;
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
                 if(tw->family == PF_INET6) {
@@ -651,6 +652,7 @@
 #endif
 
                 memcpy(newsk, sk, sizeof(*newsk));
+		sys_assign_ip_info (newsk->ip_info);
                 newsk->state = TCP_SYN_RECV;
 
                 /* SANITY */
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/udp.c linux-2.4.20-ctx16/net/ipv4/udp.c
--- linux-2.4.20-ctx14/net/ipv4/udp.c	Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/udp.c	Sat Dec 28 04:16:19 2002
@@ -106,6 +106,9 @@
 /* Shared by v4/v6 udp. */
 int udp_port_rover;
 
+int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2);
+
+
 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
 {
         write_lock_bh(&udp_hash_lock);
@@ -160,9 +163,7 @@
                         if (sk2->num == snum &&
                             sk2 != sk &&
                             sk2->bound_dev_if == sk->bound_dev_if &&
-			    (!sk2->rcv_saddr ||
-			     !sk->rcv_saddr ||
-			     sk2->rcv_saddr == sk->rcv_saddr) &&
+			    tcp_ipv4_addr_conflict (sk2,sk) &&
                             (!sk2->reuse || !sk->reuse))
                                 goto fail;
                 }
@@ -205,6 +206,20 @@
         write_unlock_bh(&udp_hash_lock);
 }
 
+static int udp_in_list (struct iproot_info *ip_info, u32 addr)
+{
+	int ret = 0;
+	int n = ip_info->nbipv4;
+	int i;
+	for (i=0; i<n; i++){
+		if (ip_info->ipv4[i] == addr){
+			ret = 1;
+			break;
+		}
+	}
+	return ret;
+}
+
 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
  * harder than this. -DaveM
  */
@@ -221,6 +236,12 @@
                                 if(sk->rcv_saddr != daddr)
                                         continue;
                                 score++;
+			}else if (sk->ip_info != NULL){
+				if (udp_in_list (sk->ip_info,daddr)){
+					score++;
+				}else{
+					continue;
+				}
                         }
                         if(sk->daddr) {
                                 if(sk->daddr != saddr)
@@ -272,7 +294,7 @@
                 if ((s->num != hnum)					||
                     (s->daddr && s->daddr!=rmt_addr)			||
                     (s->dport != rmt_port && s->dport != 0)			||
-		    (s->rcv_saddr  && s->rcv_saddr != loc_addr && s->bcast_addr != loc_addr)		||
+		    (s->rcv_saddr  && s->rcv_saddr != loc_addr && s->rcv_saddr2 != loc_addr)	||
                     (s->bound_dev_if && s->bound_dev_if != dif))
                         continue;
                 break;
@@ -517,6 +539,24 @@
                 rt = (struct rtable*)sk_dst_check(sk, 0);
 
         if (rt == NULL) {
+		struct iproot_info *ip_info = current->ip_info;
+		if (ip_info != NULL) {
+			__u32 ipv4root = ip_info->ipv4[0];
+			if (ipv4root != 0){
+				if (daddr == 0x0100007f && current->s_context != 0){
+					daddr = ipv4root;
+				}
+				if (ufh.saddr == 0){
+					ufh.saddr = ipv4root;
+				}
+				#if 0
+				else if (!udp_in_list(ip_info,ufh.saddr)){
+					err = EADDRNOTAVAIL;
+					goto out; 
+				}
+				#endif
+			}
+		}
                 err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
                 if (err)
                         goto out;