>From 18e9367049d629c16538f3cbd66ba0e50951685e Mon Sep 17 00:00:00 2001 From: Grzegorz Nosek Date: Mon, 25 Jun 2012 15:14:36 +0200 Subject: [PATCH] Fix container reaper exit If the container reaper exited, it was left in its role anyway, which led to horrible crashes: [58974.254066] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [58974.260279] IP: [] do_notify_parent+0x7a/0x1c1 [58974.260279] PGD 0 [58974.260279] Oops: 0000 [#1] SMP [58974.260279] CPU 3 [58974.260279] Modules linked in: ipt_account ipt_MASQUERADE xt_owner xt_hashlimit xt_multiport ipt_REJECT xt_comment xt_state iptable_filter iptable_mangle iptable_nat nf_nat nf_conntrack_ipv4 nf_ conntrack nf_defrag_ipv4 ip_tables tg3 libphy psmouse i5k_amb hwmon e1000e [58974.260279] [58974.260279] Pid: 8582, comm: cron Not tainted 3.2.15-00013-g9c7edc7 #12 Supermicro X7DBR-3/X7DBR-3 [58974.260279] RIP: 0010:[] [] do_notify_parent+0x7a/0x1c1 [58974.260279] RSP: 0018:ffff8802e7173be8 EFLAGS: 00010046 [58974.260279] RAX: 0000000000000000 RBX: ffff8804157e2670 RCX: 0000000000000f5a [58974.260279] RDX: ffff8804157e2990 RSI: 0000000000000000 RDI: ffff8804157e2670 [58974.260279] RBP: ffff8802e7173c98 R08: 00000001005986a8 R09: 0000358d72e81768 [58974.260279] R10: 0000000002625a00 R11: ffff880327d36be8 R12: 0000000000000011 [58974.260279] R13: ffff8804150acce0 R14: ffff8804157e2670 R15: ffff8804157e2910 [58974.260279] FS: 0000000000000000(0000) GS:ffff88042fd80000(0000) knlGS:0000000000000000 [58974.260279] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [58974.260279] CR2: 0000000000000020 CR3: 0000000001a05000 CR4: 00000000000006e0 [58974.260279] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [58974.260279] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [58974.260279] Process cron (pid: 8582, threadinfo ffff8802e7172000, task ffff880327d36ba0) [58974.260279] Stack: [58974.260279] 0000000000000011 ffffffff8102c169 ffff880200000000 ffff880327d36ba0 [58974.260279] ffff880327d36ba0 0000000000000296 ffff8802e7173c58 ffffffff8102c7cf [58974.260279] 0000000000000008 0000000000000296 ffff880327d36ba0 ffff880327d36ba0 [58974.260279] Call Trace: [58974.260279] [] ? enqueue_task+0x5d/0x64 [58974.260279] [] ? sched_move_task+0x132/0x168 [58974.260279] [] do_exit+0x5a0/0x8c2 [58974.260279] [] ? finish_wait+0x66/0x6e [58974.260279] [] ? wake_up_bit+0x2a/0x2a [58974.260279] [] do_group_exit+0x76/0x9e [58974.260279] [] get_signal_to_deliver+0x552/0x573 [58974.260279] [] do_signal+0x3e/0x618 [58974.260279] [] ? do_mmap_pgoff+0x287/0x2ea [58974.260279] [] do_notify_resume+0x2c/0x64 [58974.260279] [] int_signal+0x12/0x17 [58974.260279] Code: 00 74 04 0f 0b eb fe 48 8b 83 88 02 00 00 44 89 a5 50 ff ff ff 31 f6 c7 85 54 ff ff ff 00 00 00 00 48 89 df 48 8b 80 10 05 00 00 <48> 8b 50 20 e8 8a 7c 00 00 89 85 60 ff ff ff 48 8b 83 08 04 00 [58974.260279] RIP [] do_notify_parent+0x7a/0x1c1 [58974.260279] RSP [58974.260279] CR2: 0000000000000020 [58974.260279] ---[ end trace c1148b603ffdb861 ]--- [58974.260279] Fixing recursive fault but reboot is needed! The dead reaper ended up as the newly-deceased process's ->parent, which promptly exploded on access to tsk->parent->nsproxy (it gets reset to NULL upon task death). The funny thing is that the critical chunk had been there all along but has been removed at some time during VServer development. Also, ensure that the reaper may only belong either to the host context, or the context it's going to be the reaper for (i.e. no reaping xid FOO from xid BAR), as we have no way to handle exit cleanly in that case. --- kernel/exit.c | 3 +++ kernel/vserver/context.c | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index bb69237..07adc1a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1009,6 +1009,9 @@ NORET_TYPE void do_exit(long code) */ ptrace_put_breakpoints(tsk); + /* needs to stay before exit_notify() */ + exit_vx_info_early(tsk, code); + exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA task_lock(tsk); diff --git a/kernel/vserver/context.c b/kernel/vserver/context.c index f36970d..7a5bebd 100644 --- a/kernel/vserver/context.c +++ b/kernel/vserver/context.c @@ -669,20 +669,31 @@ out: int vx_set_reaper(struct vx_info *vxi, struct task_struct *p) { struct task_struct *old_reaper; + struct vx_info *reaper_vxi; if (!vxi) return -EINVAL; + get_task_struct(p); + reaper_vxi = p->vx_info; + if (reaper_vxi && reaper_vxi != vxi) { + printk(KERN_ERR "Cannot set %s[#%d,%d] as reaper of xid %d\n", + p->comm, p->xid, p->pid, vxi->vx_id); + put_task_struct(p); + return -EINVAL; + } + vxdprintk(VXD_CBIT(xid, 6), "vx_set_reaper(%p[#%d],%p[#%d,%d])", vxi, vxi->vx_id, p, p->xid, p->pid); old_reaper = vxi->vx_reaper; - if (old_reaper == p) + if (old_reaper == p) { + put_task_struct(p); return 0; + } /* set new child reaper */ - get_task_struct(p); vxi->vx_reaper = p; put_task_struct(old_reaper); return 0; -- 1.7.2.3