]> git.hungrycats.org Git - linux/commitdiff
af_unix: fix a fatal race with bit fields
authorEric Dumazet <eric.dumazet@gmail.com>
Wed, 1 May 2013 05:24:03 +0000 (05:24 +0000)
committerWilly Tarreau <w@1wt.eu>
Fri, 29 Jan 2016 21:12:50 +0000 (22:12 +0100)
commit 60bc851ae59bfe99be6ee89d6bc50008c85ec75d upstream.

Using bit fields is dangerous on ppc64/sparc64, as the compiler [1]
uses 64bit instructions to manipulate them.
If the 64bit word includes any atomic_t or spinlock_t, we can lose
critical concurrent changes.

This is happening in af_unix, where unix_sk(sk)->gc_candidate/
gc_maybe_cycle/lock share the same 64bit word.

This leads to fatal deadlock, as one/several cpus spin forever
on a spinlock that will never be available again.

A safer way would be to use a long to store flags.
This way we are sure compiler/arch wont do bad things.

As we own unix_gc_lock spinlock when clearing or setting bits,
we can use the non atomic __set_bit()/__clear_bit().

recursion_level can share the same 64bit location with the spinlock,
as it is set only with this spinlock held.

[1] bug fixed in gcc-4.8.0 :
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080

Reported-by: Ambrose Feinstein <ambrose@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
(cherry picked from commit 2ee9cbe7e7bfe2d36374288b818aa31b2c4981db)
[wt: adjusted context]
Signed-off-by: Willy Tarreau <w@1wt.eu>
include/net/af_unix.h
net/unix/garbage.c

index c364711174df2b7910e550dd5753453402cab52b..faf1d6dd0ec611daffdc95044314394cf9b4c850 100644 (file)
@@ -55,9 +55,10 @@ struct unix_sock {
        struct list_head        link;
         atomic_long_t           inflight;
         spinlock_t             lock;
-       unsigned int            gc_candidate : 1;
-       unsigned int            gc_maybe_cycle : 1;
        unsigned char           recursion_level;
+       unsigned long           gc_flags;
+#define UNIX_GC_CANDIDATE      0
+#define UNIX_GC_MAYBE_CYCLE    1
         wait_queue_head_t       peer_wait;
        wait_queue_t            peer_wake;
 };
index cb72e91f14892180987dc59b66e4c248359c70b0..de93193cef8d15f224792b88571e87313e12e1ab 100644 (file)
@@ -195,7 +195,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
                                         * have been added to the queues after
                                         * starting the garbage collection
                                         */
-                                       if (u->gc_candidate) {
+                                       if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
                                                hit = true;
                                                func(u);
                                        }
@@ -264,7 +264,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
         * of the list, so that it's checked even if it was already
         * passed over
         */
-       if (u->gc_maybe_cycle)
+       if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
                list_move_tail(&u->link, &gc_candidates);
 }
 
@@ -325,8 +325,8 @@ void unix_gc(void)
                BUG_ON(total_refs < inflight_refs);
                if (total_refs == inflight_refs) {
                        list_move_tail(&u->link, &gc_candidates);
-                       u->gc_candidate = 1;
-                       u->gc_maybe_cycle = 1;
+                       __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
+                       __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
                }
        }
 
@@ -354,7 +354,7 @@ void unix_gc(void)
 
                if (atomic_long_read(&u->inflight) > 0) {
                        list_move_tail(&u->link, &not_cycle_list);
-                       u->gc_maybe_cycle = 0;
+                       __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
                        scan_children(&u->sk, inc_inflight_move_tail, NULL);
                }
        }
@@ -366,7 +366,7 @@ void unix_gc(void)
         */
        while (!list_empty(&not_cycle_list)) {
                u = list_entry(not_cycle_list.next, struct unix_sock, link);
-               u->gc_candidate = 0;
+               __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
                list_move_tail(&u->link, &gc_inflight_list);
        }