]> git.hungrycats.org Git - linux/commitdiff
Revert "defer call to mem_cgroup_sk_alloc()"
authorRoman Gushchin <guro@fb.com>
Fri, 2 Feb 2018 15:26:57 +0000 (15:26 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 12 Feb 2018 06:07:20 +0000 (07:07 +0100)
[ Upstream commit edbe69ef2c90fc86998a74b08319a01c508bd497 ]

This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol:
defer call to mem_cgroup_sk_alloc()").

Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks
memcg socket memory accounting, as packets received before memcg
pointer initialization are not accounted and are causing refcounting
underflow on socket release.

Actually the free-after-use problem was fixed by
commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in
sk_clone_lock()") for the cgroup pointer.

So, let's revert it and call mem_cgroup_sk_alloc() just before
cgroup_sk_alloc(). This is safe, as we hold a reference to the socket
we're cloning, and it holds a reference to the memcg.

Also, let's drop BUG_ON(mem_cgroup_is_root()) check from
mem_cgroup_sk_alloc(). I see no reasons why bumping the root
memcg counter is a good reason to panic, and there are no realistic
ways to hit it.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
mm/memcontrol.c
net/core/sock.c
net/ipv4/inet_connection_sock.c

index ac2ffd5e02b914fb9564649c9475babc51119de6..0a78ce57872d2afbb82a9573d6ed6ccb2b855caf 100644 (file)
@@ -5828,6 +5828,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
        if (!mem_cgroup_sockets_enabled)
                return;
 
+       /*
+        * Socket cloning can throw us here with sk_memcg already
+        * filled. It won't however, necessarily happen from
+        * process context. So the test for root memcg given
+        * the current task's memcg won't help us in this case.
+        *
+        * Respecting the original socket's memcg is a better
+        * decision in this case.
+        */
+       if (sk->sk_memcg) {
+               css_get(&sk->sk_memcg->css);
+               return;
+       }
+
        rcu_read_lock();
        memcg = mem_cgroup_from_task(current);
        if (memcg == root_mem_cgroup)
index c0b5b2f17412ec3ac3d4b1cf400fb2fbcabf086f..7571dabfc4cf0c4541d2f012d37c3619218ddcf8 100644 (file)
@@ -1675,16 +1675,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                newsk->sk_dst_pending_confirm = 0;
                newsk->sk_wmem_queued   = 0;
                newsk->sk_forward_alloc = 0;
-
-               /* sk->sk_memcg will be populated at accept() time */
-               newsk->sk_memcg = NULL;
-
                atomic_set(&newsk->sk_drops, 0);
                newsk->sk_send_head     = NULL;
                newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
                atomic_set(&newsk->sk_zckey, 0);
 
                sock_reset_flag(newsk, SOCK_DONE);
+               mem_cgroup_sk_alloc(newsk);
                cgroup_sk_alloc(&newsk->sk_cgrp_data);
 
                rcu_read_lock();
index 4ca46dc08e631c360cba851f166c6d2bcc0c983c..3668c41826556ecc32123a14b67bf92e1b419a12 100644 (file)
@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
                }
                spin_unlock_bh(&queue->fastopenq.lock);
        }
-       mem_cgroup_sk_alloc(newsk);
 out:
        release_sock(sk);
        if (req)