]> git.hungrycats.org Git - linux/commitdiff
ocfs2/dlm: don't handle migrate lockres if already in shutdown
authorJun Piao <piaojun@huawei.com>
Thu, 5 Apr 2018 23:18:48 +0000 (16:18 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 May 2018 05:49:08 +0000 (07:49 +0200)
[ Upstream commit bb34f24c7d2c98d0c81838a7700e6068325b17a0 ]

We should not handle migrate lockres if we are already in
'DLM_CTXT_IN_SHUTDOWN', as that will cause lockres remains after leaving
dlm domain.  At last other nodes will get stuck into infinite loop when
requsting lock from us.

The problem is caused by concurrency umount between nodes.  Before
receiveing N1's DLM_BEGIN_EXIT_DOMAIN_MSG, N2 has picked up N1 as the
migrate target.  So N2 will continue sending lockres to N1 even though
N1 has left domain.

        N1                             N2 (owner)
                                       touch file

    access the file,
    and get pr lock

                                       begin leave domain and
                                       pick up N1 as new owner

    begin leave domain and
    migrate all lockres done

                                       begin migrate lockres to N1

    end leave domain, but
    the lockres left
    unexpectedly, because
    migrate task has passed

[piaojun@huawei.com: v3]
Link: http://lkml.kernel.org/r/5A9CBD19.5020107@huawei.com
Link: http://lkml.kernel.org/r/5A99F028.2090902@huawei.com
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmdomain.h
fs/ocfs2/dlm/dlmrecovery.c

index 2ee7fe747cea4408182aee7c45e8baa37ceaeae1..c55a9c47ac17455c680f81fe8765ee15c590fe78 100644 (file)
@@ -674,20 +674,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
        spin_unlock(&dlm->spinlock);
 }
 
-int dlm_shutting_down(struct dlm_ctxt *dlm)
-{
-       int ret = 0;
-
-       spin_lock(&dlm_domain_lock);
-
-       if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
-               ret = 1;
-
-       spin_unlock(&dlm_domain_lock);
-
-       return ret;
-}
-
 void dlm_unregister_domain(struct dlm_ctxt *dlm)
 {
        int leave = 0;
index fd6122a38dbdf04f11266af5fa094b2e00c4173c..8a9281411c18ff688cead233989606805523986f 100644 (file)
 extern spinlock_t dlm_domain_lock;
 extern struct list_head dlm_domains;
 
-int dlm_shutting_down(struct dlm_ctxt *dlm);
+static inline int dlm_joined(struct dlm_ctxt *dlm)
+{
+       int ret = 0;
+
+       spin_lock(&dlm_domain_lock);
+       if (dlm->dlm_state == DLM_CTXT_JOINED)
+               ret = 1;
+       spin_unlock(&dlm_domain_lock);
+
+       return ret;
+}
+
+static inline int dlm_shutting_down(struct dlm_ctxt *dlm)
+{
+       int ret = 0;
+
+       spin_lock(&dlm_domain_lock);
+       if (dlm->dlm_state == DLM_CTXT_IN_SHUTDOWN)
+               ret = 1;
+       spin_unlock(&dlm_domain_lock);
+
+       return ret;
+}
+
 void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
                                        int node_num);
 
index 4a338803e7e9280ce6cab1c0dd95ffacbff5ae3b..88149b4387c29b19ddc0e091de854f65f0bd1f30 100644 (file)
@@ -1377,6 +1377,15 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
        if (!dlm_grab(dlm))
                return -EINVAL;
 
+       if (!dlm_joined(dlm)) {
+               mlog(ML_ERROR, "Domain %s not joined! "
+                         "lockres %.*s, master %u\n",
+                         dlm->name, mres->lockname_len,
+                         mres->lockname, mres->master);
+               dlm_put(dlm);
+               return -EINVAL;
+       }
+
        BUG_ON(!(mres->flags & (DLM_MRES_RECOVERY|DLM_MRES_MIGRATION)));
 
        real_master = mres->master;