]> git.hungrycats.org Git - linux/commitdiff
NFSv4: Share open_owner structs between several different
authorTrond Myklebust <trond.myklebust@fys.uio.no>
Sat, 7 Feb 2004 16:01:32 +0000 (17:01 +0100)
committerTrond Myklebust <trond.myklebust@fys.uio.no>
Sat, 7 Feb 2004 16:01:32 +0000 (17:01 +0100)
processes. Reduces the load on the server.

fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
include/linux/nfs4.h
include/linux/nfs_fs.h
include/linux/nfs_xdr.h

index a6ac022add4473c18a1bd41d4d9e0d5b11f7efa8..10f67569de736dd6b0068310279c23c50f9437f9 100644 (file)
@@ -616,8 +616,13 @@ retry:
                memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid));
        } else
                memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
+       spin_lock(&inode->i_lock);
+       if (flags & FMODE_READ)
+               state->nreaders++;
+       if (flags & FMODE_WRITE)
+               state->nwriters++;
        state->state |= flags & (FMODE_READ|FMODE_WRITE);
-       state->pid = current->pid;
+       spin_unlock(&inode->i_lock);
 
        up(&sp->so_sema);
        nfs4_put_state_owner(sp);
@@ -634,6 +639,21 @@ out_up:
                iput(inode);
                inode = NULL;
        }
+       /* NOTE: BAD_SEQID means the server and client disagree about the
+        * book-keeping w.r.t. state-changing operations
+        * (OPEN/CLOSE/LOCK/LOCKU...)
+        * It is actually a sign of a bug on the client or on the server.
+        *
+        * If we receive a BAD_SEQID error in the particular case of
+        * doing an OPEN, we assume that nfs4_increment_seqid() will
+        * have unhashed the old state_owner for us, and that we can
+        * therefore safely retry using a new one. We should still warn
+        * the user though...
+        */
+       if (status == -NFS4ERR_BAD_SEQID) {
+               printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+               goto retry;
+       }
        status = nfs4_handle_error(server, status);
        if (!status)
                goto retry;
@@ -722,6 +742,36 @@ nfs4_do_close(struct inode *inode, struct nfs4_state *state)
         * the state_owner. we keep this around to process errors
         */
        nfs4_increment_seqid(status, sp);
+       if (!status)
+               memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
+
+       return status;
+}
+
+int
+nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) 
+{
+       struct nfs4_state_owner *sp = state->owner;
+       int status = 0;
+       struct nfs_closeargs arg = {
+               .fh             = NFS_FH(inode),
+               .seqid          = sp->so_seqid,
+               .share_access   = mode,
+       };
+       struct nfs_closeres res = {
+               .status         = 0,
+       };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
+
+       memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
+       status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+       nfs4_increment_seqid(status, sp);
+       if (!status)
+               memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
 
        return status;
 }
@@ -771,7 +821,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
                return 1;
        }
        d_drop(dentry);
-       nfs4_put_open_state(state);
+       nfs4_close_state(state, openflags);
        iput(inode);
        return 0;
 }
@@ -872,15 +922,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        fattr->valid = 0;
        
        if (size_change) {
-               state = nfs4_find_state_bypid(inode, current->pid);
-
+               struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+               state = nfs4_find_state(inode, cred, FMODE_WRITE);
                if (!state) {
-                       struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
                        state = nfs4_do_open(dentry->d_parent->d_inode, 
                                &dentry->d_name, FMODE_WRITE, NULL, cred);
-                       put_rpccred(cred);
                        need_iput = 1;
                }
+               put_rpccred(cred);
                if (IS_ERR(state))
                        return PTR_ERR(state);
 
@@ -895,7 +944,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 out:
        if (state) {
                inode = state->inode;
-               nfs4_put_open_state(state);
+               nfs4_close_state(state, FMODE_WRITE);
                if (need_iput)
                        iput(inode);
        }
@@ -1161,7 +1210,7 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
                        status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
                                             NFS_FH(inode), sattr, state);
                        if (status != 0) {
-                               nfs4_put_open_state(state);
+                               nfs4_close_state(state, flags);
                                iput(inode);
                                inode = ERR_PTR(status);
                        }
@@ -1742,6 +1791,7 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp)
 {
        struct dentry *dentry = filp->f_dentry;
        struct nfs4_state *state;
+       struct rpc_cred *cred;
 
        dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
                               (int)dentry->d_parent->d_name.len,
@@ -1750,12 +1800,14 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp)
 
 
        /* Find our open stateid */
-       state = nfs4_find_state_bypid(inode, current->pid);
+       cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+       state = nfs4_find_state(inode, cred, filp->f_mode);
+       put_rpccred(cred);
        if (state == NULL) {
                printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
                return -EIO; /* ERACE actually */
        }
-       nfs4_put_open_state(state);
+       nfs4_close_state(state, filp->f_mode);
        if (filp->f_mode & FMODE_WRITE) {
                lock_kernel();
                nfs_set_mmcred(inode, state->owner->so_cred);
@@ -1774,7 +1826,7 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp)
        struct nfs4_state *state = (struct nfs4_state *)filp->private_data;
 
        if (state)
-               nfs4_put_open_state(state);
+               nfs4_close_state(state, filp->f_mode);
        return 0;
 }
 
@@ -1816,6 +1868,9 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
                        rpc_delay(task, NFS4_POLL_RETRY_TIME);
                        task->tk_status = 0;
                        return -EAGAIN;
+               case -NFS4ERR_OLD_STATEID:
+                       task->tk_status = 0;
+                       return -EAGAIN;
        }
        return 0;
 }
@@ -1892,6 +1947,9 @@ nfs4_handle_error(struct nfs_server *server, int errorcode)
                case -NFS4ERR_DELAY:
                        ret = nfs4_delay(server->client);
                        break;
+               case -NFS4ERR_OLD_STATEID:
+                       ret = 0;
+                       break;
                default:
                        if (errorcode <= -1000) {
                                printk(KERN_WARNING "%s could not handle NFSv4 error %d\n",
index 333daf5f48e7d2a88ace2055aa71b64d2f51a487..7a078a42eec3c5408e79bfc89c655f77a7bd4fad 100644 (file)
@@ -188,6 +188,23 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
        return sp;
 }
 
+static struct nfs4_state_owner *
+nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+       struct nfs4_state_owner *sp, *res = NULL;
+
+       list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+               if (sp->so_cred != cred)
+                       continue;
+               atomic_inc(&sp->so_count);
+               /* Move to the head of the list */
+               list_move(&sp->so_list, &clp->cl_state_owners);
+               res = sp;
+               break;
+       }
+       return res;
+}
+
 /*
  * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
  * create a new state_owner.
@@ -208,6 +225,15 @@ nfs4_alloc_state_owner(void)
        return sp;
 }
 
+static void
+nfs4_unhash_state_owner(struct nfs4_state_owner *sp)
+{
+       struct nfs4_client *clp = sp->so_client;
+       spin_lock(&clp->cl_lock);
+       list_del_init(&sp->so_list);
+       spin_unlock(&clp->cl_lock);
+}
+
 struct nfs4_state_owner *
 nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
@@ -217,7 +243,9 @@ nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
        get_rpccred(cred);
        new = nfs4_alloc_state_owner();
        spin_lock(&clp->cl_lock);
-       sp = nfs4_client_grab_unused(clp, cred);
+       sp = nfs4_find_state_owner(clp, cred);
+       if (sp == NULL)
+               sp = nfs4_client_grab_unused(clp, cred);
        if (sp == NULL && new != NULL) {
                list_add(&new->so_list, &clp->cl_state_owners);
                new->so_client = clp;
@@ -248,6 +276,8 @@ nfs4_put_state_owner(struct nfs4_state_owner *sp)
                return;
        if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
                goto out_free;
+       if (list_empty(&sp->so_list))
+               goto out_free;
        list_move(&sp->so_list, &clp->cl_unused);
        clp->cl_nunused++;
        spin_unlock(&clp->cl_lock);
@@ -269,24 +299,38 @@ nfs4_alloc_open_state(void)
        state = kmalloc(sizeof(*state), GFP_KERNEL);
        if (!state)
                return NULL;
-       state->pid = current->pid;
        state->state = 0;
+       state->nreaders = 0;
+       state->nwriters = 0;
        memset(state->stateid.data, 0, sizeof(state->stateid.data));
        atomic_set(&state->count, 1);
        return state;
 }
 
 static struct nfs4_state *
-__nfs4_find_state_bypid(struct inode *inode, pid_t pid)
+__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs4_state *state;
 
+       mode &= (FMODE_READ|FMODE_WRITE);
        list_for_each_entry(state, &nfsi->open_states, inode_states) {
-               if (state->pid == pid) {
-                       atomic_inc(&state->count);
-                       return state;
-               }
+               if (state->owner->so_cred != cred)
+                       continue;
+               if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
+                       continue;
+               if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
+                       continue;
+               if ((state->state & mode) != mode)
+                       continue;
+               /* Add the state to the head of the inode's list */
+               list_move(&state->inode_states, &nfsi->open_states);
+               atomic_inc(&state->count);
+               if (mode & FMODE_READ)
+                       state->nreaders++;
+               if (mode & FMODE_WRITE)
+                       state->nwriters++;
+               return state;
        }
        return NULL;
 }
@@ -298,7 +342,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
        struct nfs4_state *state;
 
        list_for_each_entry(state, &nfsi->open_states, inode_states) {
+               /* Is this in the process of being freed? */
+               if (state->nreaders == 0 && state->nwriters == 0)
+                       continue;
                if (state->owner == owner) {
+                       /* Add the state to the head of the inode's list */
+                       list_move(&state->inode_states, &nfsi->open_states);
                        atomic_inc(&state->count);
                        return state;
                }
@@ -307,16 +356,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 }
 
 struct nfs4_state *
-nfs4_find_state_bypid(struct inode *inode, pid_t pid)
+nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
 {
-       struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs4_state *state;
 
        spin_lock(&inode->i_lock);
-       state = __nfs4_find_state_bypid(inode, pid);
-       /* Add the state to the tail of the inode's list */
-       if (state)
-               list_move_tail(&state->inode_states, &nfsi->open_states);
+       state = __nfs4_find_state(inode, cred, mode);
        spin_unlock(&inode->i_lock);
        return state;
 }
@@ -387,6 +432,50 @@ nfs4_put_open_state(struct nfs4_state *state)
        nfs4_put_state_owner(owner);
 }
 
+void
+nfs4_close_state(struct nfs4_state *state, mode_t mode)
+{
+       struct inode *inode = state->inode;
+       struct nfs4_state_owner *owner = state->owner;
+       int newstate;
+       int status = 0;
+
+       down(&owner->so_sema);
+       /* Protect against nfs4_find_state() */
+       spin_lock(&inode->i_lock);
+       if (mode & FMODE_READ)
+               state->nreaders--;
+       if (mode & FMODE_WRITE)
+               state->nwriters--;
+       if (state->nwriters == 0 && state->nreaders == 0)
+               list_del_init(&state->inode_states);
+       spin_unlock(&inode->i_lock);
+       do {
+               newstate = 0;
+               if (state->state == 0)
+                       break;
+               if (state->nreaders)
+                       newstate |= FMODE_READ;
+               if (state->nwriters)
+                       newstate |= FMODE_WRITE;
+               if (state->state == newstate)
+                       break;
+               if (newstate != 0)
+                       status = nfs4_do_downgrade(inode, state, newstate);
+               else
+                       status = nfs4_do_close(inode, state);
+               if (!status) {
+                       state->state = newstate;
+                       break;
+               }
+               up(&owner->so_sema);
+               status = nfs4_handle_error(NFS_SERVER(inode), status);
+               down(&owner->so_sema);
+       } while (!status);
+       up(&owner->so_sema);
+       nfs4_put_open_state(state);
+}
+
 /*
 * Called with sp->so_sema held.
 *
@@ -399,6 +488,9 @@ nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
 {
        if (status == NFS_OK || seqid_mutating_err(-status))
                sp->so_seqid++;
+       /* If the server returns BAD_SEQID, unhash state_owner here */
+       if (status == -NFS4ERR_BAD_SEQID)
+               nfs4_unhash_state_owner(sp);
 }
 
 static int reclaimer(void *);
index 16296618a2313183e1112842398268cb9450ed33..7a2d241e50e8466e72379cb90c0d997f3ddc8d74 100644 (file)
@@ -176,6 +176,14 @@ static int nfs_stat_to_errno(int);
                                        op_decode_hdr_maxsz + \
                                        4 + 5 + 2 + 3 + \
                                        decode_getattr_maxsz
+#define NFS4_enc_open_downgrade_sz \
+                               compound_encode_hdr_maxsz + \
+                                encode_putfh_maxsz + \
+                                op_encode_hdr_maxsz + 7
+#define NFS4_dec_open_downgrade_sz \
+                               compound_decode_hdr_maxsz + \
+                                decode_putfh_maxsz + \
+                                op_decode_hdr_maxsz + 4
 #define NFS4_enc_close_sz       compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + 5
@@ -711,6 +719,22 @@ encode_open_reclaim(struct xdr_stream *xdr, struct nfs_open_reclaimargs *arg)
        return 0;
 }
 
+static int
+encode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeargs *arg)
+{
+       uint32_t *p;
+
+       RESERVE_SPACE(16+sizeof(arg->stateid.data));
+       WRITE32(OP_OPEN_DOWNGRADE);
+       WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+       WRITE32(arg->seqid);
+       WRITE32(arg->share_access);
+       /* No deny modes */
+       WRITE32(0);
+
+       return 0;
+}
+
 static int
 encode_putfh(struct xdr_stream *xdr, struct nfs_fh *fh)
 {
@@ -1129,6 +1153,27 @@ out:
        return status;
 }
 
+/*
+ * Encode an OPEN_DOWNGRADE request
+ */
+static int
+nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+{
+       struct xdr_stream xdr;
+       struct compound_hdr hdr = {
+               .nops   = 2,
+       };
+       int status;
+
+       xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+       encode_compound_hdr(&xdr, &hdr);
+       status = encode_putfh(&xdr, args->fh);
+       if (status)
+               goto out;
+       status = encode_open_downgrade(&xdr, args);
+out:
+       return status;
+}
 
 /*
  * Encode a READ request
@@ -2001,6 +2046,19 @@ decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
         return 0;
 }
 
+static int
+decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
+{
+       uint32_t *p;
+       int status;
+
+       status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
+       if (status)
+               return status;
+       READ_BUF(sizeof(res->stateid.data));
+       COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+       return 0;
+}
 
 static int
 decode_putfh(struct xdr_stream *xdr)
@@ -2377,6 +2435,29 @@ decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs
 
        DECODE_TAIL;
 }
+
+/*
+ * Decode OPEN_DOWNGRADE response
+ */
+static int
+nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr;
+        int status;
+
+        xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+        status = decode_compound_hdr(&xdr, &hdr);
+        if (status)
+                goto out;
+        status = decode_putfh(&xdr);
+        if (status)
+                goto out;
+        status = decode_open_downgrade(&xdr, res);
+out:
+        return status;
+}
+
 /*
  * END OF "GENERIC" DECODE ROUTINES.
  */
@@ -2827,6 +2908,7 @@ struct rpc_procinfo       nfs4_procedures[] = {
   PROC(OPEN,           enc_open,       dec_open),
   PROC(OPEN_CONFIRM,   enc_open_confirm,       dec_open_confirm),
   PROC(OPEN_RECLAIM,   enc_open_reclaim,       dec_open_reclaim),
+  PROC(OPEN_DOWNGRADE, enc_open_downgrade,     dec_open_downgrade),
   PROC(CLOSE,          enc_close,      dec_close),
   PROC(SETATTR,                enc_setattr,    dec_setattr),
   PROC(FSINFO,         enc_fsinfo,     dec_fsinfo),
index a6f2d563b60515aa556e06aa2fd770c0de5a32e9..4a61a468271805f139dc2eb589df9e6835a6bac7 100644 (file)
@@ -290,6 +290,7 @@ enum {
        NFSPROC4_CLNT_OPEN,
        NFSPROC4_CLNT_OPEN_CONFIRM,
        NFSPROC4_CLNT_OPEN_RECLAIM,
+       NFSPROC4_CLNT_OPEN_DOWNGRADE,
        NFSPROC4_CLNT_CLOSE,
        NFSPROC4_CLNT_SETATTR,
        NFSPROC4_CLNT_FSINFO,
index 0605e9c63026ca2c03da0c033c7a1cbd8246d904..cfbb7ff1aa8945b172aa8420f75e7678fd2751f7 100644 (file)
@@ -549,10 +549,11 @@ struct nfs4_state {
 
        struct nfs4_state_owner *owner; /* Pointer to the open owner */
        struct inode *inode;            /* Pointer to the inode */
-       pid_t pid;                      /* Thread that called OPEN */
 
        nfs4_stateid stateid;
 
+       unsigned int nreaders;
+       unsigned int nwriters;
        int state;                      /* State on the server (R,W, or RW) */
        atomic_t count;
 };
@@ -568,6 +569,7 @@ extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
 extern int nfs4_do_close(struct inode *, struct nfs4_state *);
+int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
 extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
 extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
@@ -586,7 +588,8 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
-extern struct nfs4_state *nfs4_find_state_bypid(struct inode *, pid_t);
+extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
 extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
 extern int nfs4_handle_error(struct nfs_server *, int);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
index 4393ae7c305d7a575a51743cbef0cb37d9c064f5..21827ad1a71e6d6a017f70d3e9653ceeeb336a02 100644 (file)
@@ -153,6 +153,7 @@ struct nfs_closeargs {
        struct nfs_fh *         fh;
        nfs4_stateid            stateid;
        __u32                   seqid;
+       __u32                   share_access;
 };
 
 struct nfs_closeres {