return p;
}
-/*
- * Check buffer bounds after decoding arguments
- */
-static inline int
-xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_argbuf;
-
- return p - buf->base <= buf->buflen;
-}
-
-static inline int
-xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_resbuf;
-
- buf->len = p - buf->base;
- return (buf->len <= buf->buflen);
-}
/*
* First, the server side XDR functions
}
-/*
- * Check buffer bounds after decoding arguments
- */
-static int
-xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_argbuf;
-
- return p - buf->base <= buf->buflen;
-}
-
-static int
-xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_resbuf;
-
- buf->len = p - buf->base;
- return (buf->len <= buf->buflen);
-}
-
/*
* First, the server side XDR functions
*/
/*
* Reserve room in the send buffer
*/
-static void
-svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr)
+static inline void
+svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr)
{
- *ptr = buf->buf + nr;
- *len = buf->buflen - buf->len - nr;
+ *ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr;
+ *len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr;
}
/*
dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
/* Reserve room for status, post_op_attr, and path length */
- svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy,
+ svcbuf_reserve(&rqstp->rq_res, &path, &dummy,
1 + NFS3_POST_OP_ATTR_WORDS + 1);
/* Read the symlink. */
nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
struct nfsd3_readres *resp)
{
- u32 * buffer;
- int nfserr, avail;
+ int nfserr;
dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
SVCFH_fmt(&argp->fh),
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
- svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail,
- 1 + NFS3_POST_OP_ATTR_WORDS + 3);
+
resp->count = argp->count;
- if ((avail << 2) < resp->count)
- resp->count = avail << 2;
+ if (NFSSVC_MAXBLKSIZE < resp->count)
+ resp->count = NFSSVC_MAXBLKSIZE;
- svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + argp->count +4);
+ svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset,
- (char *) buffer,
+ argp->vec, argp->vlen,
&resp->count);
if (nfserr == 0) {
struct inode *inode = resp->fh.fh_dentry->d_inode;
resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh,
argp->offset,
- argp->data,
+ argp->vec, argp->vlen,
argp->len,
&resp->committed);
resp->count = argp->count;
argp->count, (u32) argp->cookie);
/* Reserve buffer space for status, attributes and verifier */
- svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count,
+ svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
1 + NFS3_POST_OP_ATTR_WORDS + 2);
/* Make sure we've room for the NULL ptr & eof flag, and shrink to
argp->count, (u32) argp->cookie);
/* Reserve buffer space for status, attributes and verifier */
- svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count,
+ svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
1 + NFS3_POST_OP_ATTR_WORDS + 2);
/* Make sure we've room for the NULL ptr & eof flag, and shrink to
#include <linux/spinlock.h>
#include <linux/dcache.h>
#include <linux/namei.h>
+#include <linux/mm.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svc.h>
return encode_post_op_attr(rqstp, p, fhp);
}
-/*
- * Check buffer bounds after decoding arguments
- */
-static inline int
-xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_argbuf;
-
- return p - buf->base <= buf->buflen;
-}
-
-static inline int
-xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_resbuf;
-
- buf->len = p - buf->base;
- dprintk("nfsd: ressize_check p %p base %p len %d\n",
- p, buf->base, buf->buflen);
- return (buf->len <= buf->buflen);
-}
/*
* XDR decode functions
nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_readargs *args)
{
+ int len;
+ int v,pn;
+
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
return 0;
- args->count = ntohl(*p++);
+ len = args->count = ntohl(*p++);
+
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+
+ /* set up the iovec */
+ v=0;
+ while (len > 0) {
+ pn = rqstp->rq_resused;
+ take_page(rqstp);
+ args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+ v++;
+ len -= PAGE_SIZE;
+ }
+ args->vlen = v;
return xdr_argsize_check(rqstp, p);
}
nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_writeargs *args)
{
+ int len, v;
+
if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset)))
return 0;
args->count = ntohl(*p++);
args->stable = ntohl(*p++);
- args->len = ntohl(*p++);
- args->data = (char *) p;
- p += XDR_QUADLEN(args->len);
+ len = args->len = ntohl(*p++);
+
+ args->vec[0].iov_base = (void*)p;
+ args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
+ (((void*)p) - rqstp->rq_arg.head[0].iov_base);
+
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+ v= 0;
+ while (len > args->vec[v].iov_len) {
+ len -= args->vec[v].iov_len;
+ v++;
+ args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
+ args->vec[v].iov_len = PAGE_SIZE;
+ }
+ args->vec[v].iov_len = len;
+ args->vlen = v+1;
- return xdr_argsize_check(rqstp, p);
+ return args->count == args->len && args->vec[0].iov_len > 0;
}
int
*p++ = htonl(resp->count);
*p++ = htonl(resp->eof);
*p++ = htonl(resp->count); /* xdr opaque count */
- p += XDR_QUADLEN(resp->count);
- }
- return xdr_ressize_check(rqstp, p);
+ xdr_ressize_check(rqstp, p);
+ /* now update rqstp->rq_res to reflect data aswell */
+ rqstp->rq_res.page_base = 0;
+ rqstp->rq_res.page_len = resp->count;
+ if (resp->count & 3) {
+ /* need to page with tail */
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
+ }
+ rqstp->rq_res.len =
+ rqstp->rq_res.head[0].iov_len+
+ rqstp->rq_res.page_len+
+ rqstp->rq_res.tail[0].iov_len;
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
}
/* WRITE */
static struct svc_cacherep * nfscache;
static int cache_disabled = 1;
-static int nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data);
+static int nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *vec);
/*
* locking for the reply cache:
for (rp = lru_head; rp; rp = rp->c_lru_next) {
if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
- kfree(rp->c_replbuf.buf);
+ kfree(rp->c_replvec.iov_base);
}
cache_disabled = 1;
/* release any buffer */
if (rp->c_type == RC_REPLBUFF) {
- kfree(rp->c_replbuf.buf);
- rp->c_replbuf.buf = NULL;
+ kfree(rp->c_replvec.iov_base);
+ rp->c_replvec.iov_base = NULL;
}
rp->c_type = RC_NOCACHE;
out:
case RC_NOCACHE:
break;
case RC_REPLSTAT:
- svc_putu32(&rqstp->rq_resbuf, rp->c_replstat);
+ svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
rtn = RC_REPLY;
break;
case RC_REPLBUFF:
- if (!nfsd_cache_append(rqstp, &rp->c_replbuf))
+ if (!nfsd_cache_append(rqstp, &rp->c_replvec))
goto out; /* should not happen */
rtn = RC_REPLY;
break;
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
{
struct svc_cacherep *rp;
- struct svc_buf *resp = &rqstp->rq_resbuf, *cachp;
+ struct iovec *resv = &rqstp->rq_res.head[0], *cachv;
int len;
if (!(rp = rqstp->rq_cacherep) || cache_disabled)
return;
- len = resp->len - (statp - resp->base);
+ len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
+ len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
rp->c_replstat = *statp;
break;
case RC_REPLBUFF:
- cachp = &rp->c_replbuf;
- cachp->buf = (u32 *) kmalloc(len << 2, GFP_KERNEL);
- if (!cachp->buf) {
+ cachv = &rp->c_replvec;
+ cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
+ if (!cachv->iov_base) {
spin_lock(&cache_lock);
rp->c_state = RC_UNUSED;
spin_unlock(&cache_lock);
return;
}
- cachp->len = len;
- memcpy(cachp->buf, statp, len << 2);
+ cachv->iov_len = len << 2;
+ memcpy(cachv->iov_base, statp, len << 2);
break;
}
spin_lock(&cache_lock);
/*
* Copy cached reply to current reply buffer. Should always fit.
+ * FIXME as reply is in a page, we should just attach the page, and
+ * keep a refcount....
*/
static int
-nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data)
+nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *data)
{
- struct svc_buf *resp = &rqstp->rq_resbuf;
+ struct iovec *vec = &rqstp->rq_res.head[0];
- if (resp->len + data->len > resp->buflen) {
+ if (vec->iov_len + data->iov_len > PAGE_SIZE) {
printk(KERN_WARNING "nfsd: cached reply too large (%d).\n",
- data->len);
+ data->iov_len);
return 0;
}
- memcpy(resp->buf, data->buf, data->len << 2);
- resp->buf += data->len;
- resp->len += data->len;
+ memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
+ vec->iov_len += data->iov_len;
return 1;
}
#define NFSDDBG_FACILITY NFSDDBG_PROC
-static void
-svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr)
+static inline void
+svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr)
{
- *ptr = buf->buf + nr;
- *len = buf->buflen - buf->len - nr;
+ *ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr;
+ *len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr;
}
static int
dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
/* Reserve room for status and path length */
- svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, 2);
+ svcbuf_reserve(&rqstp->rq_res, &path, &dummy, 2);
/* Read the symlink. */
resp->len = NFS_MAXPATHLEN;
nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
struct nfsd_readres *resp)
{
- u32 * buffer;
- int nfserr, avail;
+ int nfserr;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
/* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count.
*/
- svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, 19);
- if ((avail << 2) < argp->count) {
+ if (NFSSVC_MAXBLKSIZE < argp->count) {
printk(KERN_NOTICE
"oversized read request from %08x:%d (%d bytes)\n",
ntohl(rqstp->rq_addr.sin_addr.s_addr),
ntohs(rqstp->rq_addr.sin_port),
argp->count);
- argp->count = avail << 2;
+ argp->count = NFSSVC_MAXBLKSIZE;
}
svc_reserve(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count;
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
- (char *) buffer,
+ argp->vec, argp->vlen,
&resp->count);
return nfserr;
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
- argp->data,
+ argp->vec, argp->vlen,
argp->len,
&stable);
return nfserr;
argp->count, argp->cookie);
/* Reserve buffer space for status */
- svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, 1);
+ svcbuf_reserve(&rqstp->rq_res, &buffer, &count, 1);
/* Shrink to the client read size */
if (count > (argp->count >> 2))
/* Decode arguments */
xdr = proc->pc_decode;
- if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp)) {
+ if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base,
+ rqstp->rq_argp)) {
dprintk("nfsd: failed to decode arguments!\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
*statp = rpc_garbage_args;
}
if (rqstp->rq_proc != 0)
- svc_putu32(&rqstp->rq_resbuf, nfserr);
+ svc_putu32(&rqstp->rq_res.head[0], nfserr);
/* Encode result.
* For NFSv2, additional info is never returned in case of an error.
*/
if (!(nfserr && rqstp->rq_vers == 2)) {
xdr = proc->pc_encode;
- if (xdr && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) {
+ if (xdr && !xdr(rqstp, (u32*)(rqstp->rq_res.head[0].iov_base+rqstp->rq_res.head[0].iov_len),
+ rqstp->rq_resp)) {
/* Failed to encode result. Release cache entry */
dprintk("nfsd: failed to encode result!\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr.h>
+#include <linux/mm.h>
#define NFSDDBG_FACILITY NFSDDBG_XDR
return p;
}
-/*
- * Check buffer bounds after decoding arguments
- */
-static inline int
-xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_argbuf;
-
- return p - buf->base <= buf->buflen;
-}
-
-static inline int
-xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
-{
- struct svc_buf *buf = &rqstp->rq_resbuf;
-
- buf->len = p - buf->base;
- dprintk("nfsd: ressize_check p %p base %p len %d\n",
- p, buf->base, buf->buflen);
- return (buf->len <= buf->buflen);
-}
/*
* XDR decode functions
nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd_readargs *args)
{
+ int len;
+ int v,pn;
if (!(p = decode_fh(p, &args->fh)))
return 0;
args->offset = ntohl(*p++);
- args->count = ntohl(*p++);
- args->totalsize = ntohl(*p++);
+ len = args->count = ntohl(*p++);
+ p++; /* totalcount - unused */
+
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+ /* set up somewhere to store response.
+ * We take pages, put them on reslist and include in iovec
+ */
+ v=0;
+ while (len > 0) {
+ pn=rqstp->rq_resused;
+ take_page(rqstp);
+ args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
+ v++;
+ len -= PAGE_SIZE;
+ }
+ args->vlen = v;
return xdr_argsize_check(rqstp, p);
}
nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd_writeargs *args)
{
+ int len;
+ int v;
if (!(p = decode_fh(p, &args->fh)))
return 0;
p++; /* beginoffset */
args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */
- args->len = ntohl(*p++);
- args->data = (char *) p;
- p += XDR_QUADLEN(args->len);
-
- return xdr_argsize_check(rqstp, p);
+ len = args->len = ntohl(*p++);
+ args->vec[0].iov_base = (void*)p;
+ args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
+ (((void*)p) - rqstp->rq_arg.head[0].iov_base);
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+ v = 0;
+ while (len > args->vec[v].iov_len) {
+ len -= args->vec[v].iov_len;
+ v++;
+ args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
+ args->vec[v].iov_len = PAGE_SIZE;
+ }
+ args->vec[v].iov_len = len;
+ args->vlen = v+1;
+ return args->vec[0].iov_len > 0;
}
int
{
p = encode_fattr(rqstp, p, &resp->fh);
*p++ = htonl(resp->count);
- p += XDR_QUADLEN(resp->count);
-
- return xdr_ressize_check(rqstp, p);
+ xdr_ressize_check(rqstp, p);
+
+ /* now update rqstp->rq_res to reflect data aswell */
+ rqstp->rq_res.page_base = 0;
+ rqstp->rq_res.page_len = resp->count;
+ if (resp->count & 3) {
+ /* need to pad with tail */
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
+ }
+ rqstp->rq_res.len =
+ rqstp->rq_res.head[0].iov_len+
+ rqstp->rq_res.page_len+
+ rqstp->rq_res.tail[0].iov_len;
+ return 1;
}
int
*/
int
nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- char *buf, unsigned long *count)
+ struct iovec *vec, int vlen, unsigned long *count)
{
struct raparms *ra;
mm_segment_t oldfs;
oldfs = get_fs();
set_fs(KERNEL_DS);
- err = vfs_read(&file, buf, *count, &offset);
+ err = vfs_readv(&file, vec, vlen, *count, &offset);
set_fs(oldfs);
/* Write back readahead params */
*/
int
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- char *buf, unsigned long cnt, int *stablep)
+ struct iovec *vec, int vlen,
+ unsigned long cnt, int *stablep)
{
struct svc_export *exp;
struct file file;
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
- err = vfs_write(&file, buf, cnt, &offset);
+ err = vfs_writev(&file, vec, vlen, cnt, &offset);
if (err >= 0)
nfsdstats.io_write += cnt;
set_fs(oldfs);
return ret;
}
+ssize_t vfs_readv(struct file *file, struct iovec *vec, int vlen, size_t count, loff_t *pos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ ssize_t ret;
+
+ if (!(file->f_mode & FMODE_READ))
+ return -EBADF;
+ if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
+ return -EINVAL;
+
+ ret = locks_verify_area(FLOCK_VERIFY_READ, inode, file, *pos, count);
+ if (!ret) {
+ ret = security_ops->file_permission (file, MAY_READ);
+ if (!ret) {
+ if (file->f_op->readv)
+ ret = file->f_op->readv(file, vec, vlen, pos);
+ else {
+ /* do it by hand */
+ struct iovec *vector = vec;
+ ret = 0;
+ while (vlen > 0) {
+ void * base = vector->iov_base;
+ size_t len = vector->iov_len;
+ ssize_t nr;
+ vector++;
+ vlen--;
+ if (file->f_op->read)
+ nr = file->f_op->read(file, base, len, pos);
+ else
+ nr = do_sync_read(file, base, len, pos);
+ if (nr < 0) {
+ if (!ret) ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+ }
+ if (ret > 0)
+ dnotify_parent(file->f_dentry, DN_ACCESS);
+ }
+ }
+
+ return ret;
+}
+
ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, loff_t *ppos)
{
struct kiocb kiocb;
return ret;
}
+ssize_t vfs_writev(struct file *file, const struct iovec *vec, int vlen, size_t count, loff_t *pos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ ssize_t ret;
+
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EBADF;
+ if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+ return -EINVAL;
+
+ ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, *pos, count);
+ if (!ret) {
+ ret = security_ops->file_permission (file, MAY_WRITE);
+ if (!ret) {
+ if (file->f_op->writev)
+ ret = file->f_op->writev(file, vec, vlen, pos);
+ else {
+ /* do it by hand */
+ const struct iovec *vector = vec;
+ ret = 0;
+ while (vlen > 0) {
+ void * base = vector->iov_base;
+ size_t len = vector->iov_len;
+ ssize_t nr;
+ vector++;
+ vlen--;
+ if (file->f_op->write)
+ nr = file->f_op->write(file, base, len, pos);
+ else
+ nr = do_sync_write(file, base, len, pos);
+ if (nr < 0) {
+ if (!ret) ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+ }
+ if (ret > 0)
+ dnotify_parent(file->f_dentry, DN_MODIFY);
+ }
+ }
+
+ return ret;
+}
+
asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count)
{
struct file *file;
extern ssize_t vfs_read(struct file *, char *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char *, size_t, loff_t *);
+extern ssize_t vfs_readv(struct file *, struct iovec *, int, size_t, loff_t *);
+extern ssize_t vfs_writev(struct file *, const struct iovec *, int, size_t, loff_t *);
/*
* NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
u32 c_vers;
unsigned long c_timestamp;
union {
- struct svc_buf u_buffer;
+ struct iovec u_vec;
u32 u_status;
} c_u;
};
-#define c_replbuf c_u.u_buffer
+#define c_replvec c_u.u_vec
#define c_replstat c_u.u_status
/* cache entry states */
int, struct file *);
void nfsd_close(struct file *);
int nfsd_read(struct svc_rqst *, struct svc_fh *,
- loff_t, char *, unsigned long *);
+ loff_t, struct iovec *,int, unsigned long *);
int nfsd_write(struct svc_rqst *, struct svc_fh *,
- loff_t, char *, unsigned long, int *);
+ loff_t, struct iovec *,int, unsigned long, int *);
int nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
int nfsd_symlink(struct svc_rqst *, struct svc_fh *,
struct svc_fh fh;
__u32 offset;
__u32 count;
- __u32 totalsize;
+ struct iovec vec[RPCSVC_MAXPAGES];
+ int vlen;
};
struct nfsd_writeargs {
svc_fh fh;
- __u32 beginoffset;
__u32 offset;
- __u32 totalcount;
- __u8 * data;
int len;
+ struct iovec vec[RPCSVC_MAXPAGES];
+ int vlen;
};
struct nfsd_createargs {
struct svc_fh fh;
__u64 offset;
__u32 count;
+ struct iovec vec[RPCSVC_MAXPAGES];
+ int vlen;
};
struct nfsd3_writeargs {
__u64 offset;
__u32 count;
int stable;
- __u8 * data;
int len;
+ struct iovec vec[RPCSVC_MAXPAGES];
+ int vlen;
};
struct nfsd3_createargs {
* This is use to determine the max number of pages nfsd is
* willing to return in a single READ operation.
*/
-#define RPCSVC_MAXPAYLOAD 16384u
+#define RPCSVC_MAXPAYLOAD (64*1024u)
/*
- * Buffer to store RPC requests or replies in.
- * Each server thread has one of these beasts.
+ * RPC Requsts and replies are stored in one or more pages.
+ * We maintain an array of pages for each server thread.
+ * Requests are copied into these pages as they arrive. Remaining
+ * pages are available to write the reply into.
*
- * Area points to the allocated memory chunk currently owned by the
- * buffer. Base points to the buffer containing the request, which is
- * different from area when directly reading from an sk_buff. buf is
- * the current read/write position while processing an RPC request.
+ * Currently pages are all re-used by the same server. Later we
+ * will use ->sendpage to transmit pages with reduced copying. In
+ * that case we will need to give away the page and allocate new ones.
+ * In preparation for this, we explicitly move pages off the recv
+ * list onto the transmit list, and back.
*
- * The array of iovecs can hold additional data that the server process
- * may not want to copy into the RPC reply buffer, but pass to the
- * network sendmsg routines directly. The prime candidate for this
- * will of course be NFS READ operations, but one might also want to
- * do something about READLINK and READDIR. It might be worthwhile
- * to implement some generic readdir cache in the VFS layer...
+ * We use xdr_buf for holding responses as it fits well with NFS
+ * read responses (that have a header, and some data pages, and possibly
+ * a tail) and means we can share some client side routines.
*
- * On the receiving end of the RPC server, the iovec may be used to hold
- * the list of IP fragments once we get to process fragmented UDP
- * datagrams directly.
+ * The xdr_buf.head iovec always points to the first page in the rq_*pages
+ * list. The xdr_buf.pages pointer points to the second page on that
+ * list. xdr_buf.tail points to the end of the first page.
+ * This assumes that the non-page part of an rpc reply will fit
+ * in a page - NFSd ensures this. lockd also has no trouble.
*/
-#define RPCSVC_MAXIOV ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1)
-struct svc_buf {
- u32 * area; /* allocated memory */
- u32 * base; /* base of RPC datagram */
- int buflen; /* total length of buffer */
- u32 * buf; /* read/write pointer */
- int len; /* current end of buffer */
-
- /* iovec for zero-copy NFS READs */
- struct iovec iov[RPCSVC_MAXIOV];
- int nriov;
-};
-#define svc_getu32(argp, val) { (val) = *(argp)->buf++; (argp)->len--; }
-#define svc_putu32(resp, val) { *(resp)->buf++ = (val); (resp)->len++; }
+#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1)
+
+static inline u32 svc_getu32(struct iovec *iov)
+{
+ u32 val, *vp;
+ vp = iov->iov_base;
+ val = *vp++;
+ iov->iov_base = (void*)vp;
+ iov->iov_len -= sizeof(u32);
+ return val;
+}
+static inline void svc_putu32(struct iovec *iov, u32 val)
+{
+ u32 *vp = iov->iov_base + iov->iov_len;
+ *vp = val;
+ iov->iov_len += sizeof(u32);
+}
+
/*
* The context of a single thread, including the request currently being
* processed.
struct svc_cred rq_cred; /* auth info */
struct sk_buff * rq_skbuff; /* fast recv inet buffer */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
- struct svc_buf rq_defbuf; /* default buffer */
- struct svc_buf rq_argbuf; /* argument buffer */
- struct svc_buf rq_resbuf; /* result buffer */
+
+ struct xdr_buf rq_arg;
+ struct xdr_buf rq_res;
+ struct page * rq_argpages[RPCSVC_MAXPAGES];
+ struct page * rq_respages[RPCSVC_MAXPAGES];
+ short rq_argused; /* pages used for argument */
+ short rq_arghi; /* pages available in argument page list */
+ short rq_resused; /* pages used for result */
+
u32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */
u32 rq_vers; /* program version */
wait_queue_head_t rq_wait; /* synchronization */
};
+/*
+ * Check buffer bounds after decoding arguments
+ */
+static inline int
+xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
+{
+ char *cp = (char *)p;
+ struct iovec *vec = &rqstp->rq_arg.head[0];
+ return cp - (char*)vec->iov_base <= vec->iov_len;
+}
+
+static inline int
+xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
+{
+ struct iovec *vec = &rqstp->rq_res.head[0];
+ char *cp = (char*)p;
+
+ vec->iov_len = cp - (char*)vec->iov_base;
+ rqstp->rq_res.len = vec->iov_len;
+
+ return vec->iov_len <= PAGE_SIZE;
+}
+
+static int inline take_page(struct svc_rqst *rqstp)
+{
+ if (rqstp->rq_arghi <= rqstp->rq_argused)
+ return -ENOMEM;
+ rqstp->rq_respages[rqstp->rq_resused++] =
+ rqstp->rq_argpages[--rqstp->rq_arghi];
+ return 0;
+}
+
struct svc_deferred_req {
struct svc_serv *serv;
u32 prot; /* protocol (UDP or TCP) */
EXPORT_SYMBOL(is_subdir);
EXPORT_SYMBOL(get_unused_fd);
EXPORT_SYMBOL(vfs_read);
+EXPORT_SYMBOL(vfs_readv);
EXPORT_SYMBOL(vfs_write);
+EXPORT_SYMBOL(vfs_writev);
EXPORT_SYMBOL(vfs_create);
EXPORT_SYMBOL(vfs_mkdir);
EXPORT_SYMBOL(vfs_mknod);
#include <linux/net.h>
#include <linux/in.h>
#include <linux/unistd.h>
+#include <linux/mm.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
-
memset(serv, 0, sizeof(*serv));
serv->sv_program = prog;
serv->sv_nrthreads = 1;
}
/*
- * Allocate an RPC server buffer
- * Later versions may do nifty things by allocating multiple pages
- * of memory directly and putting them into the bufp->iov.
+ * Allocate an RPC server's buffer space.
+ * We allocate pages and place them in rq_argpages.
*/
-int
-svc_init_buffer(struct svc_buf *bufp, unsigned int size)
+static int
+svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
{
- if (!(bufp->area = (u32 *) kmalloc(size, GFP_KERNEL)))
- return 0;
- bufp->base = bufp->area;
- bufp->buf = bufp->area;
- bufp->len = 0;
- bufp->buflen = size >> 2;
-
- bufp->iov[0].iov_base = bufp->area;
- bufp->iov[0].iov_len = size;
- bufp->nriov = 1;
-
- return 1;
+ int pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE;
+ int arghi;
+
+ rqstp->rq_argused = 0;
+ rqstp->rq_resused = 0;
+ arghi = 0;
+ if (pages > RPCSVC_MAXPAGES)
+ BUG();
+ while (pages) {
+ struct page *p = alloc_page(GFP_KERNEL);
+ if (!p)
+ break;
+ rqstp->rq_argpages[arghi++] = p;
+ pages--;
+ }
+ rqstp->rq_arghi = arghi;
+ return ! pages;
}
/*
* Release an RPC server buffer
*/
-void
-svc_release_buffer(struct svc_buf *bufp)
+static void
+svc_release_buffer(struct svc_rqst *rqstp)
{
- kfree(bufp->area);
- bufp->area = 0;
+ while (rqstp->rq_arghi)
+ put_page(rqstp->rq_argpages[--rqstp->rq_arghi]);
+ while (rqstp->rq_resused)
+ put_page(rqstp->rq_respages[--rqstp->rq_resused]);
+ rqstp->rq_argused = 0;
}
/*
if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
- || !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz))
+ || !svc_init_buffer(rqstp, serv->sv_bufsz))
goto out_thread;
serv->sv_nrthreads++;
{
struct svc_serv *serv = rqstp->rq_server;
- svc_release_buffer(&rqstp->rq_defbuf);
+ svc_release_buffer(rqstp);
if (rqstp->rq_resp)
kfree(rqstp->rq_resp);
if (rqstp->rq_argp)
struct svc_program *progp;
struct svc_version *versp = NULL; /* compiler food */
struct svc_procedure *procp = NULL;
- struct svc_buf * argp = &rqstp->rq_argbuf;
- struct svc_buf * resp = &rqstp->rq_resbuf;
+ struct iovec * argv = &rqstp->rq_arg.head[0];
+ struct iovec * resv = &rqstp->rq_res.head[0];
kxdrproc_t xdr;
- u32 *bufp, *statp;
+ u32 *statp;
u32 dir, prog, vers, proc,
auth_stat, rpc_stat;
rpc_stat = rpc_success;
- bufp = argp->buf;
- if (argp->len < 5)
+ if (argv->iov_len < 6*4)
goto err_short_len;
- dir = ntohl(*bufp++);
- vers = ntohl(*bufp++);
+ /* setup response xdr_buf.
+ * Initially it has just one page
+ */
+ take_page(rqstp); /* must succeed */
+ resv->iov_base = page_address(rqstp->rq_respages[0]);
+ resv->iov_len = 0;
+ rqstp->rq_res.pages = rqstp->rq_respages+1;
+ rqstp->rq_res.len = 0;
+ rqstp->rq_res.page_base = 0;
+ rqstp->rq_res.page_len = 0;
+ /* tcp needs a space for the record length... */
+ if (rqstp->rq_prot == IPPROTO_TCP)
+ svc_putu32(resv, 0);
+
+ rqstp->rq_xid = svc_getu32(argv);
+ svc_putu32(resv, rqstp->rq_xid);
+
+ dir = ntohl(svc_getu32(argv));
+ vers = ntohl(svc_getu32(argv));
/* First words of reply: */
- svc_putu32(resp, xdr_one); /* REPLY */
- svc_putu32(resp, xdr_zero); /* ACCEPT */
+ svc_putu32(resv, xdr_one); /* REPLY */
if (dir != 0) /* direction != CALL */
goto err_bad_dir;
if (vers != 2) /* RPC version number */
goto err_bad_rpc;
- rqstp->rq_prog = prog = ntohl(*bufp++); /* program number */
- rqstp->rq_vers = vers = ntohl(*bufp++); /* version number */
- rqstp->rq_proc = proc = ntohl(*bufp++); /* procedure number */
+ svc_putu32(resv, xdr_zero); /* ACCEPT */
- argp->buf += 5;
- argp->len -= 5;
+ rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */
+ rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */
+ rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */
/*
* Decode auth data, and add verifier to reply buffer.
serv->sv_stats->rpccnt++;
/* Build the reply header. */
- statp = resp->buf;
- svc_putu32(resp, rpc_success); /* RPC_SUCCESS */
+ statp = resv->iov_base +resv->iov_len;
+ svc_putu32(resv, rpc_success); /* RPC_SUCCESS */
/* Bump per-procedure stats counter */
procp->pc_count++;
if (!versp->vs_dispatch) {
/* Decode arguments */
xdr = procp->pc_decode;
- if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp))
+ if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
goto err_garbage;
*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
/* Encode reply */
if (*statp == rpc_success && (xdr = procp->pc_encode)
- && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) {
+ && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
dprintk("svc: failed to encode reply\n");
/* serv->sv_stats->rpcsystemerr++; */
*statp = rpc_system_err;
/* Check RPC status result */
if (*statp != rpc_success)
- resp->len = statp + 1 - resp->base;
+ resv->iov_len = ((void*)statp) - resv->iov_base + 4;
/* Release reply info */
if (procp->pc_release)
err_short_len:
#ifdef RPC_PARANOIA
- printk("svc: short len %d, dropping request\n", argp->len);
+ printk("svc: short len %d, dropping request\n", argv->iov_len);
#endif
goto dropit; /* drop request */
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
- resp->buf[-1] = xdr_one; /* REJECT */
- svc_putu32(resp, xdr_zero); /* RPC_MISMATCH */
- svc_putu32(resp, xdr_two); /* Only RPCv2 supported */
- svc_putu32(resp, xdr_two);
+ svc_putu32(resv, xdr_one); /* REJECT */
+ svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */
+ svc_putu32(resv, xdr_two); /* Only RPCv2 supported */
+ svc_putu32(resv, xdr_two);
goto sendit;
err_bad_auth:
dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
serv->sv_stats->rpcbadauth++;
- resp->buf[-1] = xdr_one; /* REJECT */
- svc_putu32(resp, xdr_one); /* AUTH_ERROR */
- svc_putu32(resp, auth_stat); /* status */
+ resv->iov_len -= 4;
+ svc_putu32(resv, xdr_one); /* REJECT */
+ svc_putu32(resv, xdr_one); /* AUTH_ERROR */
+ svc_putu32(resv, auth_stat); /* status */
goto sendit;
err_bad_prog:
/* else it is just a Solaris client seeing if ACLs are supported */
#endif
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resp, rpc_prog_unavail);
+ svc_putu32(resv, rpc_prog_unavail);
goto sendit;
err_bad_vers:
printk("svc: unknown version (%d)\n", vers);
#endif
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resp, rpc_prog_mismatch);
- svc_putu32(resp, htonl(progp->pg_lovers));
- svc_putu32(resp, htonl(progp->pg_hivers));
+ svc_putu32(resv, rpc_prog_mismatch);
+ svc_putu32(resv, htonl(progp->pg_lovers));
+ svc_putu32(resv, htonl(progp->pg_hivers));
goto sendit;
err_bad_proc:
printk("svc: unknown procedure (%d)\n", proc);
#endif
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resp, rpc_proc_unavail);
+ svc_putu32(resv, rpc_proc_unavail);
goto sendit;
err_garbage:
printk("svc: failed to decode args\n");
#endif
serv->sv_stats->rpcbadfmt++;
- svc_putu32(resp, rpc_garbage_args);
+ svc_putu32(resv, rpc_garbage_args);
goto sendit;
}
*statp = rpc_success;
*authp = rpc_auth_ok;
- svc_getu32(&rqstp->rq_argbuf, flavor);
- flavor = ntohl(flavor);
+ flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0]));
dprintk("svc: svc_authenticate (%d)\n", flavor);
if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor])) {
static int
svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
{
- struct svc_buf *argp = &rqstp->rq_argbuf;
- struct svc_buf *resp = &rqstp->rq_resbuf;
+ struct iovec *argv = &rqstp->rq_arg.head[0];
+ struct iovec *resv = &rqstp->rq_res.head[0];
int rv=0;
struct ip_map key, *ipm;
- if ((argp->len -= 3) < 0) {
+ if (argv->iov_len < 3*4)
return SVC_GARBAGE;
- }
- if (*(argp->buf)++ != 0) { /* we already skipped the flavor */
+
+ if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n");
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
- if (*(argp->buf)++ != RPC_AUTH_NULL || *(argp->buf)++ != 0) {
+ if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
*authp = rpc_autherr_badverf;
return SVC_DENIED;
rqstp->rq_cred.cr_groups[0] = NOGROUP;
/* Put NULL verifier */
- svc_putu32(resp, RPC_AUTH_NULL);
- svc_putu32(resp, 0);
+ svc_putu32(resv, RPC_AUTH_NULL);
+ svc_putu32(resv, 0);
key.m_class = rqstp->rq_server->sv_program->pg_class;
key.m_addr = rqstp->rq_addr.sin_addr;
int
svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
{
- struct svc_buf *argp = &rqstp->rq_argbuf;
- struct svc_buf *resp = &rqstp->rq_resbuf;
+ struct iovec *argv = &rqstp->rq_arg.head[0];
+ struct iovec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
- u32 *bufp = argp->buf, slen, i;
- int len = argp->len;
+ u32 slen, i;
+ int len = argv->iov_len;
int rv=0;
struct ip_map key, *ipm;
- if ((len -= 3) < 0)
+ if ((len -= 3*4) < 0)
return SVC_GARBAGE;
- bufp++; /* length */
- bufp++; /* time stamp */
- slen = XDR_QUADLEN(ntohl(*bufp++)); /* machname length */
- if (slen > 64 || (len -= slen + 3) < 0)
+ svc_getu32(argv); /* length */
+ svc_getu32(argv); /* time stamp */
+ slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */
+ if (slen > 64 || (len -= (slen + 3)*4) < 0)
goto badcred;
- bufp += slen; /* skip machname */
-
- cred->cr_uid = ntohl(*bufp++); /* uid */
- cred->cr_gid = ntohl(*bufp++); /* gid */
+ argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */
+ argv->iov_len -= slen*4;
- slen = ntohl(*bufp++); /* gids length */
- if (slen > 16 || (len -= slen + 2) < 0)
+ cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */
+ cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */
+ slen = ntohl(svc_getu32(argv)); /* gids length */
+ if (slen > 16 || (len -= (slen + 2)*4) < 0)
goto badcred;
- for (i = 0; i < NGROUPS && i < slen; i++)
- cred->cr_groups[i] = ntohl(*bufp++);
+ for (i = 0; i < slen; i++)
+ if (i < NGROUPS)
+ cred->cr_groups[i] = ntohl(svc_getu32(argv));
+ else
+ svc_getu32(argv);
if (i < NGROUPS)
cred->cr_groups[i] = NOGROUP;
- bufp += (slen - i);
- if (*bufp++ != RPC_AUTH_NULL || *bufp++ != 0) {
+ if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
}
- argp->buf = bufp;
- argp->len = len;
-
/* Put NULL verifier */
- svc_putu32(resp, RPC_AUTH_NULL);
- svc_putu32(resp, 0);
+ svc_putu32(resv, RPC_AUTH_NULL);
+ svc_putu32(resv, 0);
key.m_class = rqstp->rq_server->sv_program->pg_class;
key.m_addr = rqstp->rq_addr.sin_addr;
+
ipm = ip_map_lookup(&key, 0);
rqstp->rq_client = NULL;
-
if (ipm)
switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
case -EAGAIN:
*/
void svc_reserve(struct svc_rqst *rqstp, int space)
{
- space += rqstp->rq_resbuf.len<<2;
+ space += rqstp->rq_res.head[0].iov_len;
if (space < rqstp->rq_reserved) {
struct svc_sock *svsk = rqstp->rq_sock;
* But first, check that enough space was reserved
* for the reply, otherwise we have a bug!
*/
- if ((rqstp->rq_resbuf.len<<2) > rqstp->rq_reserved)
+ if ((rqstp->rq_res.len) > rqstp->rq_reserved)
printk(KERN_ERR "RPC request reserved %d but used %d\n",
rqstp->rq_reserved,
- rqstp->rq_resbuf.len<<2);
+ rqstp->rq_res.len);
- rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base;
- rqstp->rq_resbuf.len = 0;
+ rqstp->rq_res.head[0].iov_len = 0;
svc_reserve(rqstp, 0);
rqstp->rq_sock = NULL;
len = sock_sendmsg(sock, &msg, buflen);
set_fs(oldfs);
- dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n",
- rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len);
+ dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d (addr %x)\n",
+ rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len,
+ rqstp->rq_addr.sin_addr.s_addr);
return len;
}
/*
* Receive a datagram from a UDP socket.
*/
+extern int
+csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
+
static int
svc_udp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
- u32 *data;
int err, len;
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
}
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
- /* Sorry. */
- if (skb_is_nonlinear(skb)) {
- if (skb_linearize(skb, GFP_KERNEL) != 0) {
- kfree_skb(skb);
- svc_sock_received(svsk);
- return 0;
- }
- }
+ len = skb->len - sizeof(struct udphdr);
- if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
- if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
- skb_free_datagram(svsk->sk_sk, skb);
- svc_sock_received(svsk);
- return 0;
- }
+ if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
+ /* checksum error */
+ skb_free_datagram(svsk->sk_sk, skb);
+ svc_sock_received(svsk);
+ return 0;
}
- len = skb->len - sizeof(struct udphdr);
- data = (u32 *) (skb->data + sizeof(struct udphdr));
-
- rqstp->rq_skbuff = skb;
- rqstp->rq_argbuf.base = data;
- rqstp->rq_argbuf.buf = data;
- rqstp->rq_argbuf.len = (len >> 2);
- rqstp->rq_argbuf.buflen = (len >> 2);
- /* rqstp->rq_resbuf = rqstp->rq_defbuf; */
+ rqstp->rq_arg.len = len;
+ rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
+ rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
rqstp->rq_prot = IPPROTO_UDP;
/* Get sender address */
rqstp->rq_addr.sin_port = skb->h.uh->source;
rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
+ skb_free_datagram(svsk->sk_sk, skb);
+
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
static int
svc_udp_sendto(struct svc_rqst *rqstp)
{
- struct svc_buf *bufp = &rqstp->rq_resbuf;
int error;
+ struct iovec vec[RPCSVC_MAXPAGES];
+ int v;
+ int base, len;
/* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken
* care of by the server implementation itself.
*/
- /* bufp->base = bufp->area; */
- bufp->iov[0].iov_base = bufp->base;
- bufp->iov[0].iov_len = bufp->len << 2;
-
- error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
+ vec[0] = rqstp->rq_res.head[0];
+ v=1;
+ base=rqstp->rq_res.page_base;
+ len = rqstp->rq_res.page_len;
+ while (len) {
+ vec[v].iov_base = page_address(rqstp->rq_res.pages[v-1]) + base;
+ vec[v].iov_len = PAGE_SIZE-base;
+ if (len <= vec[v].iov_len)
+ vec[v].iov_len = len;
+ len -= vec[v].iov_len;
+ base = 0;
+ v++;
+ }
+ if (rqstp->rq_res.tail[0].iov_len) {
+ vec[v] = rqstp->rq_res.tail[0];
+ v++;
+ }
+ error = svc_sendto(rqstp, vec, v);
if (error == -ECONNREFUSED)
/* ICMP error on earlier request. */
- error = svc_sendto(rqstp, bufp->iov, bufp->nriov);
+ error = svc_sendto(rqstp, vec, v);
return error;
}
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
- struct svc_buf *bufp = &rqstp->rq_argbuf;
int len;
+ struct iovec vec[RPCSVC_MAXPAGES];
+ int pnum, vlen;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(SK_DATA, &svsk->sk_flags),
}
svsk->sk_reclen &= 0x7fffffff;
dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
- if (svsk->sk_reclen > (bufp->buflen<<2)) {
+ if (svsk->sk_reclen > serv->sv_bufsz) {
printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
(unsigned long) svsk->sk_reclen);
goto err_delete;
svc_sock_received(svsk);
return -EAGAIN; /* record not complete */
}
+ len = svsk->sk_reclen;
set_bit(SK_DATA, &svsk->sk_flags);
- /* Frob argbuf */
- bufp->iov[0].iov_base += 4;
- bufp->iov[0].iov_len -= 4;
+ vec[0] = rqstp->rq_arg.head[0];
+ vlen = PAGE_SIZE;
+ pnum = 1;
+ while (vlen < len) {
+ vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]);
+ vec[pnum].iov_len = PAGE_SIZE;
+ pnum++;
+ vlen += PAGE_SIZE;
+ }
/* Now receive data */
- len = svc_recvfrom(rqstp, bufp->iov, bufp->nriov, svsk->sk_reclen);
+ len = svc_recvfrom(rqstp, vec, pnum, len);
if (len < 0)
goto error;
dprintk("svc: TCP complete record (%d bytes)\n", len);
-
- /* Position reply write pointer immediately after args,
- * allowing for record length */
- rqstp->rq_resbuf.base = rqstp->rq_argbuf.base + 1 + (len>>2);
- rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base + 1;
- rqstp->rq_resbuf.len = 1;
- rqstp->rq_resbuf.buflen= rqstp->rq_argbuf.buflen - (len>>2) - 1;
+ rqstp->rq_arg.len = len;
+ rqstp->rq_arg.page_base = 0;
+ if (len <= rqstp->rq_arg.head[0].iov_len) {
+ rqstp->rq_arg.head[0].iov_len = len;
+ rqstp->rq_arg.page_len = 0;
+ } else {
+ rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
+ }
rqstp->rq_skbuff = 0;
- rqstp->rq_argbuf.buf += 1;
- rqstp->rq_argbuf.len = (len >> 2);
- rqstp->rq_argbuf.buflen = (len >> 2) +1;
rqstp->rq_prot = IPPROTO_TCP;
/* Reset TCP read info */
static int
svc_tcp_sendto(struct svc_rqst *rqstp)
{
- struct svc_buf *bufp = &rqstp->rq_resbuf;
+ struct xdr_buf *xbufp = &rqstp->rq_res;
+ struct iovec vec[RPCSVC_MAXPAGES];
+ int v;
+ int base, len;
int sent;
+ u32 reclen;
/* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken
* care of by the server implementation itself.
*/
- bufp->iov[0].iov_base = bufp->base;
- bufp->iov[0].iov_len = bufp->len << 2;
- bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4));
+ reclen = htonl(0x80000000|((xbufp->len ) - 4));
+ memcpy(xbufp->head[0].iov_base, &reclen, 4);
+
+ vec[0] = rqstp->rq_res.head[0];
+ v=1;
+ base= xbufp->page_base;
+ len = xbufp->page_len;
+ while (len) {
+ vec[v].iov_base = page_address(xbufp->pages[v-1]) + base;
+ vec[v].iov_len = PAGE_SIZE-base;
+ if (len <= vec[v].iov_len)
+ vec[v].iov_len = len;
+ len -= vec[v].iov_len;
+ base = 0;
+ v++;
+ }
+ if (xbufp->tail[0].iov_len) {
+ vec[v] = xbufp->tail[0];
+ v++;
+ }
- sent = svc_sendto(rqstp, bufp->iov, bufp->nriov);
- if (sent != bufp->len<<2) {
+ sent = svc_sendto(rqstp, vec, v);
+ if (sent != xbufp->len) {
printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
rqstp->rq_sock->sk_server->sv_name,
(sent<0)?"got error":"sent only",
- sent, bufp->len << 2);
+ sent, xbufp->len);
svc_delete_socket(rqstp->rq_sock);
sent = -EAGAIN;
}
{
struct svc_sock *svsk =NULL;
int len;
+ int pages;
+ struct xdr_buf *arg;
DECLARE_WAITQUEUE(wait, current);
dprintk("svc: server %p waiting for data (to = %ld)\n",
rqstp);
/* Initialize the buffers */
- rqstp->rq_argbuf = rqstp->rq_defbuf;
- rqstp->rq_resbuf = rqstp->rq_defbuf;
+ /* first reclaim pages that were moved to response list */
+ while (rqstp->rq_resused)
+ rqstp->rq_argpages[rqstp->rq_arghi++] =
+ rqstp->rq_respages[--rqstp->rq_resused];
+ /* now allocate needed pages. If we get a failure, sleep briefly */
+ pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE;
+ while (rqstp->rq_arghi < pages) {
+ struct page *p = alloc_page(GFP_KERNEL);
+ if (!p) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ/2);
+ current->state = TASK_RUNNING;
+ continue;
+ }
+ rqstp->rq_argpages[rqstp->rq_arghi++] = p;
+ }
+ /* Make arg->head point to first page and arg->pages point to rest */
+ arg = &rqstp->rq_arg;
+ arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]);
+ arg->head[0].iov_len = PAGE_SIZE;
+ rqstp->rq_argused = 1;
+ arg->pages = rqstp->rq_argpages + 1;
+ arg->page_base = 0;
+ /* save at least one page for response */
+ arg->page_len = (pages-2)*PAGE_SIZE;
+ arg->len = (pages-1)*PAGE_SIZE;
+ arg->tail[0].iov_len = 0;
+
if (signalled())
return -EINTR;
rqstp->rq_userset = 0;
rqstp->rq_chandle.defer = svc_defer;
- svc_getu32(&rqstp->rq_argbuf, rqstp->rq_xid);
- svc_putu32(&rqstp->rq_resbuf, rqstp->rq_xid);
-
- /* Assume that the reply consists of a single buffer. */
- rqstp->rq_resbuf.nriov = 1;
-
if (serv->sv_stats)
serv->sv_stats->netcnt++;
return len;
svc_defer(struct cache_req *req)
{
struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
- int size = sizeof(struct svc_deferred_req) + (rqstp->rq_argbuf.buflen << 2);
+ int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.head[0].iov_len);
struct svc_deferred_req *dr;
+ if (rqstp->rq_arg.page_len)
+ return NULL; /* if more than a page, give up FIXME */
if (rqstp->rq_deferred) {
dr = rqstp->rq_deferred;
rqstp->rq_deferred = NULL;
} else {
/* FIXME maybe discard if size too large */
- dr = kmalloc(size<<2, GFP_KERNEL);
+ dr = kmalloc(size, GFP_KERNEL);
if (dr == NULL)
return NULL;
dr->serv = rqstp->rq_server;
dr->prot = rqstp->rq_prot;
dr->addr = rqstp->rq_addr;
- dr->argslen = rqstp->rq_argbuf.buflen;
- memcpy(dr->args, rqstp->rq_argbuf.base, dr->argslen<<2);
+ dr->argslen = rqstp->rq_arg.head[0].iov_len >> 2;
+ memcpy(dr->args, rqstp->rq_arg.head[0].iov_base, dr->argslen<<2);
}
spin_lock(&rqstp->rq_server->sv_lock);
rqstp->rq_sock->sk_inuse++;
{
struct svc_deferred_req *dr = rqstp->rq_deferred;
- rqstp->rq_argbuf.base = dr->args;
- rqstp->rq_argbuf.buf = dr->args;
- rqstp->rq_argbuf.len = dr->argslen;
- rqstp->rq_argbuf.buflen = dr->argslen;
+ rqstp->rq_arg.head[0].iov_base = dr->args;
+ rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
+ rqstp->rq_arg.page_len = 0;
+ rqstp->rq_arg.len = dr->argslen<<2;
rqstp->rq_prot = dr->prot;
rqstp->rq_addr = dr->addr;
return dr->argslen<<2;
* We have set things up such that we perform the checksum of the UDP
* packet in parallel with the copies into the RPC client iovec. -DaveM
*/
-static int
+int
csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
skb_reader_t desc;