--- /dev/null
+All of the above
+---
+ fs/lockd/host.c | 39 +
+ fs/lockd/mon.c | 2
+ fs/lockd/svc.c | 6
+ fs/nfs/Makefile | 4
+ fs/nfs/client.c | 28 -
+ fs/nfs/delegation.c | 186 +++--
+ fs/nfs/delegation.h | 26 -
+ fs/nfs/dir.c | 16
+ fs/nfs/direct.c | 34 +
+ fs/nfs/inode.c | 73 +-
+ fs/nfs/internal.h | 4
+ fs/nfs/mount_clnt.c | 169 +++--
+ fs/nfs/nfs2xdr.c | 6
+ fs/nfs/nfs3proc.c | 4
+ fs/nfs/nfs3xdr.c | 8
+ fs/nfs/nfs4_fs.h | 40 +
+ fs/nfs/nfs4proc.c | 760 +++++++++++++---------
+ fs/nfs/nfs4state.c | 310 ++++++---
+ fs/nfs/nfs4xdr.c | 126 ++--
+ fs/nfs/nfsroot.c | 5
+ fs/nfs/pagelist.c | 60 +-
+ fs/nfs/read.c | 40 +
+ fs/nfs/super.c | 1189 +++++++++++++++++++++++++++++-----
+ fs/nfs/write.c | 149 ++--
+ fs/nfsd/nfs4callback.c | 18 -
+ fs/nfsd/nfs4state.c | 1
+ include/linux/lockd/lockd.h | 1
+ include/linux/nfs4.h | 1
+ include/linux/nfs4_mount.h | 3
+ include/linux/nfs_fs.h | 28 -
+ include/linux/nfs_fs_sb.h | 8
+ include/linux/nfs_mount.h | 3
+ include/linux/nfs_page.h | 25 -
+ include/linux/nfs_xdr.h | 5
+ include/linux/sunrpc/auth.h | 48 +
+ include/linux/sunrpc/auth_gss.h | 6
+ include/linux/sunrpc/clnt.h | 33 -
+ include/linux/sunrpc/gss_api.h | 2
+ include/linux/sunrpc/rpc_pipe_fs.h | 2
+ include/linux/sunrpc/sched.h | 6
+ include/linux/sunrpc/svcsock.h | 1
+ include/linux/sunrpc/xprt.h | 16
+ kernel/auditsc.c | 1
+ net/sunrpc/auth.c | 357 +++++++---
+ net/sunrpc/auth_gss/auth_gss.c | 339 ++++++----
+ net/sunrpc/auth_gss/gss_krb5_mech.c | 2
+ net/sunrpc/auth_gss/gss_spkm3_mech.c | 2
+ net/sunrpc/auth_null.c | 10
+ net/sunrpc/auth_unix.c | 54 +-
+ net/sunrpc/clnt.c | 367 +++++++---
+ net/sunrpc/rpc_pipe.c | 87 ++
+ net/sunrpc/rpcb_clnt.c | 65 +-
+ net/sunrpc/sched.c | 209 ++----
+ net/sunrpc/sunrpc_syms.c | 8
+ net/sunrpc/svcsock.c | 20 +
+ net/sunrpc/xprt.c | 19 -
+ net/sunrpc/xprtsock.c | 81 +-
+ 57 files changed, 3305 insertions(+), 1807 deletions(-)
+
+diff --git a/fs/lockd/host.c b/fs/lockd/host.c
+index 96070bf..572601e 100644
+--- a/fs/lockd/host.c
++++ b/fs/lockd/host.c
+@@ -44,9 +44,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin,
+ */
+ static struct nlm_host *
+ nlm_lookup_host(int server, const struct sockaddr_in *sin,
+- int proto, int version,
+- const char *hostname,
+- int hostname_len)
++ int proto, int version, const char *hostname,
++ int hostname_len, const struct sockaddr_in *ssin)
+ {
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
+ struct nsm_handle *nsm = NULL;
+ int hash;
+
+- dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
++ dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
++ ", p=%d, v=%d, my role=%s, name=%.*s)\n",
++ NIPQUAD(ssin->sin_addr.s_addr),
+ NIPQUAD(sin->sin_addr.s_addr), proto, version,
+ server? "server" : "client",
+ hostname_len,
+@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
+ continue;
+ if (host->h_server != server)
+ continue;
++ if (!nlm_cmp_addr(&host->h_saddr, ssin))
++ continue;
+
+ /* Move to head of hash chain. */
+ hlist_del(&host->h_hash);
+@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
+ host->h_name = nsm->sm_name;
+ host->h_addr = *sin;
+ host->h_addr.sin_port = 0; /* ouch! */
++ host->h_saddr = *ssin;
+ host->h_version = version;
+ host->h_proto = proto;
+ host->h_rpcclnt = NULL;
+@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
+ */
+ nsm_unmonitor(host);
+
+- if ((clnt = host->h_rpcclnt) != NULL) {
+- if (atomic_read(&clnt->cl_users)) {
+- printk(KERN_WARNING
+- "lockd: active RPC handle\n");
+- clnt->cl_dead = 1;
+- } else {
+- rpc_destroy_client(host->h_rpcclnt);
+- }
+- }
++ clnt = host->h_rpcclnt;
++ if (clnt != NULL)
++ rpc_shutdown_client(clnt);
+ kfree(host);
+ }
+
+@@ -180,8 +178,10 @@ struct nlm_host *
+ nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
+ const char *hostname, int hostname_len)
+ {
++ struct sockaddr_in ssin = {0};
++
+ return nlm_lookup_host(0, sin, proto, version,
+- hostname, hostname_len);
++ hostname, hostname_len, &ssin);
+ }
+
+ /*
+@@ -191,9 +191,12 @@ struct nlm_host *
+ nlmsvc_lookup_host(struct svc_rqst *rqstp,
+ const char *hostname, int hostname_len)
+ {
++ struct sockaddr_in ssin = {0};
++
++ ssin.sin_addr = rqstp->rq_daddr.addr;
+ return nlm_lookup_host(1, svc_addr_in(rqstp),
+ rqstp->rq_prot, rqstp->rq_vers,
+- hostname, hostname_len);
++ hostname, hostname_len, &ssin);
+ }
+
+ /*
+@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
+ {
+ struct rpc_clnt *clnt;
+
+- dprintk("lockd: nlm_bind_host(%08x)\n",
+- (unsigned)ntohl(host->h_addr.sin_addr.s_addr));
++ dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
++ NIPQUAD(host->h_saddr.sin_addr),
++ NIPQUAD(host->h_addr.sin_addr));
+
+ /* Lock host handle */
+ mutex_lock(&host->h_mutex);
+@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
+ .protocol = host->h_proto,
+ .address = (struct sockaddr *)&host->h_addr,
+ .addrsize = sizeof(host->h_addr),
++ .saddress = (struct sockaddr *)&host->h_saddr,
+ .timeout = &timeparms,
+ .servername = host->h_name,
+ .program = &nlm_program,
+diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
+index 2102e2d..3353ed8 100644
+--- a/fs/lockd/mon.c
++++ b/fs/lockd/mon.c
+@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
+ status);
+ else
+ status = 0;
++ rpc_shutdown_client(clnt);
+ out:
+ return status;
+ }
+@@ -138,7 +139,6 @@ nsm_create(void)
+ .program = &nsm_program,
+ .version = SM_VERSION,
+ .authflavor = RPC_AUTH_NULL,
+- .flags = (RPC_CLNT_CREATE_ONESHOT),
+ };
+
+ return rpc_create(&args);
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 126b1bf..2680932 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
+ /* Process request with signals blocked, but allow SIGKILL. */
+ allow_signal(SIGKILL);
+
+- /* kick rpciod */
+- rpciod_up();
+-
+ dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
+
+ if (!nlm_timeout)
+@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
+ /* Exit the RPC thread */
+ svc_exit_thread(rqstp);
+
+- /* release rpciod */
+- rpciod_down();
+-
+ /* Release module */
+ unlock_kernel();
+ module_put_and_exit(0);
+diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
+index f4580b4..b55cb23 100644
+--- a/fs/nfs/Makefile
++++ b/fs/nfs/Makefile
+@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
+
+ nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
+ pagelist.o proc.o read.o symlink.o unlink.o \
+- write.o namespace.o
+-nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
++ write.o namespace.o mount_clnt.o
++nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
+ nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
+ nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
+ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
+diff --git a/fs/nfs/client.c b/fs/nfs/client.c
+index 881fa49..ccb4550 100644
+--- a/fs/nfs/client.c
++++ b/fs/nfs/client.c
+@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
+ int nfsversion)
+ {
+ struct nfs_client *clp;
+- int error;
+
+ if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+ goto error_0;
+
+- error = rpciod_up();
+- if (error < 0) {
+- dprintk("%s: couldn't start rpciod! Error = %d\n",
+- __FUNCTION__, error);
+- goto error_1;
+- }
+- __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+-
+ if (nfsversion == 4) {
+ if (nfs_callback_up() < 0)
+ goto error_2;
+@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
+ #ifdef CONFIG_NFS_V4
+ init_rwsem(&clp->cl_sem);
+ INIT_LIST_HEAD(&clp->cl_delegations);
+- INIT_LIST_HEAD(&clp->cl_state_owners);
+- INIT_LIST_HEAD(&clp->cl_unused);
+ spin_lock_init(&clp->cl_lock);
+ INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
+ rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+@@ -154,9 +143,6 @@ error_3:
+ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ nfs_callback_down();
+ error_2:
+- rpciod_down();
+- __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+-error_1:
+ kfree(clp);
+ error_0:
+ return NULL;
+@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
+ #ifdef CONFIG_NFS_V4
+ if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+ nfs4_kill_renewd(clp);
+- while (!list_empty(&clp->cl_unused)) {
+- struct nfs4_state_owner *sp;
+-
+- sp = list_entry(clp->cl_unused.next,
+- struct nfs4_state_owner,
+- so_list);
+- list_del(&sp->so_list);
+- kfree(sp);
+- }
+- BUG_ON(!list_empty(&clp->cl_state_owners));
++ BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
+ if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+ nfs_idmap_delete(clp);
+ #endif
+@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
+ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ nfs_callback_down();
+
+- if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+- rpciod_down();
+-
+ kfree(clp->cl_hostname);
+ kfree(clp);
+
+diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
+index 7f37d1b..20ac403 100644
+--- a/fs/nfs/delegation.c
++++ b/fs/nfs/delegation.c
+@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
+ kfree(delegation);
+ }
+
++static void nfs_free_delegation_callback(struct rcu_head *head)
++{
++ struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
++
++ nfs_free_delegation(delegation);
++}
++
+ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ struct inode *inode = state->inode;
+@@ -57,7 +64,7 @@ out_err:
+ return status;
+ }
+
+-static void nfs_delegation_claim_opens(struct inode *inode)
++static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *ctx;
+@@ -72,9 +79,11 @@ again:
+ continue;
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ continue;
++ if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
++ continue;
+ get_nfs_open_context(ctx);
+ spin_unlock(&inode->i_lock);
+- err = nfs4_open_delegation_recall(ctx->dentry, state);
++ err = nfs4_open_delegation_recall(ctx, state, stateid);
+ if (err >= 0)
+ err = nfs_delegation_claim_locks(ctx, state);
+ put_nfs_open_context(ctx);
+@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
+ struct nfs_delegation *delegation;
+ int status = 0;
+
+- /* Ensure we first revalidate the attributes and page cache! */
+- if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
+- __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+-
+ delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
+ if (delegation == NULL)
+ return -ENOMEM;
+@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
+ delegation->inode = inode;
+
+ spin_lock(&clp->cl_lock);
+- if (nfsi->delegation == NULL) {
+- list_add(&delegation->super_list, &clp->cl_delegations);
+- nfsi->delegation = delegation;
++ if (rcu_dereference(nfsi->delegation) == NULL) {
++ list_add_rcu(&delegation->super_list, &clp->cl_delegations);
+ nfsi->delegation_state = delegation->type;
++ rcu_assign_pointer(nfsi->delegation, delegation);
+ delegation = NULL;
+ } else {
+ if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
+@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
+ status = -EIO;
+ }
+ }
++
++ /* Ensure we revalidate the attributes and page cache! */
++ spin_lock(&inode->i_lock);
++ nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
++ spin_unlock(&inode->i_lock);
++
+ spin_unlock(&clp->cl_lock);
+ kfree(delegation);
+ return status;
+@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
+ int res = 0;
+
+ res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
+- nfs_free_delegation(delegation);
++ call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+ return res;
+ }
+
+@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
+ /*
+ * Basic procedure for returning a delegation to the server
+ */
+-int __nfs_inode_return_delegation(struct inode *inode)
++static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
+ {
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_inode *nfsi = NFS_I(inode);
+- struct nfs_delegation *delegation;
+- int res = 0;
+
+ nfs_msync_inode(inode);
+ down_read(&clp->cl_sem);
+ /* Guard against new delegated open calls */
+ down_write(&nfsi->rwsem);
+- spin_lock(&clp->cl_lock);
+- delegation = nfsi->delegation;
+- if (delegation != NULL) {
+- list_del_init(&delegation->super_list);
+- nfsi->delegation = NULL;
+- nfsi->delegation_state = 0;
+- }
+- spin_unlock(&clp->cl_lock);
+- nfs_delegation_claim_opens(inode);
++ nfs_delegation_claim_opens(inode, &delegation->stateid);
+ up_write(&nfsi->rwsem);
+ up_read(&clp->cl_sem);
+ nfs_msync_inode(inode);
+
+- if (delegation != NULL)
+- res = nfs_do_return_delegation(inode, delegation);
+- return res;
++ return nfs_do_return_delegation(inode, delegation);
++}
++
++static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
++{
++ struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
++
++ if (delegation == NULL)
++ goto nomatch;
++ if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
++ sizeof(delegation->stateid.data)) != 0)
++ goto nomatch;
++ list_del_rcu(&delegation->super_list);
++ nfsi->delegation_state = 0;
++ rcu_assign_pointer(nfsi->delegation, NULL);
++ return delegation;
++nomatch:
++ return NULL;
++}
++
++int nfs_inode_return_delegation(struct inode *inode)
++{
++ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
++ struct nfs_inode *nfsi = NFS_I(inode);
++ struct nfs_delegation *delegation;
++ int err = 0;
++
++ if (rcu_dereference(nfsi->delegation) != NULL) {
++ spin_lock(&clp->cl_lock);
++ delegation = nfs_detach_delegation_locked(nfsi, NULL);
++ spin_unlock(&clp->cl_lock);
++ if (delegation != NULL)
++ err = __nfs_inode_return_delegation(inode, delegation);
++ }
++ return err;
+ }
+
+ /*
+@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
+ if (clp == NULL)
+ return;
+ restart:
+- spin_lock(&clp->cl_lock);
+- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
++ rcu_read_lock();
++ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ if (delegation->inode->i_sb != sb)
+ continue;
+ inode = igrab(delegation->inode);
+ if (inode == NULL)
+ continue;
++ spin_lock(&clp->cl_lock);
++ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+ spin_unlock(&clp->cl_lock);
+- nfs_inode_return_delegation(inode);
++ rcu_read_unlock();
++ if (delegation != NULL)
++ __nfs_inode_return_delegation(inode, delegation);
+ iput(inode);
+ goto restart;
+ }
+- spin_unlock(&clp->cl_lock);
++ rcu_read_unlock();
+ }
+
+ static int nfs_do_expire_all_delegations(void *ptr)
+@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
+
+ allow_signal(SIGKILL);
+ restart:
+- spin_lock(&clp->cl_lock);
+ if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
+ goto out;
+ if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
+ goto out;
+- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
++ rcu_read_lock();
++ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ inode = igrab(delegation->inode);
+ if (inode == NULL)
+ continue;
++ spin_lock(&clp->cl_lock);
++ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+ spin_unlock(&clp->cl_lock);
+- nfs_inode_return_delegation(inode);
++ rcu_read_unlock();
++ if (delegation)
++ __nfs_inode_return_delegation(inode, delegation);
+ iput(inode);
+ goto restart;
+ }
++ rcu_read_unlock();
+ out:
+- spin_unlock(&clp->cl_lock);
+ nfs_put_client(clp);
+ module_put_and_exit(0);
+ }
+@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
+ if (clp == NULL)
+ return;
+ restart:
+- spin_lock(&clp->cl_lock);
+- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
++ rcu_read_lock();
++ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ inode = igrab(delegation->inode);
+ if (inode == NULL)
+ continue;
++ spin_lock(&clp->cl_lock);
++ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+ spin_unlock(&clp->cl_lock);
+- nfs_inode_return_delegation(inode);
++ rcu_read_unlock();
++ if (delegation != NULL)
++ __nfs_inode_return_delegation(inode, delegation);
+ iput(inode);
+ goto restart;
+ }
+- spin_unlock(&clp->cl_lock);
++ rcu_read_unlock();
+ }
+
+ struct recall_threadargs {
+@@ -316,21 +361,14 @@ static int recall_thread(void *data)
+ down_read(&clp->cl_sem);
+ down_write(&nfsi->rwsem);
+ spin_lock(&clp->cl_lock);
+- delegation = nfsi->delegation;
+- if (delegation != NULL && memcmp(delegation->stateid.data,
+- args->stateid->data,
+- sizeof(delegation->stateid.data)) == 0) {
+- list_del_init(&delegation->super_list);
+- nfsi->delegation = NULL;
+- nfsi->delegation_state = 0;
++ delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
++ if (delegation != NULL)
+ args->result = 0;
+- } else {
+- delegation = NULL;
++ else
+ args->result = -ENOENT;
+- }
+ spin_unlock(&clp->cl_lock);
+ complete(&args->started);
+- nfs_delegation_claim_opens(inode);
++ nfs_delegation_claim_opens(inode, args->stateid);
+ up_write(&nfsi->rwsem);
+ up_read(&clp->cl_sem);
+ nfs_msync_inode(inode);
+@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
+ {
+ struct nfs_delegation *delegation;
+ struct inode *res = NULL;
+- spin_lock(&clp->cl_lock);
+- list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
++ rcu_read_lock();
++ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
+ res = igrab(delegation->inode);
+ break;
+ }
+ }
+- spin_unlock(&clp->cl_lock);
++ rcu_read_unlock();
+ return res;
+ }
+
+@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
+ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
+ {
+ struct nfs_delegation *delegation;
+- spin_lock(&clp->cl_lock);
+- list_for_each_entry(delegation, &clp->cl_delegations, super_list)
++ rcu_read_lock();
++ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
+ delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
+- spin_unlock(&clp->cl_lock);
++ rcu_read_unlock();
+ }
+
+ /*
+@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
+ */
+ void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
+ {
+- struct nfs_delegation *delegation, *n;
+- LIST_HEAD(head);
+- spin_lock(&clp->cl_lock);
+- list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
++ struct nfs_delegation *delegation;
++restart:
++ rcu_read_lock();
++ list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
+ if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
+ continue;
+- list_move(&delegation->super_list, &head);
+- NFS_I(delegation->inode)->delegation = NULL;
+- NFS_I(delegation->inode)->delegation_state = 0;
+- }
+- spin_unlock(&clp->cl_lock);
+- while(!list_empty(&head)) {
+- delegation = list_entry(head.next, struct nfs_delegation, super_list);
+- list_del(&delegation->super_list);
+- nfs_free_delegation(delegation);
++ spin_lock(&clp->cl_lock);
++ delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
++ spin_unlock(&clp->cl_lock);
++ rcu_read_unlock();
++ if (delegation != NULL)
++ call_rcu(&delegation->rcu, nfs_free_delegation_callback);
++ goto restart;
+ }
++ rcu_read_unlock();
+ }
+
+ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+ {
+- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+- int res = 0;
++ int ret = 0;
+
+- if (nfsi->delegation_state == 0)
+- return 0;
+- spin_lock(&clp->cl_lock);
+- delegation = nfsi->delegation;
++ rcu_read_lock();
++ delegation = rcu_dereference(nfsi->delegation);
+ if (delegation != NULL) {
+ memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+- res = 1;
++ ret = 1;
+ }
+- spin_unlock(&clp->cl_lock);
+- return res;
++ rcu_read_unlock();
++ return ret;
+ }
+diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
+index 2cfd4b2..5874ce7 100644
+--- a/fs/nfs/delegation.h
++++ b/fs/nfs/delegation.h
+@@ -22,11 +22,12 @@ struct nfs_delegation {
+ long flags;
+ loff_t maxsize;
+ __u64 change_attr;
++ struct rcu_head rcu;
+ };
+
+ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+-int __nfs_inode_return_delegation(struct inode *inode);
++int nfs_inode_return_delegation(struct inode *inode);
+ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+
+ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
+@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
+
+ /* NFSv4 delegation-related procedures */
+ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
+-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
+ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
+
+ static inline int nfs_have_delegation(struct inode *inode, int flags)
+ {
++ struct nfs_delegation *delegation;
++ int ret = 0;
++
+ flags &= FMODE_READ|FMODE_WRITE;
+- smp_rmb();
+- if ((NFS_I(inode)->delegation_state & flags) == flags)
+- return 1;
+- return 0;
++ rcu_read_lock();
++ delegation = rcu_dereference(NFS_I(inode)->delegation);
++ if (delegation != NULL && (delegation->type & flags) == flags)
++ ret = 1;
++ rcu_read_unlock();
++ return ret;
+ }
+
+-static inline int nfs_inode_return_delegation(struct inode *inode)
+-{
+- int err = 0;
+-
+- if (NFS_I(inode)->delegation != NULL)
+- err = __nfs_inode_return_delegation(inode);
+- return err;
+-}
+ #else
+ static inline int nfs_have_delegation(struct inode *inode, int flags)
+ {
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index c27258b..322141f 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
+ return (nd->intent.open.flags & O_EXCL) != 0;
+ }
+
+-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+- struct nfs_fh *fh, struct nfs_fattr *fattr)
++static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
+ {
+ struct nfs_server *server = NFS_SERVER(dir);
+
+ if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
+- /* Revalidate fsid on root dir */
+- return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
++ /* Revalidate fsid using the parent directory */
++ return __nfs_revalidate_inode(server, dir);
+ return 0;
+ }
+
+@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
+ res = ERR_PTR(error);
+ goto out_unlock;
+ }
+- error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
++ error = nfs_reval_fsid(dir, &fattr);
+ if (error < 0) {
+ res = ERR_PTR(error);
+ goto out_unlock;
+@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
+ attr.ia_mode = mode;
+ attr.ia_valid = ATTR_MODE;
+
+- if (nd && (nd->flags & LOOKUP_CREATE))
++ if ((nd->flags & LOOKUP_CREATE) != 0)
+ open_flags = nd->intent.open.flags;
+
+ lock_kernel();
+@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
+
+ lock_kernel();
+
+- page = alloc_page(GFP_KERNEL);
++ page = alloc_page(GFP_HIGHUSER);
+ if (!page) {
+ unlock_kernel();
+ return -ENOMEM;
+@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+ struct nfs_inode *nfsi;
+ struct nfs_access_entry *cache;
+
+- spin_lock(&nfs_access_lru_lock);
+ restart:
++ spin_lock(&nfs_access_lru_lock);
+ list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+ struct inode *inode;
+
+@@ -1770,6 +1769,7 @@ remove_lru_entry:
+ clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+ }
+ spin_unlock(&inode->i_lock);
++ spin_unlock(&nfs_access_lru_lock);
+ iput(inode);
+ goto restart;
+ }
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index 00eee87..a5c82b6 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
+ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+ {
+ struct nfs_open_context *ctx = dreq->ctx;
+- struct inode *inode = ctx->dentry->d_inode;
++ struct inode *inode = ctx->path.dentry->d_inode;
+ size_t rsize = NFS_SERVER(inode)->rsize;
+ unsigned int pgbase;
+ int result;
+@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
+ break;
+ }
+ if ((unsigned)result < data->npages) {
+- nfs_direct_release_pages(data->pagevec, result);
+- nfs_readdata_release(data);
+- break;
++ bytes = result * PAGE_SIZE;
++ if (bytes <= pgbase) {
++ nfs_direct_release_pages(data->pagevec, result);
++ nfs_readdata_release(data);
++ break;
++ }
++ bytes -= pgbase;
++ data->npages = result;
+ }
+
+ get_dreq(dreq);
+@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
+ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
+ {
+ struct nfs_open_context *ctx = dreq->ctx;
+- struct inode *inode = ctx->dentry->d_inode;
++ struct inode *inode = ctx->path.dentry->d_inode;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ unsigned int pgbase;
+ int result;
+@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
+ break;
+ }
+ if ((unsigned)result < data->npages) {
+- nfs_direct_release_pages(data->pagevec, result);
+- nfs_writedata_release(data);
+- break;
++ bytes = result * PAGE_SIZE;
++ if (bytes <= pgbase) {
++ nfs_direct_release_pages(data->pagevec, result);
++ nfs_writedata_release(data);
++ break;
++ }
++ bytes -= pgbase;
++ data->npages = result;
+ }
+
+ get_dreq(dreq);
+@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
+ (unsigned long) count, (long long) pos);
+
+ if (nr_segs != 1)
+- return -EINVAL;
+-
+- if (count < 0)
+ goto out;
++
+ retval = -EFAULT;
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ goto out;
+@@ -814,7 +822,7 @@ out:
+ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+ {
+- ssize_t retval;
++ ssize_t retval = -EINVAL;
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ (unsigned long) count, (long long) pos);
+
+ if (nr_segs != 1)
+- return -EINVAL;
++ goto out;
+
+ retval = generic_write_checks(file, &pos, &count, 0);
+ if (retval)
+diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
+index bd9f5a8..3d9fccf 100644
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (ctx != NULL) {
+- atomic_set(&ctx->count, 1);
+- ctx->dentry = dget(dentry);
+- ctx->vfsmnt = mntget(mnt);
++ ctx->path.dentry = dget(dentry);
++ ctx->path.mnt = mntget(mnt);
+ ctx->cred = get_rpccred(cred);
+ ctx->state = NULL;
+ ctx->lockowner = current->files;
+ ctx->error = 0;
+ ctx->dir_cookie = 0;
++ kref_init(&ctx->kref);
+ }
+ return ctx;
+ }
+@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ if (ctx != NULL)
+- atomic_inc(&ctx->count);
++ kref_get(&ctx->kref);
+ return ctx;
+ }
+
+-void put_nfs_open_context(struct nfs_open_context *ctx)
++static void nfs_free_open_context(struct kref *kref)
+ {
+- if (atomic_dec_and_test(&ctx->count)) {
+- if (!list_empty(&ctx->list)) {
+- struct inode *inode = ctx->dentry->d_inode;
+- spin_lock(&inode->i_lock);
+- list_del(&ctx->list);
+- spin_unlock(&inode->i_lock);
+- }
+- if (ctx->state != NULL)
+- nfs4_close_state(ctx->state, ctx->mode);
+- if (ctx->cred != NULL)
+- put_rpccred(ctx->cred);
+- dput(ctx->dentry);
+- mntput(ctx->vfsmnt);
+- kfree(ctx);
++ struct nfs_open_context *ctx = container_of(kref,
++ struct nfs_open_context, kref);
++
++ if (!list_empty(&ctx->list)) {
++ struct inode *inode = ctx->path.dentry->d_inode;
++ spin_lock(&inode->i_lock);
++ list_del(&ctx->list);
++ spin_unlock(&inode->i_lock);
+ }
++ if (ctx->state != NULL)
++ nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
++ if (ctx->cred != NULL)
++ put_rpccred(ctx->cred);
++ dput(ctx->path.dentry);
++ mntput(ctx->path.mnt);
++ kfree(ctx);
++}
++
++void put_nfs_open_context(struct nfs_open_context *ctx)
++{
++ kref_put(&ctx->kref, nfs_free_open_context);
+ }
+
+ /*
+@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+ goto out_changed;
+
+ server = NFS_SERVER(inode);
+- /* Update the fsid if and only if this is the root directory */
+- if (inode == inode->i_sb->s_root->d_inode
++ /* Update the fsid? */
++ if (S_ISDIR(inode->i_mode)
+ && !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+ server->fsid = fattr->fsid;
+
+@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+ invalid &= ~NFS_INO_INVALID_DATA;
+ if (data_stable)
+ invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
+- if (!nfs_have_delegation(inode, FMODE_READ))
++ if (!nfs_have_delegation(inode, FMODE_READ) ||
++ (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
+ nfsi->cache_validity |= invalid;
++ nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
+
+ return 0;
+ out_changed:
+@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+ */
+ void nfs4_clear_inode(struct inode *inode)
+ {
+- struct nfs_inode *nfsi = NFS_I(inode);
+-
+ /* If we are holding a delegation, return it! */
+ nfs_inode_return_delegation(inode);
+ /* First call standard NFS clear_inode() code */
+ nfs_clear_inode(inode);
+- /* Now clear out any remaining state */
+- while (!list_empty(&nfsi->open_states)) {
+- struct nfs4_state *state;
+-
+- state = list_entry(nfsi->open_states.next,
+- struct nfs4_state,
+- inode_states);
+- dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
+- __FUNCTION__,
+- inode->i_sb->s_id,
+- (long long)NFS_FILEID(inode),
+- state);
+- BUG_ON(atomic_read(&state->count) != 1);
+- nfs4_close_state(state, state->state);
+- }
+ }
+ #endif
+
+@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
+ struct nfs_inode *nfsi = (struct nfs_inode *) foo;
+
+ inode_init_once(&nfsi->vfs_inode);
+- spin_lock_init(&nfsi->req_lock);
+- INIT_LIST_HEAD(&nfsi->dirty);
+- INIT_LIST_HEAD(&nfsi->commit);
+ INIT_LIST_HEAD(&nfsi->open_files);
+ INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+ INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
+ INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
+ atomic_set(&nfsi->data_updates, 0);
+- nfsi->ndirty = 0;
+ nfsi->ncommit = 0;
+ nfsi->npages = 0;
+ nfs4_init_once(nfsi);
+diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
+index ad2b40d..76cf55d 100644
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
+ /*
+ * Calculate the number of 512byte blocks used.
+ */
+-static inline unsigned long nfs_calc_block_size(u64 tsize)
++static inline blkcnt_t nfs_calc_block_size(u64 tsize)
+ {
+- loff_t used = (tsize + 511) >> 9;
++ blkcnt_t used = (tsize + 511) >> 9;
+ return (used > ULONG_MAX) ? ULONG_MAX : used;
+ }
+
+diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
+index ca5a266..8afd9f7 100644
+--- a/fs/nfs/mount_clnt.c
++++ b/fs/nfs/mount_clnt.c
+@@ -1,7 +1,5 @@
+ /*
+- * linux/fs/nfs/mount_clnt.c
+- *
+- * MOUNT client to support NFSroot.
++ * In-kernel MOUNT protocol client
+ *
+ * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
+ */
+@@ -18,33 +16,31 @@
+ #include <linux/nfs_fs.h>
+
+ #ifdef RPC_DEBUG
+-# define NFSDBG_FACILITY NFSDBG_ROOT
++# define NFSDBG_FACILITY NFSDBG_MOUNT
+ #endif
+
+-/*
+-#define MOUNT_PROGRAM 100005
+-#define MOUNT_VERSION 1
+-#define MOUNT_MNT 1
+-#define MOUNT_UMNT 3
+- */
+-
+-static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
+- int, int);
+ static struct rpc_program mnt_program;
+
+ struct mnt_fhstatus {
+- unsigned int status;
+- struct nfs_fh * fh;
++ u32 status;
++ struct nfs_fh *fh;
+ };
+
+-/*
+- * Obtain an NFS file handle for the given host and path
++/**
++ * nfs_mount - Obtain an NFS file handle for the given host and path
++ * @addr: pointer to server's address
++ * @len: size of server's address
++ * @hostname: name of server host, or NULL
++ * @path: pointer to string containing export path to mount
++ * @version: mount version to use for this request
++ * @protocol: transport protocol to use for thie request
++ * @fh: pointer to location to place returned file handle
++ *
++ * Uses default timeout parameters specified by underlying transport.
+ */
+-int
+-nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
+- int version, int protocol)
++int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
++ int version, int protocol, struct nfs_fh *fh)
+ {
+- struct rpc_clnt *mnt_clnt;
+ struct mnt_fhstatus result = {
+ .fh = fh
+ };
+@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
+ .rpc_argp = path,
+ .rpc_resp = &result,
+ };
+- char hostname[32];
++ struct rpc_create_args args = {
++ .protocol = protocol,
++ .address = addr,
++ .addrsize = len,
++ .servername = hostname,
++ .program = &mnt_program,
++ .version = version,
++ .authflavor = RPC_AUTH_UNIX,
++ .flags = RPC_CLNT_CREATE_INTR,
++ };
++ struct rpc_clnt *mnt_clnt;
+ int status;
+
+- dprintk("NFS: nfs_mount(%08x:%s)\n",
+- (unsigned)ntohl(addr->sin_addr.s_addr), path);
++ dprintk("NFS: sending MNT request for %s:%s\n",
++ (hostname ? hostname : "server"), path);
+
+- sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
+- mnt_clnt = mnt_create(hostname, addr, version, protocol);
++ mnt_clnt = rpc_create(&args);
+ if (IS_ERR(mnt_clnt))
+- return PTR_ERR(mnt_clnt);
++ goto out_clnt_err;
+
+ if (version == NFS_MNT3_VERSION)
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+ status = rpc_call_sync(mnt_clnt, &msg, 0);
+- return status < 0? status : (result.status? -EACCES : 0);
+-}
++ rpc_shutdown_client(mnt_clnt);
+
+-static struct rpc_clnt *
+-mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
+- int protocol)
+-{
+- struct rpc_create_args args = {
+- .protocol = protocol,
+- .address = (struct sockaddr *)srvaddr,
+- .addrsize = sizeof(*srvaddr),
+- .servername = hostname,
+- .program = &mnt_program,
+- .version = version,
+- .authflavor = RPC_AUTH_UNIX,
+- .flags = (RPC_CLNT_CREATE_ONESHOT |
+- RPC_CLNT_CREATE_INTR),
+- };
++ if (status < 0)
++ goto out_call_err;
++ if (result.status != 0)
++ goto out_mnt_err;
++
++ dprintk("NFS: MNT request succeeded\n");
++ status = 0;
++
++out:
++ return status;
++
++out_clnt_err:
++ status = PTR_ERR(mnt_clnt);
++ dprintk("NFS: failed to create RPC client, status=%d\n", status);
++ goto out;
++
++out_call_err:
++ dprintk("NFS: failed to start MNT request, status=%d\n", status);
++ goto out;
+
+- return rpc_create(&args);
++out_mnt_err:
++ dprintk("NFS: MNT server returned result %d\n", result.status);
++ status = -EACCES;
++ goto out;
+ }
+
+ /*
+ * XDR encode/decode functions for MOUNT
+ */
+-static int
+-xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
++static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
++ const char *path)
+ {
+ p = xdr_encode_string(p, path);
+
+@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
+ return 0;
+ }
+
+-static int
+-xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
++static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
++ struct mnt_fhstatus *res)
+ {
+ struct nfs_fh *fh = res->fh;
+
+@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+ return 0;
+ }
+
+-static int
+-xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
++static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
++ struct mnt_fhstatus *res)
+ {
+ struct nfs_fh *fh = res->fh;
+
+@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+ #define MNT_fhstatus_sz (1 + 8)
+ #define MNT_fhstatus3_sz (1 + 16)
+
+-static struct rpc_procinfo mnt_procedures[] = {
+-[MNTPROC_MNT] = {
+- .p_proc = MNTPROC_MNT,
+- .p_encode = (kxdrproc_t) xdr_encode_dirpath,
+- .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
+- .p_arglen = MNT_dirpath_sz,
+- .p_replen = MNT_fhstatus_sz,
+- .p_statidx = MNTPROC_MNT,
+- .p_name = "MOUNT",
++static struct rpc_procinfo mnt_procedures[] = {
++ [MNTPROC_MNT] = {
++ .p_proc = MNTPROC_MNT,
++ .p_encode = (kxdrproc_t) xdr_encode_dirpath,
++ .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
++ .p_arglen = MNT_dirpath_sz,
++ .p_replen = MNT_fhstatus_sz,
++ .p_statidx = MNTPROC_MNT,
++ .p_name = "MOUNT",
+ },
+ };
+
+ static struct rpc_procinfo mnt3_procedures[] = {
+-[MOUNTPROC3_MNT] = {
+- .p_proc = MOUNTPROC3_MNT,
+- .p_encode = (kxdrproc_t) xdr_encode_dirpath,
+- .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
+- .p_arglen = MNT_dirpath_sz,
+- .p_replen = MNT_fhstatus3_sz,
+- .p_statidx = MOUNTPROC3_MNT,
+- .p_name = "MOUNT",
++ [MOUNTPROC3_MNT] = {
++ .p_proc = MOUNTPROC3_MNT,
++ .p_encode = (kxdrproc_t) xdr_encode_dirpath,
++ .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
++ .p_arglen = MNT_dirpath_sz,
++ .p_replen = MNT_fhstatus3_sz,
++ .p_statidx = MOUNTPROC3_MNT,
++ .p_name = "MOUNT",
+ },
+ };
+
+
+-static struct rpc_version mnt_version1 = {
+- .number = 1,
+- .nrprocs = 2,
+- .procs = mnt_procedures
++static struct rpc_version mnt_version1 = {
++ .number = 1,
++ .nrprocs = 2,
++ .procs = mnt_procedures,
+ };
+
+-static struct rpc_version mnt_version3 = {
+- .number = 3,
+- .nrprocs = 2,
+- .procs = mnt3_procedures
++static struct rpc_version mnt_version3 = {
++ .number = 3,
++ .nrprocs = 2,
++ .procs = mnt3_procedures,
+ };
+
+-static struct rpc_version * mnt_version[] = {
++static struct rpc_version *mnt_version[] = {
+ NULL,
+ &mnt_version1,
+ NULL,
+ &mnt_version3,
+ };
+
+-static struct rpc_stat mnt_stats;
++static struct rpc_stat mnt_stats;
+
+-static struct rpc_program mnt_program = {
++static struct rpc_program mnt_program = {
+ .name = "mount",
+ .number = NFS_MNT_PROGRAM,
+ .nrvers = ARRAY_SIZE(mnt_version),
+diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
+index cd3ca7b..7fcc78f 100644
+--- a/fs/nfs/nfs2xdr.c
++++ b/fs/nfs/nfs2xdr.c
+@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
+ static int
+ nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+ u32 offset = (u32)args->offset;
+ u32 count = args->count;
+@@ -380,7 +380,7 @@ static int
+ nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
+ {
+ struct rpc_task *task = req->rq_task;
+- struct rpc_auth *auth = task->tk_auth;
++ struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+ u32 count = args->count;
+
+@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
+ static int
+ nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+
+ p = xdr_encode_fhandle(p, args->fh);
+diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
+index 45268d6..814d886 100644
+--- a/fs/nfs/nfs3proc.c
++++ b/fs/nfs/nfs3proc.c
+@@ -335,9 +335,7 @@ again:
+ * not sure this buys us anything (and I'd have
+ * to revamp the NFSv3 XDR code) */
+ status = nfs3_proc_setattr(dentry, &fattr, sattr);
+- if (status == 0)
+- nfs_setattr_update_inode(dentry->d_inode, sattr);
+- nfs_refresh_inode(dentry->d_inode, &fattr);
++ nfs_post_op_update_inode(dentry->d_inode, &fattr);
+ dprintk("NFS reply setattr (post-create): %d\n", status);
+ }
+ if (status != 0)
+diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
+index b51df8e..b4647a2 100644
+--- a/fs/nfs/nfs3xdr.c
++++ b/fs/nfs/nfs3xdr.c
+@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
+ static int
+ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+ u32 count = args->count;
+
+@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
+ static int
+ nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+ u32 count = args->count;
+
+@@ -643,7 +643,7 @@ static int
+ nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
+ struct nfs3_getaclargs *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+
+ p = xdr_encode_fhandle(p, args->fh);
+@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
+ static int
+ nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+
+ p = xdr_encode_fhandle(p, args->fh);
+diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
+index cf3a17e..6c028e7 100644
+--- a/fs/nfs/nfs4_fs.h
++++ b/fs/nfs/nfs4_fs.h
+@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
+ seqid->flags |= NFS_SEQID_CONFIRMED;
+ }
+
++struct nfs_unique_id {
++ struct rb_node rb_node;
++ __u64 id;
++};
++
+ /*
+ * NFS4 state_owners and lock_owners are simply labels for ordered
+ * sequences of RPC calls. Their sole purpose is to provide once-only
+ * semantics by allowing the server to identify replayed requests.
+ */
+ struct nfs4_state_owner {
+- spinlock_t so_lock;
+- struct list_head so_list; /* per-clientid list of state_owners */
++ struct nfs_unique_id so_owner_id;
+ struct nfs_client *so_client;
+- u32 so_id; /* 32-bit identifier, unique */
+- atomic_t so_count;
++ struct nfs_server *so_server;
++ struct rb_node so_client_node;
+
+ struct rpc_cred *so_cred; /* Associated cred */
++
++ spinlock_t so_lock;
++ atomic_t so_count;
+ struct list_head so_states;
+ struct list_head so_delegations;
+ struct nfs_seqid_counter so_seqid;
+@@ -108,7 +115,7 @@ struct nfs4_lock_state {
+ #define NFS_LOCK_INITIALIZED 1
+ int ls_flags;
+ struct nfs_seqid_counter ls_seqid;
+- u32 ls_id;
++ struct nfs_unique_id ls_id;
+ nfs4_stateid ls_stateid;
+ atomic_t ls_count;
+ };
+@@ -116,7 +123,10 @@ struct nfs4_lock_state {
+ /* bits for nfs4_state->flags */
+ enum {
+ LK_STATE_IN_USE,
+- NFS_DELEGATED_STATE,
++ NFS_DELEGATED_STATE, /* Current stateid is delegation */
++ NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
++ NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
++ NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
+ };
+
+ struct nfs4_state {
+@@ -130,11 +140,14 @@ struct nfs4_state {
+ unsigned long flags; /* Do we hold any locks? */
+ spinlock_t state_lock; /* Protects the lock_states list */
+
+- nfs4_stateid stateid;
++ seqlock_t seqlock; /* Protects the stateid/open_stateid */
++ nfs4_stateid stateid; /* Current stateid: may be delegation */
++ nfs4_stateid open_stateid; /* OPEN stateid */
+
+- unsigned int n_rdonly;
+- unsigned int n_wronly;
+- unsigned int n_rdwr;
++ /* The following 3 fields are protected by owner->so_lock */
++ unsigned int n_rdonly; /* Number of read-only references */
++ unsigned int n_wronly; /* Number of write-only references */
++ unsigned int n_rdwr; /* Number of read/write references */
+ int state; /* State on the server (R,W, or RW) */
+ atomic_t count;
+ };
+@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
+ extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
+-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
++extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
+ extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
+
+ /* nfs4state.c */
+ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
+-extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
+
+ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
+ extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+ extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
+ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
+ extern void nfs4_put_open_state(struct nfs4_state *);
+-extern void nfs4_close_state(struct nfs4_state *, mode_t);
++extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
+ extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
+ extern void nfs4_schedule_state_recovery(struct nfs_client *);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
+
+ #else
+
+-#define nfs4_close_state(a, b) do { } while (0)
++#define nfs4_close_state(a, b, c) do { } while (0)
+
+ #endif /* CONFIG_NFS_V4 */
+ #endif /* __LINUX_FS_NFS_NFS4_FS.H */
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index 648e0ac..fee2da8 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
+ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
+ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
+ static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
++static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
+
+ /* Prevent leaks of NFSv4 errors into userland */
+ int nfs4_map_errors(int err)
+@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+ }
+
+ struct nfs4_opendata {
+- atomic_t count;
++ struct kref kref;
+ struct nfs_openargs o_arg;
+ struct nfs_openres o_res;
+ struct nfs_open_confirmargs c_arg;
+ struct nfs_open_confirmres c_res;
+ struct nfs_fattr f_attr;
+ struct nfs_fattr dir_attr;
+- struct dentry *dentry;
++ struct path path;
+ struct dentry *dir;
+ struct nfs4_state_owner *owner;
++ struct nfs4_state *state;
+ struct iattr attrs;
+ unsigned long timestamp;
++ unsigned int rpc_done : 1;
+ int rpc_status;
+ int cancelled;
+ };
+
+-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
++
++static void nfs4_init_opendata_res(struct nfs4_opendata *p)
++{
++ p->o_res.f_attr = &p->f_attr;
++ p->o_res.dir_attr = &p->dir_attr;
++ p->o_res.server = p->o_arg.server;
++ nfs_fattr_init(&p->f_attr);
++ nfs_fattr_init(&p->dir_attr);
++}
++
++static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
+ struct nfs4_state_owner *sp, int flags,
+ const struct iattr *attrs)
+ {
+- struct dentry *parent = dget_parent(dentry);
++ struct dentry *parent = dget_parent(path->dentry);
+ struct inode *dir = parent->d_inode;
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs4_opendata *p;
+@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+ p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+ if (p->o_arg.seqid == NULL)
+ goto err_free;
+- atomic_set(&p->count, 1);
+- p->dentry = dget(dentry);
++ p->path.mnt = mntget(path->mnt);
++ p->path.dentry = dget(path->dentry);
+ p->dir = parent;
+ p->owner = sp;
+ atomic_inc(&sp->so_count);
+ p->o_arg.fh = NFS_FH(dir);
+ p->o_arg.open_flags = flags,
+ p->o_arg.clientid = server->nfs_client->cl_clientid;
+- p->o_arg.id = sp->so_id;
+- p->o_arg.name = &dentry->d_name;
++ p->o_arg.id = sp->so_owner_id.id;
++ p->o_arg.name = &p->path.dentry->d_name;
+ p->o_arg.server = server;
+ p->o_arg.bitmask = server->attr_bitmask;
+ p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+- p->o_res.f_attr = &p->f_attr;
+- p->o_res.dir_attr = &p->dir_attr;
+- p->o_res.server = server;
+- nfs_fattr_init(&p->f_attr);
+- nfs_fattr_init(&p->dir_attr);
+ if (flags & O_EXCL) {
+ u32 *s = (u32 *) p->o_arg.u.verifier.data;
+ s[0] = jiffies;
+@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+ p->c_arg.fh = &p->o_res.fh;
+ p->c_arg.stateid = &p->o_res.stateid;
+ p->c_arg.seqid = p->o_arg.seqid;
++ nfs4_init_opendata_res(p);
++ kref_init(&p->kref);
+ return p;
+ err_free:
+ kfree(p);
+@@ -282,27 +292,25 @@ err:
+ return NULL;
+ }
+
+-static void nfs4_opendata_free(struct nfs4_opendata *p)
++static void nfs4_opendata_free(struct kref *kref)
+ {
+- if (p != NULL && atomic_dec_and_test(&p->count)) {
+- nfs_free_seqid(p->o_arg.seqid);
+- nfs4_put_state_owner(p->owner);
+- dput(p->dir);
+- dput(p->dentry);
+- kfree(p);
+- }
++ struct nfs4_opendata *p = container_of(kref,
++ struct nfs4_opendata, kref);
++
++ nfs_free_seqid(p->o_arg.seqid);
++ if (p->state != NULL)
++ nfs4_put_open_state(p->state);
++ nfs4_put_state_owner(p->owner);
++ dput(p->dir);
++ dput(p->path.dentry);
++ mntput(p->path.mnt);
++ kfree(p);
+ }
+
+-/* Helper for asynchronous RPC calls */
+-static int nfs4_call_async(struct rpc_clnt *clnt,
+- const struct rpc_call_ops *tk_ops, void *calldata)
++static void nfs4_opendata_put(struct nfs4_opendata *p)
+ {
+- struct rpc_task *task;
+-
+- if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
+- return -ENOMEM;
+- rpc_execute(task);
+- return 0;
++ if (p != NULL)
++ kref_put(&p->kref, nfs4_opendata_free);
+ }
+
+ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
+@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
+ return ret;
+ }
+
+-static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
++static int can_open_cached(struct nfs4_state *state, int mode)
++{
++ int ret = 0;
++ switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
++ case FMODE_READ:
++ ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
++ ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
++ break;
++ case FMODE_WRITE:
++ ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
++ ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
++ break;
++ case FMODE_READ|FMODE_WRITE:
++ ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
++ }
++ return ret;
++}
++
++static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
++{
++ if ((delegation->type & open_flags) != open_flags)
++ return 0;
++ if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
++ return 0;
++ return 1;
++}
++
++static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+ {
+ switch (open_flags) {
+ case FMODE_WRITE:
+@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
+ case FMODE_READ|FMODE_WRITE:
+ state->n_rdwr++;
+ }
++ nfs4_state_set_mode_locked(state, state->state | open_flags);
+ }
+
+-static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
++static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+ {
+- struct inode *inode = state->inode;
++ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
++ memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
++ memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
++ switch (open_flags) {
++ case FMODE_READ:
++ set_bit(NFS_O_RDONLY_STATE, &state->flags);
++ break;
++ case FMODE_WRITE:
++ set_bit(NFS_O_WRONLY_STATE, &state->flags);
++ break;
++ case FMODE_READ|FMODE_WRITE:
++ set_bit(NFS_O_RDWR_STATE, &state->flags);
++ }
++}
++
++static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
++{
++ write_seqlock(&state->seqlock);
++ nfs_set_open_stateid_locked(state, stateid, open_flags);
++ write_sequnlock(&state->seqlock);
++}
+
++static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
++{
+ open_flags &= (FMODE_READ|FMODE_WRITE);
+- /* Protect against nfs4_find_state_byowner() */
++ /*
++ * Protect the call to nfs4_state_set_mode_locked and
++ * serialise the stateid update
++ */
++ write_seqlock(&state->seqlock);
++ if (deleg_stateid != NULL) {
++ memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
++ set_bit(NFS_DELEGATED_STATE, &state->flags);
++ }
++ if (open_stateid != NULL)
++ nfs_set_open_stateid_locked(state, open_stateid, open_flags);
++ write_sequnlock(&state->seqlock);
+ spin_lock(&state->owner->so_lock);
+- spin_lock(&inode->i_lock);
+- memcpy(&state->stateid, stateid, sizeof(state->stateid));
+ update_open_stateflags(state, open_flags);
+- nfs4_state_set_mode_locked(state, state->state | open_flags);
+- spin_unlock(&inode->i_lock);
+ spin_unlock(&state->owner->so_lock);
+ }
+
++static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
++{
++ struct nfs_delegation *delegation;
++
++ rcu_read_lock();
++ delegation = rcu_dereference(NFS_I(inode)->delegation);
++ if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
++ rcu_read_unlock();
++ return;
++ }
++ rcu_read_unlock();
++ nfs_inode_return_delegation(inode);
++}
++
++static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
++{
++ struct nfs4_state *state = opendata->state;
++ struct nfs_inode *nfsi = NFS_I(state->inode);
++ struct nfs_delegation *delegation;
++ int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
++ nfs4_stateid stateid;
++ int ret = -EAGAIN;
++
++ rcu_read_lock();
++ delegation = rcu_dereference(nfsi->delegation);
++ for (;;) {
++ if (can_open_cached(state, open_mode)) {
++ spin_lock(&state->owner->so_lock);
++ if (can_open_cached(state, open_mode)) {
++ update_open_stateflags(state, open_mode);
++ spin_unlock(&state->owner->so_lock);
++ rcu_read_unlock();
++ goto out_return_state;
++ }
++ spin_unlock(&state->owner->so_lock);
++ }
++ if (delegation == NULL)
++ break;
++ if (!can_open_delegated(delegation, open_mode))
++ break;
++ /* Save the delegation */
++ memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
++ rcu_read_unlock();
++ lock_kernel();
++ ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
++ unlock_kernel();
++ if (ret != 0)
++ goto out;
++ ret = -EAGAIN;
++ rcu_read_lock();
++ delegation = rcu_dereference(nfsi->delegation);
++ /* If no delegation, try a cached open */
++ if (delegation == NULL)
++ continue;
++ /* Is the delegation still valid? */
++ if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
++ continue;
++ rcu_read_unlock();
++ update_open_stateid(state, NULL, &stateid, open_mode);
++ goto out_return_state;
++ }
++ rcu_read_unlock();
++out:
++ return ERR_PTR(ret);
++out_return_state:
++ atomic_inc(&state->count);
++ return state;
++}
++
+ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
+ {
+ struct inode *inode;
+ struct nfs4_state *state = NULL;
++ struct nfs_delegation *delegation;
++ nfs4_stateid *deleg_stateid = NULL;
++ int ret;
+
+- if (!(data->f_attr.valid & NFS_ATTR_FATTR))
++ if (!data->rpc_done) {
++ state = nfs4_try_open_cached(data);
+ goto out;
++ }
++
++ ret = -EAGAIN;
++ if (!(data->f_attr.valid & NFS_ATTR_FATTR))
++ goto err;
+ inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
++ ret = PTR_ERR(inode);
+ if (IS_ERR(inode))
+- goto out;
++ goto err;
++ ret = -ENOMEM;
+ state = nfs4_get_open_state(inode, data->owner);
+ if (state == NULL)
+- goto put_inode;
+- update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags);
+-put_inode:
++ goto err_put_inode;
++ if (data->o_res.delegation_type != 0) {
++ int delegation_flags = 0;
++
++ rcu_read_lock();
++ delegation = rcu_dereference(NFS_I(inode)->delegation);
++ if (delegation)
++ delegation_flags = delegation->flags;
++ rcu_read_unlock();
++ if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
++ nfs_inode_set_delegation(state->inode,
++ data->owner->so_cred,
++ &data->o_res);
++ else
++ nfs_inode_reclaim_delegation(state->inode,
++ data->owner->so_cred,
++ &data->o_res);
++ }
++ rcu_read_lock();
++ delegation = rcu_dereference(NFS_I(inode)->delegation);
++ if (delegation != NULL)
++ deleg_stateid = &delegation->stateid;
++ update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
++ rcu_read_unlock();
+ iput(inode);
+ out:
+ return state;
++err_put_inode:
++ iput(inode);
++err:
++ return ERR_PTR(ret);
+ }
+
+ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
+@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
+ return ERR_PTR(-ENOENT);
+ }
+
+-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid)
++static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
+ {
++ struct nfs4_state *newstate;
+ int ret;
+
+ opendata->o_arg.open_flags = openflags;
++ memset(&opendata->o_res, 0, sizeof(opendata->o_res));
++ memset(&opendata->c_res, 0, sizeof(opendata->c_res));
++ nfs4_init_opendata_res(opendata);
+ ret = _nfs4_proc_open(opendata);
+ if (ret != 0)
+ return ret;
+- memcpy(stateid->data, opendata->o_res.stateid.data,
+- sizeof(stateid->data));
++ newstate = nfs4_opendata_to_nfs4_state(opendata);
++ if (IS_ERR(newstate))
++ return PTR_ERR(newstate);
++ nfs4_close_state(&opendata->path, newstate, openflags);
++ *res = newstate;
+ return 0;
+ }
+
+ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
+ {
+- nfs4_stateid stateid;
+ struct nfs4_state *newstate;
+- int mode = 0;
+- int delegation = 0;
+ int ret;
+
+ /* memory barrier prior to reading state->n_* */
++ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ smp_rmb();
+ if (state->n_rdwr != 0) {
+- ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid);
++ ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
+ if (ret != 0)
+ return ret;
+- mode |= FMODE_READ|FMODE_WRITE;
+- if (opendata->o_res.delegation_type != 0)
+- delegation = opendata->o_res.delegation_type;
+- smp_rmb();
++ if (newstate != state)
++ return -ESTALE;
+ }
+ if (state->n_wronly != 0) {
+- ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid);
++ ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
+ if (ret != 0)
+ return ret;
+- mode |= FMODE_WRITE;
+- if (opendata->o_res.delegation_type != 0)
+- delegation = opendata->o_res.delegation_type;
+- smp_rmb();
++ if (newstate != state)
++ return -ESTALE;
+ }
+ if (state->n_rdonly != 0) {
+- ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid);
++ ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
+ if (ret != 0)
+ return ret;
+- mode |= FMODE_READ;
++ if (newstate != state)
++ return -ESTALE;
+ }
+- clear_bit(NFS_DELEGATED_STATE, &state->flags);
+- if (mode == 0)
+- return 0;
+- if (opendata->o_res.delegation_type == 0)
+- opendata->o_res.delegation_type = delegation;
+- opendata->o_arg.open_flags |= mode;
+- newstate = nfs4_opendata_to_nfs4_state(opendata);
+- if (newstate != NULL) {
+- if (opendata->o_res.delegation_type != 0) {
+- struct nfs_inode *nfsi = NFS_I(newstate->inode);
+- int delegation_flags = 0;
+- if (nfsi->delegation)
+- delegation_flags = nfsi->delegation->flags;
+- if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
+- nfs_inode_set_delegation(newstate->inode,
+- opendata->owner->so_cred,
+- &opendata->o_res);
+- else
+- nfs_inode_reclaim_delegation(newstate->inode,
+- opendata->owner->so_cred,
+- &opendata->o_res);
+- }
+- nfs4_close_state(newstate, opendata->o_arg.open_flags);
++ /*
++ * We may have performed cached opens for all three recoveries.
++ * Check if we need to update the current stateid.
++ */
++ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
++ memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
++ write_seqlock(&state->seqlock);
++ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
++ memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
++ write_sequnlock(&state->seqlock);
+ }
+- if (newstate != state)
+- return -ESTALE;
+ return 0;
+ }
+
+@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
+ * OPEN_RECLAIM:
+ * reclaim state on the server after a reboot.
+ */
+-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+- struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
++ struct nfs_delegation *delegation;
+ struct nfs4_opendata *opendata;
+ int delegation_type = 0;
+ int status;
+
+- if (delegation != NULL) {
+- if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+- memcpy(&state->stateid, &delegation->stateid,
+- sizeof(state->stateid));
+- set_bit(NFS_DELEGATED_STATE, &state->flags);
+- return 0;
+- }
+- delegation_type = delegation->type;
+- }
+- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
++ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
+ if (opendata == NULL)
+ return -ENOMEM;
+ opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
+ opendata->o_arg.fh = NFS_FH(state->inode);
+ nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
++ rcu_read_lock();
++ delegation = rcu_dereference(NFS_I(state->inode)->delegation);
++ if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
++ delegation_type = delegation->flags;
++ rcu_read_unlock();
+ opendata->o_arg.u.delegation_type = delegation_type;
+ status = nfs4_open_recover(opendata, state);
+- nfs4_opendata_free(opendata);
++ nfs4_opendata_put(opendata);
+ return status;
+ }
+
+-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+- err = _nfs4_do_open_reclaim(sp, state, dentry);
++ err = _nfs4_do_open_reclaim(ctx, state);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, err, &exception);
+@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
+ ctx = nfs4_state_find_open_context(state);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+- ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
++ ret = nfs4_do_open_reclaim(ctx, state);
+ put_nfs_open_context(ctx);
+ return ret;
+ }
+
+-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
++static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+ {
+ struct nfs4_state_owner *sp = state->owner;
+ struct nfs4_opendata *opendata;
+ int ret;
+
+- if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+- return 0;
+- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
++ opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
+ if (opendata == NULL)
+ return -ENOMEM;
+ opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+- memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
++ memcpy(opendata->o_arg.u.delegation.data, stateid->data,
+ sizeof(opendata->o_arg.u.delegation.data));
+ ret = nfs4_open_recover(opendata, state);
+- nfs4_opendata_free(opendata);
++ nfs4_opendata_put(opendata);
+ return ret;
+ }
+
+-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+ {
+ struct nfs4_exception exception = { };
+- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
++ struct nfs_server *server = NFS_SERVER(state->inode);
+ int err;
+ do {
+- err = _nfs4_open_delegation_recall(dentry, state);
++ err = _nfs4_open_delegation_recall(ctx, state, stateid);
+ switch (err) {
+ case 0:
+ return err;
+@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
+ memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+ sizeof(data->o_res.stateid.data));
+ renew_lease(data->o_res.server, data->timestamp);
++ data->rpc_done = 1;
+ }
+- nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
+ nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
++ nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
+ }
+
+ static void nfs4_open_confirm_release(void *calldata)
+@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
+ if (data->cancelled == 0)
+ goto out_free;
+ /* In case of error, no cleanup! */
+- if (data->rpc_status != 0)
++ if (!data->rpc_done)
+ goto out_free;
+ nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ state = nfs4_opendata_to_nfs4_state(data);
+- if (state != NULL)
+- nfs4_close_state(state, data->o_arg.open_flags);
++ if (!IS_ERR(state))
++ nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+ out_free:
+- nfs4_opendata_free(data);
++ nfs4_opendata_put(data);
+ }
+
+ static const struct rpc_call_ops nfs4_open_confirm_ops = {
+@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
+ struct rpc_task *task;
+ int status;
+
+- atomic_inc(&data->count);
+- /*
+- * If rpc_run_task() ends up calling ->rpc_release(), we
+- * want to ensure that it takes the 'error' code path.
+- */
+- data->rpc_status = -ENOMEM;
++ kref_get(&data->kref);
++ data->rpc_done = 0;
++ data->rpc_status = 0;
+ task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
+
+ if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
+ return;
++ /*
++ * Check if we still need to send an OPEN call, or if we can use
++ * a delegation instead.
++ */
++ if (data->state != NULL) {
++ struct nfs_delegation *delegation;
++
++ if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
++ goto out_no_action;
++ rcu_read_lock();
++ delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
++ if (delegation != NULL &&
++ (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
++ rcu_read_unlock();
++ goto out_no_action;
++ }
++ rcu_read_unlock();
++ }
+ /* Update sequence id. */
+- data->o_arg.id = sp->so_id;
++ data->o_arg.id = sp->so_owner_id.id;
+ data->o_arg.clientid = sp->so_client->cl_clientid;
+ if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
+ msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ data->timestamp = jiffies;
+ rpc_call_setup(task, &msg, 0);
++ return;
++out_no_action:
++ task->tk_action = NULL;
++
+ }
+
+ static void nfs4_open_done(struct rpc_task *task, void *calldata)
+@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
+ data->rpc_status = -ENOTDIR;
+ }
+ renew_lease(data->o_res.server, data->timestamp);
++ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
++ nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ }
+ nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
++ data->rpc_done = 1;
+ }
+
+ static void nfs4_open_release(void *calldata)
+@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
+ if (data->cancelled == 0)
+ goto out_free;
+ /* In case of error, no cleanup! */
+- if (data->rpc_status != 0)
++ if (data->rpc_status != 0 || !data->rpc_done)
+ goto out_free;
+ /* In case we need an open_confirm, no cleanup! */
+ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
+ goto out_free;
+ nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ state = nfs4_opendata_to_nfs4_state(data);
+- if (state != NULL)
+- nfs4_close_state(state, data->o_arg.open_flags);
++ if (!IS_ERR(state))
++ nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+ out_free:
+- nfs4_opendata_free(data);
++ nfs4_opendata_put(data);
+ }
+
+ static const struct rpc_call_ops nfs4_open_ops = {
+@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
+ struct rpc_task *task;
+ int status;
+
+- atomic_inc(&data->count);
+- /*
+- * If rpc_run_task() ends up calling ->rpc_release(), we
+- * want to ensure that it takes the 'error' code path.
+- */
+- data->rpc_status = -ENOMEM;
++ kref_get(&data->kref);
++ data->rpc_done = 0;
++ data->rpc_status = 0;
++ data->cancelled = 0;
+ task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
+ } else
+ status = data->rpc_status;
+ rpc_put_task(task);
+- if (status != 0)
++ if (status != 0 || !data->rpc_done)
+ return status;
+
+ if (o_arg->open_flags & O_CREAT) {
+@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
+ if (status != 0)
+ return status;
+ }
+- nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
+ return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+ return 0;
+@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
+ mask |= MAY_READ;
+ if (openflags & FMODE_WRITE)
+ mask |= MAY_WRITE;
++ if (openflags & FMODE_EXEC)
++ mask |= MAY_EXEC;
+ status = nfs_access_get_cached(inode, cred, &cache);
+ if (status == 0)
+ goto out;
+@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
+ * reclaim state on the server after a network partition.
+ * Assumes caller holds the appropriate lock
+ */
+-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+- struct inode *inode = state->inode;
+- struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+ struct nfs4_opendata *opendata;
+- int openflags = state->state & (FMODE_READ|FMODE_WRITE);
+ int ret;
+
+- if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+- ret = _nfs4_do_access(inode, sp->so_cred, openflags);
+- if (ret < 0)
+- return ret;
+- memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
+- set_bit(NFS_DELEGATED_STATE, &state->flags);
+- return 0;
+- }
+- opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
++ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
+ if (opendata == NULL)
+ return -ENOMEM;
+ ret = nfs4_open_recover(opendata, state);
+ if (ret == -ESTALE) {
+ /* Invalidate the state owner so we don't ever use it again */
+- nfs4_drop_state_owner(sp);
+- d_drop(dentry);
++ nfs4_drop_state_owner(state->owner);
++ d_drop(ctx->path.dentry);
+ }
+- nfs4_opendata_free(opendata);
++ nfs4_opendata_put(opendata);
+ return ret;
+ }
+
+-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
++ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+- err = _nfs4_open_expired(sp, state, dentry);
++ err = _nfs4_open_expired(ctx, state);
+ if (err == -NFS4ERR_DELAY)
+ nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
+ ctx = nfs4_state_find_open_context(state);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+- ret = nfs4_do_open_expired(sp, state, ctx->dentry);
++ ret = nfs4_do_open_expired(ctx, state);
+ put_nfs_open_context(ctx);
+ return ret;
+ }
+
+ /*
+- * Returns a referenced nfs4_state if there is an open delegation on the file
++ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
++ * fields corresponding to attributes that were used to store the verifier.
++ * Make sure we clobber those fields in the later setattr call
+ */
+-static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
+-{
+- struct nfs_delegation *delegation;
+- struct nfs_server *server = NFS_SERVER(inode);
+- struct nfs_client *clp = server->nfs_client;
+- struct nfs_inode *nfsi = NFS_I(inode);
+- struct nfs4_state_owner *sp = NULL;
+- struct nfs4_state *state = NULL;
+- int open_flags = flags & (FMODE_READ|FMODE_WRITE);
+- int err;
+-
+- err = -ENOMEM;
+- if (!(sp = nfs4_get_state_owner(server, cred))) {
+- dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
+- return err;
+- }
+- err = nfs4_recover_expired_lease(server);
+- if (err != 0)
+- goto out_put_state_owner;
+- /* Protect against reboot recovery - NOTE ORDER! */
+- down_read(&clp->cl_sem);
+- /* Protect against delegation recall */
+- down_read(&nfsi->rwsem);
+- delegation = NFS_I(inode)->delegation;
+- err = -ENOENT;
+- if (delegation == NULL || (delegation->type & open_flags) != open_flags)
+- goto out_err;
+- err = -ENOMEM;
+- state = nfs4_get_open_state(inode, sp);
+- if (state == NULL)
+- goto out_err;
+-
+- err = -ENOENT;
+- if ((state->state & open_flags) == open_flags) {
+- spin_lock(&inode->i_lock);
+- update_open_stateflags(state, open_flags);
+- spin_unlock(&inode->i_lock);
+- goto out_ok;
+- } else if (state->state != 0)
+- goto out_put_open_state;
+-
+- lock_kernel();
+- err = _nfs4_do_access(inode, cred, open_flags);
+- unlock_kernel();
+- if (err != 0)
+- goto out_put_open_state;
+- set_bit(NFS_DELEGATED_STATE, &state->flags);
+- update_open_stateid(state, &delegation->stateid, open_flags);
+-out_ok:
+- nfs4_put_state_owner(sp);
+- up_read(&nfsi->rwsem);
+- up_read(&clp->cl_sem);
+- *res = state;
+- return 0;
+-out_put_open_state:
+- nfs4_put_open_state(state);
+-out_err:
+- up_read(&nfsi->rwsem);
+- up_read(&clp->cl_sem);
+- if (err != -EACCES)
+- nfs_inode_return_delegation(inode);
+-out_put_state_owner:
+- nfs4_put_state_owner(sp);
+- return err;
+-}
+-
+-static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
++static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
+ {
+- struct nfs4_exception exception = { };
+- struct nfs4_state *res = ERR_PTR(-EIO);
+- int err;
++ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
++ !(sattr->ia_valid & ATTR_ATIME_SET))
++ sattr->ia_valid |= ATTR_ATIME;
+
+- do {
+- err = _nfs4_open_delegated(inode, flags, cred, &res);
+- if (err == 0)
+- break;
+- res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
+- err, &exception));
+- } while (exception.retry);
+- return res;
++ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
++ !(sattr->ia_valid & ATTR_MTIME_SET))
++ sattr->ia_valid |= ATTR_MTIME;
+ }
+
+ /*
+ * Returns a referenced nfs4_state
+ */
+-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
++static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+ {
+ struct nfs4_state_owner *sp;
+ struct nfs4_state *state = NULL;
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_opendata *opendata;
+- int status;
++ int status;
+
+ /* Protect against reboot recovery conflicts */
+ status = -ENOMEM;
+@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
+ status = nfs4_recover_expired_lease(server);
+ if (status != 0)
+ goto err_put_state_owner;
++ if (path->dentry->d_inode != NULL)
++ nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
+ down_read(&clp->cl_sem);
+ status = -ENOMEM;
+- opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
++ opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
+ if (opendata == NULL)
+ goto err_release_rwsem;
+
++ if (path->dentry->d_inode != NULL)
++ opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
++
+ status = _nfs4_proc_open(opendata);
+ if (status != 0)
+- goto err_opendata_free;
++ goto err_opendata_put;
++
++ if (opendata->o_arg.open_flags & O_EXCL)
++ nfs4_exclusive_attrset(opendata, sattr);
+
+- status = -ENOMEM;
+ state = nfs4_opendata_to_nfs4_state(opendata);
+- if (state == NULL)
+- goto err_opendata_free;
+- if (opendata->o_res.delegation_type != 0)
+- nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
+- nfs4_opendata_free(opendata);
++ status = PTR_ERR(state);
++ if (IS_ERR(state))
++ goto err_opendata_put;
++ nfs4_opendata_put(opendata);
+ nfs4_put_state_owner(sp);
+ up_read(&clp->cl_sem);
+ *res = state;
+ return 0;
+-err_opendata_free:
+- nfs4_opendata_free(opendata);
++err_opendata_put:
++ nfs4_opendata_put(opendata);
+ err_release_rwsem:
+ up_read(&clp->cl_sem);
+ err_put_state_owner:
+@@ -1006,14 +1105,14 @@ out_err:
+ }
+
+
+-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
++static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
+ {
+ struct nfs4_exception exception = { };
+ struct nfs4_state *res;
+ int status;
+
+ do {
+- status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
++ status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
+ if (status == 0)
+ break;
+ /* NOTE: BAD_SEQID means the server and client disagree about the
+@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
+ * the user though...
+ */
+ if (status == -NFS4ERR_BAD_SEQID) {
+- printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
++ printk(KERN_WARNING "NFS: v4 server %s "
++ " returned a bad sequence-id error!\n",
++ NFS_SERVER(dir)->nfs_client->cl_hostname);
+ exception.retry = 1;
+ continue;
+ }
+@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
+ exception.retry = 1;
+ continue;
+ }
++ if (status == -EAGAIN) {
++ /* We must have found a delegation */
++ exception.retry = 1;
++ continue;
++ }
+ res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+ status, &exception));
+ } while (exception.retry);
+@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+ }
+
+ struct nfs4_closedata {
++ struct path path;
+ struct inode *inode;
+ struct nfs4_state *state;
+ struct nfs_closeargs arg;
+@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
+ nfs4_put_open_state(calldata->state);
+ nfs_free_seqid(calldata->arg.seqid);
+ nfs4_put_state_owner(sp);
++ dput(calldata->path.dentry);
++ mntput(calldata->path.mnt);
+ kfree(calldata);
+ }
+
+@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
+ nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
+ switch (task->tk_status) {
+ case 0:
+- memcpy(&state->stateid, &calldata->res.stateid,
+- sizeof(state->stateid));
++ nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
+ renew_lease(server, calldata->timestamp);
+ break;
+ case -NFS4ERR_STALE_STATEID:
+@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
+ .rpc_resp = &calldata->res,
+ .rpc_cred = state->owner->so_cred,
+ };
+- int mode = 0, old_mode;
++ int clear_rd, clear_wr, clear_rdwr;
++ int mode;
+
+ if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
+ return;
+- /* Recalculate the new open mode in case someone reopened the file
+- * while we were waiting in line to be scheduled.
+- */
++
++ mode = FMODE_READ|FMODE_WRITE;
++ clear_rd = clear_wr = clear_rdwr = 0;
+ spin_lock(&state->owner->so_lock);
+- spin_lock(&calldata->inode->i_lock);
+- mode = old_mode = state->state;
++ /* Calculate the change in open mode */
+ if (state->n_rdwr == 0) {
+- if (state->n_rdonly == 0)
++ if (state->n_rdonly == 0) {
+ mode &= ~FMODE_READ;
+- if (state->n_wronly == 0)
++ clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
++ clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
++ }
++ if (state->n_wronly == 0) {
+ mode &= ~FMODE_WRITE;
++ clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
++ clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
++ }
+ }
+- nfs4_state_set_mode_locked(state, mode);
+- spin_unlock(&calldata->inode->i_lock);
+ spin_unlock(&state->owner->so_lock);
+- if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
++ if (!clear_rd && !clear_wr && !clear_rdwr) {
+ /* Note: exit _without_ calling nfs4_close_done */
+ task->tk_action = NULL;
+ return;
+@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
+ *
+ * NOTE: Caller must be holding the sp->so_owner semaphore!
+ */
+-int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
++int nfs4_do_close(struct path *path, struct nfs4_state *state)
+ {
+- struct nfs_server *server = NFS_SERVER(inode);
++ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_closedata *calldata;
++ struct nfs4_state_owner *sp = state->owner;
++ struct rpc_task *task;
+ int status = -ENOMEM;
+
+ calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+ if (calldata == NULL)
+ goto out;
+- calldata->inode = inode;
++ calldata->inode = state->inode;
+ calldata->state = state;
+- calldata->arg.fh = NFS_FH(inode);
+- calldata->arg.stateid = &state->stateid;
++ calldata->arg.fh = NFS_FH(state->inode);
++ calldata->arg.stateid = &state->open_stateid;
+ /* Serialization for the sequence id */
+ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
+ if (calldata->arg.seqid == NULL)
+@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
+ calldata->arg.bitmask = server->attr_bitmask;
+ calldata->res.fattr = &calldata->fattr;
+ calldata->res.server = server;
++ calldata->path.mnt = mntget(path->mnt);
++ calldata->path.dentry = dget(path->dentry);
+
+- status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
+- if (status == 0)
+- goto out;
+-
+- nfs_free_seqid(calldata->arg.seqid);
++ task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
++ if (IS_ERR(task))
++ return PTR_ERR(task);
++ rpc_put_task(task);
++ return 0;
+ out_free_calldata:
+ kfree(calldata);
+ out:
++ nfs4_put_open_state(state);
++ nfs4_put_state_owner(sp);
+ return status;
+ }
+
+-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
++static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
+ {
+ struct file *filp;
++ int ret;
+
+- filp = lookup_instantiate_filp(nd, dentry, NULL);
++ /* If the open_intent is for execute, we have an extra check to make */
++ if (nd->intent.open.flags & FMODE_EXEC) {
++ ret = _nfs4_do_access(state->inode,
++ state->owner->so_cred,
++ nd->intent.open.flags);
++ if (ret < 0)
++ goto out_close;
++ }
++ filp = lookup_instantiate_filp(nd, path->dentry, NULL);
+ if (!IS_ERR(filp)) {
+ struct nfs_open_context *ctx;
+ ctx = (struct nfs_open_context *)filp->private_data;
+ ctx->state = state;
+ return 0;
+ }
+- nfs4_close_state(state, nd->intent.open.flags);
+- return PTR_ERR(filp);
++ ret = PTR_ERR(filp);
++out_close:
++ nfs4_close_state(path, state, nd->intent.open.flags);
++ return ret;
+ }
+
+ struct dentry *
+ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+ {
++ struct path path = {
++ .mnt = nd->mnt,
++ .dentry = dentry,
++ };
+ struct iattr attr;
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+ cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ if (IS_ERR(cred))
+ return (struct dentry *)cred;
+- state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
++ state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state)) {
+ if (PTR_ERR(state) == -ENOENT)
+@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+ res = d_add_unique(dentry, igrab(state->inode));
+ if (res != NULL)
+ dentry = res;
+- nfs4_intent_set_file(nd, dentry, state);
++ nfs4_intent_set_file(nd, &path, state);
+ return res;
+ }
+
+ int
+ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
+ {
++ struct path path = {
++ .mnt = nd->mnt,
++ .dentry = dentry,
++ };
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+
+ cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+- state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
+- if (IS_ERR(state))
+- state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
++ state = nfs4_do_open(dir, &path, openflags, NULL, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state)) {
+ switch (PTR_ERR(state)) {
+@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
+ }
+ }
+ if (state->inode == dentry->d_inode) {
+- nfs4_intent_set_file(nd, dentry, state);
++ nfs4_intent_set_file(nd, &path, state);
+ return 1;
+ }
+- nfs4_close_state(state, openflags);
++ nfs4_close_state(&path, state, openflags);
+ out_drop:
+ d_drop(dentry);
+ return 0;
+@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+ dprintk("NFS call lookupfh %s\n", name->name);
+ status = rpc_call_sync(server->client, &msg, 0);
+ dprintk("NFS reply lookupfh: %d\n", status);
+- if (status == -NFS4ERR_MOVED)
+- status = -EREMOTE;
+ return status;
+ }
+
+@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+- err = nfs4_handle_exception(server,
+- _nfs4_proc_lookupfh(server, dirfh, name,
+- fhandle, fattr),
+- &exception);
++ err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
++ /* FIXME: !!!! */
++ if (err == -NFS4ERR_MOVED) {
++ err = -EREMOTE;
++ break;
++ }
++ err = nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+ return err;
+ }
+@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ {
+- int status;
+- struct nfs_server *server = NFS_SERVER(dir);
+- struct nfs4_lookup_arg args = {
+- .bitmask = server->attr_bitmask,
+- .dir_fh = NFS_FH(dir),
+- .name = name,
+- };
+- struct nfs4_lookup_res res = {
+- .server = server,
+- .fattr = fattr,
+- .fh = fhandle,
+- };
+- struct rpc_message msg = {
+- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+- .rpc_argp = &args,
+- .rpc_resp = &res,
+- };
+-
+- nfs_fattr_init(fattr);
++ int status;
+
+ dprintk("NFS call lookup %s\n", name->name);
+- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
++ status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
+ if (status == -NFS4ERR_MOVED)
+ status = nfs4_get_referral(dir, name, fattr, fhandle);
+ dprintk("NFS reply lookup: %d\n", status);
+@@ -1752,6 +1870,10 @@ static int
+ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ int flags, struct nameidata *nd)
+ {
++ struct path path = {
++ .mnt = nd->mnt,
++ .dentry = dentry,
++ };
+ struct nfs4_state *state;
+ struct rpc_cred *cred;
+ int status = 0;
+@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ status = PTR_ERR(cred);
+ goto out;
+ }
+- state = nfs4_do_open(dir, dentry, flags, sattr, cred);
++ state = nfs4_do_open(dir, &path, flags, sattr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state)) {
+ status = PTR_ERR(state);
+@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
+ if (status == 0)
+ nfs_setattr_update_inode(state->inode, sattr);
++ nfs_post_op_update_inode(state->inode, &fattr);
+ }
+- if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
+- status = nfs4_intent_set_file(nd, dentry, state);
++ if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
++ status = nfs4_intent_set_file(nd, &path, state);
+ else
+- nfs4_close_state(state, flags);
++ nfs4_close_state(&path, state, flags);
+ out:
+ return status;
+ }
+@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
+ if (status != 0)
+ goto out;
+ lsp = request->fl_u.nfs4_fl.owner;
+- arg.lock_owner.id = lsp->ls_id;
++ arg.lock_owner.id = lsp->ls_id.id;
+ status = rpc_call_sync(server->client, &msg, 0);
+ switch (status) {
+ case 0:
+@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
+ {
+ struct nfs4_unlockdata *data;
+
++ /* Ensure this is an unlock - when canceling a lock, the
++ * canceled lock is passed in, and it won't be an unlock.
++ */
++ fl->fl_type = F_UNLCK;
++
+ data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
+ if (data == NULL) {
+ nfs_free_seqid(seqid);
+@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
+ goto out_free;
+ p->arg.lock_stateid = &lsp->ls_stateid;
+ p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
+- p->arg.lock_owner.id = lsp->ls_id;
++ p->arg.lock_owner.id = lsp->ls_id.id;
+ p->lsp = lsp;
+ atomic_inc(&lsp->ls_count);
+ p->ctx = get_nfs_open_context(ctx);
+@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
+ memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+ sizeof(data->lsp->ls_stateid.data));
+ data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+- renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
++ renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ }
+ nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
+ out:
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index 8ed79d5..28551ae 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -38,12 +38,14 @@
+ * subsequent patch.
+ */
+
++#include <linux/kernel.h>
+ #include <linux/slab.h>
+ #include <linux/smp_lock.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
+ #include <linux/kthread.h>
+ #include <linux/module.h>
++#include <linux/random.h>
+ #include <linux/workqueue.h>
+ #include <linux/bitops.h>
+
+@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
+ return status;
+ }
+
+-u32
+-nfs4_alloc_lockowner_id(struct nfs_client *clp)
+-{
+- return clp->cl_lockowner_id ++;
+-}
+-
+-static struct nfs4_state_owner *
+-nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
+-{
+- struct nfs4_state_owner *sp = NULL;
+-
+- if (!list_empty(&clp->cl_unused)) {
+- sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
+- atomic_inc(&sp->so_count);
+- sp->so_cred = cred;
+- list_move(&sp->so_list, &clp->cl_state_owners);
+- clp->cl_nunused--;
+- }
+- return sp;
+-}
+-
+ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
+ {
+ struct nfs4_state_owner *sp;
++ struct rb_node *pos;
+ struct rpc_cred *cred = NULL;
+
+- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
++ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
++ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ if (list_empty(&sp->so_states))
+ continue;
+ cred = get_rpccred(sp->so_cred);
+@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
+ static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+ {
+ struct nfs4_state_owner *sp;
++ struct rb_node *pos;
+
+- if (!list_empty(&clp->cl_state_owners)) {
+- sp = list_entry(clp->cl_state_owners.next,
+- struct nfs4_state_owner, so_list);
++ pos = rb_first(&clp->cl_state_owners);
++ if (pos != NULL) {
++ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ return get_rpccred(sp->so_cred);
+ }
+ return NULL;
+ }
+
++static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
++ __u64 minval, int maxbits)
++{
++ struct rb_node **p, *parent;
++ struct nfs_unique_id *pos;
++ __u64 mask = ~0ULL;
++
++ if (maxbits < 64)
++ mask = (1ULL << maxbits) - 1ULL;
++
++ /* Ensure distribution is more or less flat */
++ get_random_bytes(&new->id, sizeof(new->id));
++ new->id &= mask;
++ if (new->id < minval)
++ new->id += minval;
++retry:
++ p = &root->rb_node;
++ parent = NULL;
++
++ while (*p != NULL) {
++ parent = *p;
++ pos = rb_entry(parent, struct nfs_unique_id, rb_node);
++
++ if (new->id < pos->id)
++ p = &(*p)->rb_left;
++ else if (new->id > pos->id)
++ p = &(*p)->rb_right;
++ else
++ goto id_exists;
++ }
++ rb_link_node(&new->rb_node, parent, p);
++ rb_insert_color(&new->rb_node, root);
++ return;
++id_exists:
++ for (;;) {
++ new->id++;
++ if (new->id < minval || (new->id & mask) != new->id) {
++ new->id = minval;
++ break;
++ }
++ parent = rb_next(parent);
++ if (parent == NULL)
++ break;
++ pos = rb_entry(parent, struct nfs_unique_id, rb_node);
++ if (new->id < pos->id)
++ break;
++ }
++ goto retry;
++}
++
++static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
++{
++ rb_erase(&id->rb_node, root);
++}
++
+ static struct nfs4_state_owner *
+-nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
++nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+ {
++ struct nfs_client *clp = server->nfs_client;
++ struct rb_node **p = &clp->cl_state_owners.rb_node,
++ *parent = NULL;
+ struct nfs4_state_owner *sp, *res = NULL;
+
+- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+- if (sp->so_cred != cred)
++ while (*p != NULL) {
++ parent = *p;
++ sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
++
++ if (server < sp->so_server) {
++ p = &parent->rb_left;
+ continue;
+- atomic_inc(&sp->so_count);
+- /* Move to the head of the list */
+- list_move(&sp->so_list, &clp->cl_state_owners);
+- res = sp;
+- break;
++ }
++ if (server > sp->so_server) {
++ p = &parent->rb_right;
++ continue;
++ }
++ if (cred < sp->so_cred)
++ p = &parent->rb_left;
++ else if (cred > sp->so_cred)
++ p = &parent->rb_right;
++ else {
++ atomic_inc(&sp->so_count);
++ res = sp;
++ break;
++ }
+ }
+ return res;
+ }
+
++static struct nfs4_state_owner *
++nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
++{
++ struct rb_node **p = &clp->cl_state_owners.rb_node,
++ *parent = NULL;
++ struct nfs4_state_owner *sp;
++
++ while (*p != NULL) {
++ parent = *p;
++ sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
++
++ if (new->so_server < sp->so_server) {
++ p = &parent->rb_left;
++ continue;
++ }
++ if (new->so_server > sp->so_server) {
++ p = &parent->rb_right;
++ continue;
++ }
++ if (new->so_cred < sp->so_cred)
++ p = &parent->rb_left;
++ else if (new->so_cred > sp->so_cred)
++ p = &parent->rb_right;
++ else {
++ atomic_inc(&sp->so_count);
++ return sp;
++ }
++ }
++ nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
++ rb_link_node(&new->so_client_node, parent, p);
++ rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
++ return new;
++}
++
++static void
++nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
++{
++ if (!RB_EMPTY_NODE(&sp->so_client_node))
++ rb_erase(&sp->so_client_node, &clp->cl_state_owners);
++ nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
++}
++
+ /*
+ * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
+ * create a new state_owner.
+@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
+ void
+ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+ {
+- struct nfs_client *clp = sp->so_client;
+- spin_lock(&clp->cl_lock);
+- list_del_init(&sp->so_list);
+- spin_unlock(&clp->cl_lock);
++ if (!RB_EMPTY_NODE(&sp->so_client_node)) {
++ struct nfs_client *clp = sp->so_client;
++
++ spin_lock(&clp->cl_lock);
++ rb_erase(&sp->so_client_node, &clp->cl_state_owners);
++ RB_CLEAR_NODE(&sp->so_client_node);
++ spin_unlock(&clp->cl_lock);
++ }
+ }
+
+ /*
+@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_state_owner *sp, *new;
+
+- get_rpccred(cred);
+- new = nfs4_alloc_state_owner();
+ spin_lock(&clp->cl_lock);
+- sp = nfs4_find_state_owner(clp, cred);
+- if (sp == NULL)
+- sp = nfs4_client_grab_unused(clp, cred);
+- if (sp == NULL && new != NULL) {
+- list_add(&new->so_list, &clp->cl_state_owners);
+- new->so_client = clp;
+- new->so_id = nfs4_alloc_lockowner_id(clp);
+- new->so_cred = cred;
+- sp = new;
+- new = NULL;
+- }
++ sp = nfs4_find_state_owner(server, cred);
+ spin_unlock(&clp->cl_lock);
+- kfree(new);
+ if (sp != NULL)
+ return sp;
+- put_rpccred(cred);
+- return NULL;
++ new = nfs4_alloc_state_owner();
++ if (new == NULL)
++ return NULL;
++ new->so_client = clp;
++ new->so_server = server;
++ new->so_cred = cred;
++ spin_lock(&clp->cl_lock);
++ sp = nfs4_insert_state_owner(clp, new);
++ spin_unlock(&clp->cl_lock);
++ if (sp == new)
++ get_rpccred(cred);
++ else
++ kfree(new);
++ return sp;
+ }
+
+ /*
+@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+
+ if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+ return;
+- if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
+- goto out_free;
+- if (list_empty(&sp->so_list))
+- goto out_free;
+- list_move(&sp->so_list, &clp->cl_unused);
+- clp->cl_nunused++;
+- spin_unlock(&clp->cl_lock);
+- put_rpccred(cred);
+- cred = NULL;
+- return;
+-out_free:
+- list_del(&sp->so_list);
++ nfs4_remove_state_owner(clp, sp);
+ spin_unlock(&clp->cl_lock);
+ put_rpccred(cred);
+ kfree(sp);
+@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
+ atomic_set(&state->count, 1);
+ INIT_LIST_HEAD(&state->lock_states);
+ spin_lock_init(&state->state_lock);
++ seqlock_init(&state->seqlock);
+ return state;
+ }
+
+@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
+ struct nfs4_state *state;
+
+ list_for_each_entry(state, &nfsi->open_states, inode_states) {
+- /* Is this in the process of being freed? */
+- if (state->state == 0)
++ if (state->owner != owner)
+ continue;
+- if (state->owner == owner) {
+- atomic_inc(&state->count);
++ if (atomic_inc_not_zero(&state->count))
+ return state;
+- }
+ }
+ return NULL;
+ }
+@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
+ /*
+ * Close the current file.
+ */
+-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
++void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+ {
+- struct inode *inode = state->inode;
+ struct nfs4_state_owner *owner = state->owner;
+- int oldstate, newstate = 0;
++ int call_close = 0;
++ int newstate;
+
+ atomic_inc(&owner->so_count);
+ /* Protect against nfs4_find_state() */
+ spin_lock(&owner->so_lock);
+- spin_lock(&inode->i_lock);
+ switch (mode & (FMODE_READ | FMODE_WRITE)) {
+ case FMODE_READ:
+ state->n_rdonly--;
+@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+ case FMODE_READ|FMODE_WRITE:
+ state->n_rdwr--;
+ }
+- oldstate = newstate = state->state;
++ newstate = FMODE_READ|FMODE_WRITE;
+ if (state->n_rdwr == 0) {
+- if (state->n_rdonly == 0)
++ if (state->n_rdonly == 0) {
+ newstate &= ~FMODE_READ;
+- if (state->n_wronly == 0)
++ call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
++ call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
++ }
++ if (state->n_wronly == 0) {
+ newstate &= ~FMODE_WRITE;
++ call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
++ call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
++ }
++ if (newstate == 0)
++ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ }
+- if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+- nfs4_state_set_mode_locked(state, newstate);
+- oldstate = newstate;
+- }
+- spin_unlock(&inode->i_lock);
++ nfs4_state_set_mode_locked(state, newstate);
+ spin_unlock(&owner->so_lock);
+
+- if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
+- return;
+- nfs4_put_open_state(state);
+- nfs4_put_state_owner(owner);
++ if (!call_close) {
++ nfs4_put_open_state(state);
++ nfs4_put_state_owner(owner);
++ } else
++ nfs4_do_close(path, state);
+ }
+
+ /*
+@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
+ atomic_set(&lsp->ls_count, 1);
+ lsp->ls_owner = fl_owner;
+ spin_lock(&clp->cl_lock);
+- lsp->ls_id = nfs4_alloc_lockowner_id(clp);
++ nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ spin_unlock(&clp->cl_lock);
+ INIT_LIST_HEAD(&lsp->ls_locks);
+ return lsp;
+ }
+
++static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
++{
++ struct nfs_client *clp = lsp->ls_state->owner->so_client;
++
++ spin_lock(&clp->cl_lock);
++ nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
++ spin_unlock(&clp->cl_lock);
++ kfree(lsp);
++}
++
+ /*
+ * Return a compatible lock_state. If no initialized lock_state structure
+ * exists, return an uninitialized one.
+@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
+ return NULL;
+ }
+ spin_unlock(&state->state_lock);
+- kfree(new);
++ if (new != NULL)
++ nfs4_free_lock_state(new);
+ return lsp;
+ }
+
+@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+ if (list_empty(&state->lock_states))
+ clear_bit(LK_STATE_IN_USE, &state->flags);
+ spin_unlock(&state->state_lock);
+- kfree(lsp);
++ nfs4_free_lock_state(lsp);
+ }
+
+ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
+@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
+ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+ {
+ struct nfs4_lock_state *lsp;
++ int seq;
+
+- memcpy(dst, &state->stateid, sizeof(*dst));
++ do {
++ seq = read_seqbegin(&state->seqlock);
++ memcpy(dst, &state->stateid, sizeof(*dst));
++ } while(read_seqretry(&state->seqlock, seq));
+ if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
+ return;
+
+@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
+ * failed with a seqid incrementing error -
+ * see comments nfs_fs.h:seqid_mutating_error()
+ */
+-static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
++static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+ {
+ switch (status) {
+ case 0:
+ break;
+ case -NFS4ERR_BAD_SEQID:
++ if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
++ return;
++ printk(KERN_WARNING "NFS: v4 server returned a bad"
++ "sequence-id error on an"
++ "unconfirmed sequence %p!\n",
++ seqid->sequence);
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_BAD_STATEID:
+@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
+ struct nfs4_state_owner, so_seqid);
+ nfs4_drop_state_owner(sp);
+ }
+- return nfs_increment_seqid(status, seqid);
++ nfs_increment_seqid(status, seqid);
+ }
+
+ /*
+@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
+ */
+ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
+ {
+- return nfs_increment_seqid(status, seqid);
++ nfs_increment_seqid(status, seqid);
+ }
+
+ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
+@@ -748,15 +860,21 @@ out_err:
+ static void nfs4_state_mark_reclaim(struct nfs_client *clp)
+ {
+ struct nfs4_state_owner *sp;
++ struct rb_node *pos;
+ struct nfs4_state *state;
+ struct nfs4_lock_state *lock;
+
+ /* Reset all sequence ids to zero */
+- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
++ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
++ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ sp->so_seqid.counter = 0;
+ sp->so_seqid.flags = 0;
+ spin_lock(&sp->so_lock);
+ list_for_each_entry(state, &sp->so_states, open_states) {
++ clear_bit(NFS_DELEGATED_STATE, &state->flags);
++ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
++ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
++ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ lock->ls_seqid.counter = 0;
+ lock->ls_seqid.flags = 0;
+@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
+ {
+ struct nfs_client *clp = ptr;
+ struct nfs4_state_owner *sp;
++ struct rb_node *pos;
+ struct nfs4_state_recovery_ops *ops;
+ struct rpc_cred *cred;
+ int status = 0;
+@@ -816,7 +935,8 @@ restart_loop:
+ /* Mark all delegations for reclaim */
+ nfs_delegation_mark_reclaim(clp);
+ /* Note: list is protected by exclusive lock on cl->cl_sem */
+- list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
++ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
++ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ status = nfs4_reclaim_open_state(ops, sp);
+ if (status < 0) {
+ if (status == -NFS4ERR_NO_GRACE) {
+diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
+index 8003c91..c087384 100644
+--- a/fs/nfs/nfs4xdr.c
++++ b/fs/nfs/nfs4xdr.c
+@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
+ #endif
+
+ /* lock,open owner id:
+- * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2)
++ * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
+ */
+-#define owner_id_maxsz (1 + 1)
++#define open_owner_id_maxsz (1 + 4)
++#define lock_owner_id_maxsz (1 + 4)
+ #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
+ #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
+ #define op_encode_hdr_maxsz (1)
+@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
+ #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+ #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+ #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
++#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
++#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+ /* This is based on getfattr, which uses the most attributes: */
+ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
+- 3 + 3 + 3 + 2 * nfs4_name_maxsz))
++ 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
+ #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
+ nfs4_fattr_value_maxsz)
+ #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
+@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
+ 3 + (NFS4_VERIFIER_SIZE >> 2))
+ #define decode_setclientid_confirm_maxsz \
+ (op_decode_hdr_maxsz)
+-#define encode_lookup_maxsz (op_encode_hdr_maxsz + \
+- 1 + ((3 + NFS4_FHSIZE) >> 2))
++#define encode_lookup_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
++#define decode_lookup_maxsz (op_decode_hdr_maxsz)
++#define encode_share_access_maxsz \
++ (2)
++#define encode_createmode_maxsz (1 + nfs4_fattr_maxsz)
++#define encode_opentype_maxsz (1 + encode_createmode_maxsz)
++#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
++#define encode_open_maxsz (op_encode_hdr_maxsz + \
++ 2 + encode_share_access_maxsz + 2 + \
++ open_owner_id_maxsz + \
++ encode_opentype_maxsz + \
++ encode_claim_null_maxsz)
++#define decode_ace_maxsz (3 + nfs4_owner_maxsz)
++#define decode_delegation_maxsz (1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
++ decode_ace_maxsz)
++#define decode_change_info_maxsz (5)
++#define decode_open_maxsz (op_decode_hdr_maxsz + \
++ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
++ decode_change_info_maxsz + 1 + \
++ nfs4_fattr_bitmap_maxsz + \
++ decode_delegation_maxsz)
+ #define encode_remove_maxsz (op_encode_hdr_maxsz + \
+ nfs4_name_maxsz)
+ #define encode_rename_maxsz (op_encode_hdr_maxsz + \
+@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
+ #define encode_create_maxsz (op_encode_hdr_maxsz + \
+ 2 + nfs4_name_maxsz + \
+ nfs4_fattr_maxsz)
+-#define decode_create_maxsz (op_decode_hdr_maxsz + 8)
++#define decode_create_maxsz (op_decode_hdr_maxsz + \
++ decode_change_info_maxsz + \
++ nfs4_fattr_bitmap_maxsz)
+ #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
+ #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
++#define encode_fs_locations_maxsz \
++ (encode_getattr_maxsz)
++#define decode_fs_locations_maxsz \
++ (0)
+ #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
+ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
+ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
+@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
+ op_decode_hdr_maxsz + 2 + \
+ decode_getattr_maxsz)
+ #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
+- encode_putfh_maxsz + \
+- op_encode_hdr_maxsz + \
+- 13 + 3 + 2 + 64 + \
+- encode_getattr_maxsz + \
+- encode_getfh_maxsz)
++ encode_putfh_maxsz + \
++ encode_savefh_maxsz + \
++ encode_open_maxsz + \
++ encode_getfh_maxsz + \
++ encode_getattr_maxsz + \
++ encode_restorefh_maxsz + \
++ encode_getattr_maxsz)
+ #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
+- decode_putfh_maxsz + \
+- op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
+- decode_getattr_maxsz + \
+- decode_getfh_maxsz)
++ decode_putfh_maxsz + \
++ decode_savefh_maxsz + \
++ decode_open_maxsz + \
++ decode_getfh_maxsz + \
++ decode_getattr_maxsz + \
++ decode_restorefh_maxsz + \
++ decode_getattr_maxsz)
+ #define NFS4_enc_open_confirm_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
+ op_decode_hdr_maxsz + 4)
+ #define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+- op_encode_hdr_maxsz + \
+- 11)
++ encode_open_maxsz + \
++ encode_getattr_maxsz)
+ #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+- op_decode_hdr_maxsz + \
+- 4 + 5 + 2 + 3)
++ decode_open_maxsz + \
++ decode_getattr_maxsz)
+ #define NFS4_enc_open_downgrade_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
+ op_encode_hdr_maxsz + \
+ 1 + 1 + 2 + 2 + \
+ 1 + 4 + 1 + 2 + \
+- owner_id_maxsz)
++ lock_owner_id_maxsz)
+ #define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getattr_maxsz + \
+ op_decode_hdr_maxsz + \
+ 2 + 2 + 1 + 2 + \
+- owner_id_maxsz)
++ lock_owner_id_maxsz)
+ #define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getattr_maxsz + \
+ op_encode_hdr_maxsz + \
+ 1 + 2 + 2 + 2 + \
+- owner_id_maxsz)
++ lock_owner_id_maxsz)
+ #define NFS4_dec_lockt_sz (NFS4_dec_lock_sz)
+ #define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
+ encode_getfh_maxsz)
+ #define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+- op_decode_hdr_maxsz + \
++ decode_lookup_maxsz + \
+ decode_getattr_maxsz + \
+ decode_getfh_maxsz)
+ #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
+@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
+ #define NFS4_enc_fs_locations_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+- encode_getattr_maxsz)
++ encode_lookup_maxsz + \
++ encode_fs_locations_maxsz)
+ #define NFS4_dec_fs_locations_sz \
+ (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+- op_decode_hdr_maxsz + \
+- nfs4_fattr_bitmap_maxsz)
++ decode_lookup_maxsz + \
++ decode_fs_locations_maxsz)
+
+ static struct {
+ unsigned int mode;
+@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
+ WRITE64(nfs4_lock_length(args->fl));
+ WRITE32(args->new_lock_owner);
+ if (args->new_lock_owner){
+- RESERVE_SPACE(4+NFS4_STATEID_SIZE+20);
++ RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
+ WRITE32(args->open_seqid->sequence->counter);
+ WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
+ WRITE32(args->lock_seqid->sequence->counter);
+ WRITE64(args->lock_owner.clientid);
+- WRITE32(4);
+- WRITE32(args->lock_owner.id);
++ WRITE32(16);
++ WRITEMEM("lock id:", 8);
++ WRITE64(args->lock_owner.id);
+ }
+ else {
+ RESERVE_SPACE(NFS4_STATEID_SIZE+4);
+@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
+ {
+ __be32 *p;
+
+- RESERVE_SPACE(40);
++ RESERVE_SPACE(52);
+ WRITE32(OP_LOCKT);
+ WRITE32(nfs4_lock_type(args->fl, 0));
+ WRITE64(args->fl->fl_start);
+ WRITE64(nfs4_lock_length(args->fl));
+ WRITE64(args->lock_owner.clientid);
+- WRITE32(4);
+- WRITE32(args->lock_owner.id);
++ WRITE32(16);
++ WRITEMEM("lock id:", 8);
++ WRITE64(args->lock_owner.id);
+
+ return 0;
+ }
+@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
+ WRITE32(OP_OPEN);
+ WRITE32(arg->seqid->sequence->counter);
+ encode_share_access(xdr, arg->open_flags);
+- RESERVE_SPACE(16);
++ RESERVE_SPACE(28);
+ WRITE64(arg->clientid);
+- WRITE32(4);
+- WRITE32(arg->id);
++ WRITE32(16);
++ WRITEMEM("open id:", 8);
++ WRITE64(arg->id);
+ }
+
+ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
+
+ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ uint32_t attrs[2] = {
+ FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
+ FATTR4_WORD1_MOUNTED_ON_FILEID,
+@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
+
+ static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ unsigned int replen;
+ __be32 *p;
+
+@@ -1735,7 +1772,7 @@ out:
+ */
+ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
+ struct nfs_getaclargs *args)
+ {
+ struct xdr_stream xdr;
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
+ struct compound_hdr hdr = {
+ .nops = 3,
+ };
+- struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
+ int replen;
+ int status;
+
+@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
+ {
+ __be32 *p;
+- uint32_t bmlen;
++ uint32_t savewords, bmlen, i;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OPEN);
+@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
+ goto xdr_error;
+
+ READ_BUF(bmlen << 2);
+- p += bmlen;
++ savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
++ for (i = 0; i < savewords; ++i)
++ READ32(res->attrset[i]);
++ for (; i < NFS4_BITMAP_SIZE; i++)
++ res->attrset[i] = 0;
++
+ return decode_delegation(xdr, res);
+ xdr_error:
+ dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
+diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
+index 49d1008..3490322 100644
+--- a/fs/nfs/nfsroot.c
++++ b/fs/nfs/nfsroot.c
+@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
+ printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
+ program, version, NIPQUAD(servaddr));
+ set_sockaddr(&sin, servaddr, 0);
+- return rpcb_getport_external(&sin, program, version, proto);
++ return rpcb_getport_sync(&sin, program, version, proto);
+ }
+
+
+@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
+ NFS_MNT3_VERSION : NFS_MNT_VERSION;
+
+ set_sockaddr(&sin, servaddr, htons(mount_port));
+- status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
++ status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
++ nfs_path, version, protocol, &fh);
+ if (status < 0)
+ printk(KERN_ERR "Root-NFS: Server returned error %d "
+ "while mounting %s\n", status, nfs_path);
+diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
+index c5bb51a..f56dae5 100644
+--- a/fs/nfs/pagelist.c
++++ b/fs/nfs/pagelist.c
+@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ req->wb_offset = offset;
+ req->wb_pgbase = offset;
+ req->wb_bytes = count;
+- atomic_set(&req->wb_count, 1);
+ req->wb_context = get_nfs_open_context(ctx);
+-
++ kref_init(&req->wb_kref);
+ return req;
+ }
+
+@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
+ }
+
+ /**
+- * nfs_set_page_writeback_locked - Lock a request for writeback
++ * nfs_set_page_tag_locked - Tag a request as locked
+ * @req:
+ */
+-int nfs_set_page_writeback_locked(struct nfs_page *req)
++static int nfs_set_page_tag_locked(struct nfs_page *req)
+ {
+- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
++ struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
+
+ if (!nfs_lock_request(req))
+ return 0;
+- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
++ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ return 1;
+ }
+
+ /**
+- * nfs_clear_page_writeback - Unlock request and wake up sleepers
++ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
+ */
+-void nfs_clear_page_writeback(struct nfs_page *req)
++void nfs_clear_page_tag_locked(struct nfs_page *req)
+ {
+- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
++ struct inode *inode = req->wb_context->path.dentry->d_inode;
++ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (req->wb_page != NULL) {
+- spin_lock(&nfsi->req_lock);
+- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+- spin_unlock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
++ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
++ spin_unlock(&inode->i_lock);
+ }
+ nfs_unlock_request(req);
+ }
+@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
+ *
+ * Note: Should never be called with the spinlock held!
+ */
+-void
+-nfs_release_request(struct nfs_page *req)
++static void nfs_free_request(struct kref *kref)
+ {
+- if (!atomic_dec_and_test(&req->wb_count))
+- return;
++ struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+
+ /* Release struct file or cached credential */
+ nfs_clear_request(req);
+@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
+ nfs_page_free(req);
+ }
+
++void nfs_release_request(struct nfs_page *req)
++{
++ kref_put(&req->wb_kref, nfs_free_request);
++}
++
+ static int nfs_wait_bit_interruptible(void *word)
+ {
+ int ret = 0;
+@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
+ int
+ nfs_wait_on_request(struct nfs_page *req)
+ {
+- struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
++ struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
+ sigset_t oldmask;
+ int ret = 0;
+
+@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
+ /**
+ * nfs_scan_list - Scan a list for matching requests
+ * @nfsi: NFS inode
+- * @head: One of the NFS inode request lists
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
++ * @tag: tag to scan for
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+ * starting at index idx_start, is scanned.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+- * You must be holding the inode's req_lock when calling this function
++ * You must be holding the inode's i_lock when calling this function
+ */
+-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
++int nfs_scan_list(struct nfs_inode *nfsi,
+ struct list_head *dst, pgoff_t idx_start,
+- unsigned int npages)
++ unsigned int npages, int tag)
+ {
+ struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ struct nfs_page *req;
+@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
+ idx_end = idx_start + npages - 1;
+
+ for (;;) {
+- found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
++ found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
+ (void **)&pgvec[0], idx_start,
+- NFS_SCAN_MAXENTRIES);
++ NFS_SCAN_MAXENTRIES, tag);
+ if (found <= 0)
+ break;
+ for (i = 0; i < found; i++) {
+@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
+ if (req->wb_index > idx_end)
+ goto out;
+ idx_start = req->wb_index + 1;
+- if (req->wb_list_head != head)
+- continue;
+- if (nfs_set_page_writeback_locked(req)) {
++ if (nfs_set_page_tag_locked(req)) {
+ nfs_list_remove_request(req);
++ radix_tree_tag_clear(&nfsi->nfs_page_tree,
++ req->wb_index, tag);
+ nfs_list_add_request(req, dst);
+ res++;
++ if (res == INT_MAX)
++ goto out;
+ }
+ }
+-
++ /* for latency reduction */
++ cond_resched_lock(&nfsi->vfs_inode.i_lock);
+ }
+ out:
+ return res;
+diff --git a/fs/nfs/read.c b/fs/nfs/read.c
+index 7bd7cb9..6ae2e58 100644
+--- a/fs/nfs/read.c
++++ b/fs/nfs/read.c
+@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
+ unlock_page(req->wb_page);
+
+ dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ nfs_clear_request(req);
+@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ int flags;
+
+ data->req = req;
+- data->inode = inode = req->wb_context->dentry->d_inode;
++ data->inode = inode = req->wb_context->path.dentry->d_inode;
+ data->cred = req->wb_context->cred;
+
+ data->args.fh = NFS_FH(inode);
+@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
+ */
+ error = nfs_wb_page(inode, page);
+ if (error)
+- goto out_error;
++ goto out_unlock;
++ if (PageUptodate(page))
++ goto out_unlock;
+
+ error = -ESTALE;
+ if (NFS_STALE(inode))
+- goto out_error;
++ goto out_unlock;
+
+ if (file == NULL) {
+ error = -EBADF;
+ ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+ if (ctx == NULL)
+- goto out_error;
++ goto out_unlock;
+ } else
+ ctx = get_nfs_open_context((struct nfs_open_context *)
+ file->private_data);
+@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
+
+ put_nfs_open_context(ctx);
+ return error;
+-
+-out_error:
++out_unlock:
+ unlock_page(page);
+ return error;
+ }
+@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
+ struct inode *inode = page->mapping->host;
+ struct nfs_page *new;
+ unsigned int len;
++ int error;
++
++ error = nfs_wb_page(inode, page);
++ if (error)
++ goto out_unlock;
++ if (PageUptodate(page))
++ goto out_unlock;
+
+- nfs_wb_page(inode, page);
+ len = nfs_page_length(page);
+ if (len == 0)
+ return nfs_return_empty_page(page);
++
+ new = nfs_create_request(desc->ctx, inode, page, 0, len);
+- if (IS_ERR(new)) {
+- SetPageError(page);
+- unlock_page(page);
+- return PTR_ERR(new);
+- }
++ if (IS_ERR(new))
++ goto out_error;
++
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
+ nfs_pageio_add_request(desc->pgio, new);
+ return 0;
++out_error:
++ error = PTR_ERR(new);
++ SetPageError(page);
++out_unlock:
++ unlock_page(page);
++ return error;
+ }
+
+ int nfs_readpages(struct file *filp, struct address_space *mapping,
+diff --git a/fs/nfs/super.c b/fs/nfs/super.c
+index ca20d3c..a2b1af8 100644
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -45,6 +45,7 @@
+ #include <linux/inet.h>
+ #include <linux/nfs_xdr.h>
+ #include <linux/magic.h>
++#include <linux/parser.h>
+
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -57,6 +58,167 @@
+
+ #define NFSDBG_FACILITY NFSDBG_VFS
+
++
++struct nfs_parsed_mount_data {
++ int flags;
++ int rsize, wsize;
++ int timeo, retrans;
++ int acregmin, acregmax,
++ acdirmin, acdirmax;
++ int namlen;
++ unsigned int bsize;
++ unsigned int auth_flavor_len;
++ rpc_authflavor_t auth_flavors[1];
++ char *client_address;
++
++ struct {
++ struct sockaddr_in address;
++ unsigned int program;
++ unsigned int version;
++ unsigned short port;
++ int protocol;
++ } mount_server;
++
++ struct {
++ struct sockaddr_in address;
++ char *hostname;
++ char *export_path;
++ unsigned int program;
++ int protocol;
++ } nfs_server;
++};
++
++enum {
++ /* Mount options that take no arguments */
++ Opt_soft, Opt_hard,
++ Opt_intr, Opt_nointr,
++ Opt_posix, Opt_noposix,
++ Opt_cto, Opt_nocto,
++ Opt_ac, Opt_noac,
++ Opt_lock, Opt_nolock,
++ Opt_v2, Opt_v3,
++ Opt_udp, Opt_tcp,
++ Opt_acl, Opt_noacl,
++ Opt_rdirplus, Opt_nordirplus,
++ Opt_sharecache, Opt_nosharecache,
++
++ /* Mount options that take integer arguments */
++ Opt_port,
++ Opt_rsize, Opt_wsize, Opt_bsize,
++ Opt_timeo, Opt_retrans,
++ Opt_acregmin, Opt_acregmax,
++ Opt_acdirmin, Opt_acdirmax,
++ Opt_actimeo,
++ Opt_namelen,
++ Opt_mountport,
++ Opt_mountprog, Opt_mountvers,
++ Opt_nfsprog, Opt_nfsvers,
++
++ /* Mount options that take string arguments */
++ Opt_sec, Opt_proto, Opt_mountproto,
++ Opt_addr, Opt_mounthost, Opt_clientaddr,
++
++ /* Mount options that are ignored */
++ Opt_userspace, Opt_deprecated,
++
++ Opt_err
++};
++
++static match_table_t nfs_mount_option_tokens = {
++ { Opt_userspace, "bg" },
++ { Opt_userspace, "fg" },
++ { Opt_soft, "soft" },
++ { Opt_hard, "hard" },
++ { Opt_intr, "intr" },
++ { Opt_nointr, "nointr" },
++ { Opt_posix, "posix" },
++ { Opt_noposix, "noposix" },
++ { Opt_cto, "cto" },
++ { Opt_nocto, "nocto" },
++ { Opt_ac, "ac" },
++ { Opt_noac, "noac" },
++ { Opt_lock, "lock" },
++ { Opt_nolock, "nolock" },
++ { Opt_v2, "v2" },
++ { Opt_v3, "v3" },
++ { Opt_udp, "udp" },
++ { Opt_tcp, "tcp" },
++ { Opt_acl, "acl" },
++ { Opt_noacl, "noacl" },
++ { Opt_rdirplus, "rdirplus" },
++ { Opt_nordirplus, "nordirplus" },
++ { Opt_sharecache, "sharecache" },
++ { Opt_nosharecache, "nosharecache" },
++
++ { Opt_port, "port=%u" },
++ { Opt_rsize, "rsize=%u" },
++ { Opt_wsize, "wsize=%u" },
++ { Opt_bsize, "bsize=%u" },
++ { Opt_timeo, "timeo=%u" },
++ { Opt_retrans, "retrans=%u" },
++ { Opt_acregmin, "acregmin=%u" },
++ { Opt_acregmax, "acregmax=%u" },
++ { Opt_acdirmin, "acdirmin=%u" },
++ { Opt_acdirmax, "acdirmax=%u" },
++ { Opt_actimeo, "actimeo=%u" },
++ { Opt_userspace, "retry=%u" },
++ { Opt_namelen, "namlen=%u" },
++ { Opt_mountport, "mountport=%u" },
++ { Opt_mountprog, "mountprog=%u" },
++ { Opt_mountvers, "mountvers=%u" },
++ { Opt_nfsprog, "nfsprog=%u" },
++ { Opt_nfsvers, "nfsvers=%u" },
++ { Opt_nfsvers, "vers=%u" },
++
++ { Opt_sec, "sec=%s" },
++ { Opt_proto, "proto=%s" },
++ { Opt_mountproto, "mountproto=%s" },
++ { Opt_addr, "addr=%s" },
++ { Opt_clientaddr, "clientaddr=%s" },
++ { Opt_mounthost, "mounthost=%s" },
++
++ { Opt_err, NULL }
++};
++
++enum {
++ Opt_xprt_udp, Opt_xprt_tcp,
++
++ Opt_xprt_err
++};
++
++static match_table_t nfs_xprt_protocol_tokens = {
++ { Opt_xprt_udp, "udp" },
++ { Opt_xprt_tcp, "tcp" },
++
++ { Opt_xprt_err, NULL }
++};
++
++enum {
++ Opt_sec_none, Opt_sec_sys,
++ Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
++ Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
++ Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
++
++ Opt_sec_err
++};
++
++static match_table_t nfs_secflavor_tokens = {
++ { Opt_sec_none, "none" },
++ { Opt_sec_none, "null" },
++ { Opt_sec_sys, "sys" },
++
++ { Opt_sec_krb5, "krb5" },
++ { Opt_sec_krb5i, "krb5i" },
++ { Opt_sec_krb5p, "krb5p" },
++
++ { Opt_sec_lkey, "lkey" },
++ { Opt_sec_lkeyi, "lkeyi" },
++ { Opt_sec_lkeyp, "lkeyp" },
++
++ { Opt_sec_err, NULL }
++};
++
++
+ static void nfs_umount_begin(struct vfsmount *, int);
+ static int nfs_statfs(struct dentry *, struct kstatfs *);
+ static int nfs_show_options(struct seq_file *, struct vfsmount *);
+@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
+ { RPC_AUTH_GSS_SPKM, "spkm" },
+ { RPC_AUTH_GSS_SPKMI, "spkmi" },
+ { RPC_AUTH_GSS_SPKMP, "spkmp" },
+- { -1, "unknown" }
++ { UINT_MAX, "unknown" }
+ };
+ int i;
+
+- for (i=0; sec_flavours[i].flavour != -1; i++) {
++ for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
+ if (sec_flavours[i].flavour == flavour)
+ break;
+ }
+@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
+ { NFS_MOUNT_NONLM, ",nolock", "" },
+ { NFS_MOUNT_NOACL, ",noacl", "" },
+ { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
++ { NFS_MOUNT_UNSHARED, ",nosharecache", ""},
+ { 0, NULL, NULL }
+ };
+ const struct proc_nfs_info *nfs_infop;
+@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+ */
+ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
+ {
++ struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
++ struct rpc_clnt *rpc;
++
+ shrink_submounts(vfsmnt, &nfs_automount_list);
++
++ if (!(flags & MNT_FORCE))
++ return;
++ /* -EIO all pending I/O */
++ rpc = server->client_acl;
++ if (!IS_ERR(rpc))
++ rpc_killall_tasks(rpc);
++ rpc = server->client;
++ if (!IS_ERR(rpc))
++ rpc_killall_tasks(rpc);
+ }
+
+ /*
+- * Validate the NFS2/NFS3 mount data
+- * - fills in the mount root filehandle
++ * Sanity-check a server address provided by the mount command
+ */
+-static int nfs_validate_mount_data(struct nfs_mount_data *data,
+- struct nfs_fh *mntfh)
++static int nfs_verify_server_address(struct sockaddr *addr)
+ {
+- if (data == NULL) {
+- dprintk("%s: missing data argument\n", __FUNCTION__);
+- return -EINVAL;
++ switch (addr->sa_family) {
++ case AF_INET: {
++ struct sockaddr_in *sa = (struct sockaddr_in *) addr;
++ if (sa->sin_addr.s_addr != INADDR_ANY)
++ return 1;
++ break;
++ }
+ }
+
+- if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+- dprintk("%s: bad mount version\n", __FUNCTION__);
+- return -EINVAL;
++ return 0;
++}
++
++/*
++ * Error-check and convert a string of mount options from user space into
++ * a data structure
++ */
++static int nfs_parse_mount_options(char *raw,
++ struct nfs_parsed_mount_data *mnt)
++{
++ char *p, *string;
++
++ if (!raw) {
++ dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
++ return 1;
+ }
++ dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
+
+- switch (data->version) {
+- case 1:
+- data->namlen = 0;
+- case 2:
+- data->bsize = 0;
+- case 3:
+- if (data->flags & NFS_MOUNT_VER3) {
+- dprintk("%s: mount structure version %d does not support NFSv3\n",
+- __FUNCTION__,
+- data->version);
+- return -EINVAL;
++ while ((p = strsep(&raw, ",")) != NULL) {
++ substring_t args[MAX_OPT_ARGS];
++ int option, token;
++
++ if (!*p)
++ continue;
++
++ dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p);
++
++ token = match_token(p, nfs_mount_option_tokens, args);
++ switch (token) {
++ case Opt_soft:
++ mnt->flags |= NFS_MOUNT_SOFT;
++ break;
++ case Opt_hard:
++ mnt->flags &= ~NFS_MOUNT_SOFT;
++ break;
++ case Opt_intr:
++ mnt->flags |= NFS_MOUNT_INTR;
++ break;
++ case Opt_nointr:
++ mnt->flags &= ~NFS_MOUNT_INTR;
++ break;
++ case Opt_posix:
++ mnt->flags |= NFS_MOUNT_POSIX;
++ break;
++ case Opt_noposix:
++ mnt->flags &= ~NFS_MOUNT_POSIX;
++ break;
++ case Opt_cto:
++ mnt->flags &= ~NFS_MOUNT_NOCTO;
++ break;
++ case Opt_nocto:
++ mnt->flags |= NFS_MOUNT_NOCTO;
++ break;
++ case Opt_ac:
++ mnt->flags &= ~NFS_MOUNT_NOAC;
++ break;
++ case Opt_noac:
++ mnt->flags |= NFS_MOUNT_NOAC;
++ break;
++ case Opt_lock:
++ mnt->flags &= ~NFS_MOUNT_NONLM;
++ break;
++ case Opt_nolock:
++ mnt->flags |= NFS_MOUNT_NONLM;
++ break;
++ case Opt_v2:
++ mnt->flags &= ~NFS_MOUNT_VER3;
++ break;
++ case Opt_v3:
++ mnt->flags |= NFS_MOUNT_VER3;
++ break;
++ case Opt_udp:
++ mnt->flags &= ~NFS_MOUNT_TCP;
++ mnt->nfs_server.protocol = IPPROTO_UDP;
++ mnt->timeo = 7;
++ mnt->retrans = 5;
++ break;
++ case Opt_tcp:
++ mnt->flags |= NFS_MOUNT_TCP;
++ mnt->nfs_server.protocol = IPPROTO_TCP;
++ mnt->timeo = 600;
++ mnt->retrans = 2;
++ break;
++ case Opt_acl:
++ mnt->flags &= ~NFS_MOUNT_NOACL;
++ break;
++ case Opt_noacl:
++ mnt->flags |= NFS_MOUNT_NOACL;
++ break;
++ case Opt_rdirplus:
++ mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
++ break;
++ case Opt_nordirplus:
++ mnt->flags |= NFS_MOUNT_NORDIRPLUS;
++ break;
++ case Opt_sharecache:
++ mnt->flags &= ~NFS_MOUNT_UNSHARED;
++ break;
++ case Opt_nosharecache:
++ mnt->flags |= NFS_MOUNT_UNSHARED;
++ break;
++
++ case Opt_port:
++ if (match_int(args, &option))
++ return 0;
++ if (option < 0 || option > 65535)
++ return 0;
++ mnt->nfs_server.address.sin_port = htonl(option);
++ break;
++ case Opt_rsize:
++ if (match_int(args, &mnt->rsize))
++ return 0;
++ break;
++ case Opt_wsize:
++ if (match_int(args, &mnt->wsize))
++ return 0;
++ break;
++ case Opt_bsize:
++ if (match_int(args, &option))
++ return 0;
++ if (option < 0)
++ return 0;
++ mnt->bsize = option;
++ break;
++ case Opt_timeo:
++ if (match_int(args, &mnt->timeo))
++ return 0;
++ break;
++ case Opt_retrans:
++ if (match_int(args, &mnt->retrans))
++ return 0;
++ break;
++ case Opt_acregmin:
++ if (match_int(args, &mnt->acregmin))
++ return 0;
++ break;
++ case Opt_acregmax:
++ if (match_int(args, &mnt->acregmax))
++ return 0;
++ break;
++ case Opt_acdirmin:
++ if (match_int(args, &mnt->acdirmin))
++ return 0;
++ break;
++ case Opt_acdirmax:
++ if (match_int(args, &mnt->acdirmax))
++ return 0;
++ break;
++ case Opt_actimeo:
++ if (match_int(args, &option))
++ return 0;
++ if (option < 0)
++ return 0;
++ mnt->acregmin =
++ mnt->acregmax =
++ mnt->acdirmin =
++ mnt->acdirmax = option;
++ break;
++ case Opt_namelen:
++ if (match_int(args, &mnt->namlen))
++ return 0;
++ break;
++ case Opt_mountport:
++ if (match_int(args, &option))
++ return 0;
++ if (option < 0 || option > 65535)
++ return 0;
++ mnt->mount_server.port = option;
++ break;
++ case Opt_mountprog:
++ if (match_int(args, &option))
++ return 0;
++ if (option < 0)
++ return 0;
++ mnt->mount_server.program = option;
++ break;
++ case Opt_mountvers:
++ if (match_int(args, &option))
++ return 0;
++ if (option < 0)
++ return 0;
++ mnt->mount_server.version = option;
++ break;
++ case Opt_nfsprog:
++ if (match_int(args, &option))
++ return 0;
++ if (option < 0)
++ return 0;
++ mnt->nfs_server.program = option;
++ break;
++ case Opt_nfsvers:
++ if (match_int(args, &option))
++ return 0;
++ switch (option) {
++ case 2:
++ mnt->flags &= ~NFS_MOUNT_VER3;
++ break;
++ case 3:
++ mnt->flags |= NFS_MOUNT_VER3;
++ break;
++ default:
++ goto out_unrec_vers;
+ }
+- data->root.size = NFS2_FHSIZE;
+- memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+- case 4:
+- if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+- dprintk("%s: mount structure version %d does not support strong security\n",
+- __FUNCTION__,
+- data->version);
+- return -EINVAL;
++ break;
++
++ case Opt_sec:
++ string = match_strdup(args);
++ if (string == NULL)
++ goto out_nomem;
++ token = match_token(string, nfs_secflavor_tokens, args);
++ kfree(string);
++
++ /*
++ * The flags setting is for v2/v3. The flavor_len
++ * setting is for v4. v2/v3 also need to know the
++ * difference between NULL and UNIX.
++ */
++ switch (token) {
++ case Opt_sec_none:
++ mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 0;
++ mnt->auth_flavors[0] = RPC_AUTH_NULL;
++ break;
++ case Opt_sec_sys:
++ mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 0;
++ mnt->auth_flavors[0] = RPC_AUTH_UNIX;
++ break;
++ case Opt_sec_krb5:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
++ break;
++ case Opt_sec_krb5i:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
++ break;
++ case Opt_sec_krb5p:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
++ break;
++ case Opt_sec_lkey:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
++ break;
++ case Opt_sec_lkeyi:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
++ break;
++ case Opt_sec_lkeyp:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
++ break;
++ case Opt_sec_spkm:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
++ break;
++ case Opt_sec_spkmi:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
++ break;
++ case Opt_sec_spkmp:
++ mnt->flags |= NFS_MOUNT_SECFLAVOUR;
++ mnt->auth_flavor_len = 1;
++ mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
++ break;
++ default:
++ goto out_unrec_sec;
+ }
+- case 5:
+- memset(data->context, 0, sizeof(data->context));
+- }
++ break;
++ case Opt_proto:
++ string = match_strdup(args);
++ if (string == NULL)
++ goto out_nomem;
++ token = match_token(string,
++ nfs_xprt_protocol_tokens, args);
++ kfree(string);
++
++ switch (token) {
++ case Opt_udp:
++ mnt->flags &= ~NFS_MOUNT_TCP;
++ mnt->nfs_server.protocol = IPPROTO_UDP;
++ mnt->timeo = 7;
++ mnt->retrans = 5;
++ break;
++ case Opt_tcp:
++ mnt->flags |= NFS_MOUNT_TCP;
++ mnt->nfs_server.protocol = IPPROTO_TCP;
++ mnt->timeo = 600;
++ mnt->retrans = 2;
++ break;
++ default:
++ goto out_unrec_xprt;
++ }
++ break;
++ case Opt_mountproto:
++ string = match_strdup(args);
++ if (string == NULL)
++ goto out_nomem;
++ token = match_token(string,
++ nfs_xprt_protocol_tokens, args);
++ kfree(string);
++
++ switch (token) {
++ case Opt_udp:
++ mnt->mount_server.protocol = IPPROTO_UDP;
++ break;
++ case Opt_tcp:
++ mnt->mount_server.protocol = IPPROTO_TCP;
++ break;
++ default:
++ goto out_unrec_xprt;
++ }
++ break;
++ case Opt_addr:
++ string = match_strdup(args);
++ if (string == NULL)
++ goto out_nomem;
++ mnt->nfs_server.address.sin_family = AF_INET;
++ mnt->nfs_server.address.sin_addr.s_addr =
++ in_aton(string);
++ kfree(string);
++ break;
++ case Opt_clientaddr:
++ string = match_strdup(args);
++ if (string == NULL)
++ goto out_nomem;
++ mnt->client_address = string;
++ break;
++ case Opt_mounthost:
++ string = match_strdup(args);
++ if (string == NULL)
++ goto out_nomem;
++ mnt->mount_server.address.sin_family = AF_INET;
++ mnt->mount_server.address.sin_addr.s_addr =
++ in_aton(string);
++ kfree(string);
++ break;
+
+- /* Set the pseudoflavor */
+- if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+- data->pseudoflavor = RPC_AUTH_UNIX;
++ case Opt_userspace:
++ case Opt_deprecated:
++ break;
+
+-#ifndef CONFIG_NFS_V3
+- /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+- if (data->flags & NFS_MOUNT_VER3) {
+- dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+- return -EPROTONOSUPPORT;
++ default:
++ goto out_unknown;
++ }
+ }
+-#endif /* CONFIG_NFS_V3 */
+
+- /* We now require that the mount process passes the remote address */
+- if (data->addr.sin_addr.s_addr == INADDR_ANY) {
+- dprintk("%s: mount program didn't pass remote address!\n",
+- __FUNCTION__);
+- return -EINVAL;
++ return 1;
++
++out_nomem:
++ printk(KERN_INFO "NFS: not enough memory to parse option\n");
++ return 0;
++
++out_unrec_vers:
++ printk(KERN_INFO "NFS: unrecognized NFS version number\n");
++ return 0;
++
++out_unrec_xprt:
++ printk(KERN_INFO "NFS: unrecognized transport protocol\n");
++ return 0;
++
++out_unrec_sec:
++ printk(KERN_INFO "NFS: unrecognized security flavor\n");
++ return 0;
++
++out_unknown:
++ printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
++ return 0;
++}
++
++/*
++ * Use the remote server's MOUNT service to request the NFS file handle
++ * corresponding to the provided path.
++ */
++static int nfs_try_mount(struct nfs_parsed_mount_data *args,
++ struct nfs_fh *root_fh)
++{
++ struct sockaddr_in sin;
++ int status;
++
++ if (args->mount_server.version == 0) {
++ if (args->flags & NFS_MOUNT_VER3)
++ args->mount_server.version = NFS_MNT3_VERSION;
++ else
++ args->mount_server.version = NFS_MNT_VERSION;
+ }
+
+- /* Prepare the root filehandle */
+- if (data->flags & NFS_MOUNT_VER3)
+- mntfh->size = data->root.size;
++ /*
++ * Construct the mount server's address.
++ */
++ if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
++ sin = args->mount_server.address;
+ else
+- mntfh->size = NFS2_FHSIZE;
++ sin = args->nfs_server.address;
++ if (args->mount_server.port == 0) {
++ status = rpcb_getport_sync(&sin,
++ args->mount_server.program,
++ args->mount_server.version,
++ args->mount_server.protocol);
++ if (status < 0)
++ goto out_err;
++ sin.sin_port = htons(status);
++ } else
++ sin.sin_port = htons(args->mount_server.port);
++
++ /*
++ * Now ask the mount server to map our export path
++ * to a file handle.
++ */
++ status = nfs_mount((struct sockaddr *) &sin,
++ sizeof(sin),
++ args->nfs_server.hostname,
++ args->nfs_server.export_path,
++ args->mount_server.version,
++ args->mount_server.protocol,
++ root_fh);
++ if (status < 0)
++ goto out_err;
++
++ return status;
+
+- if (mntfh->size > sizeof(mntfh->data)) {
+- dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+- return -EINVAL;
++out_err:
++ dfprintk(MOUNT, "NFS: unable to contact server on host "
++ NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
++ return status;
++}
++
++/*
++ * Validate the NFS2/NFS3 mount data
++ * - fills in the mount root filehandle
++ *
++ * For option strings, user space handles the following behaviors:
++ *
++ * + DNS: mapping server host name to IP address ("addr=" option)
++ *
++ * + failure mode: how to behave if a mount request can't be handled
++ * immediately ("fg/bg" option)
++ *
++ * + retry: how often to retry a mount request ("retry=" option)
++ *
++ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
++ * mountproto=tcp after mountproto=udp, and so on
++ *
++ * XXX: as far as I can tell, changing the NFS program number is not
++ * supported in the NFS client.
++ */
++static int nfs_validate_mount_data(struct nfs_mount_data **options,
++ struct nfs_fh *mntfh,
++ const char *dev_name)
++{
++ struct nfs_mount_data *data = *options;
++
++ if (data == NULL)
++ goto out_no_data;
++
++ switch (data->version) {
++ case 1:
++ data->namlen = 0;
++ case 2:
++ data->bsize = 0;
++ case 3:
++ if (data->flags & NFS_MOUNT_VER3)
++ goto out_no_v3;
++ data->root.size = NFS2_FHSIZE;
++ memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
++ case 4:
++ if (data->flags & NFS_MOUNT_SECFLAVOUR)
++ goto out_no_sec;
++ case 5:
++ memset(data->context, 0, sizeof(data->context));
++ case 6:
++ if (data->flags & NFS_MOUNT_VER3)
++ mntfh->size = data->root.size;
++ else
++ mntfh->size = NFS2_FHSIZE;
++
++ if (mntfh->size > sizeof(mntfh->data))
++ goto out_invalid_fh;
++
++ memcpy(mntfh->data, data->root.data, mntfh->size);
++ if (mntfh->size < sizeof(mntfh->data))
++ memset(mntfh->data + mntfh->size, 0,
++ sizeof(mntfh->data) - mntfh->size);
++ break;
++ default: {
++ unsigned int len;
++ char *c;
++ int status;
++ struct nfs_parsed_mount_data args = {
++ .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
++ .rsize = NFS_MAX_FILE_IO_SIZE,
++ .wsize = NFS_MAX_FILE_IO_SIZE,
++ .timeo = 600,
++ .retrans = 2,
++ .acregmin = 3,
++ .acregmax = 60,
++ .acdirmin = 30,
++ .acdirmax = 60,
++ .mount_server.protocol = IPPROTO_UDP,
++ .mount_server.program = NFS_MNT_PROGRAM,
++ .nfs_server.protocol = IPPROTO_TCP,
++ .nfs_server.program = NFS_PROGRAM,
++ };
++
++ if (nfs_parse_mount_options((char *) *options, &args) == 0)
++ return -EINVAL;
++
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
++ if (data == NULL)
++ return -ENOMEM;
++
++ /*
++ * NB: after this point, caller will free "data"
++ * if we return an error
++ */
++ *options = data;
++
++ c = strchr(dev_name, ':');
++ if (c == NULL)
++ return -EINVAL;
++ len = c - dev_name - 1;
++ if (len > sizeof(data->hostname))
++ return -EINVAL;
++ strncpy(data->hostname, dev_name, len);
++ args.nfs_server.hostname = data->hostname;
++
++ c++;
++ if (strlen(c) > NFS_MAXPATHLEN)
++ return -EINVAL;
++ args.nfs_server.export_path = c;
++
++ status = nfs_try_mount(&args, mntfh);
++ if (status)
++ return -EINVAL;
++
++ /*
++ * Translate to nfs_mount_data, which nfs_fill_super
++ * can deal with.
++ */
++ data->version = 6;
++ data->flags = args.flags;
++ data->rsize = args.rsize;
++ data->wsize = args.wsize;
++ data->timeo = args.timeo;
++ data->retrans = args.retrans;
++ data->acregmin = args.acregmin;
++ data->acregmax = args.acregmax;
++ data->acdirmin = args.acdirmin;
++ data->acdirmax = args.acdirmax;
++ data->addr = args.nfs_server.address;
++ data->namlen = args.namlen;
++ data->bsize = args.bsize;
++ data->pseudoflavor = args.auth_flavors[0];
++
++ break;
++ }
+ }
+
+- memcpy(mntfh->data, data->root.data, mntfh->size);
+- if (mntfh->size < sizeof(mntfh->data))
+- memset(mntfh->data + mntfh->size, 0,
+- sizeof(mntfh->data) - mntfh->size);
++ if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
++ data->pseudoflavor = RPC_AUTH_UNIX;
++
++#ifndef CONFIG_NFS_V3
++ if (data->flags & NFS_MOUNT_VER3)
++ goto out_v3_not_compiled;
++#endif /* !CONFIG_NFS_V3 */
++
++ if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
++ goto out_no_address;
+
+ return 0;
++
++out_no_data:
++ dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
++ return -EINVAL;
++
++out_no_v3:
++ dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
++ data->version);
++ return -EINVAL;
++
++out_no_sec:
++ dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
++ return -EINVAL;
++
++#ifndef CONFIG_NFS_V3
++out_v3_not_compiled:
++ dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
++ return -EPROTONOSUPPORT;
++#endif /* !CONFIG_NFS_V3 */
++
++out_no_address:
++ dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
++ return -EINVAL;
++
++out_invalid_fh:
++ dfprintk(MOUNT, "NFS: invalid root filehandle\n");
++ return -EINVAL;
+ }
+
+ /*
+@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
+ {
+ struct nfs_server *server = data, *old = NFS_SB(sb);
+
+- if (old->nfs_client != server->nfs_client)
++ if (memcmp(&old->nfs_client->cl_addr,
++ &server->nfs_client->cl_addr,
++ sizeof(old->nfs_client->cl_addr)) != 0)
++ return 0;
++ /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
++ if (old->flags & NFS_MOUNT_UNSHARED)
+ return 0;
+ if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
+ return 0;
+ return 1;
+ }
+
++#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
++
++static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
++{
++ const struct nfs_server *a = s->s_fs_info;
++ const struct rpc_clnt *clnt_a = a->client;
++ const struct rpc_clnt *clnt_b = b->client;
++
++ if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
++ goto Ebusy;
++ if (a->nfs_client != b->nfs_client)
++ goto Ebusy;
++ if (a->flags != b->flags)
++ goto Ebusy;
++ if (a->wsize != b->wsize)
++ goto Ebusy;
++ if (a->rsize != b->rsize)
++ goto Ebusy;
++ if (a->acregmin != b->acregmin)
++ goto Ebusy;
++ if (a->acregmax != b->acregmax)
++ goto Ebusy;
++ if (a->acdirmin != b->acdirmin)
++ goto Ebusy;
++ if (a->acdirmax != b->acdirmax)
++ goto Ebusy;
++ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
++ goto Ebusy;
++ return 0;
++Ebusy:
++ return -EBUSY;
++}
++
+ static int nfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+ {
+@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
+ struct nfs_fh mntfh;
+ struct nfs_mount_data *data = raw_data;
+ struct dentry *mntroot;
++ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ int error;
+
+ /* Validate the mount data */
+- error = nfs_validate_mount_data(data, &mntfh);
++ error = nfs_validate_mount_data(&data, &mntfh, dev_name);
+ if (error < 0)
+- return error;
++ goto out;
+
+ /* Get a volume representation */
+ server = nfs_create_server(data, &mntfh);
+ if (IS_ERR(server)) {
+ error = PTR_ERR(server);
+- goto out_err_noserver;
++ goto out;
+ }
+
++ if (server->flags & NFS_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+ }
+
+ if (s->s_fs_info != server) {
++ error = nfs_compare_mount_options(s, server, flags);
+ nfs_free_server(server);
+ server = NULL;
++ if (error < 0)
++ goto error_splat_super;
+ }
+
+ if (!s->s_root) {
+@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
+ s->s_flags |= MS_ACTIVE;
+ mnt->mnt_sb = s;
+ mnt->mnt_root = mntroot;
+- return 0;
++ error = 0;
++
++out:
++ if (data != raw_data)
++ kfree(data);
++ return error;
+
+ out_err_nosb:
+ nfs_free_server(server);
+-out_err_noserver:
+- return error;
++ goto out;
+
+ error_splat_super:
+ up_write(&s->s_umount);
+ deactivate_super(s);
+- return error;
++ goto out;
+ }
+
+ /*
+@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+ struct super_block *s;
+ struct nfs_server *server;
+ struct dentry *mntroot;
++ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ int error;
+
+ dprintk("--> nfs_xdev_get_sb()\n");
+@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+ }
+
+ if (s->s_fs_info != server) {
++ error = nfs_compare_mount_options(s, server, flags);
+ nfs_free_server(server);
+ server = NULL;
++ if (error < 0)
++ goto error_splat_super;
+ }
+
+ if (!s->s_root) {
+@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
+ nfs_initialise_sb(sb);
+ }
+
+-static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
++/*
++ * Validate NFSv4 mount options
++ */
++static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
++ const char *dev_name,
++ struct sockaddr_in *addr,
++ rpc_authflavor_t *authflavour,
++ char **hostname,
++ char **mntpath,
++ char **ip_addr)
+ {
+- void *p = NULL;
+-
+- if (!src->len)
+- return ERR_PTR(-EINVAL);
+- if (src->len < maxlen)
+- maxlen = src->len;
+- if (dst == NULL) {
+- p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
+- if (p == NULL)
+- return ERR_PTR(-ENOMEM);
+- }
+- if (copy_from_user(dst, src->data, maxlen)) {
+- kfree(p);
+- return ERR_PTR(-EFAULT);
++ struct nfs4_mount_data *data = *options;
++ char *c;
++
++ if (data == NULL)
++ goto out_no_data;
++
++ switch (data->version) {
++ case 1:
++ if (data->host_addrlen != sizeof(*addr))
++ goto out_no_address;
++ if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
++ return -EFAULT;
++ if (addr->sin_port == 0)
++ addr->sin_port = htons(NFS_PORT);
++ if (!nfs_verify_server_address((struct sockaddr *) addr))
++ goto out_no_address;
++
++ switch (data->auth_flavourlen) {
++ case 0:
++ *authflavour = RPC_AUTH_UNIX;
++ break;
++ case 1:
++ if (copy_from_user(authflavour, data->auth_flavours,
++ sizeof(*authflavour)))
++ return -EFAULT;
++ break;
++ default:
++ goto out_inval_auth;
++ }
++
++ c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
++ if (IS_ERR(c))
++ return PTR_ERR(c);
++ *hostname = c;
++
++ c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
++ if (IS_ERR(c))
++ return PTR_ERR(c);
++ *mntpath = c;
++ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
++
++ c = strndup_user(data->client_addr.data, 16);
++ if (IS_ERR(c))
++ return PTR_ERR(c);
++ *ip_addr = c;
++
++ break;
++ default: {
++ unsigned int len;
++ struct nfs_parsed_mount_data args = {
++ .rsize = NFS_MAX_FILE_IO_SIZE,
++ .wsize = NFS_MAX_FILE_IO_SIZE,
++ .timeo = 600,
++ .retrans = 2,
++ .acregmin = 3,
++ .acregmax = 60,
++ .acdirmin = 30,
++ .acdirmax = 60,
++ .nfs_server.protocol = IPPROTO_TCP,
++ };
++
++ if (nfs_parse_mount_options((char *) *options, &args) == 0)
++ return -EINVAL;
++
++ if (!nfs_verify_server_address((struct sockaddr *)
++ &args.nfs_server.address))
++ return -EINVAL;
++ *addr = args.nfs_server.address;
++
++ switch (args.auth_flavor_len) {
++ case 0:
++ *authflavour = RPC_AUTH_UNIX;
++ break;
++ case 1:
++ *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
++ break;
++ default:
++ goto out_inval_auth;
++ }
++
++ /*
++ * Translate to nfs4_mount_data, which nfs4_fill_super
++ * can deal with.
++ */
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
++ if (data == NULL)
++ return -ENOMEM;
++ *options = data;
++
++ data->version = 1;
++ data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
++ data->rsize = args.rsize;
++ data->wsize = args.wsize;
++ data->timeo = args.timeo;
++ data->retrans = args.retrans;
++ data->acregmin = args.acregmin;
++ data->acregmax = args.acregmax;
++ data->acdirmin = args.acdirmin;
++ data->acdirmax = args.acdirmax;
++ data->proto = args.nfs_server.protocol;
++
++ /*
++ * Split "dev_name" into "hostname:mntpath".
++ */
++ c = strchr(dev_name, ':');
++ if (c == NULL)
++ return -EINVAL;
++ /* while calculating len, pretend ':' is '\0' */
++ len = c - dev_name;
++ if (len > NFS4_MAXNAMLEN)
++ return -EINVAL;
++ *hostname = kzalloc(len, GFP_KERNEL);
++ if (*hostname == NULL)
++ return -ENOMEM;
++ strncpy(*hostname, dev_name, len - 1);
++
++ c++; /* step over the ':' */
++ len = strlen(c);
++ if (len > NFS4_MAXPATHLEN)
++ return -EINVAL;
++ *mntpath = kzalloc(len + 1, GFP_KERNEL);
++ if (*mntpath == NULL)
++ return -ENOMEM;
++ strncpy(*mntpath, c, len);
++
++ dprintk("MNTPATH: %s\n", *mntpath);
++
++ *ip_addr = args.client_address;
++
++ break;
++ }
+ }
+- dst[maxlen] = '\0';
+- return dst;
++
++ return 0;
++
++out_no_data:
++ dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
++ return -EINVAL;
++
++out_inval_auth:
++ dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
++ data->auth_flavourlen);
++ return -EINVAL;
++
++out_no_address:
++ dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
++ return -EINVAL;
+ }
+
+ /*
+@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
+ rpc_authflavor_t authflavour;
+ struct nfs_fh mntfh;
+ struct dentry *mntroot;
+- char *mntpath = NULL, *hostname = NULL, ip_addr[16];
+- void *p;
++ char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
++ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ int error;
+
+- if (data == NULL) {
+- dprintk("%s: missing data argument\n", __FUNCTION__);
+- return -EINVAL;
+- }
+- if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
+- dprintk("%s: bad mount version\n", __FUNCTION__);
+- return -EINVAL;
+- }
+-
+- /* We now require that the mount process passes the remote address */
+- if (data->host_addrlen != sizeof(addr))
+- return -EINVAL;
+-
+- if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
+- return -EFAULT;
+-
+- if (addr.sin_family != AF_INET ||
+- addr.sin_addr.s_addr == INADDR_ANY
+- ) {
+- dprintk("%s: mount program didn't pass remote IP address!\n",
+- __FUNCTION__);
+- return -EINVAL;
+- }
+- /* RFC3530: The default port for NFS is 2049 */
+- if (addr.sin_port == 0)
+- addr.sin_port = htons(NFS_PORT);
+-
+- /* Grab the authentication type */
+- authflavour = RPC_AUTH_UNIX;
+- if (data->auth_flavourlen != 0) {
+- if (data->auth_flavourlen != 1) {
+- dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+- __FUNCTION__, data->auth_flavourlen);
+- error = -EINVAL;
+- goto out_err_noserver;
+- }
+-
+- if (copy_from_user(&authflavour, data->auth_flavours,
+- sizeof(authflavour))) {
+- error = -EFAULT;
+- goto out_err_noserver;
+- }
+- }
+-
+- p = nfs_copy_user_string(NULL, &data->hostname, 256);
+- if (IS_ERR(p))
+- goto out_err;
+- hostname = p;
+-
+- p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
+- if (IS_ERR(p))
+- goto out_err;
+- mntpath = p;
+-
+- dprintk("MNTPATH: %s\n", mntpath);
+-
+- p = nfs_copy_user_string(ip_addr, &data->client_addr,
+- sizeof(ip_addr) - 1);
+- if (IS_ERR(p))
+- goto out_err;
++ /* Validate the mount data */
++ error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
++ &hostname, &mntpath, &ip_addr);
++ if (error < 0)
++ goto out;
+
+ /* Get a volume representation */
+ server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
+ authflavour, &mntfh);
+ if (IS_ERR(server)) {
+ error = PTR_ERR(server);
+- goto out_err_noserver;
++ goto out;
+ }
+
++ if (server->flags & NFS4_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_free;
+@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
+ s->s_flags |= MS_ACTIVE;
+ mnt->mnt_sb = s;
+ mnt->mnt_root = mntroot;
++ error = 0;
++
++out:
++ kfree(ip_addr);
+ kfree(mntpath);
+ kfree(hostname);
+- return 0;
+-
+-out_err:
+- error = PTR_ERR(p);
+- goto out_err_noserver;
++ return error;
+
+ out_free:
+ nfs_free_server(server);
+-out_err_noserver:
+- kfree(mntpath);
+- kfree(hostname);
+- return error;
++ goto out;
+
+ error_splat_super:
+ up_write(&s->s_umount);
+ deactivate_super(s);
+- goto out_err_noserver;
++ goto out;
+ }
+
+ static void nfs4_kill_super(struct super_block *sb)
+@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+ struct super_block *s;
+ struct nfs_server *server;
+ struct dentry *mntroot;
++ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ int error;
+
+ dprintk("--> nfs4_xdev_get_sb()\n");
+@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS4_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+ struct nfs_server *server;
+ struct dentry *mntroot;
+ struct nfs_fh mntfh;
++ int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ int error;
+
+ dprintk("--> nfs4_referral_get_sb()\n");
+@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS4_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index af344a1..73ac992 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
+ if (PagePrivate(page)) {
+ req = (struct nfs_page *)page_private(page);
+ if (req != NULL)
+- atomic_inc(&req->wb_count);
++ kref_get(&req->wb_kref);
+ }
+ return req;
+ }
+
+ static struct nfs_page *nfs_page_find_request(struct page *page)
+ {
++ struct inode *inode = page->mapping->host;
+ struct nfs_page *req = NULL;
+- spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+
+- spin_lock(req_lock);
++ spin_lock(&inode->i_lock);
+ req = nfs_page_find_request_locked(page);
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ return req;
+ }
+
+@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+ }
+ /* Update file length */
+ nfs_grow_file(page, offset, count);
+- /* Set the PG_uptodate flag? */
+- nfs_mark_uptodate(page, offset, count);
+ nfs_unlock_request(req);
+ return 0;
+ }
+@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
+ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
+ struct page *page)
+ {
++ struct inode *inode = page->mapping->host;
++ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *req;
+- struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+- spinlock_t *req_lock = &nfsi->req_lock;
+ int ret;
+
+- spin_lock(req_lock);
++ spin_lock(&inode->i_lock);
+ for(;;) {
+ req = nfs_page_find_request_locked(page);
+ if (req == NULL) {
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ return 1;
+ }
+ if (nfs_lock_request_dontget(req))
+@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
+ * succeed provided that someone hasn't already marked the
+ * request as dirty (in which case we don't care).
+ */
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ ret = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ if (ret != 0)
+ return ret;
+- spin_lock(req_lock);
++ spin_lock(&inode->i_lock);
+ }
+ if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+ /* This request is marked for commit */
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ nfs_unlock_request(req);
+ nfs_pageio_complete(pgio);
+ return 1;
+ }
+ if (nfs_set_page_writeback(page) != 0) {
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ BUG();
+ }
+ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
+- NFS_PAGE_TAG_WRITEBACK);
++ NFS_PAGE_TAG_LOCKED);
+ ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ nfs_pageio_add_request(pgio, req);
+ return ret;
+ }
+@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+ if (PageDirty(req->wb_page))
+ set_bit(PG_NEED_FLUSH, &req->wb_flags);
+ nfsi->npages++;
+- atomic_inc(&req->wb_count);
++ kref_get(&req->wb_kref);
+ return 0;
+ }
+
+@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+ */
+ static void nfs_inode_remove_request(struct nfs_page *req)
+ {
+- struct inode *inode = req->wb_context->dentry->d_inode;
++ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ BUG_ON (!NFS_WBACK_BUSY(req));
+
+- spin_lock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
+ set_page_private(req->wb_page, 0);
+ ClearPagePrivate(req->wb_page);
+ radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
+@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
+ __set_page_dirty_nobuffers(req->wb_page);
+ nfsi->npages--;
+ if (!nfsi->npages) {
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ nfs_end_data_update(inode);
+ iput(inode);
+ } else
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ nfs_clear_request(req);
+ nfs_release_request(req);
+ }
+@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
+ static void
+ nfs_mark_request_commit(struct nfs_page *req)
+ {
+- struct inode *inode = req->wb_context->dentry->d_inode;
++ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+- spin_lock(&nfsi->req_lock);
+- nfs_list_add_request(req, &nfsi->commit);
++ spin_lock(&inode->i_lock);
+ nfsi->ncommit++;
+ set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
+- spin_unlock(&nfsi->req_lock);
++ radix_tree_tag_set(&nfsi->nfs_page_tree,
++ req->wb_index,
++ NFS_PAGE_TAG_COMMIT);
++ spin_unlock(&inode->i_lock);
+ inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ }
+@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
+ idx_end = idx_start + npages - 1;
+
+ next = idx_start;
+- while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
++ while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
+ if (req->wb_index > idx_end)
+ break;
+
+ next = req->wb_index + 1;
+ BUG_ON(!NFS_WBACK_BUSY(req));
+
+- atomic_inc(&req->wb_count);
+- spin_unlock(&nfsi->req_lock);
++ kref_get(&req->wb_kref);
++ spin_unlock(&inode->i_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+- spin_lock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
+ if (error < 0)
+ return error;
+ res++;
+@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
+ int res = 0;
+
+ if (nfsi->ncommit != 0) {
+- res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
++ res = nfs_scan_list(nfsi, dst, idx_start, npages,
++ NFS_PAGE_TAG_COMMIT);
+ nfsi->ncommit -= res;
+- if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+- printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ }
+ return res;
+ }
+@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
+ {
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+- struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *req, *new = NULL;
+ pgoff_t rqend, end;
+
+@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
+ /* Loop over all inode entries and see if we find
+ * A request for the page we wish to update
+ */
+- spin_lock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
+ req = nfs_page_find_request_locked(page);
+ if (req) {
+ if (!nfs_lock_request_dontget(req)) {
+ int error;
+
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ if (error < 0) {
+@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
+ }
+ continue;
+ }
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ if (new)
+ nfs_release_request(new);
+ break;
+@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
+ nfs_lock_request_dontget(new);
+ error = nfs_inode_add_request(inode, new);
+ if (error) {
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ nfs_unlock_request(new);
+ return ERR_PTR(error);
+ }
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ return new;
+ }
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+
+ new = nfs_create_request(ctx, inode, page, offset, bytes);
+ if (IS_ERR(new))
+@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
+ static void nfs_writepage_release(struct nfs_page *req)
+ {
+
+- if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
++ if (PageError(req->wb_page)) {
++ nfs_end_page_writeback(req->wb_page);
++ nfs_inode_remove_request(req);
++ } else if (!nfs_reschedule_unstable_write(req)) {
++ /* Set the PG_uptodate flag */
++ nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
+ nfs_end_page_writeback(req->wb_page);
+ nfs_inode_remove_request(req);
+ } else
+ nfs_end_page_writeback(req->wb_page);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+
+ static inline int flush_task_priority(int how)
+@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
+ * NB: take care not to mess about with data->commit et al. */
+
+ data->req = req;
+- data->inode = inode = req->wb_context->dentry->d_inode;
++ data->inode = inode = req->wb_context->path.dentry->d_inode;
+ data->cred = req->wb_context->cred;
+
+ data->args.fh = NFS_FH(inode);
+@@ -885,7 +888,7 @@ out_bad:
+ }
+ nfs_redirty_request(req);
+ nfs_end_page_writeback(req->wb_page);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ return -ENOMEM;
+ }
+
+@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
+ nfs_list_remove_request(req);
+ nfs_redirty_request(req);
+ nfs_end_page_writeback(req->wb_page);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ return -ENOMEM;
+ }
+@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
+ struct page *page = req->wb_page;
+
+ dprintk("NFS: write (%s/%Ld %d@%Ld)",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+
+@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
+ }
+
+ if (nfs_write_need_commit(data)) {
+- spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
++ struct inode *inode = page->mapping->host;
+
+- spin_lock(req_lock);
++ spin_lock(&inode->i_lock);
+ if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
+ /* Do nothing we need to resend the writes */
+ } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
+ clear_bit(PG_NEED_COMMIT, &req->wb_flags);
+ dprintk(" server reboot detected\n");
+ }
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ } else
+ dprintk(" OK\n");
+
+@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
+ page = req->wb_page;
+
+ dprintk("NFS: write (%s/%Ld %d@%Ld)",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+
+@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
+ dprintk(" marked for commit\n");
+ goto next;
+ }
++ /* Set the PG_uptodate flag? */
++ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ dprintk(" OK\n");
+ remove_request:
+ nfs_end_page_writeback(page);
+ nfs_inode_remove_request(req);
+ next:
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ }
+
+@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
+
+ list_splice_init(head, &data->pages);
+ first = nfs_list_entry(data->pages.next);
+- inode = first->wb_context->dentry->d_inode;
++ inode = first->wb_context->path.dentry->d_inode;
+
+ data->inode = inode;
+ data->cred = first->wb_context->cred;
+@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+ nfs_list_remove_request(req);
+ nfs_mark_request_commit(req);
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ return -ENOMEM;
+ }
+@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+
+ dprintk("NFS: commit (%s/%Ld %d@%Ld)",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ if (task->tk_status < 0) {
+@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
+ * returned by the server against all stored verfs. */
+ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+ /* We have a match */
++ /* Set the PG_uptodate flag */
++ nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
++ req->wb_bytes);
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
+ dprintk(" mismatch\n");
+ nfs_redirty_request(req);
+ next:
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ }
+
+@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
+
+ int nfs_commit_inode(struct inode *inode, int how)
+ {
+- struct nfs_inode *nfsi = NFS_I(inode);
+ LIST_HEAD(head);
+ int res;
+
+- spin_lock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
+ res = nfs_scan_commit(inode, &head, 0, 0);
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ if (res) {
+ int error = nfs_commit_list(inode, &head, how);
+ if (error < 0)
+@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
+ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
+ {
+ struct inode *inode = mapping->host;
+- struct nfs_inode *nfsi = NFS_I(inode);
+ pgoff_t idx_start, idx_end;
+ unsigned int npages = 0;
+ LIST_HEAD(head);
+@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
+ }
+ }
+ how &= ~FLUSH_NOCOMMIT;
+- spin_lock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
+ do {
+ ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+ if (ret != 0)
+@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
+ if (pages == 0)
+ break;
+ if (how & FLUSH_INVALIDATE) {
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ nfs_cancel_commit_list(&head);
+ ret = pages;
+- spin_lock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
+ continue;
+ }
+ pages += nfs_scan_commit(inode, &head, 0, 0);
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ ret = nfs_commit_list(inode, &head, how);
+- spin_lock(&nfsi->req_lock);
++ spin_lock(&inode->i_lock);
++
+ } while (ret >= 0);
+- spin_unlock(&nfsi->req_lock);
++ spin_unlock(&inode->i_lock);
+ return ret;
+ }
+
+@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
+ {
+ struct address_space *mapping = page->mapping;
+ struct inode *inode;
+- spinlock_t *req_lock;
+ struct nfs_page *req;
+ int ret;
+
+@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
+ inode = mapping->host;
+ if (!inode)
+ goto out_raced;
+- req_lock = &NFS_I(inode)->req_lock;
+- spin_lock(req_lock);
++ spin_lock(&inode->i_lock);
+ req = nfs_page_find_request_locked(page);
+ if (req != NULL) {
+ /* Mark any existing write requests for flushing */
+ ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ nfs_release_request(req);
+ return ret;
+ }
+ ret = __set_page_dirty_nobuffers(page);
+- spin_unlock(req_lock);
++ spin_unlock(&inode->i_lock);
+ return ret;
+ out_raced:
+ return !TestSetPageDirty(page);
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 864090e..5443c52 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
+ .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+ .rpc_argp = clp,
+ };
+- char clientname[16];
+ int status;
+
+ if (atomic_read(&cb->cb_set))
+@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
+ memset(program->stats, 0, sizeof(cb->cb_stat));
+ program->stats->program = program;
+
+- /* Just here to make some printk's more useful: */
+- snprintf(clientname, sizeof(clientname),
+- "%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
+- args.servername = clientname;
+-
+ /* Create RPC client */
+ cb->cb_client = rpc_create(&args);
+ if (IS_ERR(cb->cb_client)) {
+@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
+ goto out_err;
+ }
+
+- /* Kick rpciod, put the call on the wire. */
+- if (rpciod_up() != 0)
+- goto out_clnt;
+-
+ /* the task holds a reference to the nfs4_client struct */
+ atomic_inc(&clp->cl_count);
+
+ msg.rpc_cred = nfsd4_lookupcred(clp,0);
+ if (IS_ERR(msg.rpc_cred))
+- goto out_rpciod;
++ goto out_release_clp;
+ status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+ put_rpccred(msg.rpc_cred);
+
+ if (status != 0) {
+ dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
+- goto out_rpciod;
++ goto out_release_clp;
+ }
+ return;
+
+-out_rpciod:
++out_release_clp:
+ atomic_dec(&clp->cl_count);
+- rpciod_down();
+-out_clnt:
+ rpc_shutdown_client(cb->cb_client);
+ out_err:
+ cb->cb_client = NULL;
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 3cc8ce4..8c52913 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
+ if (clnt) {
+ clp->cl_callback.cb_client = NULL;
+ rpc_shutdown_client(clnt);
+- rpciod_down();
+ }
+ }
+
+diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
+index 05707e2..e2d1ce3 100644
+--- a/include/linux/lockd/lockd.h
++++ b/include/linux/lockd/lockd.h
+@@ -39,6 +39,7 @@
+ struct nlm_host {
+ struct hlist_node h_hash; /* doubly linked list */
+ struct sockaddr_in h_addr; /* peer address */
++ struct sockaddr_in h_saddr; /* our address (optional) */
+ struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */
+ char * h_name; /* remote hostname */
+ u32 h_version; /* interface version */
+diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
+index 7e7f33a..8726491 100644
+--- a/include/linux/nfs4.h
++++ b/include/linux/nfs4.h
+@@ -15,6 +15,7 @@
+
+ #include <linux/types.h>
+
++#define NFS4_BITMAP_SIZE 2
+ #define NFS4_VERIFIER_SIZE 8
+ #define NFS4_STATEID_SIZE 16
+ #define NFS4_FHSIZE 128
+diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
+index 26b4c83..a0dcf66 100644
+--- a/include/linux/nfs4_mount.h
++++ b/include/linux/nfs4_mount.h
+@@ -65,6 +65,7 @@ struct nfs4_mount_data {
+ #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */
+ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */
+ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */
+-#define NFS4_MOUNT_FLAGMASK 0xFFFF
++#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */
++#define NFS4_MOUNT_FLAGMASK 0x9033
+
+ #endif
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index 0543439..c098ae1 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -30,7 +30,9 @@
+ #ifdef __KERNEL__
+
+ #include <linux/in.h>
++#include <linux/kref.h>
+ #include <linux/mm.h>
++#include <linux/namei.h>
+ #include <linux/pagemap.h>
+ #include <linux/rbtree.h>
+ #include <linux/rwsem.h>
+@@ -69,9 +71,8 @@ struct nfs_access_entry {
+
+ struct nfs4_state;
+ struct nfs_open_context {
+- atomic_t count;
+- struct vfsmount *vfsmnt;
+- struct dentry *dentry;
++ struct kref kref;
++ struct path path;
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+ fl_owner_t lockowner;
+@@ -155,13 +156,9 @@ struct nfs_inode {
+ /*
+ * This is the list of dirty unwritten pages.
+ */
+- spinlock_t req_lock;
+- struct list_head dirty;
+- struct list_head commit;
+ struct radix_tree_root nfs_page_tree;
+
+- unsigned int ndirty,
+- ncommit,
++ unsigned long ncommit,
+ npages;
+
+ /* Open contexts for shared mmap writes */
+@@ -187,6 +184,7 @@ struct nfs_inode {
+ #define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */
+ #define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */
+ #define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */
++#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */
+
+ /*
+ * Bit offsets in flags field
+@@ -496,21 +494,18 @@ static inline void nfs3_forget_cached_acls(struct inode *inode)
+
+ /*
+ * linux/fs/mount_clnt.c
+- * (Used only by nfsroot module)
+ */
+-extern int nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *,
+- int, int);
++extern int nfs_mount(struct sockaddr *, size_t, char *, char *,
++ int, int, struct nfs_fh *);
+
+ /*
+ * inline functions
+ */
+
+-static inline loff_t
+-nfs_size_to_loff_t(__u64 size)
++static inline loff_t nfs_size_to_loff_t(__u64 size)
+ {
+- loff_t maxsz = (((loff_t) ULONG_MAX) << PAGE_CACHE_SHIFT) + PAGE_CACHE_SIZE - 1;
+- if (size > maxsz)
+- return maxsz;
++ if (size > (__u64) OFFSET_MAX - 1)
++ return OFFSET_MAX - 1;
+ return (loff_t) size;
+ }
+
+@@ -557,6 +552,7 @@ extern void * nfs_root_data(void);
+ #define NFSDBG_ROOT 0x0080
+ #define NFSDBG_CALLBACK 0x0100
+ #define NFSDBG_CLIENT 0x0200
++#define NFSDBG_MOUNT 0x0400
+ #define NFSDBG_ALL 0xFFFF
+
+ #ifdef __KERNEL__
+diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
+index 52b4378..0cac49b 100644
+--- a/include/linux/nfs_fs_sb.h
++++ b/include/linux/nfs_fs_sb.h
+@@ -16,7 +16,6 @@ struct nfs_client {
+ #define NFS_CS_INITING 1 /* busy initialising */
+ int cl_nfsversion; /* NFS protocol version */
+ unsigned long cl_res_state; /* NFS resources state */
+-#define NFS_CS_RPCIOD 0 /* - rpciod started */
+ #define NFS_CS_CALLBACK 1 /* - callback started */
+ #define NFS_CS_IDMAP 2 /* - idmap started */
+ #define NFS_CS_RENEWD 3 /* - renewd started */
+@@ -35,7 +34,8 @@ struct nfs_client {
+ nfs4_verifier cl_confirm;
+ unsigned long cl_state;
+
+- u32 cl_lockowner_id;
++ struct rb_root cl_openowner_id;
++ struct rb_root cl_lockowner_id;
+
+ /*
+ * The following rwsem ensures exclusive access to the server
+@@ -44,9 +44,7 @@ struct nfs_client {
+ struct rw_semaphore cl_sem;
+
+ struct list_head cl_delegations;
+- struct list_head cl_state_owners;
+- struct list_head cl_unused;
+- int cl_nunused;
++ struct rb_root cl_state_owners;
+ spinlock_t cl_lock;
+
+ unsigned long cl_lease_time;
+diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
+index cc8b9c5..a3ade89 100644
+--- a/include/linux/nfs_mount.h
++++ b/include/linux/nfs_mount.h
+@@ -37,7 +37,7 @@ struct nfs_mount_data {
+ int acdirmin; /* 1 */
+ int acdirmax; /* 1 */
+ struct sockaddr_in addr; /* 1 */
+- char hostname[256]; /* 1 */
++ char hostname[NFS_MAXNAMLEN + 1]; /* 1 */
+ int namlen; /* 2 */
+ unsigned int bsize; /* 3 */
+ struct nfs3_fh root; /* 4 */
+@@ -62,6 +62,7 @@ struct nfs_mount_data {
+ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */
+ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */
+ #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
++#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
+ #define NFS_MOUNT_FLAGMASK 0xFFFF
+
+ #endif
+diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
+index bd193af..78e6079 100644
+--- a/include/linux/nfs_page.h
++++ b/include/linux/nfs_page.h
+@@ -16,12 +16,13 @@
+ #include <linux/sunrpc/auth.h>
+ #include <linux/nfs_xdr.h>
+
+-#include <asm/atomic.h>
++#include <linux/kref.h>
+
+ /*
+ * Valid flags for the radix tree
+ */
+-#define NFS_PAGE_TAG_WRITEBACK 0
++#define NFS_PAGE_TAG_LOCKED 0
++#define NFS_PAGE_TAG_COMMIT 1
+
+ /*
+ * Valid flags for a dirty buffer
+@@ -33,8 +34,7 @@
+
+ struct nfs_inode;
+ struct nfs_page {
+- struct list_head wb_list, /* Defines state of page: */
+- *wb_list_head; /* read/write/commit */
++ struct list_head wb_list; /* Defines state of page: */
+ struct page *wb_page; /* page to read in/write out */
+ struct nfs_open_context *wb_context; /* File state context info */
+ atomic_t wb_complete; /* i/os we're waiting for */
+@@ -42,7 +42,7 @@ struct nfs_page {
+ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
+ wb_pgbase, /* Start of page data */
+ wb_bytes; /* Length of request */
+- atomic_t wb_count; /* reference count */
++ struct kref wb_kref; /* reference count */
+ unsigned long wb_flags;
+ struct nfs_writeverf wb_verf; /* Commit cookie */
+ };
+@@ -71,8 +71,8 @@ extern void nfs_clear_request(struct nfs_page *req);
+ extern void nfs_release_request(struct nfs_page *req);
+
+
+-extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
+- pgoff_t idx_start, unsigned int npages);
++extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
++ pgoff_t idx_start, unsigned int npages, int tag);
+ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ struct inode *inode,
+ int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+@@ -84,12 +84,11 @@ extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
+ extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
+ extern int nfs_wait_on_request(struct nfs_page *);
+ extern void nfs_unlock_request(struct nfs_page *req);
+-extern int nfs_set_page_writeback_locked(struct nfs_page *req);
+-extern void nfs_clear_page_writeback(struct nfs_page *req);
++extern void nfs_clear_page_tag_locked(struct nfs_page *req);
+
+
+ /*
+- * Lock the page of an asynchronous request without incrementing the wb_count
++ * Lock the page of an asynchronous request without getting a new reference
+ */
+ static inline int
+ nfs_lock_request_dontget(struct nfs_page *req)
+@@ -98,14 +97,14 @@ nfs_lock_request_dontget(struct nfs_page *req)
+ }
+
+ /*
+- * Lock the page of an asynchronous request
++ * Lock the page of an asynchronous request and take a reference
+ */
+ static inline int
+ nfs_lock_request(struct nfs_page *req)
+ {
+ if (test_and_set_bit(PG_BUSY, &req->wb_flags))
+ return 0;
+- atomic_inc(&req->wb_count);
++ kref_get(&req->wb_kref);
+ return 1;
+ }
+
+@@ -118,7 +117,6 @@ static inline void
+ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+ {
+ list_add_tail(&req->wb_list, head);
+- req->wb_list_head = head;
+ }
+
+
+@@ -132,7 +130,6 @@ nfs_list_remove_request(struct nfs_page *req)
+ if (list_empty(&req->wb_list))
+ return;
+ list_del_init(&req->wb_list);
+- req->wb_list_head = NULL;
+ }
+
+ static inline struct nfs_page *
+diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
+index 10c26ed..38d7768 100644
+--- a/include/linux/nfs_xdr.h
++++ b/include/linux/nfs_xdr.h
+@@ -119,7 +119,7 @@ struct nfs_openargs {
+ struct nfs_seqid * seqid;
+ int open_flags;
+ __u64 clientid;
+- __u32 id;
++ __u64 id;
+ union {
+ struct iattr * attrs; /* UNCHECKED, GUARDED */
+ nfs4_verifier verifier; /* EXCLUSIVE */
+@@ -144,6 +144,7 @@ struct nfs_openres {
+ nfs4_stateid delegation;
+ __u32 do_recall;
+ __u64 maxsize;
++ __u32 attrset[NFS4_BITMAP_SIZE];
+ };
+
+ /*
+@@ -180,7 +181,7 @@ struct nfs_closeres {
+ * */
+ struct nfs_lowner {
+ __u64 clientid;
+- u32 id;
++ __u64 id;
+ };
+
+ struct nfs_lock_args {
+diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
+index 534cdc7..7a69ca3 100644
+--- a/include/linux/sunrpc/auth.h
++++ b/include/linux/sunrpc/auth.h
+@@ -16,6 +16,7 @@
+ #include <linux/sunrpc/xdr.h>
+
+ #include <asm/atomic.h>
++#include <linux/rcupdate.h>
+
+ /* size of the nodename buffer */
+ #define UNX_MAXNODENAME 32
+@@ -30,22 +31,28 @@ struct auth_cred {
+ /*
+ * Client user credentials
+ */
++struct rpc_auth;
++struct rpc_credops;
+ struct rpc_cred {
+ struct hlist_node cr_hash; /* hash chain */
+- struct rpc_credops * cr_ops;
+- unsigned long cr_expire; /* when to gc */
+- atomic_t cr_count; /* ref count */
+- unsigned short cr_flags; /* various flags */
++ struct list_head cr_lru; /* lru garbage collection */
++ struct rcu_head cr_rcu;
++ struct rpc_auth * cr_auth;
++ const struct rpc_credops *cr_ops;
+ #ifdef RPC_DEBUG
+ unsigned long cr_magic; /* 0x0f4aa4f0 */
+ #endif
++ unsigned long cr_expire; /* when to gc */
++ unsigned long cr_flags; /* various flags */
++ atomic_t cr_count; /* ref count */
+
+ uid_t cr_uid;
+
+ /* per-flavor data */
+ };
+-#define RPCAUTH_CRED_NEW 0x0001
+-#define RPCAUTH_CRED_UPTODATE 0x0002
++#define RPCAUTH_CRED_NEW 0
++#define RPCAUTH_CRED_UPTODATE 1
++#define RPCAUTH_CRED_HASHED 2
+
+ #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0
+
+@@ -56,10 +63,10 @@ struct rpc_cred {
+ #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1)
+ struct rpc_cred_cache {
+ struct hlist_head hashtable[RPC_CREDCACHE_NR];
+- unsigned long nextgc; /* next garbage collection */
+- unsigned long expire; /* cache expiry interval */
++ spinlock_t lock;
+ };
+
++struct rpc_authops;
+ struct rpc_auth {
+ unsigned int au_cslack; /* call cred size estimate */
+ /* guess at number of u32's auth adds before
+@@ -69,7 +76,7 @@ struct rpc_auth {
+ unsigned int au_verfsize;
+
+ unsigned int au_flags; /* various flags */
+- struct rpc_authops * au_ops; /* operations */
++ const struct rpc_authops *au_ops; /* operations */
+ rpc_authflavor_t au_flavor; /* pseudoflavor (note may
+ * differ from the flavor in
+ * au_ops->au_flavor in gss
+@@ -115,17 +122,19 @@ struct rpc_credops {
+ void *, __be32 *, void *);
+ };
+
+-extern struct rpc_authops authunix_ops;
+-extern struct rpc_authops authnull_ops;
+-#ifdef CONFIG_SUNRPC_SECURE
+-extern struct rpc_authops authdes_ops;
+-#endif
++extern const struct rpc_authops authunix_ops;
++extern const struct rpc_authops authnull_ops;
++
++void __init rpc_init_authunix(void);
++void __init rpcauth_init_module(void);
++void __exit rpcauth_remove_module(void);
+
+-int rpcauth_register(struct rpc_authops *);
+-int rpcauth_unregister(struct rpc_authops *);
++int rpcauth_register(const struct rpc_authops *);
++int rpcauth_unregister(const struct rpc_authops *);
+ struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
+-void rpcauth_destroy(struct rpc_auth *);
++void rpcauth_release(struct rpc_auth *);
+ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
++void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
+ struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
+ struct rpc_cred * rpcauth_bindcred(struct rpc_task *);
+ void rpcauth_holdcred(struct rpc_task *);
+@@ -138,8 +147,9 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
+ int rpcauth_refreshcred(struct rpc_task *);
+ void rpcauth_invalcred(struct rpc_task *);
+ int rpcauth_uptodatecred(struct rpc_task *);
+-int rpcauth_init_credcache(struct rpc_auth *, unsigned long);
+-void rpcauth_free_credcache(struct rpc_auth *);
++int rpcauth_init_credcache(struct rpc_auth *);
++void rpcauth_destroy_credcache(struct rpc_auth *);
++void rpcauth_clear_credcache(struct rpc_cred_cache *);
+
+ static inline
+ struct rpc_cred * get_rpccred(struct rpc_cred *cred)
+diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
+index 2db2fbf..67658e1 100644
+--- a/include/linux/sunrpc/auth_gss.h
++++ b/include/linux/sunrpc/auth_gss.h
+@@ -75,6 +75,7 @@ struct gss_cl_ctx {
+ struct xdr_netobj gc_wire_ctx;
+ u32 gc_win;
+ unsigned long gc_expiry;
++ struct rcu_head gc_rcu;
+ };
+
+ struct gss_upcall_msg;
+@@ -85,11 +86,6 @@ struct gss_cred {
+ struct gss_upcall_msg *gc_upcall;
+ };
+
+-#define gc_uid gc_base.cr_uid
+-#define gc_count gc_base.cr_count
+-#define gc_flags gc_base.cr_flags
+-#define gc_expire gc_base.cr_expire
+-
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */
+
+diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
+index 6661142..c0d9d14 100644
+--- a/include/linux/sunrpc/clnt.h
++++ b/include/linux/sunrpc/clnt.h
+@@ -24,8 +24,10 @@ struct rpc_inode;
+ * The high-level client handle
+ */
+ struct rpc_clnt {
+- atomic_t cl_count; /* Number of clones */
+- atomic_t cl_users; /* number of references */
++ struct kref cl_kref; /* Number of references */
++ struct list_head cl_clients; /* Global list of clients */
++ struct list_head cl_tasks; /* List of tasks */
++ spinlock_t cl_lock; /* spinlock */
+ struct rpc_xprt * cl_xprt; /* transport */
+ struct rpc_procinfo * cl_procinfo; /* procedure info */
+ u32 cl_prog, /* RPC program number */
+@@ -41,9 +43,7 @@ struct rpc_clnt {
+ unsigned int cl_softrtry : 1,/* soft timeouts */
+ cl_intr : 1,/* interruptible */
+ cl_discrtry : 1,/* disconnect before retry */
+- cl_autobind : 1,/* use getport() */
+- cl_oneshot : 1,/* dispose after use */
+- cl_dead : 1;/* abandoned */
++ cl_autobind : 1;/* use getport() */
+
+ struct rpc_rtt * cl_rtt; /* RTO estimator data */
+
+@@ -98,6 +98,7 @@ struct rpc_create_args {
+ int protocol;
+ struct sockaddr *address;
+ size_t addrsize;
++ struct sockaddr *saddress;
+ struct rpc_timeout *timeout;
+ char *servername;
+ struct rpc_program *program;
+@@ -110,20 +111,20 @@ struct rpc_create_args {
+ #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0)
+ #define RPC_CLNT_CREATE_INTR (1UL << 1)
+ #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2)
+-#define RPC_CLNT_CREATE_ONESHOT (1UL << 3)
+-#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4)
+-#define RPC_CLNT_CREATE_NOPING (1UL << 5)
+-#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6)
++#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3)
++#define RPC_CLNT_CREATE_NOPING (1UL << 4)
++#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5)
+
+ struct rpc_clnt *rpc_create(struct rpc_create_args *args);
+ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
+ struct rpc_program *, int);
+ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
+-int rpc_shutdown_client(struct rpc_clnt *);
+-int rpc_destroy_client(struct rpc_clnt *);
++void rpc_shutdown_client(struct rpc_clnt *);
+ void rpc_release_client(struct rpc_clnt *);
++
+ int rpcb_register(u32, u32, int, unsigned short, int *);
+-void rpcb_getport(struct rpc_task *);
++int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
++void rpcb_getport_async(struct rpc_task *);
+
+ void rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
+
+@@ -132,20 +133,16 @@ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
+ void *calldata);
+ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
+ int flags);
++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
++ int flags);
+ void rpc_restart_call(struct rpc_task *);
+ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
+ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
+ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
+ size_t rpc_max_payload(struct rpc_clnt *);
+ void rpc_force_rebind(struct rpc_clnt *);
+-int rpc_ping(struct rpc_clnt *clnt, int flags);
+ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
+ char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+
+-/*
+- * Helper function for NFSroot support
+- */
+-int rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int);
+-
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SUNRPC_CLNT_H */
+diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
+index 5eca9e4..bbac101 100644
+--- a/include/linux/sunrpc/gss_api.h
++++ b/include/linux/sunrpc/gss_api.h
+@@ -77,7 +77,7 @@ struct gss_api_mech {
+ struct module *gm_owner;
+ struct xdr_netobj gm_oid;
+ char *gm_name;
+- struct gss_api_ops *gm_ops;
++ const struct gss_api_ops *gm_ops;
+ /* pseudoflavors supported by this mechanism: */
+ int gm_pf_num;
+ struct pf_desc * gm_pfs;
+diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
+index ad29376..51b977a 100644
+--- a/include/linux/sunrpc/rpc_pipe_fs.h
++++ b/include/linux/sunrpc/rpc_pipe_fs.h
+@@ -23,9 +23,11 @@ struct rpc_inode {
+ void *private;
+ struct list_head pipe;
+ struct list_head in_upcall;
++ struct list_head in_downcall;
+ int pipelen;
+ int nreaders;
+ int nwriters;
++ int nkern_readwriters;
+ wait_queue_head_t waitq;
+ #define RPC_PIPE_WAIT_FOR_OPEN 1
+ int flags;
+diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
+index 2047fb2..8ea077d 100644
+--- a/include/linux/sunrpc/sched.h
++++ b/include/linux/sunrpc/sched.h
+@@ -98,7 +98,6 @@ struct rpc_task {
+ unsigned short tk_pid; /* debugging aid */
+ #endif
+ };
+-#define tk_auth tk_client->cl_auth
+ #define tk_xprt tk_client->cl_xprt
+
+ /* support walking a list of tasks on a wait queue */
+@@ -110,11 +109,6 @@ struct rpc_task {
+ if (!list_empty(head) && \
+ ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
+
+-/* .. and walking list of all tasks */
+-#define alltask_for_each(task, pos, head) \
+- list_for_each(pos, head) \
+- if ((task=list_entry(pos, struct rpc_task, tk_task)),1)
+-
+ typedef void (*rpc_action)(struct rpc_task *);
+
+ struct rpc_call_ops {
+diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
+index e21dd93..a53e0fa 100644
+--- a/include/linux/sunrpc/svcsock.h
++++ b/include/linux/sunrpc/svcsock.h
+@@ -59,6 +59,7 @@ struct svc_sock {
+ /* cache of various info for TCP sockets */
+ void *sk_info_authunix;
+
++ struct sockaddr_storage sk_local; /* local address */
+ struct sockaddr_storage sk_remote; /* remote peer's address */
+ int sk_remotelen; /* length of address */
+ };
+diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
+index 34f7590..d11cedd 100644
+--- a/include/linux/sunrpc/xprt.h
++++ b/include/linux/sunrpc/xprt.h
+@@ -17,6 +17,8 @@
+ #include <linux/sunrpc/xdr.h>
+ #include <linux/sunrpc/msg_prot.h>
+
++#ifdef __KERNEL__
++
+ extern unsigned int xprt_udp_slot_table_entries;
+ extern unsigned int xprt_tcp_slot_table_entries;
+
+@@ -194,7 +196,13 @@ struct rpc_xprt {
+ char * address_strings[RPC_DISPLAY_MAX];
+ };
+
+-#ifdef __KERNEL__
++struct rpc_xprtsock_create {
++ int proto; /* IPPROTO_UDP or IPPROTO_TCP */
++ struct sockaddr * srcaddr; /* optional local address */
++ struct sockaddr * dstaddr; /* remote peer address */
++ size_t addrlen;
++ struct rpc_timeout * timeout; /* optional timeout parameters */
++};
+
+ /*
+ * Transport operations used by ULPs
+@@ -204,7 +212,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long
+ /*
+ * Generic internal transport functions
+ */
+-struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms);
++struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args);
+ void xprt_connect(struct rpc_task *task);
+ void xprt_reserve(struct rpc_task *task);
+ int xprt_reserve_xprt(struct rpc_task *task);
+@@ -242,8 +250,8 @@ void xprt_disconnect(struct rpc_xprt *xprt);
+ /*
+ * Socket transport setup operations
+ */
+-struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
+-struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
++struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args);
++struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args);
+ int init_socket_xprt(void);
+ void cleanup_socket_xprt(void);
+
+diff --git a/kernel/auditsc.c b/kernel/auditsc.c
+index e36481e..ced6541 100644
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -1500,6 +1500,7 @@ add_names:
+ context->names[idx].ino = (unsigned long)-1;
+ }
+ }
++EXPORT_SYMBOL(__audit_inode_child);
+
+ /**
+ * auditsc_get_stamp - get local copies of audit_context values
+diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
+index 9527f2b..74baf87 100644
+--- a/net/sunrpc/auth.c
++++ b/net/sunrpc/auth.c
+@@ -18,12 +18,16 @@
+ # define RPCDBG_FACILITY RPCDBG_AUTH
+ #endif
+
+-static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = {
++static DEFINE_SPINLOCK(rpc_authflavor_lock);
++static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+ &authnull_ops, /* AUTH_NULL */
+ &authunix_ops, /* AUTH_UNIX */
+ NULL, /* others can be loadable modules */
+ };
+
++static LIST_HEAD(cred_unused);
++static unsigned long number_cred_unused;
++
+ static u32
+ pseudoflavor_to_flavor(u32 flavor) {
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+@@ -32,55 +36,67 @@ pseudoflavor_to_flavor(u32 flavor) {
+ }
+
+ int
+-rpcauth_register(struct rpc_authops *ops)
++rpcauth_register(const struct rpc_authops *ops)
+ {
+ rpc_authflavor_t flavor;
++ int ret = -EPERM;
+
+ if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+ return -EINVAL;
+- if (auth_flavors[flavor] != NULL)
+- return -EPERM; /* what else? */
+- auth_flavors[flavor] = ops;
+- return 0;
++ spin_lock(&rpc_authflavor_lock);
++ if (auth_flavors[flavor] == NULL) {
++ auth_flavors[flavor] = ops;
++ ret = 0;
++ }
++ spin_unlock(&rpc_authflavor_lock);
++ return ret;
+ }
+
+ int
+-rpcauth_unregister(struct rpc_authops *ops)
++rpcauth_unregister(const struct rpc_authops *ops)
+ {
+ rpc_authflavor_t flavor;
++ int ret = -EPERM;
+
+ if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+ return -EINVAL;
+- if (auth_flavors[flavor] != ops)
+- return -EPERM; /* what else? */
+- auth_flavors[flavor] = NULL;
+- return 0;
++ spin_lock(&rpc_authflavor_lock);
++ if (auth_flavors[flavor] == ops) {
++ auth_flavors[flavor] = NULL;
++ ret = 0;
++ }
++ spin_unlock(&rpc_authflavor_lock);
++ return ret;
+ }
+
+ struct rpc_auth *
+ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
+ {
+ struct rpc_auth *auth;
+- struct rpc_authops *ops;
++ const struct rpc_authops *ops;
+ u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
+
+ auth = ERR_PTR(-EINVAL);
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ goto out;
+
+- /* FIXME - auth_flavors[] really needs an rw lock,
+- * and module refcounting. */
+ #ifdef CONFIG_KMOD
+ if ((ops = auth_flavors[flavor]) == NULL)
+ request_module("rpc-auth-%u", flavor);
+ #endif
+- if ((ops = auth_flavors[flavor]) == NULL)
++ spin_lock(&rpc_authflavor_lock);
++ ops = auth_flavors[flavor];
++ if (ops == NULL || !try_module_get(ops->owner)) {
++ spin_unlock(&rpc_authflavor_lock);
+ goto out;
++ }
++ spin_unlock(&rpc_authflavor_lock);
+ auth = ops->create(clnt, pseudoflavor);
++ module_put(ops->owner);
+ if (IS_ERR(auth))
+ return auth;
+ if (clnt->cl_auth)
+- rpcauth_destroy(clnt->cl_auth);
++ rpcauth_release(clnt->cl_auth);
+ clnt->cl_auth = auth;
+
+ out:
+@@ -88,7 +104,7 @@ out:
+ }
+
+ void
+-rpcauth_destroy(struct rpc_auth *auth)
++rpcauth_release(struct rpc_auth *auth)
+ {
+ if (!atomic_dec_and_test(&auth->au_count))
+ return;
+@@ -97,11 +113,31 @@ rpcauth_destroy(struct rpc_auth *auth)
+
+ static DEFINE_SPINLOCK(rpc_credcache_lock);
+
++static void
++rpcauth_unhash_cred_locked(struct rpc_cred *cred)
++{
++ hlist_del_rcu(&cred->cr_hash);
++ smp_mb__before_clear_bit();
++ clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
++}
++
++static void
++rpcauth_unhash_cred(struct rpc_cred *cred)
++{
++ spinlock_t *cache_lock;
++
++ cache_lock = &cred->cr_auth->au_credcache->lock;
++ spin_lock(cache_lock);
++ if (atomic_read(&cred->cr_count) == 0)
++ rpcauth_unhash_cred_locked(cred);
++ spin_unlock(cache_lock);
++}
++
+ /*
+ * Initialize RPC credential cache
+ */
+ int
+-rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
++rpcauth_init_credcache(struct rpc_auth *auth)
+ {
+ struct rpc_cred_cache *new;
+ int i;
+@@ -111,8 +147,7 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
+ return -ENOMEM;
+ for (i = 0; i < RPC_CREDCACHE_NR; i++)
+ INIT_HLIST_HEAD(&new->hashtable[i]);
+- new->expire = expire;
+- new->nextgc = jiffies + (expire >> 1);
++ spin_lock_init(&new->lock);
+ auth->au_credcache = new;
+ return 0;
+ }
+@@ -121,13 +156,13 @@ rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
+ * Destroy a list of credentials
+ */
+ static inline
+-void rpcauth_destroy_credlist(struct hlist_head *head)
++void rpcauth_destroy_credlist(struct list_head *head)
+ {
+ struct rpc_cred *cred;
+
+- while (!hlist_empty(head)) {
+- cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
+- hlist_del_init(&cred->cr_hash);
++ while (!list_empty(head)) {
++ cred = list_entry(head->next, struct rpc_cred, cr_lru);
++ list_del_init(&cred->cr_lru);
+ put_rpccred(cred);
+ }
+ }
+@@ -137,58 +172,95 @@ void rpcauth_destroy_credlist(struct hlist_head *head)
+ * that are not referenced.
+ */
+ void
+-rpcauth_free_credcache(struct rpc_auth *auth)
++rpcauth_clear_credcache(struct rpc_cred_cache *cache)
+ {
+- struct rpc_cred_cache *cache = auth->au_credcache;
+- HLIST_HEAD(free);
+- struct hlist_node *pos, *next;
++ LIST_HEAD(free);
++ struct hlist_head *head;
+ struct rpc_cred *cred;
+ int i;
+
+ spin_lock(&rpc_credcache_lock);
++ spin_lock(&cache->lock);
+ for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+- hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+- cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+- __hlist_del(&cred->cr_hash);
+- hlist_add_head(&cred->cr_hash, &free);
++ head = &cache->hashtable[i];
++ while (!hlist_empty(head)) {
++ cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
++ get_rpccred(cred);
++ if (!list_empty(&cred->cr_lru)) {
++ list_del(&cred->cr_lru);
++ number_cred_unused--;
++ }
++ list_add_tail(&cred->cr_lru, &free);
++ rpcauth_unhash_cred_locked(cred);
+ }
+ }
++ spin_unlock(&cache->lock);
+ spin_unlock(&rpc_credcache_lock);
+ rpcauth_destroy_credlist(&free);
+ }
+
+-static void
+-rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free)
++/*
++ * Destroy the RPC credential cache
++ */
++void
++rpcauth_destroy_credcache(struct rpc_auth *auth)
+ {
+- if (atomic_read(&cred->cr_count) != 1)
+- return;
+- if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire))
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+- if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) {
+- __hlist_del(&cred->cr_hash);
+- hlist_add_head(&cred->cr_hash, free);
++ struct rpc_cred_cache *cache = auth->au_credcache;
++
++ if (cache) {
++ auth->au_credcache = NULL;
++ rpcauth_clear_credcache(cache);
++ kfree(cache);
+ }
+ }
+
+ /*
+ * Remove stale credentials. Avoid sleeping inside the loop.
+ */
+-static void
+-rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free)
++static int
++rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
+ {
+- struct rpc_cred_cache *cache = auth->au_credcache;
+- struct hlist_node *pos, *next;
+- struct rpc_cred *cred;
+- int i;
++ spinlock_t *cache_lock;
++ struct rpc_cred *cred;
+
+- dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth);
+- for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+- hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+- cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+- rpcauth_prune_expired(auth, cred, free);
++ while (!list_empty(&cred_unused)) {
++ cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru);
++ list_del_init(&cred->cr_lru);
++ number_cred_unused--;
++ if (atomic_read(&cred->cr_count) != 0)
++ continue;
++ cache_lock = &cred->cr_auth->au_credcache->lock;
++ spin_lock(cache_lock);
++ if (atomic_read(&cred->cr_count) == 0) {
++ get_rpccred(cred);
++ list_add_tail(&cred->cr_lru, free);
++ rpcauth_unhash_cred_locked(cred);
++ nr_to_scan--;
+ }
++ spin_unlock(cache_lock);
++ if (nr_to_scan == 0)
++ break;
+ }
+- cache->nextgc = jiffies + cache->expire;
++ return nr_to_scan;
++}
++
++/*
++ * Run memory cache shrinker.
++ */
++static int
++rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
++{
++ LIST_HEAD(free);
++ int res;
++
++ if (list_empty(&cred_unused))
++ return 0;
++ spin_lock(&rpc_credcache_lock);
++ nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
++ res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
++ spin_unlock(&rpc_credcache_lock);
++ rpcauth_destroy_credlist(&free);
++ return res;
+ }
+
+ /*
+@@ -198,53 +270,56 @@ struct rpc_cred *
+ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
+ int flags)
+ {
++ LIST_HEAD(free);
+ struct rpc_cred_cache *cache = auth->au_credcache;
+- HLIST_HEAD(free);
+- struct hlist_node *pos, *next;
+- struct rpc_cred *new = NULL,
+- *cred = NULL;
++ struct hlist_node *pos;
++ struct rpc_cred *cred = NULL,
++ *entry, *new;
+ int nr = 0;
+
+ if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS))
+ nr = acred->uid & RPC_CREDCACHE_MASK;
+-retry:
+- spin_lock(&rpc_credcache_lock);
+- if (time_before(cache->nextgc, jiffies))
+- rpcauth_gc_credcache(auth, &free);
+- hlist_for_each_safe(pos, next, &cache->hashtable[nr]) {
+- struct rpc_cred *entry;
+- entry = hlist_entry(pos, struct rpc_cred, cr_hash);
+- if (entry->cr_ops->crmatch(acred, entry, flags)) {
+- hlist_del(&entry->cr_hash);
+- cred = entry;
+- break;
++
++ rcu_read_lock();
++ hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
++ if (!entry->cr_ops->crmatch(acred, entry, flags))
++ continue;
++ spin_lock(&cache->lock);
++ if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
++ spin_unlock(&cache->lock);
++ continue;
+ }
+- rpcauth_prune_expired(auth, entry, &free);
+- }
+- if (new) {
+- if (cred)
+- hlist_add_head(&new->cr_hash, &free);
+- else
+- cred = new;
++ cred = get_rpccred(entry);
++ spin_unlock(&cache->lock);
++ break;
+ }
+- if (cred) {
+- hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]);
+- get_rpccred(cred);
+- }
+- spin_unlock(&rpc_credcache_lock);
++ rcu_read_unlock();
+
+- rpcauth_destroy_credlist(&free);
++ if (cred != NULL)
++ goto found;
+
+- if (!cred) {
+- new = auth->au_ops->crcreate(auth, acred, flags);
+- if (!IS_ERR(new)) {
+-#ifdef RPC_DEBUG
+- new->cr_magic = RPCAUTH_CRED_MAGIC;
+-#endif
+- goto retry;
+- } else
+- cred = new;
+- } else if ((cred->cr_flags & RPCAUTH_CRED_NEW)
++ new = auth->au_ops->crcreate(auth, acred, flags);
++ if (IS_ERR(new)) {
++ cred = new;
++ goto out;
++ }
++
++ spin_lock(&cache->lock);
++ hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) {
++ if (!entry->cr_ops->crmatch(acred, entry, flags))
++ continue;
++ cred = get_rpccred(entry);
++ break;
++ }
++ if (cred == NULL) {
++ cred = new;
++ set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
++ hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
++ } else
++ list_add_tail(&new->cr_lru, &free);
++ spin_unlock(&cache->lock);
++found:
++ if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)
+ && cred->cr_ops->cr_init != NULL
+ && !(flags & RPCAUTH_LOOKUP_NEW)) {
+ int res = cred->cr_ops->cr_init(auth, cred);
+@@ -253,8 +328,9 @@ retry:
+ cred = ERR_PTR(res);
+ }
+ }
+-
+- return (struct rpc_cred *) cred;
++ rpcauth_destroy_credlist(&free);
++out:
++ return cred;
+ }
+
+ struct rpc_cred *
+@@ -275,10 +351,27 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
+ return ret;
+ }
+
++void
++rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
++ struct rpc_auth *auth, const struct rpc_credops *ops)
++{
++ INIT_HLIST_NODE(&cred->cr_hash);
++ INIT_LIST_HEAD(&cred->cr_lru);
++ atomic_set(&cred->cr_count, 1);
++ cred->cr_auth = auth;
++ cred->cr_ops = ops;
++ cred->cr_expire = jiffies;
++#ifdef RPC_DEBUG
++ cred->cr_magic = RPCAUTH_CRED_MAGIC;
++#endif
++ cred->cr_uid = acred->uid;
++}
++EXPORT_SYMBOL(rpcauth_init_cred);
++
+ struct rpc_cred *
+ rpcauth_bindcred(struct rpc_task *task)
+ {
+- struct rpc_auth *auth = task->tk_auth;
++ struct rpc_auth *auth = task->tk_client->cl_auth;
+ struct auth_cred acred = {
+ .uid = current->fsuid,
+ .gid = current->fsgid,
+@@ -288,7 +381,7 @@ rpcauth_bindcred(struct rpc_task *task)
+ int flags = 0;
+
+ dprintk("RPC: %5u looking up %s cred\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name);
++ task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
+ get_group_info(acred.group_info);
+ if (task->tk_flags & RPC_TASK_ROOTCREDS)
+ flags |= RPCAUTH_LOOKUP_ROOTCREDS;
+@@ -304,19 +397,42 @@ rpcauth_bindcred(struct rpc_task *task)
+ void
+ rpcauth_holdcred(struct rpc_task *task)
+ {
+- dprintk("RPC: %5u holding %s cred %p\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name,
+- task->tk_msg.rpc_cred);
+- if (task->tk_msg.rpc_cred)
+- get_rpccred(task->tk_msg.rpc_cred);
++ struct rpc_cred *cred = task->tk_msg.rpc_cred;
++ if (cred != NULL) {
++ get_rpccred(cred);
++ dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
++ cred->cr_auth->au_ops->au_name, cred);
++ }
+ }
+
+ void
+ put_rpccred(struct rpc_cred *cred)
+ {
+- cred->cr_expire = jiffies;
++ /* Fast path for unhashed credentials */
++ if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
++ goto need_lock;
++
+ if (!atomic_dec_and_test(&cred->cr_count))
+ return;
++ goto out_destroy;
++need_lock:
++ if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
++ return;
++ if (!list_empty(&cred->cr_lru)) {
++ number_cred_unused--;
++ list_del_init(&cred->cr_lru);
++ }
++ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
++ rpcauth_unhash_cred(cred);
++ else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
++ cred->cr_expire = jiffies;
++ list_add_tail(&cred->cr_lru, &cred_unused);
++ number_cred_unused++;
++ spin_unlock(&rpc_credcache_lock);
++ return;
++ }
++ spin_unlock(&rpc_credcache_lock);
++out_destroy:
+ cred->cr_ops->crdestroy(cred);
+ }
+
+@@ -326,7 +442,7 @@ rpcauth_unbindcred(struct rpc_task *task)
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+
+ dprintk("RPC: %5u releasing %s cred %p\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
++ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+
+ put_rpccred(cred);
+ task->tk_msg.rpc_cred = NULL;
+@@ -338,7 +454,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p)
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+
+ dprintk("RPC: %5u marshaling %s cred %p\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
++ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+
+ return cred->cr_ops->crmarshal(task, p);
+ }
+@@ -349,7 +465,7 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+
+ dprintk("RPC: %5u validating %s cred %p\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
++ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+
+ return cred->cr_ops->crvalidate(task, p);
+ }
+@@ -390,7 +506,7 @@ rpcauth_refreshcred(struct rpc_task *task)
+ int err;
+
+ dprintk("RPC: %5u refreshing %s cred %p\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name, cred);
++ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
+
+ err = cred->cr_ops->crrefresh(task);
+ if (err < 0)
+@@ -401,17 +517,34 @@ rpcauth_refreshcred(struct rpc_task *task)
+ void
+ rpcauth_invalcred(struct rpc_task *task)
+ {
++ struct rpc_cred *cred = task->tk_msg.rpc_cred;
++
+ dprintk("RPC: %5u invalidating %s cred %p\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
+- spin_lock(&rpc_credcache_lock);
+- if (task->tk_msg.rpc_cred)
+- task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+- spin_unlock(&rpc_credcache_lock);
++ task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
++ if (cred)
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ }
+
+ int
+ rpcauth_uptodatecred(struct rpc_task *task)
+ {
+- return !(task->tk_msg.rpc_cred) ||
+- (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
++ struct rpc_cred *cred = task->tk_msg.rpc_cred;
++
++ return cred == NULL ||
++ test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
++}
++
++
++static struct shrinker *rpc_cred_shrinker;
++
++void __init rpcauth_init_module(void)
++{
++ rpc_init_authunix();
++ rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker);
++}
++
++void __exit rpcauth_remove_module(void)
++{
++ if (rpc_cred_shrinker != NULL)
++ remove_shrinker(rpc_cred_shrinker);
+ }
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index 4e4ccc5..17d460f 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -54,9 +54,10 @@
+ #include <linux/sunrpc/gss_api.h>
+ #include <asm/uaccess.h>
+
+-static struct rpc_authops authgss_ops;
++static const struct rpc_authops authgss_ops;
+
+-static struct rpc_credops gss_credops;
++static const struct rpc_credops gss_credops;
++static const struct rpc_credops gss_nullops;
+
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY RPCDBG_AUTH
+@@ -64,7 +65,6 @@ static struct rpc_credops gss_credops;
+
+ #define NFS_NGROUPS 16
+
+-#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */
+ #define GSS_CRED_SLACK 1024 /* XXX: unused */
+ /* length of a krb5 verifier (48), plus data added before arguments when
+ * using integrity (two 4-byte integers): */
+@@ -79,19 +79,16 @@ static struct rpc_credops gss_credops;
+ /* dump the buffer in `emacs-hexl' style */
+ #define isprint(c) ((c > 0x1f) && (c < 0x7f))
+
+-static DEFINE_RWLOCK(gss_ctx_lock);
+-
+ struct gss_auth {
++ struct kref kref;
+ struct rpc_auth rpc_auth;
+ struct gss_api_mech *mech;
+ enum rpc_gss_svc service;
+- struct list_head upcalls;
+ struct rpc_clnt *client;
+ struct dentry *dentry;
+- spinlock_t lock;
+ };
+
+-static void gss_destroy_ctx(struct gss_cl_ctx *);
++static void gss_free_ctx(struct gss_cl_ctx *);
+ static struct rpc_pipe_ops gss_upcall_ops;
+
+ static inline struct gss_cl_ctx *
+@@ -105,20 +102,24 @@ static inline void
+ gss_put_ctx(struct gss_cl_ctx *ctx)
+ {
+ if (atomic_dec_and_test(&ctx->count))
+- gss_destroy_ctx(ctx);
++ gss_free_ctx(ctx);
+ }
+
++/* gss_cred_set_ctx:
++ * called by gss_upcall_callback and gss_create_upcall in order
++ * to set the gss context. The actual exchange of an old context
++ * and a new one is protected by the inode->i_lock.
++ */
+ static void
+ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
+ {
+ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+ struct gss_cl_ctx *old;
+- write_lock(&gss_ctx_lock);
++
+ old = gss_cred->gc_ctx;
+- gss_cred->gc_ctx = ctx;
+- cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+- cred->cr_flags &= ~RPCAUTH_CRED_NEW;
+- write_unlock(&gss_ctx_lock);
++ rcu_assign_pointer(gss_cred->gc_ctx, ctx);
++ set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
++ clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
+ if (old)
+ gss_put_ctx(old);
+ }
+@@ -129,10 +130,10 @@ gss_cred_is_uptodate_ctx(struct rpc_cred *cred)
+ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+ int res = 0;
+
+- read_lock(&gss_ctx_lock);
+- if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx)
++ rcu_read_lock();
++ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx)
+ res = 1;
+- read_unlock(&gss_ctx_lock);
++ rcu_read_unlock();
+ return res;
+ }
+
+@@ -171,10 +172,10 @@ gss_cred_get_ctx(struct rpc_cred *cred)
+ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+ struct gss_cl_ctx *ctx = NULL;
+
+- read_lock(&gss_ctx_lock);
++ rcu_read_lock();
+ if (gss_cred->gc_ctx)
+ ctx = gss_get_ctx(gss_cred->gc_ctx);
+- read_unlock(&gss_ctx_lock);
++ rcu_read_unlock();
+ return ctx;
+ }
+
+@@ -269,10 +270,10 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
+ }
+
+ static struct gss_upcall_msg *
+-__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
++__gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
+ {
+ struct gss_upcall_msg *pos;
+- list_for_each_entry(pos, &gss_auth->upcalls, list) {
++ list_for_each_entry(pos, &rpci->in_downcall, list) {
+ if (pos->uid != uid)
+ continue;
+ atomic_inc(&pos->count);
+@@ -290,24 +291,24 @@ __gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
+ static inline struct gss_upcall_msg *
+ gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
+ {
++ struct inode *inode = gss_auth->dentry->d_inode;
++ struct rpc_inode *rpci = RPC_I(inode);
+ struct gss_upcall_msg *old;
+
+- spin_lock(&gss_auth->lock);
+- old = __gss_find_upcall(gss_auth, gss_msg->uid);
++ spin_lock(&inode->i_lock);
++ old = __gss_find_upcall(rpci, gss_msg->uid);
+ if (old == NULL) {
+ atomic_inc(&gss_msg->count);
+- list_add(&gss_msg->list, &gss_auth->upcalls);
++ list_add(&gss_msg->list, &rpci->in_downcall);
+ } else
+ gss_msg = old;
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ return gss_msg;
+ }
+
+ static void
+ __gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+ {
+- if (list_empty(&gss_msg->list))
+- return;
+ list_del_init(&gss_msg->list);
+ rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+ wake_up_all(&gss_msg->waitqueue);
+@@ -318,10 +319,14 @@ static void
+ gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+ {
+ struct gss_auth *gss_auth = gss_msg->auth;
++ struct inode *inode = gss_auth->dentry->d_inode;
+
+- spin_lock(&gss_auth->lock);
+- __gss_unhash_msg(gss_msg);
+- spin_unlock(&gss_auth->lock);
++ if (list_empty(&gss_msg->list))
++ return;
++ spin_lock(&inode->i_lock);
++ if (!list_empty(&gss_msg->list))
++ __gss_unhash_msg(gss_msg);
++ spin_unlock(&inode->i_lock);
+ }
+
+ static void
+@@ -330,16 +335,16 @@ gss_upcall_callback(struct rpc_task *task)
+ struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+ struct gss_cred, gc_base);
+ struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
++ struct inode *inode = gss_msg->auth->dentry->d_inode;
+
+- BUG_ON(gss_msg == NULL);
++ spin_lock(&inode->i_lock);
+ if (gss_msg->ctx)
+ gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx));
+ else
+ task->tk_status = gss_msg->msg.errno;
+- spin_lock(&gss_msg->auth->lock);
+ gss_cred->gc_upcall = NULL;
+ rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+- spin_unlock(&gss_msg->auth->lock);
++ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+ }
+
+@@ -386,11 +391,12 @@ static inline int
+ gss_refresh_upcall(struct rpc_task *task)
+ {
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+- struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth,
++ struct gss_auth *gss_auth = container_of(cred->cr_auth,
+ struct gss_auth, rpc_auth);
+ struct gss_cred *gss_cred = container_of(cred,
+ struct gss_cred, gc_base);
+ struct gss_upcall_msg *gss_msg;
++ struct inode *inode = gss_auth->dentry->d_inode;
+ int err = 0;
+
+ dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
+@@ -400,7 +406,7 @@ gss_refresh_upcall(struct rpc_task *task)
+ err = PTR_ERR(gss_msg);
+ goto out;
+ }
+- spin_lock(&gss_auth->lock);
++ spin_lock(&inode->i_lock);
+ if (gss_cred->gc_upcall != NULL)
+ rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL);
+ else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+@@ -411,7 +417,7 @@ gss_refresh_upcall(struct rpc_task *task)
+ rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL);
+ } else
+ err = gss_msg->msg.errno;
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+ out:
+ dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n",
+@@ -422,6 +428,7 @@ out:
+ static inline int
+ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+ {
++ struct inode *inode = gss_auth->dentry->d_inode;
+ struct rpc_cred *cred = &gss_cred->gc_base;
+ struct gss_upcall_msg *gss_msg;
+ DEFINE_WAIT(wait);
+@@ -435,12 +442,11 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+ }
+ for (;;) {
+ prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
+- spin_lock(&gss_auth->lock);
++ spin_lock(&inode->i_lock);
+ if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
+- spin_unlock(&gss_auth->lock);
+ break;
+ }
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ if (signalled()) {
+ err = -ERESTARTSYS;
+ goto out_intr;
+@@ -451,6 +457,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+ gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx));
+ else
+ err = gss_msg->msg.errno;
++ spin_unlock(&inode->i_lock);
+ out_intr:
+ finish_wait(&gss_msg->waitqueue, &wait);
+ gss_release_msg(gss_msg);
+@@ -489,12 +496,11 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ const void *p, *end;
+ void *buf;
+ struct rpc_clnt *clnt;
+- struct gss_auth *gss_auth;
+- struct rpc_cred *cred;
+ struct gss_upcall_msg *gss_msg;
++ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct gss_cl_ctx *ctx;
+ uid_t uid;
+- int err = -EFBIG;
++ ssize_t err = -EFBIG;
+
+ if (mlen > MSG_BUF_MAXSIZE)
+ goto out;
+@@ -503,7 +509,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ if (!buf)
+ goto out;
+
+- clnt = RPC_I(filp->f_path.dentry->d_inode)->private;
++ clnt = RPC_I(inode)->private;
+ err = -EFAULT;
+ if (copy_from_user(buf, src, mlen))
+ goto err;
+@@ -519,43 +525,38 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+ ctx = gss_alloc_context();
+ if (ctx == NULL)
+ goto err;
+- err = 0;
+- gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth);
+- p = gss_fill_context(p, end, ctx, gss_auth->mech);
++
++ err = -ENOENT;
++ /* Find a matching upcall */
++ spin_lock(&inode->i_lock);
++ gss_msg = __gss_find_upcall(RPC_I(inode), uid);
++ if (gss_msg == NULL) {
++ spin_unlock(&inode->i_lock);
++ goto err_put_ctx;
++ }
++ list_del_init(&gss_msg->list);
++ spin_unlock(&inode->i_lock);
++
++ p = gss_fill_context(p, end, ctx, gss_msg->auth->mech);
+ if (IS_ERR(p)) {
+ err = PTR_ERR(p);
+- if (err != -EACCES)
+- goto err_put_ctx;
+- }
+- spin_lock(&gss_auth->lock);
+- gss_msg = __gss_find_upcall(gss_auth, uid);
+- if (gss_msg) {
+- if (err == 0 && gss_msg->ctx == NULL)
+- gss_msg->ctx = gss_get_ctx(ctx);
+- gss_msg->msg.errno = err;
+- __gss_unhash_msg(gss_msg);
+- spin_unlock(&gss_auth->lock);
+- gss_release_msg(gss_msg);
+- } else {
+- struct auth_cred acred = { .uid = uid };
+- spin_unlock(&gss_auth->lock);
+- cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW);
+- if (IS_ERR(cred)) {
+- err = PTR_ERR(cred);
+- goto err_put_ctx;
+- }
+- gss_cred_set_ctx(cred, gss_get_ctx(ctx));
++ gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN;
++ goto err_release_msg;
+ }
+- gss_put_ctx(ctx);
+- kfree(buf);
+- dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen);
+- return mlen;
++ gss_msg->ctx = gss_get_ctx(ctx);
++ err = mlen;
++
++err_release_msg:
++ spin_lock(&inode->i_lock);
++ __gss_unhash_msg(gss_msg);
++ spin_unlock(&inode->i_lock);
++ gss_release_msg(gss_msg);
+ err_put_ctx:
+ gss_put_ctx(ctx);
+ err:
+ kfree(buf);
+ out:
+- dprintk("RPC: gss_pipe_downcall returning %d\n", err);
++ dprintk("RPC: gss_pipe_downcall returning %Zd\n", err);
+ return err;
+ }
+
+@@ -563,27 +564,21 @@ static void
+ gss_pipe_release(struct inode *inode)
+ {
+ struct rpc_inode *rpci = RPC_I(inode);
+- struct rpc_clnt *clnt;
+- struct rpc_auth *auth;
+- struct gss_auth *gss_auth;
++ struct gss_upcall_msg *gss_msg;
+
+- clnt = rpci->private;
+- auth = clnt->cl_auth;
+- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+- spin_lock(&gss_auth->lock);
+- while (!list_empty(&gss_auth->upcalls)) {
+- struct gss_upcall_msg *gss_msg;
++ spin_lock(&inode->i_lock);
++ while (!list_empty(&rpci->in_downcall)) {
+
+- gss_msg = list_entry(gss_auth->upcalls.next,
++ gss_msg = list_entry(rpci->in_downcall.next,
+ struct gss_upcall_msg, list);
+ gss_msg->msg.errno = -EPIPE;
+ atomic_inc(&gss_msg->count);
+ __gss_unhash_msg(gss_msg);
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+- spin_lock(&gss_auth->lock);
++ spin_lock(&inode->i_lock);
+ }
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ }
+
+ static void
+@@ -637,18 +632,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+ gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
+ if (gss_auth->service == 0)
+ goto err_put_mech;
+- INIT_LIST_HEAD(&gss_auth->upcalls);
+- spin_lock_init(&gss_auth->lock);
+ auth = &gss_auth->rpc_auth;
+ auth->au_cslack = GSS_CRED_SLACK >> 2;
+ auth->au_rslack = GSS_VERF_SLACK >> 2;
+ auth->au_ops = &authgss_ops;
+ auth->au_flavor = flavor;
+ atomic_set(&auth->au_count, 1);
+-
+- err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE);
+- if (err)
+- goto err_put_mech;
++ kref_init(&gss_auth->kref);
+
+ gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
+ clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+@@ -657,7 +647,13 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+ goto err_put_mech;
+ }
+
++ err = rpcauth_init_credcache(auth);
++ if (err)
++ goto err_unlink_pipe;
++
+ return auth;
++err_unlink_pipe:
++ rpc_unlink(gss_auth->dentry);
+ err_put_mech:
+ gss_mech_put(gss_auth->mech);
+ err_free:
+@@ -668,6 +664,25 @@ out_dec:
+ }
+
+ static void
++gss_free(struct gss_auth *gss_auth)
++{
++ rpc_unlink(gss_auth->dentry);
++ gss_auth->dentry = NULL;
++ gss_mech_put(gss_auth->mech);
++
++ kfree(gss_auth);
++ module_put(THIS_MODULE);
++}
++
++static void
++gss_free_callback(struct kref *kref)
++{
++ struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref);
++
++ gss_free(gss_auth);
++}
++
++static void
+ gss_destroy(struct rpc_auth *auth)
+ {
+ struct gss_auth *gss_auth;
+@@ -675,23 +690,51 @@ gss_destroy(struct rpc_auth *auth)
+ dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
+ auth, auth->au_flavor);
+
++ rpcauth_destroy_credcache(auth);
++
+ gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+- rpc_unlink(gss_auth->dentry);
+- gss_auth->dentry = NULL;
+- gss_mech_put(gss_auth->mech);
++ kref_put(&gss_auth->kref, gss_free_callback);
++}
+
+- rpcauth_free_credcache(auth);
+- kfree(gss_auth);
+- module_put(THIS_MODULE);
++/*
++ * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
++ * to the server with the GSS control procedure field set to
++ * RPC_GSS_PROC_DESTROY. This should normally cause the server to release
++ * all RPCSEC_GSS state associated with that context.
++ */
++static int
++gss_destroying_context(struct rpc_cred *cred)
++{
++ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
++ struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
++ struct rpc_task *task;
++
++ if (gss_cred->gc_ctx == NULL ||
++ gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY)
++ return 0;
++
++ gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY;
++ cred->cr_ops = &gss_nullops;
++
++ /* Take a reference to ensure the cred will be destroyed either
++ * by the RPC call or by the put_rpccred() below */
++ get_rpccred(cred);
++
++ task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC);
++ if (!IS_ERR(task))
++ rpc_put_task(task);
++
++ put_rpccred(cred);
++ return 1;
+ }
+
+-/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure
++/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
+ * to create a new cred or context, so they check that things have been
+ * allocated before freeing them. */
+ static void
+-gss_destroy_ctx(struct gss_cl_ctx *ctx)
++gss_do_free_ctx(struct gss_cl_ctx *ctx)
+ {
+- dprintk("RPC: gss_destroy_ctx\n");
++ dprintk("RPC: gss_free_ctx\n");
+
+ if (ctx->gc_gss_ctx)
+ gss_delete_sec_context(&ctx->gc_gss_ctx);
+@@ -701,15 +744,46 @@ gss_destroy_ctx(struct gss_cl_ctx *ctx)
+ }
+
+ static void
+-gss_destroy_cred(struct rpc_cred *rc)
++gss_free_ctx_callback(struct rcu_head *head)
+ {
+- struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base);
++ struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu);
++ gss_do_free_ctx(ctx);
++}
+
+- dprintk("RPC: gss_destroy_cred \n");
++static void
++gss_free_ctx(struct gss_cl_ctx *ctx)
++{
++ call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
++}
+
+- if (cred->gc_ctx)
+- gss_put_ctx(cred->gc_ctx);
+- kfree(cred);
++static void
++gss_free_cred(struct gss_cred *gss_cred)
++{
++ dprintk("RPC: gss_free_cred %p\n", gss_cred);
++ kfree(gss_cred);
++}
++
++static void
++gss_free_cred_callback(struct rcu_head *head)
++{
++ struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu);
++ gss_free_cred(gss_cred);
++}
++
++static void
++gss_destroy_cred(struct rpc_cred *cred)
++{
++ struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
++ struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
++ struct gss_cl_ctx *ctx = gss_cred->gc_ctx;
++
++ if (gss_destroying_context(cred))
++ return;
++ rcu_assign_pointer(gss_cred->gc_ctx, NULL);
++ call_rcu(&cred->cr_rcu, gss_free_cred_callback);
++ if (ctx)
++ gss_put_ctx(ctx);
++ kref_put(&gss_auth->kref, gss_free_callback);
+ }
+
+ /*
+@@ -734,16 +808,14 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+ if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
+ goto out_err;
+
+- atomic_set(&cred->gc_count, 1);
+- cred->gc_uid = acred->uid;
++ rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
+ /*
+ * Note: in order to force a call to call_refresh(), we deliberately
+ * fail to flag the credential as RPCAUTH_CRED_UPTODATE.
+ */
+- cred->gc_flags = 0;
+- cred->gc_base.cr_ops = &gss_credops;
+- cred->gc_base.cr_flags = RPCAUTH_CRED_NEW;
++ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
+ cred->gc_service = gss_auth->service;
++ kref_get(&gss_auth->kref);
+ return &cred->gc_base;
+
+ out_err:
+@@ -774,7 +846,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
+ * we don't really care if the credential has expired or not,
+ * since the caller should be prepared to reinitialise it.
+ */
+- if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW))
++ if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
+ goto out;
+ /* Don't match with creds that have expired. */
+ if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
+@@ -830,7 +902,7 @@ gss_marshal(struct rpc_task *task, __be32 *p)
+ mic.data = (u8 *)(p + 1);
+ maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ } else if (maj_stat != 0) {
+ printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
+ goto out_put_ctx;
+@@ -855,6 +927,13 @@ gss_refresh(struct rpc_task *task)
+ return 0;
+ }
+
++/* Dummy refresh routine: used only when destroying the context */
++static int
++gss_refresh_null(struct rpc_task *task)
++{
++ return -EACCES;
++}
++
+ static __be32 *
+ gss_validate(struct rpc_task *task, __be32 *p)
+ {
+@@ -883,12 +962,15 @@ gss_validate(struct rpc_task *task, __be32 *p)
+
+ maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+- if (maj_stat)
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
++ if (maj_stat) {
++ dprintk("RPC: %5u gss_validate: gss_verify_mic returned"
++ "error 0x%08x\n", task->tk_pid, maj_stat);
+ goto out_bad;
++ }
+ /* We leave it to unwrap to calculate au_rslack. For now we just
+ * calculate the length of the verifier: */
+- task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2;
++ cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+ gss_put_ctx(ctx);
+ dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n",
+ task->tk_pid);
+@@ -937,7 +1019,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+ maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
+ status = -EIO; /* XXX? */
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ else if (maj_stat)
+ return status;
+ q = xdr_encode_opaque(p, NULL, mic.len);
+@@ -1036,7 +1118,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+ /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
+ * done anyway, so it's safe to put the request on the wire: */
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ else if (maj_stat)
+ return status;
+
+@@ -1123,7 +1205,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+
+ maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ if (maj_stat != GSS_S_COMPLETE)
+ return status;
+ return 0;
+@@ -1148,7 +1230,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+
+ maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ if (maj_stat != GSS_S_COMPLETE)
+ return status;
+ if (ntohl(*(*p)++) != rqstp->rq_seqno)
+@@ -1188,7 +1270,7 @@ gss_unwrap_resp(struct rpc_task *task,
+ break;
+ }
+ /* take into account extra slack for integrity and privacy cases: */
+- task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp)
++ cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
+ + (savedlen - head->iov_len);
+ out_decode:
+ status = decode(rqstp, p, obj);
+@@ -1199,7 +1281,7 @@ out:
+ return status;
+ }
+
+-static struct rpc_authops authgss_ops = {
++static const struct rpc_authops authgss_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_GSS,
+ #ifdef RPC_DEBUG
+@@ -1211,7 +1293,7 @@ static struct rpc_authops authgss_ops = {
+ .crcreate = gss_create_cred
+ };
+
+-static struct rpc_credops gss_credops = {
++static const struct rpc_credops gss_credops = {
+ .cr_name = "AUTH_GSS",
+ .crdestroy = gss_destroy_cred,
+ .cr_init = gss_cred_init,
+@@ -1223,6 +1305,17 @@ static struct rpc_credops gss_credops = {
+ .crunwrap_resp = gss_unwrap_resp,
+ };
+
++static const struct rpc_credops gss_nullops = {
++ .cr_name = "AUTH_GSS",
++ .crdestroy = gss_destroy_cred,
++ .crmatch = gss_match,
++ .crmarshal = gss_marshal,
++ .crrefresh = gss_refresh_null,
++ .crvalidate = gss_validate,
++ .crwrap_req = gss_wrap_req,
++ .crunwrap_resp = gss_unwrap_resp,
++};
++
+ static struct rpc_pipe_ops gss_upcall_ops = {
+ .upcall = gss_pipe_upcall,
+ .downcall = gss_pipe_downcall,
+diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
+index 7b19432..71b9dae 100644
+--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
++++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
+@@ -201,7 +201,7 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
+ kfree(kctx);
+ }
+
+-static struct gss_api_ops gss_kerberos_ops = {
++static const struct gss_api_ops gss_kerberos_ops = {
+ .gss_import_sec_context = gss_import_sec_context_kerberos,
+ .gss_get_mic = gss_get_mic_kerberos,
+ .gss_verify_mic = gss_verify_mic_kerberos,
+diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
+index 7e15aa6..577d590 100644
+--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
++++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
+@@ -202,7 +202,7 @@ gss_get_mic_spkm3(struct gss_ctx *ctx,
+ return err;
+ }
+
+-static struct gss_api_ops gss_spkm3_ops = {
++static const struct gss_api_ops gss_spkm3_ops = {
+ .gss_import_sec_context = gss_import_sec_context_spkm3,
+ .gss_get_mic = gss_get_mic_spkm3,
+ .gss_verify_mic = gss_verify_mic_spkm3,
+diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
+index 3df9fcc..537d0e8 100644
+--- a/net/sunrpc/auth_null.c
++++ b/net/sunrpc/auth_null.c
+@@ -76,7 +76,7 @@ nul_marshal(struct rpc_task *task, __be32 *p)
+ static int
+ nul_refresh(struct rpc_task *task)
+ {
+- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ return 0;
+ }
+
+@@ -101,7 +101,7 @@ nul_validate(struct rpc_task *task, __be32 *p)
+ return p;
+ }
+
+-struct rpc_authops authnull_ops = {
++const struct rpc_authops authnull_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_NULL,
+ #ifdef RPC_DEBUG
+@@ -122,7 +122,7 @@ struct rpc_auth null_auth = {
+ };
+
+ static
+-struct rpc_credops null_credops = {
++const struct rpc_credops null_credops = {
+ .cr_name = "AUTH_NULL",
+ .crdestroy = nul_destroy_cred,
+ .crmatch = nul_match,
+@@ -133,9 +133,11 @@ struct rpc_credops null_credops = {
+
+ static
+ struct rpc_cred null_cred = {
++ .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru),
++ .cr_auth = &null_auth,
+ .cr_ops = &null_credops,
+ .cr_count = ATOMIC_INIT(1),
+- .cr_flags = RPCAUTH_CRED_UPTODATE,
++ .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
+ #ifdef RPC_DEBUG
+ .cr_magic = RPCAUTH_CRED_MAGIC,
+ #endif
+diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
+index 4e7733a..5ed91e5 100644
+--- a/net/sunrpc/auth_unix.c
++++ b/net/sunrpc/auth_unix.c
+@@ -20,11 +20,6 @@ struct unx_cred {
+ gid_t uc_gids[NFS_NGROUPS];
+ };
+ #define uc_uid uc_base.cr_uid
+-#define uc_count uc_base.cr_count
+-#define uc_flags uc_base.cr_flags
+-#define uc_expire uc_base.cr_expire
+-
+-#define UNX_CRED_EXPIRE (60 * HZ)
+
+ #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2))
+
+@@ -34,15 +29,14 @@ struct unx_cred {
+
+ static struct rpc_auth unix_auth;
+ static struct rpc_cred_cache unix_cred_cache;
+-static struct rpc_credops unix_credops;
++static const struct rpc_credops unix_credops;
+
+ static struct rpc_auth *
+ unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+ {
+ dprintk("RPC: creating UNIX authenticator for client %p\n",
+ clnt);
+- if (atomic_inc_return(&unix_auth.au_count) == 0)
+- unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1);
++ atomic_inc(&unix_auth.au_count);
+ return &unix_auth;
+ }
+
+@@ -50,7 +44,7 @@ static void
+ unx_destroy(struct rpc_auth *auth)
+ {
+ dprintk("RPC: destroying UNIX authenticator %p\n", auth);
+- rpcauth_free_credcache(auth);
++ rpcauth_clear_credcache(auth->au_credcache);
+ }
+
+ /*
+@@ -74,8 +68,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+ if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+- atomic_set(&cred->uc_count, 1);
+- cred->uc_flags = RPCAUTH_CRED_UPTODATE;
++ rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
++ cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+ if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
+ cred->uc_uid = 0;
+ cred->uc_gid = 0;
+@@ -85,22 +79,34 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+ if (groups > NFS_NGROUPS)
+ groups = NFS_NGROUPS;
+
+- cred->uc_uid = acred->uid;
+ cred->uc_gid = acred->gid;
+ for (i = 0; i < groups; i++)
+ cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+ if (i < NFS_NGROUPS)
+ cred->uc_gids[i] = NOGROUP;
+ }
+- cred->uc_base.cr_ops = &unix_credops;
+
+- return (struct rpc_cred *) cred;
++ return &cred->uc_base;
++}
++
++static void
++unx_free_cred(struct unx_cred *unx_cred)
++{
++ dprintk("RPC: unx_free_cred %p\n", unx_cred);
++ kfree(unx_cred);
++}
++
++static void
++unx_free_cred_callback(struct rcu_head *head)
++{
++ struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
++ unx_free_cred(unx_cred);
+ }
+
+ static void
+ unx_destroy_cred(struct rpc_cred *cred)
+ {
+- kfree(cred);
++ call_rcu(&cred->cr_rcu, unx_free_cred_callback);
+ }
+
+ /*
+@@ -111,7 +117,7 @@ unx_destroy_cred(struct rpc_cred *cred)
+ static int
+ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
+ {
+- struct unx_cred *cred = (struct unx_cred *) rcred;
++ struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
+ int i;
+
+ if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) {
+@@ -142,7 +148,7 @@ static __be32 *
+ unx_marshal(struct rpc_task *task, __be32 *p)
+ {
+ struct rpc_clnt *clnt = task->tk_client;
+- struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
++ struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
+ __be32 *base, *hold;
+ int i;
+
+@@ -175,7 +181,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
+ static int
+ unx_refresh(struct rpc_task *task)
+ {
+- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ return 0;
+ }
+
+@@ -198,13 +204,18 @@ unx_validate(struct rpc_task *task, __be32 *p)
+ printk("RPC: giant verf size: %u\n", size);
+ return NULL;
+ }
+- task->tk_auth->au_rslack = (size >> 2) + 2;
++ task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2;
+ p += (size >> 2);
+
+ return p;
+ }
+
+-struct rpc_authops authunix_ops = {
++void __init rpc_init_authunix(void)
++{
++ spin_lock_init(&unix_cred_cache.lock);
++}
++
++const struct rpc_authops authunix_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_UNIX,
+ #ifdef RPC_DEBUG
+@@ -218,7 +229,6 @@ struct rpc_authops authunix_ops = {
+
+ static
+ struct rpc_cred_cache unix_cred_cache = {
+- .expire = UNX_CRED_EXPIRE,
+ };
+
+ static
+@@ -232,7 +242,7 @@ struct rpc_auth unix_auth = {
+ };
+
+ static
+-struct rpc_credops unix_credops = {
++const struct rpc_credops unix_credops = {
+ .cr_name = "AUTH_UNIX",
+ .crdestroy = unx_destroy_cred,
+ .crmatch = unx_match,
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index d8fbee4..5d3fe7b 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -44,6 +44,12 @@
+ dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \
+ __FUNCTION__, t->tk_status)
+
++/*
++ * All RPC clients are linked into this list
++ */
++static LIST_HEAD(all_clients);
++static DEFINE_SPINLOCK(rpc_client_lock);
++
+ static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
+
+
+@@ -66,6 +72,21 @@ static void call_connect_status(struct rpc_task *task);
+ static __be32 * call_header(struct rpc_task *task);
+ static __be32 * call_verify(struct rpc_task *task);
+
++static int rpc_ping(struct rpc_clnt *clnt, int flags);
++
++static void rpc_register_client(struct rpc_clnt *clnt)
++{
++ spin_lock(&rpc_client_lock);
++ list_add(&clnt->cl_clients, &all_clients);
++ spin_unlock(&rpc_client_lock);
++}
++
++static void rpc_unregister_client(struct rpc_clnt *clnt)
++{
++ spin_lock(&rpc_client_lock);
++ list_del(&clnt->cl_clients);
++ spin_unlock(&rpc_client_lock);
++}
+
+ static int
+ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
+@@ -111,6 +132,9 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
+ dprintk("RPC: creating %s client for %s (xprt %p)\n",
+ program->name, servname, xprt);
+
++ err = rpciod_up();
++ if (err)
++ goto out_no_rpciod;
+ err = -EINVAL;
+ if (!xprt)
+ goto out_no_xprt;
+@@ -121,8 +145,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
+ clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
+ if (!clnt)
+ goto out_err;
+- atomic_set(&clnt->cl_users, 0);
+- atomic_set(&clnt->cl_count, 1);
+ clnt->cl_parent = clnt;
+
+ clnt->cl_server = clnt->cl_inline_name;
+@@ -148,6 +170,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
+ if (clnt->cl_metrics == NULL)
+ goto out_no_stats;
+ clnt->cl_program = program;
++ INIT_LIST_HEAD(&clnt->cl_tasks);
++ spin_lock_init(&clnt->cl_lock);
+
+ if (!xprt_bound(clnt->cl_xprt))
+ clnt->cl_autobind = 1;
+@@ -155,6 +179,8 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
+ clnt->cl_rtt = &clnt->cl_rtt_default;
+ rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+
++ kref_init(&clnt->cl_kref);
++
+ err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
+ if (err < 0)
+ goto out_no_path;
+@@ -172,6 +198,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
+ if (clnt->cl_nodelen > UNX_MAXNODENAME)
+ clnt->cl_nodelen = UNX_MAXNODENAME;
+ memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
++ rpc_register_client(clnt);
+ return clnt;
+
+ out_no_auth:
+@@ -188,6 +215,8 @@ out_no_stats:
+ out_err:
+ xprt_put(xprt);
+ out_no_xprt:
++ rpciod_down();
++out_no_rpciod:
+ return ERR_PTR(err);
+ }
+
+@@ -205,13 +234,32 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
+ {
+ struct rpc_xprt *xprt;
+ struct rpc_clnt *clnt;
++ struct rpc_xprtsock_create xprtargs = {
++ .proto = args->protocol,
++ .srcaddr = args->saddress,
++ .dstaddr = args->address,
++ .addrlen = args->addrsize,
++ .timeout = args->timeout
++ };
++ char servername[20];
+
+- xprt = xprt_create_transport(args->protocol, args->address,
+- args->addrsize, args->timeout);
++ xprt = xprt_create_transport(&xprtargs);
+ if (IS_ERR(xprt))
+ return (struct rpc_clnt *)xprt;
+
+ /*
++ * If the caller chooses not to specify a hostname, whip
++ * up a string representation of the passed-in address.
++ */
++ if (args->servername == NULL) {
++ struct sockaddr_in *addr =
++ (struct sockaddr_in *) &args->address;
++ snprintf(servername, sizeof(servername), NIPQUAD_FMT,
++ NIPQUAD(addr->sin_addr.s_addr));
++ args->servername = servername;
++ }
++
++ /*
+ * By default, kernel RPC client connects from a reserved port.
+ * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
+ * but it is always enabled for rpciod, which handles the connect
+@@ -245,8 +293,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
+ clnt->cl_intr = 1;
+ if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
+ clnt->cl_autobind = 1;
+- if (args->flags & RPC_CLNT_CREATE_ONESHOT)
+- clnt->cl_oneshot = 1;
+ if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
+ clnt->cl_discrtry = 1;
+
+@@ -268,24 +314,25 @@ rpc_clone_client(struct rpc_clnt *clnt)
+ new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
+ if (!new)
+ goto out_no_clnt;
+- atomic_set(&new->cl_count, 1);
+- atomic_set(&new->cl_users, 0);
++ new->cl_parent = clnt;
++ /* Turn off autobind on clones */
++ new->cl_autobind = 0;
++ INIT_LIST_HEAD(&new->cl_tasks);
++ spin_lock_init(&new->cl_lock);
++ rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ new->cl_metrics = rpc_alloc_iostats(clnt);
+ if (new->cl_metrics == NULL)
+ goto out_no_stats;
++ kref_init(&new->cl_kref);
+ err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
+ if (err != 0)
+ goto out_no_path;
+- new->cl_parent = clnt;
+- atomic_inc(&clnt->cl_count);
+- new->cl_xprt = xprt_get(clnt->cl_xprt);
+- /* Turn off autobind on clones */
+- new->cl_autobind = 0;
+- new->cl_oneshot = 0;
+- new->cl_dead = 0;
+- rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ if (new->cl_auth)
+ atomic_inc(&new->cl_auth->au_count);
++ xprt_get(clnt->cl_xprt);
++ kref_get(&clnt->cl_kref);
++ rpc_register_client(new);
++ rpciod_up();
+ return new;
+ out_no_path:
+ rpc_free_iostats(new->cl_metrics);
+@@ -298,86 +345,86 @@ out_no_clnt:
+
+ /*
+ * Properly shut down an RPC client, terminating all outstanding
+- * requests. Note that we must be certain that cl_oneshot and
+- * cl_dead are cleared, or else the client would be destroyed
+- * when the last task releases it.
++ * requests.
+ */
+-int
+-rpc_shutdown_client(struct rpc_clnt *clnt)
++void rpc_shutdown_client(struct rpc_clnt *clnt)
+ {
+- dprintk("RPC: shutting down %s client for %s, tasks=%d\n",
+- clnt->cl_protname, clnt->cl_server,
+- atomic_read(&clnt->cl_users));
+-
+- while (atomic_read(&clnt->cl_users) > 0) {
+- /* Don't let rpc_release_client destroy us */
+- clnt->cl_oneshot = 0;
+- clnt->cl_dead = 0;
++ dprintk("RPC: shutting down %s client for %s\n",
++ clnt->cl_protname, clnt->cl_server);
++
++ while (!list_empty(&clnt->cl_tasks)) {
+ rpc_killall_tasks(clnt);
+ wait_event_timeout(destroy_wait,
+- !atomic_read(&clnt->cl_users), 1*HZ);
+- }
+-
+- if (atomic_read(&clnt->cl_users) < 0) {
+- printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
+- clnt, atomic_read(&clnt->cl_users));
+-#ifdef RPC_DEBUG
+- rpc_show_tasks();
+-#endif
+- BUG();
++ list_empty(&clnt->cl_tasks), 1*HZ);
+ }
+
+- return rpc_destroy_client(clnt);
++ rpc_release_client(clnt);
+ }
+
+ /*
+- * Delete an RPC client
++ * Free an RPC client
+ */
+-int
+-rpc_destroy_client(struct rpc_clnt *clnt)
++static void
++rpc_free_client(struct kref *kref)
+ {
+- if (!atomic_dec_and_test(&clnt->cl_count))
+- return 1;
+- BUG_ON(atomic_read(&clnt->cl_users) != 0);
++ struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
+
+ dprintk("RPC: destroying %s client for %s\n",
+ clnt->cl_protname, clnt->cl_server);
+- if (clnt->cl_auth) {
+- rpcauth_destroy(clnt->cl_auth);
+- clnt->cl_auth = NULL;
+- }
+ if (!IS_ERR(clnt->cl_dentry)) {
+ rpc_rmdir(clnt->cl_dentry);
+ rpc_put_mount();
+ }
+ if (clnt->cl_parent != clnt) {
+- rpc_destroy_client(clnt->cl_parent);
++ rpc_release_client(clnt->cl_parent);
+ goto out_free;
+ }
+ if (clnt->cl_server != clnt->cl_inline_name)
+ kfree(clnt->cl_server);
+ out_free:
++ rpc_unregister_client(clnt);
+ rpc_free_iostats(clnt->cl_metrics);
+ clnt->cl_metrics = NULL;
+ xprt_put(clnt->cl_xprt);
++ rpciod_down();
+ kfree(clnt);
+- return 0;
+ }
+
+ /*
+- * Release an RPC client
++ * Free an RPC client
++ */
++static void
++rpc_free_auth(struct kref *kref)
++{
++ struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
++
++ if (clnt->cl_auth == NULL) {
++ rpc_free_client(kref);
++ return;
++ }
++
++ /*
++ * Note: RPCSEC_GSS may need to send NULL RPC calls in order to
++ * release remaining GSS contexts. This mechanism ensures
++ * that it can do so safely.
++ */
++ kref_init(kref);
++ rpcauth_release(clnt->cl_auth);
++ clnt->cl_auth = NULL;
++ kref_put(kref, rpc_free_client);
++}
++
++/*
++ * Release reference to the RPC client
+ */
+ void
+ rpc_release_client(struct rpc_clnt *clnt)
+ {
+- dprintk("RPC: rpc_release_client(%p, %d)\n",
+- clnt, atomic_read(&clnt->cl_users));
++ dprintk("RPC: rpc_release_client(%p)\n", clnt);
+
+- if (!atomic_dec_and_test(&clnt->cl_users))
+- return;
+- wake_up(&destroy_wait);
+- if (clnt->cl_oneshot || clnt->cl_dead)
+- rpc_destroy_client(clnt);
++ if (list_empty(&clnt->cl_tasks))
++ wake_up(&destroy_wait);
++ kref_put(&clnt->cl_kref, rpc_free_auth);
+ }
+
+ /**
+@@ -468,82 +515,96 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
+ rpc_restore_sigmask(oldset);
+ }
+
+-/*
+- * New rpc_call implementation
++static
++struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
++ struct rpc_message *msg,
++ int flags,
++ const struct rpc_call_ops *ops,
++ void *data)
++{
++ struct rpc_task *task, *ret;
++ sigset_t oldset;
++
++ task = rpc_new_task(clnt, flags, ops, data);
++ if (task == NULL) {
++ rpc_release_calldata(ops, data);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
++ rpc_task_sigmask(task, &oldset);
++ if (msg != NULL) {
++ rpc_call_setup(task, msg, 0);
++ if (task->tk_status != 0) {
++ ret = ERR_PTR(task->tk_status);
++ rpc_put_task(task);
++ goto out;
++ }
++ }
++ atomic_inc(&task->tk_count);
++ rpc_execute(task);
++ ret = task;
++out:
++ rpc_restore_sigmask(&oldset);
++ return ret;
++}
++
++/**
++ * rpc_call_sync - Perform a synchronous RPC call
++ * @clnt: pointer to RPC client
++ * @msg: RPC call parameters
++ * @flags: RPC call flags
+ */
+ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
+ {
+ struct rpc_task *task;
+- sigset_t oldset;
+- int status;
+-
+- /* If this client is slain all further I/O fails */
+- if (clnt->cl_dead)
+- return -EIO;
++ int status;
+
+ BUG_ON(flags & RPC_TASK_ASYNC);
+
+- task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
+- if (task == NULL)
+- return -ENOMEM;
+-
+- /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
+- rpc_task_sigmask(task, &oldset);
+-
+- /* Set up the call info struct and execute the task */
+- rpc_call_setup(task, msg, 0);
+- if (task->tk_status == 0) {
+- atomic_inc(&task->tk_count);
+- rpc_execute(task);
+- }
++ task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
++ if (IS_ERR(task))
++ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+- rpc_restore_sigmask(&oldset);
+ return status;
+ }
+
+-/*
+- * New rpc_call implementation
++/**
++ * rpc_call_async - Perform an asynchronous RPC call
++ * @clnt: pointer to RPC client
++ * @msg: RPC call parameters
++ * @flags: RPC call flags
++ * @ops: RPC call ops
++ * @data: user call data
+ */
+ int
+ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
+ const struct rpc_call_ops *tk_ops, void *data)
+ {
+ struct rpc_task *task;
+- sigset_t oldset;
+- int status;
+-
+- /* If this client is slain all further I/O fails */
+- status = -EIO;
+- if (clnt->cl_dead)
+- goto out_release;
+-
+- flags |= RPC_TASK_ASYNC;
+-
+- /* Create/initialize a new RPC task */
+- status = -ENOMEM;
+- if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
+- goto out_release;
+-
+- /* Mask signals on GSS_AUTH upcalls */
+- rpc_task_sigmask(task, &oldset);
+
+- rpc_call_setup(task, msg, 0);
+-
+- /* Set up the call info struct and execute the task */
+- status = task->tk_status;
+- if (status == 0)
+- rpc_execute(task);
+- else
+- rpc_put_task(task);
+-
+- rpc_restore_sigmask(&oldset);
+- return status;
+-out_release:
+- rpc_release_calldata(tk_ops, data);
+- return status;
++ task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
++ if (IS_ERR(task))
++ return PTR_ERR(task);
++ rpc_put_task(task);
++ return 0;
+ }
+
++/**
++ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
++ * @clnt: pointer to RPC client
++ * @flags: RPC flags
++ * @ops: RPC call ops
++ * @data: user call data
++ */
++struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
++ const struct rpc_call_ops *tk_ops,
++ void *data)
++{
++ return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
++}
++EXPORT_SYMBOL(rpc_run_task);
+
+ void
+ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
+@@ -745,7 +806,7 @@ call_reserveresult(struct rpc_task *task)
+ static void
+ call_allocate(struct rpc_task *task)
+ {
+- unsigned int slack = task->tk_auth->au_cslack;
++ unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack;
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+@@ -1273,9 +1334,9 @@ call_verify(struct rpc_task *task)
+ * - if it isn't pointer subtraction in the NFS client may give
+ * undefined results
+ */
+- printk(KERN_WARNING
+- "call_verify: XDR representation not a multiple of"
+- " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len);
++ dprintk("RPC: %5u %s: XDR representation not a multiple of"
++ " 4 bytes: 0x%x\n", task->tk_pid, __FUNCTION__,
++ task->tk_rqstp->rq_rcv_buf.len);
+ goto out_eio;
+ }
+ if ((len -= 3) < 0)
+@@ -1283,7 +1344,8 @@ call_verify(struct rpc_task *task)
+ p += 1; /* skip XID */
+
+ if ((n = ntohl(*p++)) != RPC_REPLY) {
+- printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
++ dprintk("RPC: %5u %s: not an RPC reply: %x\n",
++ task->tk_pid, __FUNCTION__, n);
+ goto out_garbage;
+ }
+ if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
+@@ -1334,7 +1396,8 @@ call_verify(struct rpc_task *task)
+ "authentication.\n", task->tk_client->cl_server);
+ break;
+ default:
+- printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
++ dprintk("RPC: %5u %s: unknown auth error: %x\n",
++ task->tk_pid, __FUNCTION__, n);
+ error = -EIO;
+ }
+ dprintk("RPC: %5u %s: call rejected %d\n",
+@@ -1342,7 +1405,8 @@ call_verify(struct rpc_task *task)
+ goto out_err;
+ }
+ if (!(p = rpcauth_checkverf(task, p))) {
+- printk(KERN_WARNING "call_verify: auth check failed\n");
++ dprintk("RPC: %5u %s: auth check failed\n",
++ task->tk_pid, __FUNCTION__);
+ goto out_garbage; /* bad verifier, retry */
+ }
+ len = p - (__be32 *)iov->iov_base - 1;
+@@ -1381,7 +1445,8 @@ call_verify(struct rpc_task *task)
+ task->tk_pid, __FUNCTION__);
+ break; /* retry */
+ default:
+- printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
++ dprintk("RPC: %5u %s: server accept status: %x\n",
++ task->tk_pid, __FUNCTION__, n);
+ /* Also retry */
+ }
+
+@@ -1395,14 +1460,16 @@ out_garbage:
+ out_retry:
+ return ERR_PTR(-EAGAIN);
+ }
+- printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
+ out_eio:
+ error = -EIO;
+ out_err:
+ rpc_exit(task, error);
++ dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
++ __FUNCTION__, error);
+ return ERR_PTR(error);
+ out_overflow:
+- printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
++ dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
++ __FUNCTION__);
+ goto out_garbage;
+ }
+
+@@ -1421,7 +1488,7 @@ static struct rpc_procinfo rpcproc_null = {
+ .p_decode = rpcproc_decode_null,
+ };
+
+-int rpc_ping(struct rpc_clnt *clnt, int flags)
++static int rpc_ping(struct rpc_clnt *clnt, int flags)
+ {
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_null,
+@@ -1432,3 +1499,51 @@ int rpc_ping(struct rpc_clnt *clnt, int flags)
+ put_rpccred(msg.rpc_cred);
+ return err;
+ }
++
++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
++{
++ struct rpc_message msg = {
++ .rpc_proc = &rpcproc_null,
++ .rpc_cred = cred,
++ };
++ return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
++}
++EXPORT_SYMBOL(rpc_call_null);
++
++#ifdef RPC_DEBUG
++void rpc_show_tasks(void)
++{
++ struct rpc_clnt *clnt;
++ struct rpc_task *t;
++
++ spin_lock(&rpc_client_lock);
++ if (list_empty(&all_clients))
++ goto out;
++ printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
++ "-rpcwait -action- ---ops--\n");
++ list_for_each_entry(clnt, &all_clients, cl_clients) {
++ if (list_empty(&clnt->cl_tasks))
++ continue;
++ spin_lock(&clnt->cl_lock);
++ list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
++ const char *rpc_waitq = "none";
++
++ if (RPC_IS_QUEUED(t))
++ rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
++
++ printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
++ t->tk_pid,
++ (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
++ t->tk_flags, t->tk_status,
++ t->tk_client,
++ (t->tk_client ? t->tk_client->cl_prog : 0),
++ t->tk_rqstp, t->tk_timeout,
++ rpc_waitq,
++ t->tk_action, t->tk_ops);
++ }
++ spin_unlock(&clnt->cl_lock);
++ }
++out:
++ spin_unlock(&rpc_client_lock);
++}
++#endif
+diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
+index 5887457..22e25b5 100644
+--- a/net/sunrpc/rpc_pipe.c
++++ b/net/sunrpc/rpc_pipe.c
+@@ -14,7 +14,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/mount.h>
+ #include <linux/namei.h>
+-#include <linux/dnotify.h>
++#include <linux/fsnotify.h>
+ #include <linux/kernel.h>
+
+ #include <asm/ioctls.h>
+@@ -344,7 +344,7 @@ rpc_info_open(struct inode *inode, struct file *file)
+ mutex_lock(&inode->i_mutex);
+ clnt = RPC_I(inode)->private;
+ if (clnt) {
+- atomic_inc(&clnt->cl_users);
++ kref_get(&clnt->cl_kref);
+ m->private = clnt;
+ } else {
+ single_release(inode, file);
+@@ -448,6 +448,15 @@ void rpc_put_mount(void)
+ simple_release_fs(&rpc_mount, &rpc_mount_count);
+ }
+
++static int rpc_delete_dentry(struct dentry *dentry)
++{
++ return 1;
++}
++
++static struct dentry_operations rpc_dentry_operations = {
++ .d_delete = rpc_delete_dentry,
++};
++
+ static int
+ rpc_lookup_parent(char *path, struct nameidata *nd)
+ {
+@@ -506,7 +515,7 @@ rpc_get_inode(struct super_block *sb, int mode)
+ * FIXME: This probably has races.
+ */
+ static void
+-rpc_depopulate(struct dentry *parent)
++rpc_depopulate(struct dentry *parent, int start, int eof)
+ {
+ struct inode *dir = parent->d_inode;
+ struct list_head *pos, *next;
+@@ -518,6 +527,10 @@ repeat:
+ spin_lock(&dcache_lock);
+ list_for_each_safe(pos, next, &parent->d_subdirs) {
+ dentry = list_entry(pos, struct dentry, d_u.d_child);
++ if (!dentry->d_inode ||
++ dentry->d_inode->i_ino < start ||
++ dentry->d_inode->i_ino >= eof)
++ continue;
+ spin_lock(&dentry->d_lock);
+ if (!d_unhashed(dentry)) {
+ dget_locked(dentry);
+@@ -533,11 +546,11 @@ repeat:
+ if (n) {
+ do {
+ dentry = dvec[--n];
+- if (dentry->d_inode) {
+- rpc_close_pipes(dentry->d_inode);
++ if (S_ISREG(dentry->d_inode->i_mode))
+ simple_unlink(dir, dentry);
+- }
+- inode_dir_notify(dir, DN_DELETE);
++ else if (S_ISDIR(dentry->d_inode->i_mode))
++ simple_rmdir(dir, dentry);
++ d_delete(dentry);
+ dput(dentry);
+ } while (n);
+ goto repeat;
+@@ -560,6 +573,7 @@ rpc_populate(struct dentry *parent,
+ dentry = d_alloc_name(parent, files[i].name);
+ if (!dentry)
+ goto out_bad;
++ dentry->d_op = &rpc_dentry_operations;
+ mode = files[i].mode;
+ inode = rpc_get_inode(dir->i_sb, mode);
+ if (!inode) {
+@@ -574,6 +588,7 @@ rpc_populate(struct dentry *parent,
+ if (S_ISDIR(mode))
+ inc_nlink(dir);
+ d_add(dentry, inode);
++ fsnotify_create(dir, dentry);
+ }
+ mutex_unlock(&dir->i_mutex);
+ return 0;
+@@ -595,7 +610,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
+ inode->i_ino = iunique(dir->i_sb, 100);
+ d_instantiate(dentry, inode);
+ inc_nlink(dir);
+- inode_dir_notify(dir, DN_CREATE);
++ fsnotify_mkdir(dir, dentry);
+ return 0;
+ out_err:
+ printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
+@@ -607,21 +622,14 @@ static int
+ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
+ {
+ int error;
+-
+- shrink_dcache_parent(dentry);
+- if (d_unhashed(dentry))
+- return 0;
+- if ((error = simple_rmdir(dir, dentry)) != 0)
+- return error;
+- if (!error) {
+- inode_dir_notify(dir, DN_DELETE);
+- d_drop(dentry);
+- }
+- return 0;
++ error = simple_rmdir(dir, dentry);
++ if (!error)
++ d_delete(dentry);
++ return error;
+ }
+
+ static struct dentry *
+-rpc_lookup_create(struct dentry *parent, const char *name, int len)
++rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive)
+ {
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+@@ -630,7 +638,9 @@ rpc_lookup_create(struct dentry *parent, const char *name, int len)
+ dentry = lookup_one_len(name, parent, len);
+ if (IS_ERR(dentry))
+ goto out_err;
+- if (dentry->d_inode) {
++ if (!dentry->d_inode)
++ dentry->d_op = &rpc_dentry_operations;
++ else if (exclusive) {
+ dput(dentry);
+ dentry = ERR_PTR(-EEXIST);
+ goto out_err;
+@@ -649,7 +659,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
+
+ if ((error = rpc_lookup_parent(path, nd)) != 0)
+ return ERR_PTR(error);
+- dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
++ dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1);
+ if (IS_ERR(dentry))
+ rpc_release_path(nd);
+ return dentry;
+@@ -681,7 +691,7 @@ out:
+ rpc_release_path(&nd);
+ return dentry;
+ err_depopulate:
+- rpc_depopulate(dentry);
++ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+ __rpc_rmdir(dir, dentry);
+ err_dput:
+ dput(dentry);
+@@ -701,7 +711,7 @@ rpc_rmdir(struct dentry *dentry)
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+- rpc_depopulate(dentry);
++ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+ error = __rpc_rmdir(dir, dentry);
+ dput(dentry);
+ mutex_unlock(&dir->i_mutex);
+@@ -716,10 +726,21 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
+ struct inode *dir, *inode;
+ struct rpc_inode *rpci;
+
+- dentry = rpc_lookup_create(parent, name, strlen(name));
++ dentry = rpc_lookup_create(parent, name, strlen(name), 0);
+ if (IS_ERR(dentry))
+ return dentry;
+ dir = parent->d_inode;
++ if (dentry->d_inode) {
++ rpci = RPC_I(dentry->d_inode);
++ if (rpci->private != private ||
++ rpci->ops != ops ||
++ rpci->flags != flags) {
++ dput (dentry);
++ dentry = ERR_PTR(-EBUSY);
++ }
++ rpci->nkern_readwriters++;
++ goto out;
++ }
+ inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
+ if (!inode)
+ goto err_dput;
+@@ -730,7 +751,8 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
+ rpci->private = private;
+ rpci->flags = flags;
+ rpci->ops = ops;
+- inode_dir_notify(dir, DN_CREATE);
++ rpci->nkern_readwriters = 1;
++ fsnotify_create(dir, dentry);
+ dget(dentry);
+ out:
+ mutex_unlock(&dir->i_mutex);
+@@ -754,13 +776,11 @@ rpc_unlink(struct dentry *dentry)
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+- if (!d_unhashed(dentry)) {
+- d_drop(dentry);
+- if (dentry->d_inode) {
+- rpc_close_pipes(dentry->d_inode);
+- error = simple_unlink(dir, dentry);
+- }
+- inode_dir_notify(dir, DN_DELETE);
++ if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) {
++ rpc_close_pipes(dentry->d_inode);
++ error = simple_unlink(dir, dentry);
++ if (!error)
++ d_delete(dentry);
+ }
+ dput(dentry);
+ mutex_unlock(&dir->i_mutex);
+@@ -833,6 +853,7 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
+ rpci->nreaders = 0;
+ rpci->nwriters = 0;
+ INIT_LIST_HEAD(&rpci->in_upcall);
++ INIT_LIST_HEAD(&rpci->in_downcall);
+ INIT_LIST_HEAD(&rpci->pipe);
+ rpci->pipelen = 0;
+ init_waitqueue_head(&rpci->waitq);
+diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
+index 6c7aa8a..d1740db 100644
+--- a/net/sunrpc/rpcb_clnt.c
++++ b/net/sunrpc/rpcb_clnt.c
+@@ -12,6 +12,8 @@
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
++#include <linux/module.h>
++
+ #include <linux/types.h>
+ #include <linux/socket.h>
+ #include <linux/kernel.h>
+@@ -184,8 +186,8 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
+ .program = &rpcb_program,
+ .version = version,
+ .authflavor = RPC_AUTH_UNIX,
+- .flags = (RPC_CLNT_CREATE_ONESHOT |
+- RPC_CLNT_CREATE_NOPING),
++ .flags = (RPC_CLNT_CREATE_NOPING |
++ RPC_CLNT_CREATE_INTR),
+ };
+
+ ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+@@ -238,6 +240,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+
+ error = rpc_call_sync(rpcb_clnt, &msg, 0);
+
++ rpc_shutdown_client(rpcb_clnt);
+ if (error < 0)
+ printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+ "server (errno %d).\n", -error);
+@@ -246,21 +249,20 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+ return error;
+ }
+
+-#ifdef CONFIG_ROOT_NFS
+ /**
+- * rpcb_getport_external - obtain the port for an RPC service on a given host
++ * rpcb_getport_sync - obtain the port for an RPC service on a given host
+ * @sin: address of remote peer
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ *
+ * Called from outside the RPC client in a synchronous task context.
++ * Uses default timeout parameters specified by underlying transport.
+ *
+- * For now, this supports only version 2 queries, but is used only by
+- * mount_clnt for NFS_ROOT.
++ * XXX: Needs to support IPv6, and rpcbind versions 3 and 4
+ */
+-int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
+- __u32 vers, int prot)
++int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
++ __u32 vers, int prot)
+ {
+ struct rpcbind_args map = {
+ .r_prog = prog,
+@@ -277,15 +279,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
+ char hostname[40];
+ int status;
+
+- dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
+- NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
++ dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
++ __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
+
+- sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
++ sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
+ rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
+ if (IS_ERR(rpcb_clnt))
+ return PTR_ERR(rpcb_clnt);
+
+ status = rpc_call_sync(rpcb_clnt, &msg, 0);
++ rpc_shutdown_client(rpcb_clnt);
+
+ if (status >= 0) {
+ if (map.r_port != 0)
+@@ -294,16 +297,16 @@ int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
+ }
+ return status;
+ }
+-#endif
++EXPORT_SYMBOL_GPL(rpcb_getport_sync);
+
+ /**
+- * rpcb_getport - obtain the port for a given RPC service on a given host
++ * rpcb_getport_async - obtain the port for a given RPC service on a given host
+ * @task: task that is waiting for portmapper request
+ *
+ * This one can be called for an ongoing RPC request, and can be used in
+ * an async (rpciod) context.
+ */
+-void rpcb_getport(struct rpc_task *task)
++void rpcb_getport_async(struct rpc_task *task)
+ {
+ struct rpc_clnt *clnt = task->tk_client;
+ int bind_version;
+@@ -314,17 +317,17 @@ void rpcb_getport(struct rpc_task *task)
+ struct sockaddr addr;
+ int status;
+
+- dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n",
+- task->tk_pid, clnt->cl_server,
+- clnt->cl_prog, clnt->cl_vers, xprt->prot);
++ dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
++ task->tk_pid, __FUNCTION__,
++ clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
+
+ /* Autobind on cloned rpc clients is discouraged */
+ BUG_ON(clnt->cl_parent != clnt);
+
+ if (xprt_test_and_set_binding(xprt)) {
+ status = -EACCES; /* tell caller to check again */
+- dprintk("RPC: %5u rpcb_getport waiting for another binder\n",
+- task->tk_pid);
++ dprintk("RPC: %5u %s: waiting for another binder\n",
++ task->tk_pid, __FUNCTION__);
+ goto bailout_nowake;
+ }
+
+@@ -335,27 +338,28 @@ void rpcb_getport(struct rpc_task *task)
+ /* Someone else may have bound if we slept */
+ if (xprt_bound(xprt)) {
+ status = 0;
+- dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid);
++ dprintk("RPC: %5u %s: already bound\n",
++ task->tk_pid, __FUNCTION__);
+ goto bailout_nofree;
+ }
+
+ if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
+ xprt->bind_index = 0;
+ status = -EACCES; /* tell caller to try again later */
+- dprintk("RPC: %5u rpcb_getport no more getport versions "
+- "available\n", task->tk_pid);
++ dprintk("RPC: %5u %s: no more getport versions available\n",
++ task->tk_pid, __FUNCTION__);
+ goto bailout_nofree;
+ }
+ bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
+
+- dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n",
+- task->tk_pid, bind_version);
++ dprintk("RPC: %5u %s: trying rpcbind version %u\n",
++ task->tk_pid, __FUNCTION__, bind_version);
+
+ map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
+ if (!map) {
+ status = -ENOMEM;
+- dprintk("RPC: %5u rpcb_getport no memory available\n",
+- task->tk_pid);
++ dprintk("RPC: %5u %s: no memory available\n",
++ task->tk_pid, __FUNCTION__);
+ goto bailout_nofree;
+ }
+ map->r_prog = clnt->cl_prog;
+@@ -373,16 +377,17 @@ void rpcb_getport(struct rpc_task *task)
+ rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
+ if (IS_ERR(rpcb_clnt)) {
+ status = PTR_ERR(rpcb_clnt);
+- dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n",
+- task->tk_pid, PTR_ERR(rpcb_clnt));
++ dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
++ task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
+ goto bailout;
+ }
+
+ child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
++ rpc_release_client(rpcb_clnt);
+ if (IS_ERR(child)) {
+ status = -EIO;
+- dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n",
+- task->tk_pid);
++ dprintk("RPC: %5u %s: rpc_run_task failed\n",
++ task->tk_pid, __FUNCTION__);
+ goto bailout_nofree;
+ }
+ rpc_put_task(child);
+diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
+index 944d753..2ac43c4 100644
+--- a/net/sunrpc/sched.c
++++ b/net/sunrpc/sched.c
+@@ -25,7 +25,6 @@
+ #ifdef RPC_DEBUG
+ #define RPCDBG_FACILITY RPCDBG_SCHED
+ #define RPC_TASK_MAGIC_ID 0xf00baa
+-static int rpc_task_id;
+ #endif
+
+ /*
+@@ -40,7 +39,6 @@ static mempool_t *rpc_task_mempool __read_mostly;
+ static mempool_t *rpc_buffer_mempool __read_mostly;
+
+ static void __rpc_default_timer(struct rpc_task *task);
+-static void rpciod_killall(void);
+ static void rpc_async_schedule(struct work_struct *);
+ static void rpc_release_task(struct rpc_task *task);
+
+@@ -50,23 +48,13 @@ static void rpc_release_task(struct rpc_task *task);
+ static RPC_WAITQ(delay_queue, "delayq");
+
+ /*
+- * All RPC tasks are linked into this list
+- */
+-static LIST_HEAD(all_tasks);
+-
+-/*
+ * rpciod-related stuff
+ */
+ static DEFINE_MUTEX(rpciod_mutex);
+-static unsigned int rpciod_users;
++static atomic_t rpciod_users = ATOMIC_INIT(0);
+ struct workqueue_struct *rpciod_workqueue;
+
+ /*
+- * Spinlock for other critical sections of code.
+- */
+-static DEFINE_SPINLOCK(rpc_sched_lock);
+-
+-/*
+ * Disable the timer for a given RPC task. Should be called with
+ * queue->lock and bh_disabled in order to avoid races within
+ * rpc_run_timer().
+@@ -267,18 +255,33 @@ static int rpc_wait_bit_interruptible(void *word)
+ return 0;
+ }
+
++#ifdef RPC_DEBUG
++static void rpc_task_set_debuginfo(struct rpc_task *task)
++{
++ static atomic_t rpc_pid;
++
++ task->tk_magic = RPC_TASK_MAGIC_ID;
++ task->tk_pid = atomic_inc_return(&rpc_pid);
++}
++#else
++static inline void rpc_task_set_debuginfo(struct rpc_task *task)
++{
++}
++#endif
++
+ static void rpc_set_active(struct rpc_task *task)
+ {
++ struct rpc_clnt *clnt;
+ if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
+ return;
+- spin_lock(&rpc_sched_lock);
+-#ifdef RPC_DEBUG
+- task->tk_magic = RPC_TASK_MAGIC_ID;
+- task->tk_pid = rpc_task_id++;
+-#endif
++ rpc_task_set_debuginfo(task);
+ /* Add to global list of all tasks */
+- list_add_tail(&task->tk_task, &all_tasks);
+- spin_unlock(&rpc_sched_lock);
++ clnt = task->tk_client;
++ if (clnt != NULL) {
++ spin_lock(&clnt->cl_lock);
++ list_add_tail(&task->tk_task, &clnt->cl_tasks);
++ spin_unlock(&clnt->cl_lock);
++ }
+ }
+
+ /*
+@@ -818,6 +821,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
+ if (tk_ops->rpc_call_prepare != NULL)
+ task->tk_action = rpc_prepare_task;
+ task->tk_calldata = calldata;
++ INIT_LIST_HEAD(&task->tk_task);
+
+ /* Initialize retry counters */
+ task->tk_garb_retry = 2;
+@@ -830,7 +834,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
+ task->tk_workqueue = rpciod_workqueue;
+
+ if (clnt) {
+- atomic_inc(&clnt->cl_users);
++ kref_get(&clnt->cl_kref);
+ if (clnt->cl_softrtry)
+ task->tk_flags |= RPC_TASK_SOFT;
+ if (!clnt->cl_intr)
+@@ -860,9 +864,7 @@ static void rpc_free_task(struct rcu_head *rcu)
+ }
+
+ /*
+- * Create a new task for the specified client. We have to
+- * clean up after an allocation failure, as the client may
+- * have specified "oneshot".
++ * Create a new task for the specified client.
+ */
+ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+ {
+@@ -870,7 +872,7 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc
+
+ task = rpc_alloc_task();
+ if (!task)
+- goto cleanup;
++ goto out;
+
+ rpc_init_task(task, clnt, flags, tk_ops, calldata);
+
+@@ -878,16 +880,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc
+ task->tk_flags |= RPC_TASK_DYNAMIC;
+ out:
+ return task;
+-
+-cleanup:
+- /* Check whether to release the client */
+- if (clnt) {
+- printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
+- atomic_read(&clnt->cl_users), clnt->cl_oneshot);
+- atomic_inc(&clnt->cl_users); /* pretend we were used ... */
+- rpc_release_client(clnt);
+- }
+- goto out;
+ }
+
+
+@@ -920,11 +912,13 @@ static void rpc_release_task(struct rpc_task *task)
+ #endif
+ dprintk("RPC: %5u release task\n", task->tk_pid);
+
+- /* Remove from global task list */
+- spin_lock(&rpc_sched_lock);
+- list_del(&task->tk_task);
+- spin_unlock(&rpc_sched_lock);
+-
++ if (!list_empty(&task->tk_task)) {
++ struct rpc_clnt *clnt = task->tk_client;
++ /* Remove from client task list */
++ spin_lock(&clnt->cl_lock);
++ list_del(&task->tk_task);
++ spin_unlock(&clnt->cl_lock);
++ }
+ BUG_ON (RPC_IS_QUEUED(task));
+
+ /* Synchronously delete any running timer */
+@@ -939,29 +933,6 @@ static void rpc_release_task(struct rpc_task *task)
+ rpc_put_task(task);
+ }
+
+-/**
+- * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+- * @clnt: pointer to RPC client
+- * @flags: RPC flags
+- * @ops: RPC call ops
+- * @data: user call data
+- */
+-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+- const struct rpc_call_ops *ops,
+- void *data)
+-{
+- struct rpc_task *task;
+- task = rpc_new_task(clnt, flags, ops, data);
+- if (task == NULL) {
+- rpc_release_calldata(ops, data);
+- return ERR_PTR(-ENOMEM);
+- }
+- atomic_inc(&task->tk_count);
+- rpc_execute(task);
+- return task;
+-}
+-EXPORT_SYMBOL(rpc_run_task);
+-
+ /*
+ * Kill all tasks for the given client.
+ * XXX: kill their descendants as well?
+@@ -969,44 +940,25 @@ EXPORT_SYMBOL(rpc_run_task);
+ void rpc_killall_tasks(struct rpc_clnt *clnt)
+ {
+ struct rpc_task *rovr;
+- struct list_head *le;
+
+- dprintk("RPC: killing all tasks for client %p\n", clnt);
+
++ if (list_empty(&clnt->cl_tasks))
++ return;
++ dprintk("RPC: killing all tasks for client %p\n", clnt);
+ /*
+ * Spin lock all_tasks to prevent changes...
+ */
+- spin_lock(&rpc_sched_lock);
+- alltask_for_each(rovr, le, &all_tasks) {
++ spin_lock(&clnt->cl_lock);
++ list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
+ if (! RPC_IS_ACTIVATED(rovr))
+ continue;
+- if (!clnt || rovr->tk_client == clnt) {
++ if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
+ rovr->tk_flags |= RPC_TASK_KILLED;
+ rpc_exit(rovr, -EIO);
+ rpc_wake_up_task(rovr);
+ }
+ }
+- spin_unlock(&rpc_sched_lock);
+-}
+-
+-static void rpciod_killall(void)
+-{
+- unsigned long flags;
+-
+- while (!list_empty(&all_tasks)) {
+- clear_thread_flag(TIF_SIGPENDING);
+- rpc_killall_tasks(NULL);
+- flush_workqueue(rpciod_workqueue);
+- if (!list_empty(&all_tasks)) {
+- dprintk("RPC: rpciod_killall: waiting for tasks "
+- "to exit\n");
+- yield();
+- }
+- }
+-
+- spin_lock_irqsave(¤t->sighand->siglock, flags);
+- recalc_sigpending();
+- spin_unlock_irqrestore(¤t->sighand->siglock, flags);
++ spin_unlock(&clnt->cl_lock);
+ }
+
+ /*
+@@ -1018,28 +970,27 @@ rpciod_up(void)
+ struct workqueue_struct *wq;
+ int error = 0;
+
++ if (atomic_inc_not_zero(&rpciod_users))
++ return 0;
++
+ mutex_lock(&rpciod_mutex);
+- dprintk("RPC: rpciod_up: users %u\n", rpciod_users);
+- rpciod_users++;
+- if (rpciod_workqueue)
+- goto out;
+- /*
+- * If there's no pid, we should be the first user.
+- */
+- if (rpciod_users > 1)
+- printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users);
++
++ /* Guard against races with rpciod_down() */
++ if (rpciod_workqueue != NULL)
++ goto out_ok;
+ /*
+ * Create the rpciod thread and wait for it to start.
+ */
++ dprintk("RPC: creating workqueue rpciod\n");
+ error = -ENOMEM;
+ wq = create_workqueue("rpciod");
+- if (wq == NULL) {
+- printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
+- rpciod_users--;
++ if (wq == NULL)
+ goto out;
+- }
++
+ rpciod_workqueue = wq;
+ error = 0;
++out_ok:
++ atomic_inc(&rpciod_users);
+ out:
+ mutex_unlock(&rpciod_mutex);
+ return error;
+@@ -1048,59 +999,19 @@ out:
+ void
+ rpciod_down(void)
+ {
++ if (!atomic_dec_and_test(&rpciod_users))
++ return;
++
+ mutex_lock(&rpciod_mutex);
+- dprintk("RPC: rpciod_down sema %u\n", rpciod_users);
+- if (rpciod_users) {
+- if (--rpciod_users)
+- goto out;
+- } else
+- printk(KERN_WARNING "rpciod_down: no users??\n");
++ dprintk("RPC: destroying workqueue rpciod\n");
+
+- if (!rpciod_workqueue) {
+- dprintk("RPC: rpciod_down: Nothing to do!\n");
+- goto out;
++ if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) {
++ destroy_workqueue(rpciod_workqueue);
++ rpciod_workqueue = NULL;
+ }
+- rpciod_killall();
+-
+- destroy_workqueue(rpciod_workqueue);
+- rpciod_workqueue = NULL;
+- out:
+ mutex_unlock(&rpciod_mutex);
+ }
+
+-#ifdef RPC_DEBUG
+-void rpc_show_tasks(void)
+-{
+- struct list_head *le;
+- struct rpc_task *t;
+-
+- spin_lock(&rpc_sched_lock);
+- if (list_empty(&all_tasks)) {
+- spin_unlock(&rpc_sched_lock);
+- return;
+- }
+- printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
+- "-rpcwait -action- ---ops--\n");
+- alltask_for_each(t, le, &all_tasks) {
+- const char *rpc_waitq = "none";
+-
+- if (RPC_IS_QUEUED(t))
+- rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+-
+- printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
+- t->tk_pid,
+- (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+- t->tk_flags, t->tk_status,
+- t->tk_client,
+- (t->tk_client ? t->tk_client->cl_prog : 0),
+- t->tk_rqstp, t->tk_timeout,
+- rpc_waitq,
+- t->tk_action, t->tk_ops);
+- }
+- spin_unlock(&rpc_sched_lock);
+-}
+-#endif
+-
+ void
+ rpc_destroy_mempool(void)
+ {
+diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
+index 73075de..384c4ad 100644
+--- a/net/sunrpc/sunrpc_syms.c
++++ b/net/sunrpc/sunrpc_syms.c
+@@ -28,15 +28,11 @@ EXPORT_SYMBOL(rpc_init_task);
+ EXPORT_SYMBOL(rpc_sleep_on);
+ EXPORT_SYMBOL(rpc_wake_up_next);
+ EXPORT_SYMBOL(rpc_wake_up_task);
+-EXPORT_SYMBOL(rpciod_down);
+-EXPORT_SYMBOL(rpciod_up);
+-EXPORT_SYMBOL(rpc_new_task);
+ EXPORT_SYMBOL(rpc_wake_up_status);
+
+ /* RPC client functions */
+ EXPORT_SYMBOL(rpc_clone_client);
+ EXPORT_SYMBOL(rpc_bind_new_program);
+-EXPORT_SYMBOL(rpc_destroy_client);
+ EXPORT_SYMBOL(rpc_shutdown_client);
+ EXPORT_SYMBOL(rpc_killall_tasks);
+ EXPORT_SYMBOL(rpc_call_sync);
+@@ -61,7 +57,7 @@ EXPORT_SYMBOL(rpcauth_unregister);
+ EXPORT_SYMBOL(rpcauth_create);
+ EXPORT_SYMBOL(rpcauth_lookupcred);
+ EXPORT_SYMBOL(rpcauth_lookup_credcache);
+-EXPORT_SYMBOL(rpcauth_free_credcache);
++EXPORT_SYMBOL(rpcauth_destroy_credcache);
+ EXPORT_SYMBOL(rpcauth_init_credcache);
+ EXPORT_SYMBOL(put_rpccred);
+
+@@ -156,6 +152,7 @@ init_sunrpc(void)
+ cache_register(&ip_map_cache);
+ cache_register(&unix_gid_cache);
+ init_socket_xprt();
++ rpcauth_init_module();
+ out:
+ return err;
+ }
+@@ -163,6 +160,7 @@ out:
+ static void __exit
+ cleanup_sunrpc(void)
+ {
++ rpcauth_remove_module();
+ cleanup_socket_xprt();
+ unregister_rpc_pipefs();
+ rpc_destroy_mempool();
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index 5baf48d..64b9b8c 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -644,6 +644,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT,
+ };
++ struct sockaddr *sin;
+ int len;
+
+ len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
+@@ -654,6 +655,19 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
+ memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
+ rqstp->rq_addrlen = svsk->sk_remotelen;
+
++ /* Destination address in request is needed for binding the
++ * source address in RPC callbacks later.
++ */
++ sin = (struct sockaddr *)&svsk->sk_local;
++ switch (sin->sa_family) {
++ case AF_INET:
++ rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
++ break;
++ case AF_INET6:
++ rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
++ break;
++ }
++
+ dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
+ svsk, iov[0].iov_base, iov[0].iov_len, len);
+
+@@ -1064,6 +1078,12 @@ svc_tcp_accept(struct svc_sock *svsk)
+ goto failed;
+ memcpy(&newsvsk->sk_remote, sin, slen);
+ newsvsk->sk_remotelen = slen;
++ err = kernel_getsockname(newsock, sin, &slen);
++ if (unlikely(err < 0)) {
++ dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
++ slen = offsetof(struct sockaddr, sa_data);
++ }
++ memcpy(&newsvsk->sk_local, sin, slen);
+
+ svc_sock_received(newsvsk);
+
+diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
+index 5b05b73..c8c2edc 100644
+--- a/net/sunrpc/xprt.c
++++ b/net/sunrpc/xprt.c
+@@ -127,7 +127,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_clear_bit();
+ } else
+- schedule_work(&xprt->task_cleanup);
++ queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ }
+
+ /*
+@@ -515,7 +515,7 @@ xprt_init_autodisconnect(unsigned long data)
+ if (xprt_connecting(xprt))
+ xprt_release_write(xprt, NULL);
+ else
+- schedule_work(&xprt->task_cleanup);
++ queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ return;
+ out_abort:
+ spin_unlock(&xprt->transport_lock);
+@@ -886,27 +886,24 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
+
+ /**
+ * xprt_create_transport - create an RPC transport
+- * @proto: requested transport protocol
+- * @ap: remote peer address
+- * @size: length of address
+- * @to: timeout parameters
++ * @args: rpc transport creation arguments
+ *
+ */
+-struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
++struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args)
+ {
+ struct rpc_xprt *xprt;
+ struct rpc_rqst *req;
+
+- switch (proto) {
++ switch (args->proto) {
+ case IPPROTO_UDP:
+- xprt = xs_setup_udp(ap, size, to);
++ xprt = xs_setup_udp(args);
+ break;
+ case IPPROTO_TCP:
+- xprt = xs_setup_tcp(ap, size, to);
++ xprt = xs_setup_tcp(args);
+ break;
+ default:
+ printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
+- proto);
++ args->proto);
+ return ERR_PTR(-EIO);
+ }
+ if (IS_ERR(xprt)) {
+diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
+index cc33c58..4ae7eed 100644
+--- a/net/sunrpc/xprtsock.c
++++ b/net/sunrpc/xprtsock.c
+@@ -235,6 +235,7 @@ struct sock_xprt {
+ * Connection of transports
+ */
+ struct delayed_work connect_worker;
++ struct sockaddr_storage addr;
+ unsigned short port;
+
+ /*
+@@ -653,8 +654,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
+
+ dprintk("RPC: xs_destroy xprt %p\n", xprt);
+
+- cancel_delayed_work(&transport->connect_worker);
+- flush_scheduled_work();
++ cancel_rearming_delayed_work(&transport->connect_worker);
+
+ xprt_disconnect(xprt);
+ xs_close(xprt);
+@@ -1001,7 +1001,7 @@ static void xs_tcp_state_change(struct sock *sk)
+ /* Try to schedule an autoclose RPC calls */
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+- schedule_work(&xprt->task_cleanup);
++ queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ default:
+ xprt_disconnect(xprt);
+ }
+@@ -1146,31 +1146,36 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
+ sap->sin_port = htons(port);
+ }
+
+-static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock)
++static int xs_bind(struct sock_xprt *transport, struct socket *sock)
+ {
+ struct sockaddr_in myaddr = {
+ .sin_family = AF_INET,
+ };
++ struct sockaddr_in *sa;
+ int err;
+ unsigned short port = transport->port;
+
++ if (!transport->xprt.resvport)
++ port = 0;
++ sa = (struct sockaddr_in *)&transport->addr;
++ myaddr.sin_addr = sa->sin_addr;
+ do {
+ myaddr.sin_port = htons(port);
+ err = kernel_bind(sock, (struct sockaddr *) &myaddr,
+ sizeof(myaddr));
++ if (!transport->xprt.resvport)
++ break;
+ if (err == 0) {
+ transport->port = port;
+- dprintk("RPC: xs_bindresvport bound to port %u\n",
+- port);
+- return 0;
++ break;
+ }
+ if (port <= xprt_min_resvport)
+ port = xprt_max_resvport;
+ else
+ port--;
+ } while (err == -EADDRINUSE && port != transport->port);
+-
+- dprintk("RPC: can't bind to reserved port (%d).\n", -err);
++ dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n",
++ NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err);
+ return err;
+ }
+
+@@ -1229,7 +1234,7 @@ static void xs_udp_connect_worker(struct work_struct *work)
+ }
+ xs_reclassify_socket(sock);
+
+- if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
++ if (xs_bind(transport, sock)) {
+ sock_release(sock);
+ goto out;
+ }
+@@ -1316,7 +1321,7 @@ static void xs_tcp_connect_worker(struct work_struct *work)
+ }
+ xs_reclassify_socket(sock);
+
+- if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
++ if (xs_bind(transport, sock)) {
+ sock_release(sock);
+ goto out;
+ }
+@@ -1410,18 +1415,16 @@ static void xs_connect(struct rpc_task *task)
+ dprintk("RPC: xs_connect delayed xprt %p for %lu "
+ "seconds\n",
+ xprt, xprt->reestablish_timeout / HZ);
+- schedule_delayed_work(&transport->connect_worker,
+- xprt->reestablish_timeout);
++ queue_delayed_work(rpciod_workqueue,
++ &transport->connect_worker,
++ xprt->reestablish_timeout);
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
+ } else {
+ dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
+- schedule_delayed_work(&transport->connect_worker, 0);
+-
+- /* flush_scheduled_work can sleep... */
+- if (!RPC_IS_ASYNC(task))
+- flush_scheduled_work();
++ queue_delayed_work(rpciod_workqueue,
++ &transport->connect_worker, 0);
+ }
+ }
+
+@@ -1476,7 +1479,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
+ .set_buffer_size = xs_udp_set_buffer_size,
+ .reserve_xprt = xprt_reserve_xprt_cong,
+ .release_xprt = xprt_release_xprt_cong,
+- .rpcbind = rpcb_getport,
++ .rpcbind = rpcb_getport_async,
+ .set_port = xs_set_port,
+ .connect = xs_connect,
+ .buf_alloc = rpc_malloc,
+@@ -1493,7 +1496,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
+ static struct rpc_xprt_ops xs_tcp_ops = {
+ .reserve_xprt = xprt_reserve_xprt,
+ .release_xprt = xs_tcp_release_xprt,
+- .rpcbind = rpcb_getport,
++ .rpcbind = rpcb_getport_async,
+ .set_port = xs_set_port,
+ .connect = xs_connect,
+ .buf_alloc = rpc_malloc,
+@@ -1505,12 +1508,12 @@ static struct rpc_xprt_ops xs_tcp_ops = {
+ .print_stats = xs_tcp_print_stats,
+ };
+
+-static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size)
++static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size)
+ {
+ struct rpc_xprt *xprt;
+ struct sock_xprt *new;
+
+- if (addrlen > sizeof(xprt->addr)) {
++ if (args->addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: xs_setup_xprt: address too large\n");
+ return ERR_PTR(-EBADF);
+ }
+@@ -1532,8 +1535,10 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns
+ return ERR_PTR(-ENOMEM);
+ }
+
+- memcpy(&xprt->addr, addr, addrlen);
+- xprt->addrlen = addrlen;
++ memcpy(&xprt->addr, args->dstaddr, args->addrlen);
++ xprt->addrlen = args->addrlen;
++ if (args->srcaddr)
++ memcpy(&new->addr, args->srcaddr, args->addrlen);
+ new->port = xs_get_random_port();
+
+ return xprt;
+@@ -1541,22 +1546,20 @@ static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, uns
+
+ /**
+ * xs_setup_udp - Set up transport to use a UDP socket
+- * @addr: address of remote server
+- * @addrlen: length of address in bytes
+- * @to: timeout parameters
++ * @args: rpc transport creation arguments
+ *
+ */
+-struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
++struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
+ {
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+
+- xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries);
++ xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
+ if (IS_ERR(xprt))
+ return xprt;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+- if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
++ if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
+ xprt_set_bound(xprt);
+
+ xprt->prot = IPPROTO_UDP;
+@@ -1572,8 +1575,8 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_
+
+ xprt->ops = &xs_udp_ops;
+
+- if (to)
+- xprt->timeout = *to;
++ if (args->timeout)
++ xprt->timeout = *args->timeout;
+ else
+ xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
+
+@@ -1586,22 +1589,20 @@ struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_
+
+ /**
+ * xs_setup_tcp - Set up transport to use a TCP socket
+- * @addr: address of remote server
+- * @addrlen: length of address in bytes
+- * @to: timeout parameters
++ * @args: rpc transport creation arguments
+ *
+ */
+-struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
++struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
+ {
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+
+- xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries);
++ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ if (IS_ERR(xprt))
+ return xprt;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+- if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
++ if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
+ xprt_set_bound(xprt);
+
+ xprt->prot = IPPROTO_TCP;
+@@ -1616,8 +1617,8 @@ struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_
+
+ xprt->ops = &xs_tcp_ops;
+
+- if (to)
+- xprt->timeout = *to;
++ if (args->timeout)
++ xprt->timeout = *args->timeout;
+ else
+ xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
+
--- /dev/null
+Changes since 2.6.22-rc1-CITI_NFS4_ALL-1
+ - update to 2.6.22-rc5
+ - allow id-squashing options to vary per pseudoflavor
+ - enforce requirement that flags other than ro/rw and
+ id-squashing flags not vary per pseudoflavor
+ - meelap: vary maximum delegation limit by RAM size
+ - meelap: don't grant delegations on files that saw conflicts
+ - silence a compiler warning in ACL code
+ - bhalevy: fix enc_stateid_sz for nfsd callbacks
+ - fix mishandling of acl errors that could cause an oops
+ - make all export finding function return -errno's on err
+ - show flavor info in /proc/net/rpc/nfsd.export/content
+ - miscellaneous cleanup
+
+---
+
+ b/fs/Kconfig | 1
+ b/fs/lockd/svc.c | 31 ++-
+ b/fs/locks.c | 23 +-
+ b/fs/nfs/file.c | 4
+ b/fs/nfsd/auth.c | 18 +-
+ b/fs/nfsd/export.c | 284 ++++++++++++++++++++++++++------
+ b/fs/nfsd/lockd.c | 1
+ b/fs/nfsd/nfs4acl.c | 12 +
+ b/fs/nfsd/nfs4callback.c | 2
+ b/fs/nfsd/nfs4idmap.c | 13 +
+ b/fs/nfsd/nfs4proc.c | 34 +++
+ b/fs/nfsd/nfs4state.c | 28 ++-
+ b/fs/nfsd/nfs4xdr.c | 99 +++++++++++
+ b/fs/nfsd/nfsctl.c | 1
+ b/fs/nfsd/nfsfh.c | 32 ++-
+ b/fs/nfsd/nfsproc.c | 3
+ b/fs/nfsd/nfssvc.c | 10 +
+ b/fs/nfsd/vfs.c | 110 ++++++------
+ b/fs/open.c | 16 +
+ b/include/linux/fs.h | 2
+ b/include/linux/lockd/bind.h | 9 +
+ b/include/linux/nfsd/export.h | 41 ++++
+ b/include/linux/nfsd/nfsd.h | 9 -
+ b/include/linux/nfsd/state.h | 3
+ b/include/linux/nfsd/xdr4.h | 7
+ b/include/linux/sunrpc/gss_api.h | 1
+ b/include/linux/sunrpc/svc.h | 2
+ b/include/linux/sunrpc/svcauth.h | 1
+ b/include/linux/sunrpc/svcauth_gss.h | 2
+ b/net/sunrpc/auth_gss/gss_krb5_mech.c | 1
+ b/net/sunrpc/auth_gss/gss_mech_switch.c | 15 +
+ b/net/sunrpc/auth_gss/gss_spkm3_mech.c | 1
+ b/net/sunrpc/auth_gss/svcauth_gss.c | 32 +++
+ b/net/sunrpc/svcauth_unix.c | 7
+ include/linux/nfsd/interface.h | 13 -
+ 36 files changed, 693 insertions(+), 177 deletions(-)
+
+diff --git a/fs/Kconfig b/fs/Kconfig
+index 0fa0c11..76cf825 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -1675,6 +1675,7 @@ config NFSD_V3_ACL
+ config NFSD_V4
+ bool "Provide NFSv4 server support (EXPERIMENTAL)"
+ depends on NFSD_V3 && EXPERIMENTAL
++ select RPCSEC_GSS_KRB5
+ help
+ If you would like to include the NFSv4 server as well as the NFSv2
+ and NFSv3 servers, say Y here. This feature is experimental, and
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 126b1bf..6378572 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -75,18 +75,35 @@ static const int nlm_port_min = 0, nlm_port_max = 65535;
+
+ static struct ctl_table_header * nlm_sysctl_table;
+
+-static unsigned long set_grace_period(void)
++static time_t get_lockd_grace_period(void)
+ {
+- unsigned long grace_period;
+-
+ /* Note: nlm_timeout should always be nonzero */
+ if (nlm_grace_period)
+- grace_period = ((nlm_grace_period + nlm_timeout - 1)
+- / nlm_timeout) * nlm_timeout * HZ;
++ return ((nlm_grace_period + nlm_timeout - 1)
++ / nlm_timeout) * nlm_timeout;
+ else
+- grace_period = nlm_timeout * 5 * HZ;
++ return nlm_timeout * 5;
++}
++
++time_t get_nfs_grace_period(void)
++{
++ time_t lockdgrace = get_lockd_grace_period();
++ time_t nfsdgrace = 0;
++
++ if (nlmsvc_ops)
++ nfsdgrace = nlmsvc_ops->get_grace_period();
++
++ return max(lockdgrace, nfsdgrace);
++}
++EXPORT_SYMBOL(get_nfs_grace_period);
++
++static unsigned long set_grace_period(void)
++{
++ time_t grace_period;
++
++ grace_period = get_nfs_grace_period();
+ nlmsvc_grace_period = 1;
+- return grace_period + jiffies;
++ return grace_period * HZ + jiffies;
+ }
+
+ static inline void clear_grace_period(void)
+diff --git a/fs/locks.c b/fs/locks.c
+index 431a8b8..bcc37b9 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -661,7 +661,7 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
+ return result;
+ }
+
+-int
++void
+ posix_test_lock(struct file *filp, struct file_lock *fl)
+ {
+ struct file_lock *cfl;
+@@ -670,17 +670,15 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
+ for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
+ if (!IS_POSIX(cfl))
+ continue;
+- if (posix_locks_conflict(cfl, fl))
++ if (posix_locks_conflict(fl, cfl))
+ break;
+ }
+- if (cfl) {
++ if (cfl)
+ __locks_copy_lock(fl, cfl);
+- unlock_kernel();
+- return 1;
+- } else
++ else
+ fl->fl_type = F_UNLCK;
+ unlock_kernel();
+- return 0;
++ return;
+ }
+
+ EXPORT_SYMBOL(posix_test_lock);
+@@ -823,7 +821,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
+ lock_kernel();
+ if (request->fl_type != F_UNLCK) {
+ for_each_lock(inode, before) {
+- struct file_lock *fl = *before;
++ fl = *before;
+ if (!IS_POSIX(fl))
+ continue;
+ if (!posix_locks_conflict(request, fl))
+@@ -1169,9 +1167,9 @@ static void time_out_leases(struct inode *inode)
+ * @inode: the inode of the file to return
+ * @mode: the open mode (read or write)
+ *
+- * break_lease (inlined for speed) has checked there already
+- * is a lease on this file. Leases are broken on a call to open()
+- * or truncate(). This function can sleep unless you
++ * break_lease (inlined for speed) has checked there already is at least
++ * some kind of lock (maybe a lease) on this file. Leases are broken on
++ * a call to open() or truncate(). This function can sleep unless you
+ * specified %O_NONBLOCK to your open().
+ */
+ int __break_lease(struct inode *inode, unsigned int mode)
+@@ -1597,8 +1595,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
+ /**
+ * vfs_test_lock - test file byte range lock
+ * @filp: The file to test lock for
+- * @fl: The lock to test
+- * @conf: Place to return a copy of the conflicting lock, if found
++ * @fl: The lock to test; also used to hold result
+ *
+ * Returns -ERRNO on failure. Indicates presence of conflicting lock by
+ * setting conf->fl_type to something other than F_UNLCK.
+diff --git a/fs/nfs/file.c b/fs/nfs/file.c
+index 9eb8eb4..5b24e88 100644
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -397,7 +397,9 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
+
+ lock_kernel();
+ /* Try local locking first */
+- if (posix_test_lock(filp, fl)) {
++ posix_test_lock(filp, fl);
++ if (fl->fl_type != F_UNLCK) {
++ /* found a conflict */
+ goto out;
+ }
+
+diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
+index 6e92b0f..cf61dc8 100644
+--- a/fs/nfsd/auth.c
++++ b/fs/nfsd/auth.c
+@@ -12,17 +12,31 @@
+
+ #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
+
++static int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
++{
++ struct exp_flavor_info *f;
++ struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
++
++ for (f = exp->ex_flavors; f < end; f++) {
++ if (f->pseudoflavor == rqstp->rq_flavor)
++ return f->flags;
++ }
++ return exp->ex_flags;
++
++}
++
+ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
+ {
+ struct svc_cred cred = rqstp->rq_cred;
+ int i;
++ int flags = nfsexp_flags(rqstp, exp);
+ int ret;
+
+- if (exp->ex_flags & NFSEXP_ALLSQUASH) {
++ if (flags & NFSEXP_ALLSQUASH) {
+ cred.cr_uid = exp->ex_anon_uid;
+ cred.cr_gid = exp->ex_anon_gid;
+ cred.cr_group_info = groups_alloc(0);
+- } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
++ } else if (flags & NFSEXP_ROOTSQUASH) {
+ struct group_info *gi;
+ if (!cred.cr_uid)
+ cred.cr_uid = exp->ex_anon_uid;
+diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
+index 79bd03b..d9ed7f1 100644
+--- a/fs/nfsd/export.c
++++ b/fs/nfsd/export.c
+@@ -32,6 +32,8 @@
+ #include <linux/nfsd/nfsfh.h>
+ #include <linux/nfsd/syscall.h>
+ #include <linux/lockd/bind.h>
++#include <linux/sunrpc/msg_prot.h>
++#include <linux/sunrpc/gss_api.h>
+
+ #define NFSDDBG_FACILITY NFSDDBG_EXPORT
+
+@@ -451,8 +453,46 @@ out_free_all:
+ return err;
+ }
+
++static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
++{
++ int listsize, err;
++ struct exp_flavor_info *f;
++
++ err = get_int(mesg, &listsize);
++ if (err)
++ return err;
++ if (listsize < 0 || listsize > MAX_SECINFO_LIST)
++ return -EINVAL;
++
++ for (f=exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
++ err = get_int(mesg, &f->pseudoflavor);
++ if (err)
++ return err;
++ /*
++ * Just a quick sanity check; we could also try to check
++ * whether this pseudoflavor is supported, but at worst
++ * an unsupported pseudoflavor on the export would just
++ * be a pseudoflavor that won't match the flavor of any
++ * authenticated request. The administrator will
++ * probably discover the problem when someone fails to
++ * authenticate.
++ */
++ if (f->pseudoflavor < 0)
++ return -EINVAL;
++ err = get_int(mesg, &f->flags);
++ if (err)
++ return err;
++ /* Only some flags are allowed to differ between flavors: */
++ if (~NFSEXP_SECINFO_FLAGS & (f->flags ^ exp->ex_flags))
++ return -EINVAL;
++ }
++ exp->ex_nflavors = listsize;
++ return 0;
++}
++
+ #else /* CONFIG_NFSD_V4 */
+ static inline int fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) { return 0; }
++static inline int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
+ #endif
+
+ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
+@@ -476,6 +516,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
+
+ exp.ex_uuid = NULL;
+
++ /* secinfo */
++ exp.ex_nflavors = 0;
++
+ if (mesg[mlen-1] != '\n')
+ return -EINVAL;
+ mesg[mlen-1] = 0;
+@@ -553,7 +596,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
+ if (exp.ex_uuid == NULL)
+ err = -ENOMEM;
+ }
+- } else
++ } else if (strcmp(buf, "secinfo") == 0)
++ err = secinfo_parse(&mesg, buf, &exp);
++ else
+ /* quietly ignore unknown words and anything
+ * following. Newer user-space can try to set
+ * new values, then see what the result was.
+@@ -593,6 +638,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
+
+ static void exp_flags(struct seq_file *m, int flag, int fsid,
+ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
++static void show_secinfo(struct seq_file *m, struct svc_export *exp);
+
+ static int svc_export_show(struct seq_file *m,
+ struct cache_detail *cd,
+@@ -622,6 +668,7 @@ static int svc_export_show(struct seq_file *m,
+ seq_printf(m, "%02x", exp->ex_uuid[i]);
+ }
+ }
++ show_secinfo(m, exp);
+ }
+ seq_puts(m, ")\n");
+ return 0;
+@@ -654,6 +701,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+ {
+ struct svc_export *new = container_of(cnew, struct svc_export, h);
+ struct svc_export *item = container_of(citem, struct svc_export, h);
++ int i;
+
+ new->ex_flags = item->ex_flags;
+ new->ex_anon_uid = item->ex_anon_uid;
+@@ -669,6 +717,10 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+ item->ex_fslocs.locations_count = 0;
+ new->ex_fslocs.migrated = item->ex_fslocs.migrated;
+ item->ex_fslocs.migrated = 0;
++ new->ex_nflavors = item->ex_nflavors;
++ for (i = 0; i < MAX_SECINFO_LIST; i++){
++ new->ex_flavors[i] = item->ex_flavors[i];
++ }
+ }
+
+ static struct cache_head *svc_export_alloc(void)
+@@ -738,16 +790,18 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
+ int err;
+
+ if (!clp)
+- return NULL;
++ return ERR_PTR(-ENOENT);
+
+ key.ek_client = clp;
+ key.ek_fsidtype = fsid_type;
+ memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
+
+ ek = svc_expkey_lookup(&key);
+- if (ek != NULL)
+- if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp)))
+- ek = ERR_PTR(err);
++ if (ek == NULL)
++ return ERR_PTR(-ENOMEM);
++ err = cache_check(&svc_expkey_cache, &ek->h, reqp);
++ if (err)
++ return ERR_PTR(err);
+ return ek;
+ }
+
+@@ -808,30 +862,21 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
+ struct cache_req *reqp)
+ {
+ struct svc_export *exp, key;
++ int err;
+
+ if (!clp)
+- return NULL;
++ return ERR_PTR(-ENOENT);
+
+ key.ex_client = clp;
+ key.ex_mnt = mnt;
+ key.ex_dentry = dentry;
+
+ exp = svc_export_lookup(&key);
+- if (exp != NULL) {
+- int err;
+-
+- err = cache_check(&svc_export_cache, &exp->h, reqp);
+- switch (err) {
+- case 0: break;
+- case -EAGAIN:
+- case -ETIMEDOUT:
+- exp = ERR_PTR(err);
+- break;
+- default:
+- exp = NULL;
+- }
+- }
+-
++ if (exp == NULL)
++ return ERR_PTR(-ENOMEM);
++ err = cache_check(&svc_export_cache, &exp->h, reqp);
++ if (err)
++ return ERR_PTR(err);
+ return exp;
+ }
+
+@@ -847,7 +892,7 @@ exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
+ dget(dentry);
+ exp = exp_get_by_name(clp, mnt, dentry, reqp);
+
+- while (exp == NULL && !IS_ROOT(dentry)) {
++ while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
+ struct dentry *parent;
+
+ parent = dget_parent(dentry);
+@@ -900,7 +945,7 @@ static void exp_fsid_unhash(struct svc_export *exp)
+ return;
+
+ ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
+- if (ek && !IS_ERR(ek)) {
++ if (!IS_ERR(ek)) {
+ ek->h.expiry_time = get_seconds()-1;
+ cache_put(&ek->h, &svc_expkey_cache);
+ }
+@@ -938,7 +983,7 @@ static void exp_unhash(struct svc_export *exp)
+ struct inode *inode = exp->ex_dentry->d_inode;
+
+ ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
+- if (ek && !IS_ERR(ek)) {
++ if (!IS_ERR(ek)) {
+ ek->h.expiry_time = get_seconds()-1;
+ cache_put(&ek->h, &svc_expkey_cache);
+ }
+@@ -989,13 +1034,12 @@ exp_export(struct nfsctl_export *nxp)
+
+ /* must make sure there won't be an ex_fsid clash */
+ if ((nxp->ex_flags & NFSEXP_FSID) &&
+- (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) &&
+- !IS_ERR(fsid_key) &&
++ (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) &&
+ fsid_key->ek_mnt &&
+ (fsid_key->ek_mnt != nd.mnt || fsid_key->ek_dentry != nd.dentry) )
+ goto finish;
+
+- if (exp) {
++ if (!IS_ERR(exp)) {
+ /* just a flags/id/fsid update */
+
+ exp_fsid_unhash(exp);
+@@ -1104,7 +1148,7 @@ exp_unexport(struct nfsctl_export *nxp)
+ err = -EINVAL;
+ exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
+ path_release(&nd);
+- if (!exp)
++ if (IS_ERR(exp))
+ goto out_domain;
+
+ exp_do_unexport(exp);
+@@ -1149,10 +1193,6 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
+ err = PTR_ERR(exp);
+ goto out;
+ }
+- if (!exp) {
+- dprintk("nfsd: exp_rootfh export not found.\n");
+- goto out;
+- }
+
+ /*
+ * fh must be initialized before calling fh_compose
+@@ -1176,17 +1216,130 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
+ {
+ struct svc_export *exp;
+ struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
+- if (!ek || IS_ERR(ek))
++ if (IS_ERR(ek))
+ return ERR_PTR(PTR_ERR(ek));
+
+ exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp);
+ cache_put(&ek->h, &svc_expkey_cache);
+
+- if (!exp || IS_ERR(exp))
++ if (IS_ERR(exp))
+ return ERR_PTR(PTR_ERR(exp));
+ return exp;
+ }
+
++__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
++{
++ struct exp_flavor_info *f;
++ struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
++
++ /* legacy gss-only clients are always OK: */
++ if (exp->ex_client == rqstp->rq_gssclient)
++ return 0;
++ /* ip-address based client; check sec= export option: */
++ for (f = exp->ex_flavors; f < end; f++) {
++ if (f->pseudoflavor == rqstp->rq_flavor)
++ return 0;
++ }
++ /* defaults in absence of sec= options: */
++ if (exp->ex_nflavors == 0) {
++ if (rqstp->rq_flavor == RPC_AUTH_NULL ||
++ rqstp->rq_flavor == RPC_AUTH_UNIX)
++ return 0;
++ }
++ return nfserr_wrongsec;
++}
++
++/*
++ * Uses rq_client and rq_gssclient to find an export; uses rq_client (an
++ * auth_unix client) if it's available and has secinfo information;
++ * otherwise, will try to use rq_gssclient.
++ *
++ * Called from functions that handle requests; functions that do work on
++ * behalf of mountd are passed a single client name to use, and should
++ * use exp_get_by_name() or exp_find().
++ */
++struct svc_export *
++rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
++ struct dentry *dentry)
++{
++ struct svc_export *gssexp, *exp = NULL;
++
++ if (rqstp->rq_client == NULL)
++ goto gss;
++
++ /* First try the auth_unix client: */
++ exp = exp_get_by_name(rqstp->rq_client, mnt, dentry,
++ &rqstp->rq_chandle);
++ if (PTR_ERR(exp) == -ENOENT)
++ goto gss;
++ if (IS_ERR(exp))
++ return exp;
++ /* If it has secinfo, assume there are no gss/... clients */
++ if (exp->ex_nflavors > 0)
++ return exp;
++gss:
++ /* Otherwise, try falling back on gss client */
++ if (rqstp->rq_gssclient == NULL)
++ return exp;
++ gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry,
++ &rqstp->rq_chandle);
++ if (PTR_ERR(gssexp) == -ENOENT)
++ return exp;
++ if (exp)
++ exp_put(exp);
++ return gssexp;
++}
++
++struct svc_export *
++rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
++{
++ struct svc_export *gssexp, *exp = NULL;
++
++ if (rqstp->rq_client == NULL)
++ goto gss;
++
++ /* First try the auth_unix client: */
++ exp = exp_find(rqstp->rq_client, fsid_type, fsidv, &rqstp->rq_chandle);
++ if (PTR_ERR(exp) == -ENOENT)
++ goto gss;
++ if (IS_ERR(exp))
++ return exp;
++ /* If it has secinfo, assume there are no gss/... clients */
++ if (exp->ex_nflavors > 0)
++ return exp;
++gss:
++ /* Otherwise, try falling back on gss client */
++ if (rqstp->rq_gssclient == NULL)
++ return exp;
++ gssexp = exp_find(rqstp->rq_gssclient, fsid_type, fsidv,
++ &rqstp->rq_chandle);
++ if (PTR_ERR(gssexp) == -ENOENT)
++ return exp;
++ if (exp)
++ exp_put(exp);
++ return gssexp;
++}
++
++struct svc_export *
++rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
++ struct dentry *dentry)
++{
++ struct svc_export *exp;
++
++ dget(dentry);
++ exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
++
++ while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
++ struct dentry *parent;
++
++ parent = dget_parent(dentry);
++ dput(dentry);
++ dentry = parent;
++ exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
++ }
++ dput(dentry);
++ return exp;
++}
+
+ /*
+ * Called when we need the filehandle for the root of the pseudofs,
+@@ -1194,8 +1347,7 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
+ * export point with fsid==0
+ */
+ __be32
+-exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
+- struct cache_req *creq)
++exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ struct svc_export *exp;
+ __be32 rv;
+@@ -1203,12 +1355,16 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
+
+ mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
+
+- exp = exp_find(clp, FSID_NUM, fsidv, creq);
++ exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
++ if (PTR_ERR(exp) == -ENOENT)
++ return nfserr_perm;
+ if (IS_ERR(exp))
+ return nfserrno(PTR_ERR(exp));
+- if (exp == NULL)
+- return nfserr_perm;
+ rv = fh_compose(fhp, exp, exp->ex_dentry, NULL);
++ if (rv)
++ goto out;
++ rv = check_nfsd_access(exp, rqstp);
++out:
+ exp_put(exp);
+ return rv;
+ }
+@@ -1296,28 +1452,62 @@ static struct flags {
+ { 0, {"", ""}}
+ };
+
+-static void exp_flags(struct seq_file *m, int flag, int fsid,
+- uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
++static void show_expflags(struct seq_file *m, int flags, int mask)
+ {
+- int first = 0;
+ struct flags *flg;
++ int state, first = 0;
+
+ for (flg = expflags; flg->flag; flg++) {
+- int state = (flg->flag & flag)?0:1;
++ if (flg->flag & ~mask)
++ continue;
++ state = (flg->flag & flags) ? 0 : 1;
+ if (*flg->name[state])
+ seq_printf(m, "%s%s", first++?",":"", flg->name[state]);
+ }
++}
++
++static void show_secinfo_flags(struct seq_file *m, int flags)
++{
++ seq_printf(m, ",");
++ show_expflags(m, flags, NFSEXP_SECINFO_FLAGS);
++}
++
++static void show_secinfo(struct seq_file *m, struct svc_export *exp)
++{
++ struct exp_flavor_info *f;
++ struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
++ int lastflags = 0, first = 0;
++
++ if (exp->ex_nflavors == 0)
++ return;
++ for (f = exp->ex_flavors; f < end; f++) {
++ if (first || f->flags != lastflags) {
++ if (!first)
++ show_secinfo_flags(m, lastflags);
++ seq_printf(m, ",sec=%d", f->pseudoflavor);
++ lastflags = f->flags;
++ } else {
++ seq_printf(m, ":%d", f->pseudoflavor);
++ }
++ }
++ show_secinfo_flags(m, lastflags);
++}
++
++static void exp_flags(struct seq_file *m, int flag, int fsid,
++ uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
++{
++ show_expflags(m, flag, NFSEXP_ALLFLAGS);
+ if (flag & NFSEXP_FSID)
+- seq_printf(m, "%sfsid=%d", first++?",":"", fsid);
++ seq_printf(m, ",fsid=%d", fsid);
+ if (anonu != (uid_t)-2 && anonu != (0x10000-2))
+- seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
++ seq_printf(m, ",sanonuid=%d", anonu);
+ if (anong != (gid_t)-2 && anong != (0x10000-2))
+- seq_printf(m, "%sanongid=%d", first++?",":"", anong);
++ seq_printf(m, ",sanongid=%d", anong);
+ if (fsloc && fsloc->locations_count > 0) {
+ char *loctype = (fsloc->migrated) ? "refer" : "replicas";
+ int i;
+
+- seq_printf(m, "%s%s=", first++?",":"", loctype);
++ seq_printf(m, ",%s=", loctype);
+ seq_escape(m, fsloc->locations[0].path, ",;@ \t\n\\");
+ seq_putc(m, '@');
+ seq_escape(m, fsloc->locations[0].hosts, ",;@ \t\n\\");
+diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
+index 221acd1..9e4a568 100644
+--- a/fs/nfsd/lockd.c
++++ b/fs/nfsd/lockd.c
+@@ -65,6 +65,7 @@ nlm_fclose(struct file *filp)
+ static struct nlmsvc_binding nfsd_nlm_ops = {
+ .fopen = nlm_fopen, /* open file for locking */
+ .fclose = nlm_fclose, /* close file */
++ .get_grace_period = get_nfs4_grace_period,
+ };
+
+ void
+diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
+index cc3b7ba..b6ed383 100644
+--- a/fs/nfsd/nfs4acl.c
++++ b/fs/nfsd/nfs4acl.c
+@@ -183,8 +183,13 @@ static void
+ summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas)
+ {
+ struct posix_acl_entry *pa, *pe;
+- pas->users = 0;
+- pas->groups = 0;
++
++ /*
++ * Only pas.users and pas.groups need initialization; previous
++ * posix_acl_valid() calls ensure that the other fields will be
++ * initialized in the following loop. But, just to placate gcc:
++ */
++ memset(pas, 0, sizeof(*pas));
+ pas->mask = 07;
+
+ pe = acl->a_entries + acl->a_count;
+@@ -732,13 +737,16 @@ int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
+ *pacl = posix_state_to_acl(&effective_acl_state, flags);
+ if (IS_ERR(*pacl)) {
+ ret = PTR_ERR(*pacl);
++ *pacl = NULL;
+ goto out_dstate;
+ }
+ *dpacl = posix_state_to_acl(&default_acl_state,
+ flags | NFS4_ACL_TYPE_DEFAULT);
+ if (IS_ERR(*dpacl)) {
+ ret = PTR_ERR(*dpacl);
++ *dpacl = NULL;
+ posix_acl_release(*pacl);
++ *pacl = NULL;
+ goto out_dstate;
+ }
+ sort_pacl(*pacl);
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 864090e..c9e0c30 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -75,7 +75,7 @@ enum nfs_cb_opnum4 {
+ #define op_enc_sz 1
+ #define op_dec_sz 2
+ #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
+-#define enc_stateid_sz 16
++#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2)
+ #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
+ 1 + enc_stateid_sz + \
+ enc_nfs4_fh_sz)
+diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
+index 45aa21c..2cf9a9a 100644
+--- a/fs/nfsd/nfs4idmap.c
++++ b/fs/nfsd/nfs4idmap.c
+@@ -587,6 +587,15 @@ idmap_lookup(struct svc_rqst *rqstp,
+ return ret;
+ }
+
++static char *
++rqst_authname(struct svc_rqst *rqstp)
++{
++ struct auth_domain *clp;
++
++ clp = rqstp->rq_gssclient ? rqstp->rq_gssclient : rqstp->rq_client;
++ return clp->name;
++}
++
+ static int
+ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
+ uid_t *id)
+@@ -600,7 +609,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
+ return -EINVAL;
+ memcpy(key.name, name, namelen);
+ key.name[namelen] = '\0';
+- strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
++ strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item);
+ if (ret == -ENOENT)
+ ret = -ESRCH; /* nfserr_badname */
+@@ -620,7 +629,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
+ };
+ int ret;
+
+- strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
++ strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item);
+ if (ret == -ENOENT)
+ return sprintf(name, "%u", id);
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 8522729..9403095 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -47,6 +47,7 @@
+ #include <linux/nfsd/state.h>
+ #include <linux/nfsd/xdr4.h>
+ #include <linux/nfs4_acl.h>
++#include <linux/sunrpc/gss_api.h>
+
+ #define NFSDDBG_FACILITY NFSDDBG_PROC
+
+@@ -286,8 +287,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ __be32 status;
+
+ fh_put(&cstate->current_fh);
+- status = exp_pseudoroot(rqstp->rq_client, &cstate->current_fh,
+- &rqstp->rq_chandle);
++ status = exp_pseudoroot(rqstp, &cstate->current_fh);
+ return status;
+ }
+
+@@ -474,8 +474,8 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ __be32 ret;
+
+ fh_init(&tmp_fh, NFS4_FHSIZE);
+- if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh,
+- &rqstp->rq_chandle)) != 0)
++ ret = exp_pseudoroot(rqstp, &tmp_fh);
++ if (ret)
+ return ret;
+ if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) {
+ fh_put(&tmp_fh);
+@@ -611,6 +611,29 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ }
+
+ static __be32
++nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_secinfo *secinfo)
++{
++ struct svc_fh resfh;
++ struct svc_export *exp;
++ struct dentry *dentry;
++ __be32 err;
++
++ fh_init(&resfh, NFS4_FHSIZE);
++ err = nfsd_lookup_dentry(rqstp, &cstate->current_fh,
++ secinfo->si_name, secinfo->si_namelen,
++ &exp, &dentry);
++ if (err)
++ return err;
++ if (dentry->d_inode == NULL) {
++ exp_put(exp);
++ err = nfserr_noent;
++ } else
++ secinfo->si_exp = exp;
++ dput(dentry);
++ return err;
++}
++
++static __be32
+ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_setattr *setattr)
+ {
+@@ -1009,6 +1032,9 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
+ [OP_SAVEFH] = {
+ .op_func = (nfsd4op_func)nfsd4_savefh,
+ },
++ [OP_SECINFO] = {
++ .op_func = (nfsd4op_func)nfsd4_secinfo,
++ },
+ [OP_SETATTR] = {
+ .op_func = (nfsd4op_func)nfsd4_setattr,
+ },
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 3cc8ce4..47d48e8 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -51,6 +51,7 @@
+ #include <linux/namei.h>
+ #include <linux/mutex.h>
+ #include <linux/lockd/bind.h>
++#include <linux/module.h>
+
+ #define NFSDDBG_FACILITY NFSDDBG_PROC
+
+@@ -149,6 +150,7 @@ get_nfs4_file(struct nfs4_file *fi)
+ }
+
+ static int num_delegations;
++unsigned int max_delegations = 0;
+
+ /*
+ * Open owner state (share locks)
+@@ -192,7 +194,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
+ struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
+
+ dprintk("NFSD alloc_init_deleg\n");
+- if (num_delegations > STATEID_HASH_SIZE * 4)
++ if (fp->fi_had_conflict)
++ return NULL;
++ if (num_delegations > max_delegations)
+ return NULL;
+ dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
+ if (dp == NULL)
+@@ -1000,6 +1004,7 @@ alloc_init_file(struct inode *ino)
+ list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ fp->fi_inode = igrab(ino);
+ fp->fi_id = current_fileid++;
++ fp->fi_had_conflict = false;
+ return fp;
+ }
+ return NULL;
+@@ -1326,6 +1331,7 @@ do_recall(void *__dp)
+ {
+ struct nfs4_delegation *dp = __dp;
+
++ dp->dl_file->fi_had_conflict = true;
+ nfsd4_cb_recall(dp);
+ return 0;
+ }
+@@ -3191,6 +3197,23 @@ nfsd4_load_reboot_recovery_data(void)
+ printk("NFSD: Failure reading reboot recovery data\n");
+ }
+
++time_t
++get_nfs4_grace_period(void)
++{
++ return max(user_lease_time, lease_time);
++}
++
++static void
++set_max_delegations()
++{
++ struct sysinfo sys;
++
++ si_meminfo(&sys);
++ sys.totalram *= sys.mem_unit;
++ sys.totalram >>= (18 - PAGE_SHIFT);
++ max_delegations = (unsigned int) sys.totalram;
++}
++
+ /* initialization to perform when the nfsd service is started: */
+
+ static void
+@@ -3199,12 +3222,13 @@ __nfs4_state_start(void)
+ time_t grace_time;
+
+ boot_time = get_seconds();
+- grace_time = max(user_lease_time, lease_time);
++ grace_time = get_nfs_grace_period();
+ lease_time = user_lease_time;
+ in_grace = 1;
+ printk("NFSD: starting %ld-second grace period\n", grace_time);
+ laundry_wq = create_singlethread_workqueue("nfsd4");
+ queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ);
++ set_max_delegations();
+ }
+
+ int
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 15809df..be8c614 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -56,6 +56,8 @@
+ #include <linux/nfsd_idmap.h>
+ #include <linux/nfs4.h>
+ #include <linux/nfs4_acl.h>
++#include <linux/sunrpc/gss_api.h>
++#include <linux/sunrpc/svcauth_gss.h>
+
+ #define NFSDDBG_FACILITY NFSDDBG_XDR
+
+@@ -819,6 +821,22 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+ }
+
+ static __be32
++nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, struct nfsd4_secinfo *secinfo)
++{
++ DECODE_HEAD;
++
++ READ_BUF(4);
++ READ32(secinfo->si_namelen);
++ READ_BUF(secinfo->si_namelen);
++ SAVEMEM(secinfo->si_name, secinfo->si_namelen);
++ status = check_filename(secinfo->si_name, secinfo->si_namelen,
++ nfserr_noent);
++ if (status)
++ return status;
++ DECODE_TAIL;
++}
++
++static __be32
+ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
+ {
+ DECODE_HEAD;
+@@ -1131,6 +1149,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ case OP_SAVEFH:
+ op->status = nfs_ok;
+ break;
++ case OP_SECINFO:
++ op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo);
++ break;
+ case OP_SETATTR:
+ op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
+ break;
+@@ -1296,7 +1317,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
+ char *path, *rootpath;
+
+ fh_init(&tmp_fh, NFS4_FHSIZE);
+- *stat = exp_pseudoroot(rqstp->rq_client, &tmp_fh, &rqstp->rq_chandle);
++ *stat = exp_pseudoroot(rqstp, &tmp_fh);
+ if (*stat)
+ return NULL;
+ rootpath = tmp_fh.fh_export->ex_path;
+@@ -1847,11 +1868,19 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
+ if (d_mountpoint(dentry)) {
+ int err;
+
++ /*
++ * Why the heck aren't we just using nfsd_lookup??
++ * Different "."/".." handling? Something else?
++ * At least, add a comment here to explain....
++ */
+ err = nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp);
+ if (err) {
+ nfserr = nfserrno(err);
+ goto out_put;
+ }
++ nfserr = check_nfsd_access(exp, cd->rd_rqstp);
++ if (nfserr)
++ goto out_put;
+
+ }
+ nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
+@@ -2419,6 +2448,71 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ }
+ }
+
++static void
++nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_secinfo *secinfo)
++{
++ int i = 0;
++ struct svc_export *exp = secinfo->si_exp;
++ u32 nflavs;
++ struct exp_flavor_info *flavs;
++ struct exp_flavor_info def_flavs[2];
++ ENCODE_HEAD;
++
++ if (nfserr)
++ goto out;
++ if (exp->ex_nflavors) {
++ flavs = exp->ex_flavors;
++ nflavs = exp->ex_nflavors;
++ } else { /* Handling of some defaults in absence of real secinfo: */
++ flavs = def_flavs;
++ if (exp->ex_client->flavour->flavour == RPC_AUTH_UNIX) {
++ nflavs = 2;
++ flavs[0].pseudoflavor = RPC_AUTH_UNIX;
++ flavs[1].pseudoflavor = RPC_AUTH_NULL;
++ } else if (exp->ex_client->flavour->flavour == RPC_AUTH_GSS) {
++ nflavs = 1;
++ flavs[0].pseudoflavor
++ = svcauth_gss_flavor(exp->ex_client);
++ } else {
++ nflavs = 1;
++ flavs[0].pseudoflavor
++ = exp->ex_client->flavour->flavour;
++ }
++ }
++
++ RESERVE_SPACE(4);
++ WRITE32(nflavs);
++ ADJUST_ARGS();
++ for (i = 0; i < nflavs; i++) {
++ u32 flav = flavs[i].pseudoflavor;
++ struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav);
++
++ if (gm) {
++ RESERVE_SPACE(4);
++ WRITE32(RPC_AUTH_GSS);
++ ADJUST_ARGS();
++ RESERVE_SPACE(4 + gm->gm_oid.len);
++ WRITE32(gm->gm_oid.len);
++ WRITEMEM(gm->gm_oid.data, gm->gm_oid.len);
++ ADJUST_ARGS();
++ RESERVE_SPACE(4);
++ WRITE32(0); /* qop */
++ ADJUST_ARGS();
++ RESERVE_SPACE(4);
++ WRITE32(gss_pseudoflavor_to_service(gm, flav));
++ ADJUST_ARGS();
++ gss_mech_put(gm);
++ } else {
++ RESERVE_SPACE(4);
++ WRITE32(flav);
++ ADJUST_ARGS();
++ }
++ }
++out:
++ if (exp)
++ exp_put(exp);
++}
++
+ /*
+ * The SETATTR encode routine is special -- it always encodes a bitmap,
+ * regardless of the error status.
+@@ -2559,6 +2653,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+ break;
+ case OP_SAVEFH:
+ break;
++ case OP_SECINFO:
++ nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo);
++ break;
+ case OP_SETATTR:
+ nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
+ break;
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 71c686d..4f10257 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -35,7 +35,6 @@
+ #include <linux/nfsd/cache.h>
+ #include <linux/nfsd/xdr.h>
+ #include <linux/nfsd/syscall.h>
+-#include <linux/nfsd/interface.h>
+
+ #include <asm/uaccess.h>
+
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index 6ca2d24..5198573 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -19,6 +19,7 @@
+
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/svcauth_gss.h>
+ #include <linux/nfsd/nfsd.h>
+
+ #define NFSDDBG_FACILITY NFSDDBG_FH
+@@ -123,8 +124,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
+ int data_left = fh->fh_size/4;
+
+ error = nfserr_stale;
+- if (rqstp->rq_client == NULL)
+- goto out;
+ if (rqstp->rq_vers > 2)
+ error = nfserr_badhandle;
+ if (rqstp->rq_vers == 4 && fh->fh_size == 0)
+@@ -148,7 +147,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
+ fh->fh_fsid[1] = fh->fh_fsid[2];
+ }
+ if ((data_left -= len)<0) goto out;
+- exp = exp_find(rqstp->rq_client, fh->fh_fsid_type, datap, &rqstp->rq_chandle);
++ exp = rqst_exp_find(rqstp, fh->fh_fsid_type, datap);
+ datap += len;
+ } else {
+ dev_t xdev;
+@@ -159,19 +158,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
+ xdev = old_decode_dev(fh->ofh_xdev);
+ xino = u32_to_ino_t(fh->ofh_xino);
+ mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
+- exp = exp_find(rqstp->rq_client, FSID_DEV, tfh,
+- &rqstp->rq_chandle);
++ exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
+ }
+
+- if (IS_ERR(exp) && (PTR_ERR(exp) == -EAGAIN
+- || PTR_ERR(exp) == -ETIMEDOUT)) {
+- error = nfserrno(PTR_ERR(exp));
++ error = nfserr_stale;
++ if (PTR_ERR(exp) == -ENOENT)
+ goto out;
+- }
+
+- error = nfserr_stale;
+- if (!exp || IS_ERR(exp))
++ if (IS_ERR(exp)) {
++ error = nfserrno(PTR_ERR(exp));
+ goto out;
++ }
+
+ /* Check if the request originated from a secure port. */
+ error = nfserr_perm;
+@@ -257,8 +254,19 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
+ if (error)
+ goto out;
+
++ if (!(access & MAY_LOCK)) {
++ /*
++ * pseudoflavor restrictions are not enforced on NLM,
++ * which clients virtually always use auth_sys for,
++ * even while using RPCSEC_GSS for NFS.
++ */
++ error = check_nfsd_access(exp, rqstp);
++ if (error)
++ goto out;
++ }
++
+ /* Finally, check access permissions. */
+- error = nfsd_permission(exp, dentry, access);
++ error = nfsd_permission(rqstp, exp, dentry, access);
+
+ if (error) {
+ dprintk("fh_verify: %s/%s permission failure, "
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index b2c7147..977a71f 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -278,7 +278,8 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
+ * echo thing > device-special-file-or-pipe
+ * by doing a CREATE with type==0
+ */
+- nfserr = nfsd_permission(newfhp->fh_export,
++ nfserr = nfsd_permission(rqstp,
++ newfhp->fh_export,
+ newfhp->fh_dentry,
+ MAY_WRITE|MAY_LOCAL_ACCESS);
+ if (nfserr && nfserr != nfserr_rofs)
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index ff55950..da33b25 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -492,6 +492,15 @@ out:
+ module_put_and_exit(0);
+ }
+
++static __be32 map_new_errors(u32 vers, __be32 nfserr)
++{
++ if (nfserr == nfserr_jukebox && vers == 2)
++ return nfserr_dropit;
++ if (nfserr == nfserr_wrongsec && vers < 4)
++ return nfserr_acces;
++ return nfserr;
++}
++
+ int
+ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ {
+@@ -534,6 +543,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+
+ /* Now call the procedure handler, and encode NFS status. */
+ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
++ nfserr = map_new_errors(rqstp->rq_vers, nfserr);
+ if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2)
+ nfserr = nfserr_dropit;
+ if (nfserr == nfserr_dropit) {
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 7e6aa24..8a3f520 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -113,7 +113,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+
+ while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+
+- exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle);
++ exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+ if (IS_ERR(exp2)) {
+ err = PTR_ERR(exp2);
+ dput(mounts);
+@@ -135,21 +135,10 @@ out:
+ return err;
+ }
+
+-/*
+- * Look up one component of a pathname.
+- * N.B. After this call _both_ fhp and resfh need an fh_put
+- *
+- * If the lookup would cross a mountpoint, and the mounted filesystem
+- * is exported to the client with NFSEXP_NOHIDE, then the lookup is
+- * accepted as it stands and the mounted directory is
+- * returned. Otherwise the covered directory is returned.
+- * NOTE: this mountpoint crossing is not supported properly by all
+- * clients and is explicitly disallowed for NFSv3
+- * NeilBrown <neilb@cse.unsw.edu.au>
+- */
+ __be32
+-nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+- int len, struct svc_fh *resfh)
++nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ const char *name, int len,
++ struct svc_export **exp_ret, struct dentry **dentry_ret)
+ {
+ struct svc_export *exp;
+ struct dentry *dparent;
+@@ -168,8 +157,6 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+ exp = fhp->fh_export;
+ exp_get(exp);
+
+- err = nfserr_acces;
+-
+ /* Lookup the name, but don't follow links */
+ if (isdotent(name, len)) {
+ if (len==1)
+@@ -190,17 +177,15 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+ dput(dentry);
+ dentry = dp;
+
+- exp2 = exp_parent(exp->ex_client, mnt, dentry,
+- &rqstp->rq_chandle);
+- if (IS_ERR(exp2)) {
++ exp2 = rqst_exp_parent(rqstp, mnt, dentry);
++ if (PTR_ERR(exp2) == -ENOENT) {
++ dput(dentry);
++ dentry = dget(dparent);
++ } else if (IS_ERR(exp2)) {
+ host_err = PTR_ERR(exp2);
+ dput(dentry);
+ mntput(mnt);
+ goto out_nfserr;
+- }
+- if (!exp2) {
+- dput(dentry);
+- dentry = dget(dparent);
+ } else {
+ exp_put(exp);
+ exp = exp2;
+@@ -223,6 +208,41 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+ }
+ }
+ }
++ *dentry_ret = dentry;
++ *exp_ret = exp;
++ return 0;
++
++out_nfserr:
++ exp_put(exp);
++ return nfserrno(host_err);
++}
++
++/*
++ * Look up one component of a pathname.
++ * N.B. After this call _both_ fhp and resfh need an fh_put
++ *
++ * If the lookup would cross a mountpoint, and the mounted filesystem
++ * is exported to the client with NFSEXP_NOHIDE, then the lookup is
++ * accepted as it stands and the mounted directory is
++ * returned. Otherwise the covered directory is returned.
++ * NOTE: this mountpoint crossing is not supported properly by all
++ * clients and is explicitly disallowed for NFSv3
++ * NeilBrown <neilb@cse.unsw.edu.au>
++ */
++__be32
++nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
++ int len, struct svc_fh *resfh)
++{
++ struct svc_export *exp;
++ struct dentry *dentry;
++ __be32 err;
++
++ err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
++ if (err)
++ return err;
++ err = check_nfsd_access(exp, rqstp);
++ if (err)
++ goto out;
+ /*
+ * Note: we compose the file handle now, but as the
+ * dentry may be negative, it may need to be updated.
+@@ -230,16 +250,13 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+ err = fh_compose(resfh, exp, dentry, fhp);
+ if (!err && !dentry->d_inode)
+ err = nfserr_noent;
+- dput(dentry);
+ out:
++ dput(dentry);
+ exp_put(exp);
+ return err;
+-
+-out_nfserr:
+- err = nfserrno(host_err);
+- goto out;
+ }
+
++
+ /*
+ * Set various file attributes.
+ * N.B. After this call fhp needs an fh_put
+@@ -311,7 +328,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ /* The size case is special. It changes the file as well as the attributes. */
+ if (iap->ia_valid & ATTR_SIZE) {
+ if (iap->ia_size < inode->i_size) {
+- err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
++ err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+ }
+@@ -435,7 +452,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ /* Get inode */
+ error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
+ if (error)
+- goto out;
++ return error;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+@@ -444,33 +461,25 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
+ if (host_error == -EINVAL) {
+- error = nfserr_attrnotsupp;
+- goto out;
++ return nfserr_attrnotsupp;
+ } else if (host_error < 0)
+ goto out_nfserr;
+
+ host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+ if (host_error < 0)
+- goto out_nfserr;
++ goto out_release;
+
+- if (S_ISDIR(inode->i_mode)) {
++ if (S_ISDIR(inode->i_mode))
+ host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
+- if (host_error < 0)
+- goto out_nfserr;
+- }
+-
+- error = nfs_ok;
+
+-out:
++out_release:
+ posix_acl_release(pacl);
+ posix_acl_release(dpacl);
+- return (error);
+ out_nfserr:
+ if (host_error == -EOPNOTSUPP)
+- error = nfserr_attrnotsupp;
++ return nfserr_attrnotsupp;
+ else
+- error = nfserrno(host_error);
+- goto out;
++ return nfserrno(host_error);
+ }
+
+ static struct posix_acl *
+@@ -607,7 +616,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
+
+ sresult |= map->access;
+
+- err2 = nfsd_permission(export, dentry, map->how);
++ err2 = nfsd_permission(rqstp, export, dentry, map->how);
+ switch (err2) {
+ case nfs_ok:
+ result |= map->access;
+@@ -1018,7 +1027,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+ __be32 err;
+
+ if (file) {
+- err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
++ err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ MAY_READ|MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+@@ -1047,7 +1056,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+ __be32 err = 0;
+
+ if (file) {
+- err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
++ err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ MAY_WRITE|MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+@@ -1776,7 +1785,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
+ * Check for a user's access permissions to this inode.
+ */
+ __be32
+-nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
++nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
++ struct dentry *dentry, int acc)
+ {
+ struct inode *inode = dentry->d_inode;
+ int err;
+@@ -1807,7 +1817,7 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
+ */
+ if (!(acc & MAY_LOCAL_ACCESS))
+ if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
+- if (EX_RDONLY(exp) || IS_RDONLY(inode))
++ if (EX_RDONLY(exp, rqstp) || IS_RDONLY(inode))
+ return nfserr_rofs;
+ if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
+ return nfserr_perm;
+diff --git a/fs/open.c b/fs/open.c
+index 0d515d1..c32aba0 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -255,24 +255,26 @@ static long do_sys_truncate(const char __user * path, loff_t length)
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto dput_and_out;
+
+- /*
+- * Make sure that there are no leases.
+- */
+- error = break_lease(inode, FMODE_WRITE);
++ error = get_write_access(inode);
+ if (error)
+ goto dput_and_out;
+
+- error = get_write_access(inode);
++ /*
++ * Make sure that there are no leases. get_write_access() protects
++ * against the truncate racing with a lease-granting setlease().
++ */
++ error = break_lease(inode, FMODE_WRITE);
+ if (error)
+- goto dput_and_out;
++ goto put_write_and_out;
+
+ error = locks_verify_truncate(inode, NULL, length);
+ if (!error) {
+ DQUOT_INIT(inode);
+ error = do_truncate(nd.dentry, length, 0, NULL);
+ }
+- put_write_access(inode);
+
++put_write_and_out:
++ put_write_access(inode);
+ dput_and_out:
+ path_release(&nd);
+ out:
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index b3ae77c..9df3553 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -855,7 +855,7 @@ extern void locks_init_lock(struct file_lock *);
+ extern void locks_copy_lock(struct file_lock *, struct file_lock *);
+ extern void locks_remove_posix(struct file *, fl_owner_t);
+ extern void locks_remove_flock(struct file *);
+-extern int posix_test_lock(struct file *, struct file_lock *);
++extern void posix_test_lock(struct file *, struct file_lock *);
+ extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
+ extern int posix_lock_file_wait(struct file *, struct file_lock *);
+ extern int posix_unblock_lock(struct file *, struct file_lock *);
+diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
+index 246de1d..df008eb 100644
+--- a/include/linux/lockd/bind.h
++++ b/include/linux/lockd/bind.h
+@@ -27,6 +27,7 @@ struct nlmsvc_binding {
+ struct nfs_fh *,
+ struct file **);
+ void (*fclose)(struct file *);
++ time_t (*get_grace_period)(void);
+ };
+
+ extern struct nlmsvc_binding * nlmsvc_ops;
+@@ -38,4 +39,12 @@ extern int nlmclnt_proc(struct inode *, int, struct file_lock *);
+ extern int lockd_up(int proto);
+ extern void lockd_down(void);
+
++time_t get_nfs_grace_period(void);
++
++#ifdef CONFIG_NFSD_V4
++time_t get_nfs4_grace_period(void);
++#else
++static inline void get_nfs4_grace_period(void) {return 0;}
++#endif
++
+ #endif /* LINUX_LOCKD_BIND_H */
+diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
+index 9f62d61..78feb7b 100644
+--- a/include/linux/nfsd/export.h
++++ b/include/linux/nfsd/export.h
+@@ -42,6 +42,9 @@
+ #define NFSEXP_NOACL 0x8000 /* reserved for possible ACL related use */
+ #define NFSEXP_ALLFLAGS 0xFE3F
+
++/* The flags that may vary depending on security flavor: */
++#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
++ | NFSEXP_ALLSQUASH)
+
+ #ifdef __KERNEL__
+
+@@ -64,6 +67,19 @@ struct nfsd4_fs_locations {
+ int migrated;
+ };
+
++/*
++ * We keep an array of pseudoflavors with the export, in order from most
++ * to least preferred. For the forseeable future, we don't expect more
++ * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3,
++ * spkm3i, and spkm3p (and using all 8 at once should be rare).
++ */
++#define MAX_SECINFO_LIST 8
++
++struct exp_flavor_info {
++ u32 pseudoflavor;
++ u32 flags;
++};
++
+ struct svc_export {
+ struct cache_head h;
+ struct auth_domain * ex_client;
+@@ -76,6 +92,8 @@ struct svc_export {
+ int ex_fsid;
+ unsigned char * ex_uuid; /* 16 byte fsid */
+ struct nfsd4_fs_locations ex_fslocs;
++ int ex_nflavors;
++ struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
+ };
+
+ /* an "export key" (expkey) maps a filehandlefragement to an
+@@ -95,10 +113,22 @@ struct svc_expkey {
+
+ #define EX_SECURE(exp) (!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
+ #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC))
+-#define EX_RDONLY(exp) ((exp)->ex_flags & NFSEXP_READONLY)
+ #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE)
+ #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
+
++static inline int EX_RDONLY(struct svc_export *exp, struct svc_rqst *rqstp)
++{
++ struct exp_flavor_info *f;
++ struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
++
++ for (f = exp->ex_flavors; f < end; f++) {
++ if (f->pseudoflavor == rqstp->rq_flavor)
++ return f->flags & NFSEXP_READONLY;
++ }
++ return exp->ex_flags & NFSEXP_READONLY;
++}
++
++__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
+
+ /*
+ * Function declarations
+@@ -112,13 +142,19 @@ struct svc_export * exp_get_by_name(struct auth_domain *clp,
+ struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct cache_req *reqp);
++struct svc_export * rqst_exp_get_by_name(struct svc_rqst *,
++ struct vfsmount *,
++ struct dentry *);
+ struct svc_export * exp_parent(struct auth_domain *clp,
+ struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct cache_req *reqp);
++struct svc_export * rqst_exp_parent(struct svc_rqst *,
++ struct vfsmount *mnt,
++ struct dentry *dentry);
+ int exp_rootfh(struct auth_domain *,
+ char *path, struct knfsd_fh *, int maxsize);
+-__be32 exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
++__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
+ __be32 nfserrno(int errno);
+
+ extern struct cache_detail svc_export_cache;
+@@ -135,6 +171,7 @@ static inline void exp_get(struct svc_export *exp)
+ extern struct svc_export *
+ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
+ struct cache_req *reqp);
++struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
+
+ #endif /* __KERNEL__ */
+
+diff --git a/include/linux/nfsd/interface.h b/include/linux/nfsd/interface.h
+deleted file mode 100644
+index af09797..0000000
+--- a/include/linux/nfsd/interface.h
++++ /dev/null
+@@ -1,13 +0,0 @@
+-/*
+- * include/linux/nfsd/interface.h
+- *
+- * defines interface between nfsd and other bits of
+- * the kernel. Particularly filesystems (eventually).
+- *
+- * Copyright (C) 2000 Neil Brown <neilb@cse.unsw.edu.au>
+- */
+-
+-#ifndef LINUX_NFSD_INTERFACE_H
+-#define LINUX_NFSD_INTERFACE_H
+-
+-#endif /* LINUX_NFSD_INTERFACE_H */
+diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
+index 72feac5..e452256 100644
+--- a/include/linux/nfsd/nfsd.h
++++ b/include/linux/nfsd/nfsd.h
+@@ -22,7 +22,6 @@
+ #include <linux/nfsd/export.h>
+ #include <linux/nfsd/auth.h>
+ #include <linux/nfsd/stats.h>
+-#include <linux/nfsd/interface.h>
+ /*
+ * nfsd version
+ */
+@@ -72,6 +71,9 @@ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp);
+ __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+ const char *, int, struct svc_fh *);
++__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
++ const char *, int,
++ struct svc_export **, struct dentry **);
+ __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+ struct iattr *, int, time_t);
+ #ifdef CONFIG_NFSD_V4
+@@ -120,7 +122,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
+ struct kstatfs *);
+
+ int nfsd_notify_change(struct inode *, struct iattr *);
+-__be32 nfsd_permission(struct svc_export *, struct dentry *, int);
++__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
++ struct dentry *, int);
+ int nfsd_sync_dir(struct dentry *dp);
+
+ #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+@@ -149,6 +152,7 @@ extern int nfsd_max_blksize;
+ * NFSv4 State
+ */
+ #ifdef CONFIG_NFSD_V4
++extern unsigned int max_delegations;
+ void nfs4_state_init(void);
+ int nfs4_state_start(void);
+ void nfs4_state_shutdown(void);
+@@ -236,6 +240,7 @@ void nfsd_lockd_shutdown(void);
+ #define nfserr_badname __constant_htonl(NFSERR_BADNAME)
+ #define nfserr_cb_path_down __constant_htonl(NFSERR_CB_PATH_DOWN)
+ #define nfserr_locked __constant_htonl(NFSERR_LOCKED)
++#define nfserr_wrongsec __constant_htonl(NFSERR_WRONGSEC)
+ #define nfserr_replay_me __constant_htonl(NFSERR_REPLAY_ME)
+
+ /* error codes for internal use */
+diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
+index ab5c236..db348f7 100644
+--- a/include/linux/nfsd/state.h
++++ b/include/linux/nfsd/state.h
+@@ -67,7 +67,7 @@ struct nfs4_cb_recall {
+ int cbr_trunc;
+ stateid_t cbr_stateid;
+ u32 cbr_fhlen;
+- u32 cbr_fhval[NFS4_FHSIZE];
++ char cbr_fhval[NFS4_FHSIZE];
+ struct nfs4_delegation *cbr_dp;
+ };
+
+@@ -224,6 +224,7 @@ struct nfs4_file {
+ struct inode *fi_inode;
+ u32 fi_id; /* used with stateowner->so_id
+ * for stateid_hashtbl hash */
++ bool fi_had_conflict;
+ };
+
+ /*
+diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
+index 09799bc..1b65326 100644
+--- a/include/linux/nfsd/xdr4.h
++++ b/include/linux/nfsd/xdr4.h
+@@ -293,6 +293,12 @@ struct nfsd4_rename {
+ struct nfsd4_change_info rn_tinfo; /* response */
+ };
+
++struct nfsd4_secinfo {
++ u32 si_namelen; /* request */
++ char *si_name; /* request */
++ struct svc_export *si_exp; /* response */
++};
++
+ struct nfsd4_setattr {
+ stateid_t sa_stateid; /* request */
+ u32 sa_bmval[2]; /* request */
+@@ -365,6 +371,7 @@ struct nfsd4_op {
+ struct nfsd4_remove remove;
+ struct nfsd4_rename rename;
+ clientid_t renew;
++ struct nfsd4_secinfo secinfo;
+ struct nfsd4_setattr setattr;
+ struct nfsd4_setclientid setclientid;
+ struct nfsd4_setclientid_confirm setclientid_confirm;
+diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
+index 5eca9e4..f76f705 100644
+--- a/include/linux/sunrpc/gss_api.h
++++ b/include/linux/sunrpc/gss_api.h
+@@ -58,6 +58,7 @@ u32 gss_unwrap(
+ u32 gss_delete_sec_context(
+ struct gss_ctx **ctx_id);
+
++u32 gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 service);
+ u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor);
+ char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service);
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 4a7ae8a..211f8da 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -212,6 +212,7 @@ struct svc_rqst {
+ struct svc_pool * rq_pool; /* thread pool */
+ struct svc_procedure * rq_procinfo; /* procedure info */
+ struct auth_ops * rq_authop; /* authentication flavour */
++ u32 rq_flavor; /* pseudoflavor */
+ struct svc_cred rq_cred; /* auth info */
+ struct sk_buff * rq_skbuff; /* fast recv inet buffer */
+ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
+@@ -248,6 +249,7 @@ struct svc_rqst {
+ */
+ /* Catering to nfsd */
+ struct auth_domain * rq_client; /* RPC peer info */
++ struct auth_domain * rq_gssclient; /* "gss/"-style peer info */
+ struct svc_cacherep * rq_cacherep; /* cache info */
+ struct knfsd_fh * rq_reffh; /* Referrence filehandle, used to
+ * determine what device number
+diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
+index de92619..22e1ef8 100644
+--- a/include/linux/sunrpc/svcauth.h
++++ b/include/linux/sunrpc/svcauth.h
+@@ -127,6 +127,7 @@ extern struct auth_domain *auth_unix_lookup(struct in_addr addr);
+ extern int auth_unix_forget_old(struct auth_domain *dom);
+ extern void svcauth_unix_purge(void);
+ extern void svcauth_unix_info_release(void *);
++extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
+
+ static inline unsigned long hash_str(char *name, int bits)
+ {
+diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
+index 5a5db16..442b061 100644
+--- a/include/linux/sunrpc/svcauth_gss.h
++++ b/include/linux/sunrpc/svcauth_gss.h
+@@ -22,6 +22,8 @@
+ int gss_svc_init(void);
+ void gss_svc_shutdown(void);
+ int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
++u32 svcauth_gss_flavor(struct auth_domain *dom);
++
+
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
+diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
+index 7b19432..03e0b8b 100644
+--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
++++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
+@@ -231,6 +231,7 @@ static struct pf_desc gss_kerberos_pfs[] = {
+ static struct gss_api_mech gss_kerberos_mech = {
+ .gm_name = "krb5",
+ .gm_owner = THIS_MODULE,
++ .gm_oid = {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"},
+ .gm_ops = &gss_kerberos_ops,
+ .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
+ .gm_pfs = gss_kerberos_pfs,
+diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
+index 2687251..5c4e983 100644
+--- a/net/sunrpc/auth_gss/gss_mech_switch.c
++++ b/net/sunrpc/auth_gss/gss_mech_switch.c
+@@ -194,6 +194,21 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
+ EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor);
+
+ u32
++gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
++{
++ int i;
++
++ for (i = 0; i < gm->gm_pf_num; i++) {
++ if (gm->gm_pfs[i].service == service) {
++ return gm->gm_pfs[i].pseudoflavor;
++ }
++ }
++ return RPC_AUTH_MAXFLAVOR; /* illegal value */
++}
++
++EXPORT_SYMBOL(gss_svc_to_pseudoflavor);
++
++u32
+ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
+ {
+ int i;
+diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
+index 7e15aa6..9331119 100644
+--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
++++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
+@@ -217,6 +217,7 @@ static struct pf_desc gss_spkm3_pfs[] = {
+ static struct gss_api_mech gss_spkm3_mech = {
+ .gm_name = "spkm3",
+ .gm_owner = THIS_MODULE,
++ .gm_oid = {7, "\053\006\001\005\005\001\003"},
+ .gm_ops = &gss_spkm3_ops,
+ .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
+ .gm_pfs = gss_spkm3_pfs,
+diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
+index 099a983..34d6763 100644
+--- a/net/sunrpc/auth_gss/svcauth_gss.c
++++ b/net/sunrpc/auth_gss/svcauth_gss.c
+@@ -743,6 +743,15 @@ find_gss_auth_domain(struct gss_ctx *ctx, u32 svc)
+
+ static struct auth_ops svcauthops_gss;
+
++u32 svcauth_gss_flavor(struct auth_domain *dom)
++{
++ struct gss_domain *gd = container_of(dom, struct gss_domain, h);
++
++ return gd->pseudoflavor;
++}
++
++EXPORT_SYMBOL(svcauth_gss_flavor);
++
+ int
+ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
+ {
+@@ -913,10 +922,23 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
+ struct gss_svc_data *svcdata = rqstp->rq_auth_data;
+ struct rsc *rsci = svcdata->rsci;
+ struct rpc_gss_wire_cred *gc = &svcdata->clcred;
++ int stat;
+
+- rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
+- if (rqstp->rq_client == NULL)
++ /*
++ * A gss export can be specified either by:
++ * export *(sec=krb5,rw)
++ * or by
++ * export gss/krb5(rw)
++ * The latter is deprecated; but for backwards compatibility reasons
++ * the nfsd code will still fall back on trying it if the former
++ * doesn't work; so we try to make both available to nfsd, below.
++ */
++ rqstp->rq_gssclient = find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
++ if (rqstp->rq_gssclient == NULL)
+ return SVC_DENIED;
++ stat = svcauth_unix_set_client(rqstp);
++ if (stat == SVC_DROP)
++ return stat;
+ return SVC_OK;
+ }
+
+@@ -1088,7 +1110,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
+ svc_putnl(resv, GSS_SEQ_WIN);
+ if (svc_safe_putnetobj(resv, &rsip->out_token))
+ goto drop;
+- rqstp->rq_client = NULL;
+ }
+ goto complete;
+ case RPC_GSS_PROC_DESTROY:
+@@ -1131,6 +1152,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
+ }
+ svcdata->rsci = rsci;
+ cache_get(&rsci->h);
++ rqstp->rq_flavor = gss_svc_to_pseudoflavor(
++ rsci->mechctx->mech_type, gc->gc_svc);
+ ret = SVC_OK;
+ goto out;
+ }
+@@ -1317,6 +1340,9 @@ out_err:
+ if (rqstp->rq_client)
+ auth_domain_put(rqstp->rq_client);
+ rqstp->rq_client = NULL;
++ if (rqstp->rq_gssclient)
++ auth_domain_put(rqstp->rq_gssclient);
++ rqstp->rq_gssclient = NULL;
+ if (rqstp->rq_cred.cr_group_info)
+ put_group_info(rqstp->rq_cred.cr_group_info);
+ rqstp->rq_cred.cr_group_info = NULL;
+diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
+index 07dcd20..4114794 100644
+--- a/net/sunrpc/svcauth_unix.c
++++ b/net/sunrpc/svcauth_unix.c
+@@ -5,6 +5,7 @@
+ #include <linux/sunrpc/xdr.h>
+ #include <linux/sunrpc/svcsock.h>
+ #include <linux/sunrpc/svcauth.h>
++#include <linux/sunrpc/gss_api.h>
+ #include <linux/err.h>
+ #include <linux/seq_file.h>
+ #include <linux/hash.h>
+@@ -637,7 +638,7 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
+ }
+ }
+
+-static int
++int
+ svcauth_unix_set_client(struct svc_rqst *rqstp)
+ {
+ struct sockaddr_in *sin = svc_addr_in(rqstp);
+@@ -672,6 +673,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
+ return SVC_OK;
+ }
+
++EXPORT_SYMBOL(svcauth_unix_set_client);
++
+ static int
+ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
+ {
+@@ -707,6 +710,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
+ svc_putnl(resv, RPC_AUTH_NULL);
+ svc_putnl(resv, 0);
+
++ rqstp->rq_flavor = RPC_AUTH_NULL;
+ return SVC_OK;
+ }
+
+@@ -784,6 +788,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
+ svc_putnl(resv, RPC_AUTH_NULL);
+ svc_putnl(resv, 0);
+
++ rqstp->rq_flavor = RPC_AUTH_UNIX;
+ return SVC_OK;
+
+ badcred: