NFSv4: Fix state recovery when the client runs over the grace period
If the client for some reason is not able to recover all its state within
the time allotted for the grace period, and the server reboots again, the
client is not allowed to recover the state that was 'lost' using reboot
recovery.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 300faba..e5cd8ca 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -821,6 +821,27 @@
nfs4_recover_state(clp);
}
+static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+{
+
+ set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
+ /* Don't recover state that expired before the reboot */
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) {
+ clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
+ return 0;
+ }
+ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+ return 1;
+}
+
+static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+{
+ set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
+ clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
+ set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
+ return 1;
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -869,6 +890,8 @@
* server that doesn't support a grace period.
*/
list_for_each_entry(state, &sp->so_states, open_states) {
+ if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
+ continue;
if (state->state == 0)
continue;
status = ops->recover_open(sp, state);
@@ -888,8 +911,7 @@
printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
__func__, status);
case -ENOENT:
- case -NFS4ERR_RECLAIM_BAD:
- case -NFS4ERR_RECLAIM_CONFLICT:
+ case -ESTALE:
/*
* Open state on this file cannot be recovered
* All we can do is revert to using the zero stateid.
@@ -899,8 +921,13 @@
/* Mark the file as being 'closed' */
state->state = 0;
break;
+ case -NFS4ERR_RECLAIM_BAD:
+ case -NFS4ERR_RECLAIM_CONFLICT:
+ nfs4_state_mark_reclaim_nograce(sp->so_client, state);
+ break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_NO_GRACE:
+ nfs4_state_mark_reclaim_nograce(sp->so_client, state);
case -NFS4ERR_STALE_CLIENTID:
goto out_err;
}
@@ -910,12 +937,26 @@
return status;
}
-static void nfs4_state_mark_reclaim(struct nfs_client *clp)
+static void nfs4_clear_open_state(struct nfs4_state *state)
+{
+ struct nfs4_lock_state *lock;
+
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ lock->ls_seqid.counter = 0;
+ lock->ls_seqid.flags = 0;
+ lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+ }
+}
+
+static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
{
struct nfs4_state_owner *sp;
struct rb_node *pos;
struct nfs4_state *state;
- struct nfs4_lock_state *lock;
/* Reset all sequence ids to zero */
for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
@@ -924,20 +965,60 @@
sp->so_seqid.flags = 0;
spin_lock(&sp->so_lock);
list_for_each_entry(state, &sp->so_states, open_states) {
- clear_bit(NFS_DELEGATED_STATE, &state->flags);
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
- list_for_each_entry(lock, &state->lock_states, ls_locks) {
- lock->ls_seqid.counter = 0;
- lock->ls_seqid.flags = 0;
- lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
- }
+ if (mark_reclaim(clp, state))
+ nfs4_clear_open_state(state);
}
spin_unlock(&sp->so_lock);
}
}
+static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
+{
+ /* Mark all delegations for reclaim */
+ nfs_delegation_mark_reclaim(clp);
+ nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
+}
+
+static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
+{
+ struct nfs4_state_owner *sp;
+ struct rb_node *pos;
+ struct nfs4_state *state;
+
+ if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ return;
+
+ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+ spin_lock(&sp->so_lock);
+ list_for_each_entry(state, &sp->so_states, open_states) {
+ if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags))
+ continue;
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ }
+ spin_unlock(&sp->so_lock);
+ }
+
+ nfs_delegation_reap_unclaimed(clp);
+}
+
+static void nfs_delegation_clear_all(struct nfs_client *clp)
+{
+ nfs_delegation_mark_reclaim(clp);
+ nfs_delegation_reap_unclaimed(clp);
+}
+
+static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
+{
+ nfs_delegation_clear_all(clp);
+ nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
+}
+
+static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp)
+{
+ clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
+}
+
static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
{
struct rb_node *pos;
@@ -964,11 +1045,25 @@
/* Yes there are: try to renew the old lease */
status = nfs4_proc_renew(clp, cred);
put_rpccred(cred);
+ switch (status) {
+ case -NFS4ERR_CB_PATH_DOWN:
+ set_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state);
+ break;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_LEASE_MOVED:
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs4_state_start_reclaim_reboot(clp);
+ break;
+ case -NFS4ERR_EXPIRED:
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs4_state_start_reclaim_nograce(clp);
+ }
return status;
}
/* "reboot" to ensure we clear all state on the server */
clp->cl_boot_time = CURRENT_TIME;
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
return status;
}
@@ -993,7 +1088,6 @@
static int reclaimer(void *ptr)
{
struct nfs_client *clp = ptr;
- const struct nfs4_state_recovery_ops *ops;
int status = 0;
allow_signal(SIGKILL);
@@ -1001,47 +1095,51 @@
/* Ensure exclusive access to NFSv4 state */
down_write(&clp->cl_sem);
while (!list_empty(&clp->cl_superblocks)) {
- ops = &nfs4_network_partition_recovery_ops;
status = nfs4_check_lease(clp);
- switch (status) {
- case 0:
- case -NFS4ERR_CB_PATH_DOWN:
- goto out;
- case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_LEASE_MOVED:
- ops = &nfs4_reboot_recovery_ops;
- }
- /* We're going to have to re-establish a clientid */
- nfs4_state_mark_reclaim(clp);
-
- status = nfs4_reclaim_lease(clp);
- if (status) {
- if (status == -EAGAIN)
- continue;
- goto out_error;
- }
-
- /* Mark all delegations for reclaim */
- nfs_delegation_mark_reclaim(clp);
- /* Note: list is protected by exclusive lock on cl->cl_sem */
- status = nfs4_do_reclaim(clp, ops);
- if (status < 0) {
- if (status == -NFS4ERR_NO_GRACE) {
- ops = &nfs4_network_partition_recovery_ops;
- status = nfs4_do_reclaim(clp, ops);
+ if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
+ /* We're going to have to re-establish a clientid */
+ status = nfs4_reclaim_lease(clp);
+ if (status) {
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ if (status == -EAGAIN)
+ continue;
+ goto out_error;
}
- if (status == -NFS4ERR_STALE_CLIENTID)
- continue;
- if (status == -NFS4ERR_EXPIRED)
- continue;
}
- nfs_delegation_reap_unclaimed(clp);
+
+ /* First recover reboot state... */
+ if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ /* Note: list is protected by exclusive lock on cl->cl_sem */
+ status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops);
+ if (status == -NFS4ERR_STALE_CLIENTID) {
+ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+ continue;
+ }
+ nfs4_state_end_reclaim_reboot(clp);
+ continue;
+ }
+
+ /* Now recover expired state... */
+ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ /* Note: list is protected by exclusive lock on cl->cl_sem */
+ status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops);
+ if (status < 0) {
+ set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
+ if (status == -NFS4ERR_STALE_CLIENTID)
+ continue;
+ if (status == -NFS4ERR_EXPIRED)
+ continue;
+ goto out_error;
+ } else
+ nfs4_state_end_reclaim_nograce(clp);
+ continue;
+ }
break;
}
out:
up_write(&clp->cl_sem);
- if (status == -NFS4ERR_CB_PATH_DOWN)
+ if (test_and_clear_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state))
nfs_handle_cb_pathdown(clp);
nfs4_clear_recover_bit(clp);
nfs_put_client(clp);
@@ -1050,7 +1148,8 @@
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s"
" with error %d\n", clp->cl_hostname, -status);
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ nfs4_state_end_reclaim_reboot(clp);
goto out;
}