patch-2.2.18 linux/fs/nfsd/vfs.c
Next file: linux/fs/nls/Config.in
Previous file: linux/fs/nfsd/stats.c
Back to the patch index
Back to the overall index
- Lines: 1386
- Date:
Sun Oct 15 21:15:17 2000
- Orig file:
v2.2.17/fs/nfsd/vfs.c
- Orig date:
Fri Apr 21 12:46:44 2000
diff -u --new-file --recursive --exclude-from /usr/src/exclude v2.2.17/fs/nfsd/vfs.c linux/fs/nfsd/vfs.c
@@ -31,6 +31,10 @@
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
+#ifdef CONFIG_NFSD_V3
+#include <linux/nfs3.h>
+#include <linux/nfsd/xdr3.h>
+#endif /* CONFIG_NFSD_V3 */
#include <linux/nfsd/nfsfh.h>
#include <linux/quotaops.h>
@@ -41,16 +45,15 @@
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
#define NFSD_PARANOIA
-/* Open mode for nfsd_open */
-#define OPEN_READ 0
-#define OPEN_WRITE 1
-
-/* Hack until we have a macro check for mandatory locks. */
-#ifndef IS_ISMNDLK
-#define IS_ISMNDLK(i) (((i)->i_mode & (S_ISGID|S_IXGRP|S_IFMT)) \
- == (S_ISGID|S_IFREG))
-#endif
+/* We must ignore files (but only files) which might have mandatory
+ * locks on them because there is no way to know if the accesser has
+ * the lock.
+ */
+/* MANDATORY_LOCK taken from 2.3 */
+#define MANDATORY_LOCK(inode) \
+ (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
/* Time difference margin in seconds for comparison. It is a
dynamically-tunable parameter via /proc/fs/nfs/time-diff-margin.
*/
@@ -83,65 +86,58 @@
static struct raparms * raparml = NULL;
static struct raparms * raparm_cache = NULL;
+
+/*
+ * We need to do a check-parent every time
+ * after we have locked the parent - to verify
+ * that the parent is still our parent and
+ * that we are still hashed onto it..
+ *
+ * This is required in case two processes race
+ * on removing (or moving) the same entry: the
+ * parent lock will serialize them, but the
+ * other process will be too late..
+ *
+ * Note that this nfsd_check_parent is identical
+ * the check_parent in linux/fs/namei.c.
+ */
+#define nfsd_check_parent(dir, dentry) \
+ ((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash))
+
/*
* Lock a parent directory following the VFS locking protocol.
*/
int
fh_lock_parent(struct svc_fh *parent_fh, struct dentry *dchild)
{
- int nfserr = 0;
-
fh_lock(parent_fh);
/*
* Make sure the parent->child relationship still holds,
* and that the child is still hashed.
*/
- if (dchild->d_parent != parent_fh->fh_dentry)
- goto out_not_parent;
- if (list_empty(&dchild->d_hash))
- goto out_not_hashed;
-out:
- return nfserr;
+ if (nfsd_check_parent(parent_fh->fh_dentry, dchild))
+ return 0;
-out_not_parent:
printk(KERN_WARNING
- "fh_lock_parent: %s/%s parent changed\n",
+ "fh_lock_parent: %s/%s parent changed or child unhashed\n",
dchild->d_parent->d_name.name, dchild->d_name.name);
- goto out_unlock;
-out_not_hashed:
- printk(KERN_WARNING
- "fh_lock_parent: %s/%s unhashed\n",
- dchild->d_parent->d_name.name, dchild->d_name.name);
-out_unlock:
- nfserr = nfserr_noent;
- fh_unlock(parent_fh);
- goto out;
-}
-/*
- * Deny access to certain file systems
- */
-static inline int
-fs_off_limits(struct super_block *sb)
-{
- return !sb || sb->s_magic == NFS_SUPER_MAGIC
- || sb->s_magic == PROC_SUPER_MAGIC;
+ fh_unlock(parent_fh);
+ return nfserr_noent;
}
-/*
- * Check whether directory is a mount point, but it is all right if
- * this is precisely the local mount point being exported.
- */
-static inline int
-nfsd_iscovered(struct dentry *dentry, struct svc_export *exp)
-{
- return (dentry != dentry->d_covers &&
- dentry != exp->ex_dentry);
-}
/*
* Look up one component of a pathname.
* N.B. After this call _both_ fhp and resfh need an fh_put
+ *
+ * If the lookup would cross a mountpoint, and the mounted filesystem
+ * is exported to the client with NFSEXP_CROSSMNT, then the lookup is
+ * accepted as it stands and the mounted directory is
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ * clients and is explicitly disallowed for NFSv3
+ * NeilBrown <neilb@cse.unsw.edu.au>
*/
int
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
@@ -166,33 +162,62 @@
if (err)
goto out;
#endif
- err = nfserr_noent;
- if (fs_off_limits(dparent->d_sb))
- goto out;
err = nfserr_acces;
- if (nfsd_iscovered(dparent, exp))
- goto out;
/* Lookup the name, but don't follow links */
- dchild = lookup_dentry(name, dget(dparent), 0);
- if (IS_ERR(dchild))
- goto out_nfserr;
- /*
- * check if we have crossed a mount point ...
- */
- if (dchild->d_sb != dparent->d_sb) {
- struct dentry *tdentry;
- tdentry = dchild->d_covers;
- if (tdentry == dchild)
- goto out_dput;
- dput(dchild);
- dchild = dget(tdentry);
- if (dchild->d_sb != dparent->d_sb) {
-printk("nfsd_lookup: %s/%s crossed mount point!\n", dparent->d_name.name, dchild->d_name.name);
- goto out_dput;
+ if (strcmp(name, "..")==0) {
+ /* checking mountpoint crossing is very different when stepping up */
+ if (dparent == exp->ex_dentry) {
+ if (!EX_CROSSMNT(exp))
+ dchild = dget(dparent); /* .. == . just like at / */
+ else
+ {
+ struct svc_export *exp2 = NULL;
+ struct dentry *dp;
+ dchild = dparent->d_covers->d_parent;
+ for (dp=dchild;
+ exp2 == NULL && dp->d_covers->d_parent != dp;
+ dp=dp->d_covers->d_parent)
+ exp2 = exp_get(exp->ex_client, dp->d_inode->i_dev, dp->d_inode->i_ino);
+ if (exp2==NULL || dchild->d_sb != exp2->ex_dentry->d_sb) {
+ dchild = dget(dparent);
+ } else {
+ dget(dchild);
+ exp = exp2;
+ }
+ }
+ } else
+ dchild = dget(dparent->d_parent);
+ } else {
+ dchild = lookup_dentry(name, dget(dparent), 0);
+ if (IS_ERR(dchild))
+ goto out_nfserr;
+ /*
+ * check if we have crossed a mount point ...
+ */
+ if (dchild->d_sb != dparent->d_sb) {
+ struct svc_export *exp2 = NULL;
+ exp2 = exp_get(rqstp->rq_client,
+ dchild->d_inode->i_dev,
+ dchild->d_inode->i_ino);
+ if (exp2 && EX_CROSSMNT(exp2))
+ /* successfully crossed mount point */
+ exp = exp2;
+ else if (dchild->d_covers->d_sb == dparent->d_sb) {
+ /* stay in the original filesystem */
+ struct dentry *tdentry = dget(dchild->d_covers);
+ dput(dchild);
+ dchild = tdentry;
+ } else {
+ /* This cannot possibly happen */
+ printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name);
+ dput(dchild);
+ err = nfserr_acces;
+ goto out;
+
+ }
}
}
-
/*
* Note: we compose the file handle now, but as the
* dentry may be negative, it may need to be updated.
@@ -207,10 +232,6 @@
out_nfserr:
err = nfserrno(-PTR_ERR(dchild));
goto out;
-out_dput:
- dput(dchild);
- err = nfserr_acces;
- goto out;
}
/*
@@ -226,6 +247,8 @@
int ftype = 0;
int imode;
int err;
+ kernel_cap_t saved_cap = 0;
+ int size_change = 0;
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= MAY_WRITE;
@@ -234,39 +257,43 @@
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
- if (err)
+ if (err || !iap->ia_valid)
goto out;
dentry = fhp->fh_dentry;
inode = dentry->d_inode;
err = inode_change_ok(inode, iap);
- if (err) {
- /* It is very tricky. When you are not the file owner,
- but have the write permission, you should be allowed
- to set atime and mtime to the current time on the
- server. However, the NFS V2 protocol doesn't support
- it. It has been fixed in V3. Here we do this: if the
- current server time and atime/mtime are close enough,
- we use the current server time. */
-#define CURRENT_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
- if (iap->ia_mtime == iap->ia_atime
- && ((iap->ia_valid & (CURRENT_TIME_SET))
- == CURRENT_TIME_SET)) {
- time_t now = CURRENT_TIME;
- time_t delta = iap->ia_atime - now;
- if (delta < 0) delta = -delta;
- if (delta <= nfsd_time_diff_margin) {
- iap->ia_valid &= ~CURRENT_TIME_SET;
- goto current_time_ok;
- }
+ /* could be a "touch" (utimes) request where the user is not the owner but does
+ * have write permission. In this case the user should be allowed to set
+ * both times to the current time. We could just assume any such SETATTR
+ * is intended to set the times to "now", but we do a couple of simple tests
+ * to increase our confidence.
+ */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define MAX_TOUCH_TIME_ERROR (30*60)
+ if (err
+ && (iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
+ && iap->ia_mtime == iap->ia_ctime
+ ) {
+ /* looks good. now just make sure time is in the right ballpark.
+ * solaris, at least, doesn't seem to care what the time request is
+ */
+ time_t delta = iap->ia_atime - CURRENT_TIME;
+ if (delta<0) delta = -delta;
+ if (delta < MAX_TOUCH_TIME_ERROR) {
+ /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
+ * this will cause notify_change to setthese times to "now"
+ */
+ iap->ia_valid &= ~BOTH_TIME_SET;
+ err = inode_change_ok(inode, iap);
}
- goto out_nfserr;
}
-current_time_ok:
+ if (err)
+ goto out_nfserr;
- /* The size case is special... */
+ /* The size case is special. It changes the file as well as the attributes. */
if (iap->ia_valid & ATTR_SIZE) {
if (!S_ISREG(inode->i_mode))
printk("nfsd_setattr: size change??\n");
@@ -275,22 +302,12 @@
if (err)
goto out;
}
- DQUOT_INIT(inode);
err = get_write_access(inode);
- if (err) {
- DQUOT_DROP(inode);
+ if (err)
goto out_nfserr;
- }
- /* N.B. Should we update the inode cache here? */
- inode->i_size = iap->ia_size;
- if (inode->i_op && inode->i_op->truncate)
- inode->i_op->truncate(inode);
- mark_inode_dirty(inode);
- put_write_access(inode);
- DQUOT_DROP(inode);
- iap->ia_valid &= ~ATTR_SIZE;
- iap->ia_valid |= ATTR_MTIME;
- iap->ia_mtime = CURRENT_TIME;
+ size_change = 1;
+
+ DQUOT_INIT(inode);
}
imode = inode->i_mode;
@@ -312,24 +329,53 @@
}
/* Change the attributes. */
- if (iap->ia_valid) {
- kernel_cap_t saved_cap = 0;
- iap->ia_valid |= ATTR_CTIME;
- iap->ia_ctime = CURRENT_TIME;
- if (current->fsuid != 0) {
- saved_cap = current->cap_effective;
- cap_clear(current->cap_effective);
- }
- err = notify_change(dentry, iap);
- if (current->fsuid != 0)
- current->cap_effective = saved_cap;
- if (err)
- goto out_nfserr;
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(inode);
+
+ iap->ia_valid |= ATTR_CTIME;
+ if (current->fsuid != 0) {
+ saved_cap = current->cap_effective;
+ cap_clear(current->cap_effective);
+ }
+#ifdef CONFIG_QUOTA
+ /* DQUOT_TRANSFER needs both ia_uid and ia_gid defined */
+ if (iap->ia_valid & (ATTR_UID|ATTR_GID)) {
+ if (! (iap->ia_valid & ATTR_UID))
+ iap->ia_uid = inode->i_uid;
+ if (! (iap->ia_valid & ATTR_GID))
+ iap->ia_gid = inode->i_gid;
+ iap->ia_valid |= ATTR_UID|ATTR_GID;
}
+#endif /* CONFIG_QUOTA */
+
+ fh_lock(fhp);
+#ifdef CONFIG_QUOTA
+ if (iap->ia_valid & (ATTR_UID|ATTR_GID))
+ err = DQUOT_TRANSFER(dentry, iap);
+ else
+#endif
+ err = notify_change(dentry, iap);
+
+ if (size_change) {
+ if (!err) {
+ vmtruncate(inode,iap->ia_size);
+ if (inode->i_op && inode->i_op->truncate)
+ inode->i_op->truncate(inode);
+ }
+ fh_unlock(fhp);
+ put_write_access(inode);
+ } else
+ fh_unlock(fhp);
+
+ if (current->fsuid != 0)
+ current->cap_effective = saved_cap;
+ if (err)
+ goto out_nfserr;
+ if (EX_ISSYNC(fhp->fh_export))
+ write_inode_now(inode);
err = 0;
+
+ /* Don't unlock inode; the nfssvc_release functions are supposed
+ * to do this. */
out:
return err;
@@ -338,20 +384,107 @@
goto out;
}
+#ifdef CONFIG_NFSD_V3
+/*
+ * Check server access rights to a file system object
+ */
+struct accessmap {
+ u32 access;
+ int how;
+};
+static struct accessmap nfs3_regaccess[] = {
+ { NFS3_ACCESS_READ, MAY_READ },
+ { NFS3_ACCESS_EXECUTE, MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC },
+ { NFS3_ACCESS_EXTEND, MAY_WRITE },
+
+ { 0, 0 }
+};
+
+static struct accessmap nfs3_diraccess[] = {
+ { NFS3_ACCESS_READ, MAY_READ },
+ { NFS3_ACCESS_LOOKUP, MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC },
+ { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE },
+ { NFS3_ACCESS_DELETE, MAY_REMOVE },
+
+ { 0, 0 }
+};
+
+static struct accessmap nfs3_anyaccess[] = {
+ /* XXX: should we try to cover read/write here for clients that
+ * rely on us to do their access checking for special files? */
+
+ { 0, 0 }
+};
+
+int
+nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access)
+{
+ struct accessmap *map;
+ struct svc_export *export;
+ struct dentry *dentry;
+ u32 query, result = 0;
+ int error;
+
+ error = fh_verify(rqstp, fhp, 0, MAY_NOP);
+ if (error)
+ goto out;
+
+ export = fhp->fh_export;
+ dentry = fhp->fh_dentry;
+
+ if (S_ISREG(dentry->d_inode->i_mode)) {
+ map = nfs3_regaccess;
+ } else if (S_ISDIR(dentry->d_inode->i_mode)) {
+ map = nfs3_diraccess;
+ } else {
+ map = nfs3_anyaccess;
+ }
+
+ query = *access;
+ while (map->access) {
+ if (map->access & query) {
+ error = nfsd_permission(export, dentry, (map->how | NO_OWNER_OVERRIDE));
+ if (error == 0)
+ result |= map->access;
+ else if ((error == nfserr_perm) || (error == nfserr_acces)) {
+ /*
+ * This access type is denyed; but the
+ * access query itself succeeds.
+ */
+ error = 0;
+ } else {
+ /*
+ * Some fatal error. Fail the query.
+ */
+ goto out;
+ }
+ }
+ map++;
+ }
+ *access = result;
+
+out:
+ return error;
+}
+#endif
+
+
+
/*
* Open an existing file or directory.
- * The wflag argument indicates write access.
+ * The access argument indicates the type of open (read/write/lock)
* N.B. After this call fhp needs an fh_put
*/
int
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
- int wflag, struct file *filp)
+ int access, struct file *filp)
{
struct dentry *dentry;
struct inode *inode;
- int access, err;
+ int err;
- access = wflag? MAY_WRITE : MAY_READ;
err = fh_verify(rqstp, fhp, type, access);
if (err)
goto out;
@@ -368,24 +501,27 @@
if (!inode->i_op || !inode->i_op->default_file_ops)
goto out;
- if (wflag && (err = get_write_access(inode)) != 0)
+ if ((access & MAY_WRITE) && (err = get_write_access(inode)) != 0)
goto out_nfserr;
memset(filp, 0, sizeof(*filp));
filp->f_op = inode->i_op->default_file_ops;
filp->f_count = 1;
- filp->f_flags = wflag? O_WRONLY : O_RDONLY;
- filp->f_mode = wflag? FMODE_WRITE : FMODE_READ;
filp->f_dentry = dentry;
-
- if (wflag)
+ if (access & MAY_WRITE) {
+ filp->f_flags = O_WRONLY;
+ filp->f_mode = FMODE_WRITE;
DQUOT_INIT(inode);
+ } else {
+ filp->f_flags = O_RDONLY;
+ filp->f_mode = FMODE_READ;
+ }
err = 0;
if (filp->f_op && filp->f_op->open) {
err = filp->f_op->open(inode, filp);
if (err) {
- if (wflag)
+ if (access & MAY_WRITE)
put_write_access(inode);
/* I nearly added put_filp() call here, but this filp
@@ -419,17 +555,33 @@
filp->f_op->release(inode, filp);
if (filp->f_mode & FMODE_WRITE) {
put_write_access(inode);
- DQUOT_DROP(inode);
}
}
/*
* Sync a file
+ * As this calls fsync (not fdatasync) there is no need for a write_inode
+ * after it.
*/
void
-nfsd_sync(struct inode *inode, struct file *filp)
+nfsd_sync(struct file *filp)
{
+ dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
+ down(&filp->f_dentry->d_inode->i_sem);
filp->f_op->fsync(filp, filp->f_dentry);
+ up(&filp->f_dentry->d_inode->i_sem);
+}
+
+void
+nfsd_sync_dir(struct dentry *dp)
+{
+ struct inode *inode = dp->d_inode;
+ int (*fsync) (struct file *, struct dentry *);
+
+ if (inode->i_op->default_file_ops
+ && (fsync = inode->i_op->default_file_ops->fsync)) {
+ fsync(NULL, dp);
+ }
}
/*
@@ -478,7 +630,7 @@
int err;
struct file file;
- err = nfsd_open(rqstp, fhp, S_IFREG, OPEN_READ, &file);
+ err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
if (err)
goto out;
err = nfserr_perm;
@@ -543,11 +695,11 @@
uid_t saved_euid;
#endif
- if (!cnt)
- goto out;
- err = nfsd_open(rqstp, fhp, S_IFREG, OPEN_WRITE, &file);
+ err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
if (err)
goto out;
+ if (!cnt)
+ goto out_close;
err = nfserr_perm;
if (!file.f_op->write)
goto out_close;
@@ -560,11 +712,21 @@
* Request sync writes if
* - the sync export option has been set, or
* - the client requested O_SYNC behavior (NFSv3 feature).
+ * - The file system doesn't support fsync().
* When gathered writes have been configured for this volume,
* flushing the data to disk is handled separately below.
*/
+#ifdef CONFIG_NFSD_V3
+ if (rqstp->rq_vers == 2)
+ stable = EX_ISSYNC(exp);
+ else if (file.f_op->fsync == 0)
+ stable = 1;
+ if (stable && !EX_WGATHER(exp))
+ file.f_flags |= O_SYNC;
+#else
if ((stable || (stable = EX_ISSYNC(exp))) && !EX_WGATHER(exp))
file.f_flags |= O_SYNC;
+#endif /* CONFIG_NFSD_V3 */
fh_lock(fhp); /* lock inode */
file.f_pos = offset; /* set write offset */
@@ -618,26 +780,25 @@
*/
if (EX_WGATHER(exp) && (inode->i_writecount > 1
|| (last_ino == inode->i_ino && last_dev == inode->i_dev))) {
-#if 0
- interruptible_sleep_on_timeout(&inode->i_wait, 10 * HZ / 1000);
-#else
dprintk("nfsd: write defer %d\n", current->pid);
+ current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout((HZ+99)/100);
+ current->state = TASK_RUNNING;
dprintk("nfsd: write resume %d\n", current->pid);
-#endif
}
if (inode->i_state & I_DIRTY) {
dprintk("nfsd: write sync %d\n", current->pid);
- nfsd_sync(inode, &file);
- write_inode_now(inode);
+ nfsd_sync(&file);
}
+#if 0
wake_up(&inode->i_wait);
+#endif
last_ino = inode->i_ino;
last_dev = inode->i_dev;
}
- dprintk("nfsd: write complete\n");
+ dprintk("nfsd: write complete err=%d\n", err);
if (err >= 0)
err = 0;
else
@@ -648,6 +809,38 @@
return err;
}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Commit all pendig writes to stable storage.
+ * Strictly speaking, we could sync just indicated the file region here,
+ * but there's currently no way we can ask the VFS to do so.
+ *
+ * We lock the file to make sure we return full WCC data to the client.
+ */
+int
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ off_t offset, unsigned long count)
+{
+ struct file file;
+ int err;
+
+ if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
+ return err;
+
+ fh_lock(fhp);
+ if (file.f_op && file.f_op->fsync) {
+ file.f_op->fsync(&file, file.f_dentry);
+ } else {
+ err = nfserr_notsupp;
+ }
+ fh_unlock(fhp);
+
+ nfsd_close(&file);
+ return err;
+}
+#endif /* CONFIG_NFSD_V3 */
+
/*
* Create a file (regular, directory, device, fifo); UNIX sockets
* not yet implemented.
@@ -669,6 +862,7 @@
err = nfserr_perm;
if (!flen)
goto out;
+
err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
if (err)
goto out;
@@ -679,40 +873,42 @@
err = nfserr_notdir;
if(!dirp->i_op || !dirp->i_op->lookup)
goto out;
+
+ err = nfserr_exist;
+ if (isdotent(fname, flen))
+ goto out;
/*
* Check whether the response file handle has been verified yet.
* If it has, the parent directory should already be locked.
*/
- if (!resfhp->fh_dverified) {
+ if (!resfhp->fh_dentry) {
dchild = lookup_dentry(fname, dget(dentry), 0);
err = PTR_ERR(dchild);
if (IS_ERR(dchild))
goto out_nfserr;
fh_compose(resfhp, fhp->fh_export, dchild);
+ /* Lock the parent and check for errors ... */
+ err = fh_lock_parent(fhp, dchild);
+ if (err)
+ goto out;
} else {
dchild = resfhp->fh_dentry;
- if (!fhp->fh_locked)
+ if (!fhp->fh_locked) {
+ /* not actually possible */
printk(KERN_ERR
"nfsd_create: parent %s/%s not locked!\n",
dentry->d_parent->d_name.name,
dentry->d_name.name);
- }
- err = nfserr_exist;
- if (dchild->d_inode)
- goto out;
- if (!fhp->fh_locked) {
- /* Lock the parent and check for errors ... */
- err = fh_lock_parent(fhp, dchild);
- if (err)
+ err = -EIO;
goto out;
}
+ }
/*
* Make sure the child dentry is still negative ...
*/
err = nfserr_exist;
if (dchild->d_inode) {
- printk(KERN_WARNING
- "nfsd_create: dentry %s/%s not negative!\n",
+ dprintk("nfsd_create: dentry %s/%s not negative!\n",
dentry->d_name.name, dchild->d_name.name);
goto out;
}
@@ -739,24 +935,29 @@
case S_IFSOCK:
opfunc = dirp->i_op->mknod;
break;
+ default:
+ printk("nfsd: bad file type %o in nfsd_create\n", type);
+ err = nfserr_inval;
}
if (!opfunc)
goto out;
if (!(iap->ia_valid & ATTR_MODE))
iap->ia_mode = 0;
+ iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
/*
* Call the dir op function to create the object.
*/
DQUOT_INIT(dirp);
err = opfunc(dirp, dchild, iap->ia_mode, rdev);
- DQUOT_DROP(dirp);
if (err < 0)
goto out_nfserr;
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(dirp);
+ if (EX_ISSYNC(fhp->fh_export)) {
+ nfsd_sync_dir(dentry);
+ write_inode_now(dchild->d_inode);
+ }
/*
* Update the file handle to get the new inode info.
@@ -779,6 +980,129 @@
goto out;
}
+#ifdef CONFIG_NFSD_V3
+/*
+ * NFSv3 version of nfsd_create
+ */
+int
+nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *fname, int flen, struct iattr *iap,
+ struct svc_fh *resfhp, int createmode, u32 *verifier)
+{
+ struct dentry *dentry, *dchild;
+ struct inode *dirp;
+ int err;
+
+ err = nfserr_perm;
+ if (!flen)
+ goto out;
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+ err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ dirp = dentry->d_inode;
+
+ /* Get all the sanity checks out of the way before
+ * we lock the parent. */
+ err = nfserr_notdir;
+ if(!dirp->i_op || !dirp->i_op->lookup)
+ goto out;
+ err = nfserr_perm;
+ if(!dirp->i_op->create)
+ goto out;
+
+ err = nfserr_exist;
+ if (isdotent(fname, flen))
+ goto out;
+ /*
+ * Compose the response file handle.
+ */
+ dchild = lookup_dentry(fname, dget(dentry), 0);
+ err = PTR_ERR(dchild);
+ if(IS_ERR(dchild))
+ goto out_nfserr;
+ fh_compose(resfhp, fhp->fh_export, dchild);
+
+ /*
+ * We must lock the directory before we check for the inode.
+ */
+ err = fh_lock_parent(fhp, dchild);
+ if (err)
+ goto out;
+
+ if (dchild->d_inode) {
+ err = 0;
+
+ if (resfhp->fh_handle.fh_ino == 0)
+ /* inode might have been instantiated while we slept */
+ fh_update(resfhp);
+
+ switch (createmode) {
+ case NFS3_CREATE_UNCHECKED:
+ if (! S_ISREG(dchild->d_inode->i_mode))
+ err = nfserr_exist;
+ else {
+ iap->ia_valid &= ATTR_SIZE;
+ goto set_attr;
+ }
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ if ( dchild->d_inode->i_mtime == verifier[0]
+ && dchild->d_inode->i_atime == verifier[1]
+ && dchild->d_inode->i_mode == S_IFREG
+ && dchild->d_inode->i_size == 0 )
+ break;
+ /* fallthru */
+ case NFS3_CREATE_GUARDED:
+ err = nfserr_exist;
+ }
+ goto out;
+ }
+
+ err = dirp->i_op->create(dirp, dchild, iap->ia_mode);
+ if (err < 0)
+ goto out_nfserr;
+
+ if (EX_ISSYNC(fhp->fh_export)) {
+ nfsd_sync_dir(dentry);
+ /* setattr will sync the child (or not) */
+ }
+
+ /*
+ * Update the filehandle to get the new inode info.
+ */
+ fh_update(resfhp);
+ err = 0;
+
+ if (createmode == NFS3_CREATE_EXCLUSIVE) {
+ /* Cram the verifier into atime/mtime */
+ iap->ia_valid = ATTR_MTIME|ATTR_ATIME|ATTR_MTIME_SET|ATTR_ATIME_SET;
+ iap->ia_mtime = verifier[0];
+ iap->ia_atime = verifier[1];
+ }
+
+ /* Set file attributes. Mode has already been set and
+ * setting uid/gid works only for root. Irix appears to
+ * send along the gid when it tries to implement setgid
+ * directories via NFS. Clear out all that cruft.
+ */
+ set_attr:
+ if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+ err = nfsd_setattr(rqstp, resfhp, iap);
+
+ out:
+ fh_unlock(fhp);
+ return err;
+
+ out_nfserr:
+ err = nfserrno(-err);
+ goto out;
+}
+#endif /* CONFIG_NFSD_V3 */
+
/*
* Truncate a file.
* The calling routines must make sure to update the ctime
@@ -817,15 +1141,16 @@
cap_clear(current->cap_effective);
}
err = notify_change(dentry, &newattrs);
- if (current->fsuid != 0)
- current->cap_effective = saved_cap;
if (!err) {
vmtruncate(inode, size);
if (inode->i_op && inode->i_op->truncate)
inode->i_op->truncate(inode);
}
+ if (current->fsuid != 0)
+ current->cap_effective = saved_cap;
put_write_access(inode);
- DQUOT_DROP(inode);
+ if (EX_ISSYNC(fhp->fh_export))
+ nfsd_sync_dir(dentry);
fh_unlock(fhp);
out_nfserr:
if (err)
@@ -859,7 +1184,10 @@
goto out;
UPDATE_ATIME(inode);
- /* N.B. Why does this call need a get_fs()?? */
+ /* N.B. Why does this call need a get_fs()??
+ * Remove the set_fs and watch the fireworks:-) --okir
+ */
+
oldfs = get_fs(); set_fs(KERNEL_DS);
err = inode->i_op->readlink(dentry, buf, *lenp);
set_fs(oldfs);
@@ -884,7 +1212,8 @@
nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen,
char *path, int plen,
- struct svc_fh *resfhp)
+ struct svc_fh *resfhp,
+ struct iattr *iap)
{
struct dentry *dentry, *dnew;
struct inode *dirp;
@@ -899,9 +1228,11 @@
goto out;
dentry = fhp->fh_dentry;
- err = nfserr_perm;
- if (nfsd_iscovered(dentry, fhp->fh_export))
+ err = nfserr_exist;
+ if (isdotent(fname, flen))
goto out;
+
+ err = nfserr_perm;
dirp = dentry->d_inode;
if (!dirp->i_op || !dirp->i_op->symlink)
goto out;
@@ -922,10 +1253,20 @@
if (!dnew->d_inode) {
DQUOT_INIT(dirp);
err = dirp->i_op->symlink(dirp, dnew, path);
- DQUOT_DROP(dirp);
if (!err) {
if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(dirp);
+ nfsd_sync_dir(dentry);
+ if (iap) {
+ iap->ia_valid &= ATTR_MODE /* ~(ATTR_MODE|ATTR_UID|ATTR_GID)*/;
+ if (iap->ia_valid) {
+ iap->ia_valid |= ATTR_CTIME;
+ iap->ia_mode = (iap->ia_mode&S_IALLUGO)
+ | S_IFLNK;
+ err = notify_change(dnew, iap);
+ if (!err && EX_ISSYNC(fhp->fh_export))
+ write_inode_now(dentry->d_inode);
+ }
+ }
} else
err = nfserrno(-err);
}
@@ -949,7 +1290,7 @@
*/
int
nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
- char *fname, int len, struct svc_fh *tfhp)
+ char *fname, int flen, struct svc_fh *tfhp)
{
struct dentry *ddir, *dnew, *dold;
struct inode *dirp, *dest;
@@ -958,12 +1299,16 @@
err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
if (err)
goto out;
- err = fh_verify(rqstp, tfhp, S_IFREG, MAY_NOP);
+ err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
if (err)
goto out;
err = nfserr_perm;
- if (!len)
+ if (!flen)
+ goto out;
+
+ err = nfserr_exist;
+ if (isdotent(fname, flen))
goto out;
ddir = ffhp->fh_dentry;
@@ -987,10 +1332,7 @@
dold = tfhp->fh_dentry;
dest = dold->d_inode;
- err = nfserr_acces;
- if (nfsd_iscovered(ddir, ffhp->fh_export))
- goto out_unlock;
- /* FIXME: nxdev for NFSv3 */
+ err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
if (dirp->i_dev != dest->i_dev)
goto out_unlock;
@@ -1002,10 +1344,9 @@
DQUOT_INIT(dirp);
err = dirp->i_op->link(dold, dirp, dnew);
- DQUOT_DROP(dirp);
if (!err) {
if (EX_ISSYNC(ffhp->fh_export)) {
- write_inode_now(dirp);
+ nfsd_sync_dir(ddir);
write_inode_now(dest);
}
} else
@@ -1024,26 +1365,12 @@
}
/*
- * We need to do a check-parent every time
- * after we have locked the parent - to verify
- * that the parent is still our parent and
- * that we are still hashed onto it..
- *
- * This is requied in case two processes race
- * on removing (or moving) the same entry: the
- * parent lock will serialize them, but the
- * other process will be too late..
- */
-#define check_parent(dir, dentry) \
- ((dir) == (dentry)->d_parent->d_inode && !list_empty(&dentry->d_hash))
-
-/*
* This follows the model of double_lock() in the VFS.
*/
static inline void nfsd_double_down(struct semaphore *s1, struct semaphore *s2)
{
if (s1 != s2) {
- if ((unsigned long) s1 > (unsigned long) s2) {
+ if ((unsigned long) s1 < (unsigned long) s2) {
struct semaphore *tmp = s1;
s1 = s2;
s2 = tmp;
@@ -1085,12 +1412,12 @@
tdentry = tfhp->fh_dentry;
tdir = tdentry->d_inode;
- /* N.B. We shouldn't need this ... dentry layer handles it */
+ err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+ if (fdir->i_dev != tdir->i_dev)
+ goto out;
+
err = nfserr_perm;
- if (!flen || (fname[0] == '.' &&
- (flen == 1 || (flen == 2 && fname[1] == '.'))) ||
- !tlen || (tname[0] == '.' &&
- (tlen == 1 || (tlen == 2 && tname[1] == '.'))))
+ if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
odentry = lookup_dentry(fname, dget(fdentry), 0);
@@ -1111,31 +1438,36 @@
* Lock the parent directories.
*/
nfsd_double_down(&tdir->i_sem, &fdir->i_sem);
+
+#ifdef CONFIG_NFSD_V3
+ /* Fill in the pre-op attr for the wcc data for both
+ * tdir and fdir
+ */
+ fill_pre_wcc(ffhp);
+ fill_pre_wcc(tfhp);
+#endif /* CONFIG_NFSD_V3 */
+
err = -ENOENT;
/* GAM3 check for parent changes after locking. */
- if (check_parent(fdir, odentry) &&
- check_parent(tdir, ndentry)) {
+ if (nfsd_check_parent(fdentry, odentry) &&
+ nfsd_check_parent(tdentry, ndentry)) {
err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!err && EX_ISSYNC(tfhp->fh_export)) {
- write_inode_now(fdir);
- write_inode_now(tdir);
+ nfsd_sync_dir(tdentry);
+ nfsd_sync_dir(fdentry);
}
} else
dprintk("nfsd: Caught race in nfsd_rename");
- DQUOT_DROP(fdir);
- DQUOT_DROP(tdir);
+#ifdef CONFIG_NFSD_V3
+ /* Fill in the post-op attr for the wcc data for both
+ * tdir and fdir
+ */
+ fill_post_wcc(ffhp);
+ fill_post_wcc(tfhp);
+#endif /* CONFIG_NFSD_V3 */
nfsd_double_up(&tdir->i_sem, &fdir->i_sem);
-
- if (!err && odentry->d_inode) {
- add_to_rename_cache(tdir->i_ino,
- odentry->d_inode->i_dev,
- fdir->i_ino,
- odentry->d_inode->i_ino);
- } else {
- printk(": no inode in rename or err: %d.\n", err);
- }
dput(ndentry);
out_dput_old:
@@ -1162,13 +1494,12 @@
struct inode *dirp;
int err;
- /* N.B. We shouldn't need this test ... handled by dentry layer */
- err = nfserr_acces;
- if (!flen || isdotent(fname, flen))
- goto out;
err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
if (err)
goto out;
+ err = nfserr_acces;
+ if (!flen || isdotent(fname, flen))
+ goto out;
dentry = fhp->fh_dentry;
dirp = dentry->d_inode;
@@ -1177,13 +1508,13 @@
err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
goto out_nfserr;
+
if (!rdentry->d_inode) {
dput(rdentry);
err = nfserr_noent;
goto out;
}
- expire_by_dentry(rdentry);
if (type != S_IFDIR) {
/* It's UNLINK */
@@ -1194,30 +1525,33 @@
err = vfs_unlink(dirp, rdentry);
- DQUOT_DROP(dirp);
fh_unlock(fhp);
dput(rdentry);
- expire_by_dentry(rdentry);
+
} else {
/* It's RMDIR */
/* See comments in fs/namei.c:do_rmdir */
rdentry->d_count++;
nfsd_double_down(&dirp->i_sem, &rdentry->d_inode->i_sem);
- if (!fhp->fh_pre_mtime)
- fhp->fh_pre_mtime = dirp->i_mtime;
+
+#ifdef CONFIG_NFSD_V3
+ fill_pre_wcc(fhp);
+#else
fhp->fh_locked = 1;
+#endif /* CONFIG_NFSD_V3 */
err = -ENOENT;
- if (check_parent(dirp, rdentry))
+ if (nfsd_check_parent(dentry, rdentry))
err = vfs_rmdir(dirp, rdentry);
rdentry->d_count--;
- DQUOT_DROP(dirp);
- if (!fhp->fh_post_version)
- fhp->fh_post_version = dirp->i_version;
+#ifdef CONFIG_NFSD_V3
+ fill_post_wcc(fhp);
+#else
fhp->fh_locked = 0;
+#endif /* CONFIG_NFSD_V3 */
nfsd_double_up(&dirp->i_sem, &rdentry->d_inode->i_sem);
dput(rdentry);
@@ -1225,9 +1559,11 @@
if (err)
goto out_nfserr;
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(dirp);
-
+ if (EX_ISSYNC(fhp->fh_export)) {
+ down(&dentry->d_inode->i_sem);
+ nfsd_sync_dir(dentry);
+ up(&dentry->d_inode->i_sem);
+ }
out:
return err;
@@ -1238,10 +1574,11 @@
/*
* Read entries from a directory.
+ * The verifier is an NFSv3 thing we ignore for now.
*/
int
nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- encode_dent_fn func, u32 *buffer, int *countp)
+ encode_dent_fn func, u32 *buffer, int *countp, u32 *verf)
{
struct inode *inode;
u32 *p;
@@ -1249,13 +1586,11 @@
struct file file;
struct readdir_cd cd;
- err = 0;
- if (offset > ~(u32) 0)
- goto out;
-
- err = nfsd_open(rqstp, fhp, S_IFDIR, OPEN_READ, &file);
+ err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
if (err)
goto out;
+ if (offset > ~(u32) 0)
+ goto out_close;
err = nfserr_notdir;
if (!file.f_op->readdir)
@@ -1267,6 +1602,7 @@
cd.rqstp = rqstp;
cd.buffer = buffer;
cd.buflen = *countp; /* count of words */
+ cd.dirfh = fhp;
/*
* Read the directory entries. This silly loop is necessary because
@@ -1296,8 +1632,14 @@
/* If we didn't fill the buffer completely, we're at EOF */
eof = !cd.eob;
- if (cd.offset)
- *cd.offset = htonl(file.f_pos);
+ if (cd.offset) {
+#ifdef CONFIG_NFSD_V3
+ if (rqstp->rq_vers == 3)
+ (void)enc64(cd.offset, file.f_pos);
+ else
+#endif /* CONFIG_NFSD_V3 */
+ *cd.offset = htonl(file.f_pos);
+ }
p = cd.buffer;
*p++ = 0; /* no more entries */
@@ -1360,17 +1702,33 @@
struct inode *inode = dentry->d_inode;
int err;
kernel_cap_t saved_cap = 0;
+ int owneraccess;
+
+ /*
+ * Check if we are to use "owner may always access" semantics,
+ * then clean out the flag bit which controls this. It might be
+ * clearer to reverse the logic of this flag, but I didn't
+ * want to change a lot of code in a stable kernel - dhiggen.
+ */
+
+ if (acc & NO_OWNER_OVERRIDE) {
+ owneraccess = 0;
+ acc &= ~NO_OWNER_OVERRIDE;
+ } else {
+ owneraccess = 1;
+ }
if (acc == MAY_NOP)
return 0;
#if 0
- dprintk("nfsd: permission 0x%x%s%s%s%s%s mode 0%o%s%s%s\n",
+ dprintk("nfsd: permission 0x%x%s%s%s%s%s%s mode 0%o%s%s%s\n",
acc,
(acc & MAY_READ)? " read" : "",
(acc & MAY_WRITE)? " write" : "",
(acc & MAY_EXEC)? " exec" : "",
(acc & MAY_SATTR)? " sattr" : "",
(acc & MAY_TRUNC)? " trunc" : "",
+ (acc & MAY_LOCK)? " lock" : "",
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
IS_APPEND(inode)? " append" : "",
@@ -1378,33 +1736,40 @@
dprintk(" owner %d/%d user %d/%d\n",
inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
#endif
-#ifndef CONFIG_NFSD_SUN
- if (dentry->d_mounts != dentry) {
- return nfserr_perm;
- }
-#endif
if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
if (EX_RDONLY(exp) || IS_RDONLY(inode))
return nfserr_rofs;
- if (S_ISDIR(inode->i_mode) && nfsd_iscovered(dentry, exp))
- return nfserr_perm;
if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
return nfserr_perm;
}
if ((acc & MAY_TRUNC) && IS_APPEND(inode))
return nfserr_perm;
+ if (acc & MAY_LOCK) {
+ /* If we cannot rely on authentication in NLM requests,
+ * just allow locks, others require read permission
+ */
+ if (exp->ex_flags & NFSEXP_NOAUTHNLM)
+ return 0;
+ else
+ acc = MAY_READ;
+ }
/*
- * The file owner always gets access permission. This is to make
- * file access work even when the client has done a fchmod(fd, 0).
+ * The file owner always gets access permission (except in the
+ * special case of a V3 ACCESS call, which is used for checking at
+ * open() time). This is to make file access work even when the
+ * client has done a fchmod(fd, 0).
*
* However, `cp foo bar' should fail nevertheless when bar is
* readonly. A sensible way to do this might be to reject all
* attempts to truncate a read-only file, because a creat() call
* always implies file truncation.
+ * dhXXX we are not currently setting MAY_TRUNC from a SETATTR which
+ * changes the size, so this check is not enforced. It probably
+ * should be?
*/
- if (inode->i_uid == current->fsuid /* && !(acc & MAY_TRUNC) */)
+ if (owneraccess && inode->i_uid == current->fsuid /* && !(acc & MAY_TRUNC) */)
return 0;
if (current->fsuid != 0) {
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)