patch-2.1.107 linux/fs/ext2/truncate.c
Next file: linux/fs/fat/Makefile
Previous file: linux/fs/dquot.c
Back to the patch index
Back to the overall index
- Lines: 573
- Date:
Tue Jun 23 09:23:49 1998
- Orig file:
v2.1.106/linux/fs/ext2/truncate.c
- Orig date:
Thu Mar 26 15:57:04 1998
diff -u --recursive --new-file v2.1.106/linux/fs/ext2/truncate.c linux/fs/ext2/truncate.c
@@ -14,6 +14,8 @@
*
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ * General cleanup and race fixes, wsh, 1998
*/
/*
@@ -47,6 +49,22 @@
#endif
/*
+ * Macros to return the block number for the inode size and offset.
+ * Currently we always hold the inode semaphore during truncate, so
+ * there's no need to test for changes during the operation.
+ */
+#define DIRECT_BLOCK(inode) \
+ ((inode->i_size + inode->i_sb->s_blocksize - 1) / \
+ inode->i_sb->s_blocksize)
+#define INDIRECT_BLOCK(inode,offset) ((int)DIRECT_BLOCK(inode) - offset)
+#define DINDIRECT_BLOCK(inode,offset) \
+ (INDIRECT_BLOCK(inode,offset) / addr_per_block)
+#define TINDIRECT_BLOCK(inode,offset) \
+ (INDIRECT_BLOCK(inode,offset) / (addr_per_block*addr_per_block))
+
+static u32 le32_zero = cpu_to_le32(0);
+
+/*
* Truncate has the most races in the whole filesystem: coding it is
* a pain in the a**. Especially as I don't do any locking...
*
@@ -57,53 +75,136 @@
*
* The new code handles normal truncates (size = 0) as well as the more
* general case (size = XXX). I hope.
+ *
+ *
+ * Truncate operations have been rewritten to avoid various races. The
+ * previous code was allowing blocking operations to precede a call to
+ * bforget(), possible allowing the buffer to be used again.
+ *
+ * We now ensure that b_count == 1 before calling bforget() and that the
+ * parent buffer (if any) is unlocked before clearing the block pointer.
+ * The operations are always performed in this order:
+ * (1) Make sure that the parent buffer is unlocked.
+ * (2) Use find_buffer() to find the block buffer without blocking,
+ * and set 'retry' if the buffer is locked or b_count > 1.
+ * (3) Clear the block pointer in the parent (buffer or inode).
+ * (4) Update the inode block count and mark the inode dirty.
+ * (5) Forget the block buffer, if any. This call won't block, as
+ * we know the buffer is unlocked from (2).
+ * (6) If the block pointer is in a (parent) buffer, mark the buffer
+ * dirty. (Note that this can block on a loop device.)
+ * (7) Accumulate the blocks to free and/or update the block bitmap.
+ * (This operation will frequently block.)
+ *
+ * The requirement that parent buffers be unlocked follows from the general
+ * principle of not modifying a buffer that may be undergoing I/O. With the
+ * the present kernels there's no problem with modifying a locked inode, as
+ * the I_DIRTY bit is cleared before setting I_LOCK.
+ * -- WSH, 1998
+ */
+
+/*
+ * Check whether any of the slots in an indirect block are
+ * still in use, and if not free the block.
*/
+static int check_block_empty(struct inode *inode, struct buffer_head *bh,
+ u32 *p, struct buffer_head *ind_bh)
+{
+ int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ u32 * ind = (u32 *) bh->b_data;
+ int i, retry;
+
+ /* Make sure both buffers are unlocked */
+ do {
+ retry = 0;
+ if (buffer_locked(bh)) {
+ __wait_on_buffer(bh);
+ retry = 1;
+ }
+ if (ind_bh && buffer_locked(ind_bh)) {
+ __wait_on_buffer(ind_bh);
+ retry = 1;
+ }
+ } while (retry);
+
+ for (i = 0; i < addr_per_block; i++)
+ if (le32_to_cpu(*(ind++)))
+ goto in_use;
+
+ if (bh->b_count == 1) {
+ int tmp;
+ if (ind_bh) {
+ tmp = le32_to_cpu(*p);
+ *p = le32_zero;
+ } else {
+ tmp = *p;
+ *p = 0;
+ }
+ inode->i_blocks -= (inode->i_sb->s_blocksize / 512);
+ mark_inode_dirty(inode);
+ /*
+ * Forget the buffer, then mark the parent buffer dirty.
+ */
+ bforget(bh);
+ if (ind_bh)
+ mark_buffer_dirty(ind_bh, 1);
+ ext2_free_blocks (inode, tmp, 1);
+ goto out;
+ }
+ retry = 1;
+
+in_use:
+ if (IS_SYNC(inode) && buffer_dirty(bh)) {
+ ll_rw_block (WRITE, 1, &bh);
+ wait_on_buffer (bh);
+ }
+ brelse (bh);
+
+out:
+ return retry;
+}
static int trunc_direct (struct inode * inode)
{
- u32 * p;
- int i, tmp;
struct buffer_head * bh;
- unsigned long block_to_free = 0;
- unsigned long free_count = 0;
- int retry = 0;
+ int i, retry = 0;
+ unsigned long block_to_free = 0, free_count = 0;
int blocks = inode->i_sb->s_blocksize / 512;
-#define DIRECT_BLOCK ((inode->i_size + inode->i_sb->s_blocksize - 1) / \
- inode->i_sb->s_blocksize)
- int direct_block = DIRECT_BLOCK;
+ int direct_block = DIRECT_BLOCK(inode);
-repeat:
for (i = direct_block ; i < EXT2_NDIR_BLOCKS ; i++) {
- p = inode->u.ext2_i.i_data + i;
- tmp = *p;
+ u32 * p = inode->u.ext2_i.i_data + i;
+ int tmp = *p;
+
if (!tmp)
continue;
- bh = get_hash_table (inode->i_dev, tmp,
- inode->i_sb->s_blocksize);
- if (i < direct_block) {
- brelse (bh);
- goto repeat;
- }
- if ((bh && bh->b_count != 1) || tmp != *p) {
- retry = 1;
- brelse (bh);
- continue;
+
+ bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
+ if (bh) {
+ bh->b_count++;
+ if(bh->b_count != 1 || buffer_locked(bh)) {
+ brelse(bh);
+ retry = 1;
+ continue;
+ }
}
+
*p = 0;
inode->i_blocks -= blocks;
mark_inode_dirty(inode);
bforget(bh);
- if (free_count == 0) {
- block_to_free = tmp;
- free_count++;
- } else if (free_count > 0 && block_to_free == tmp - free_count)
+
+ /* accumulate blocks to free if they're contiguous */
+ if (free_count == 0)
+ goto free_this;
+ else if (block_to_free == tmp - free_count)
free_count++;
else {
ext2_free_blocks (inode, block_to_free, free_count);
+ free_this:
block_to_free = tmp;
free_count = 1;
}
-/* ext2_free_blocks (inode, tmp, 1); */
}
if (free_count > 0)
ext2_free_blocks (inode, block_to_free, free_count);
@@ -111,174 +212,146 @@
}
static int trunc_indirect (struct inode * inode, int offset, u32 * p,
- int in_inode)
+ struct buffer_head *dind_bh)
{
- int i, tmp;
- struct buffer_head * bh;
struct buffer_head * ind_bh;
- u32 * ind;
- unsigned long block_to_free = 0;
- unsigned long free_count = 0;
- int retry = 0;
- int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
- int blocks = inode->i_sb->s_blocksize / 512;
-#define INDIRECT_BLOCK ((int)DIRECT_BLOCK - offset)
- int indirect_block = INDIRECT_BLOCK;
+ int i, tmp, retry = 0;
+ unsigned long block_to_free = 0, free_count = 0;
+ int indirect_block, addr_per_block, blocks;
- tmp = in_inode ? *p : le32_to_cpu(*p);
+ tmp = dind_bh ? le32_to_cpu(*p) : *p;
if (!tmp)
return 0;
ind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize);
- if (tmp != (in_inode ? *p : le32_to_cpu(*p))) {
+ if (tmp != (dind_bh ? le32_to_cpu(*p) : *p)) {
brelse (ind_bh);
return 1;
}
+ /* A read failure? Report error and clear slot (should be rare). */
if (!ind_bh) {
- *p = in_inode ? 0 : cpu_to_le32(0);
+ ext2_error(inode->i_sb, "trunc_indirect",
+ "Read failure, inode=%ld, block=%d",
+ inode->i_ino, tmp);
+ if (dind_bh) {
+ *p = le32_zero;
+ mark_buffer_dirty(dind_bh, 1);
+ } else {
+ *p = 0;
+ mark_inode_dirty(inode);
+ }
return 0;
}
-repeat:
+
+ blocks = inode->i_sb->s_blocksize / 512;
+ addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ indirect_block = INDIRECT_BLOCK(inode, offset);
+ if (indirect_block < 0)
+ indirect_block = 0;
for (i = indirect_block ; i < addr_per_block ; i++) {
- if (i < 0)
- i = 0;
- if (i < indirect_block)
- goto repeat;
- ind = i + (u32 *) ind_bh->b_data;
+ u32 * ind = i + (u32 *) ind_bh->b_data;
+ struct buffer_head * bh;
+
+ wait_on_buffer(ind_bh);
tmp = le32_to_cpu(*ind);
if (!tmp)
continue;
- bh = get_hash_table (inode->i_dev, tmp,
- inode->i_sb->s_blocksize);
- if (i < indirect_block) {
- brelse (bh);
- goto repeat;
+ /*
+ * Use find_buffer so we don't block here.
+ */
+ bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
+ if (bh) {
+ bh->b_count++;
+ if (bh->b_count != 1 || buffer_locked(bh)) {
+ brelse (bh);
+ retry = 1;
+ continue;
+ }
}
- if ((bh && bh->b_count != 1) || tmp != le32_to_cpu(*ind)) {
- retry = 1;
- brelse (bh);
- continue;
- }
- *ind = cpu_to_le32(0);
- mark_buffer_dirty(ind_bh, 1);
+
+ *ind = le32_zero;
+ inode->i_blocks -= blocks;
+ mark_inode_dirty(inode);
bforget(bh);
- if (free_count == 0) {
- block_to_free = tmp;
- free_count++;
- } else if (free_count > 0 && block_to_free == tmp - free_count)
+ mark_buffer_dirty(ind_bh, 1);
+
+ /* accumulate blocks to free if they're contiguous */
+ if (free_count == 0)
+ goto free_this;
+ else if (block_to_free == tmp - free_count)
free_count++;
else {
ext2_free_blocks (inode, block_to_free, free_count);
+ free_this:
block_to_free = tmp;
free_count = 1;
}
-/* ext2_free_blocks (inode, tmp, 1); */
- inode->i_blocks -= blocks;
- mark_inode_dirty(inode);
}
if (free_count > 0)
ext2_free_blocks (inode, block_to_free, free_count);
- ind = (u32 *) ind_bh->b_data;
- for (i = 0; i < addr_per_block; i++)
- if (le32_to_cpu(*(ind++)))
- break;
- if (i >= addr_per_block) {
- if (ind_bh->b_count != 1)
- retry = 1;
- else {
- tmp = in_inode ? *p : le32_to_cpu(*p);
- *p = in_inode ? 0 : cpu_to_le32(0);
- inode->i_blocks -= blocks;
- mark_inode_dirty(inode);
- ext2_free_blocks (inode, tmp, 1);
- bforget(ind_bh);
- ind_bh = NULL;
- }
- }
- if (IS_SYNC(inode) && ind_bh && buffer_dirty(ind_bh)) {
- ll_rw_block (WRITE, 1, &ind_bh);
- wait_on_buffer (ind_bh);
- }
- brelse (ind_bh);
+ /*
+ * Check the block and dispose of the ind_bh buffer.
+ */
+ retry |= check_block_empty(inode, ind_bh, p, dind_bh);
+
return retry;
}
-static int trunc_dindirect (struct inode * inode, int offset,
- u32 * p, int in_inode)
+static int trunc_dindirect (struct inode * inode, int offset, u32 * p,
+ struct buffer_head * tind_bh)
{
- int i, tmp;
struct buffer_head * dind_bh;
- u32 * dind;
- int retry = 0;
- int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
- int blocks = inode->i_sb->s_blocksize / 512;
-#define DINDIRECT_BLOCK (((int)DIRECT_BLOCK - offset) / addr_per_block)
- int dindirect_block = DINDIRECT_BLOCK;
+ int i, tmp, retry = 0;
+ int dindirect_block, addr_per_block;
- tmp = in_inode ? *p : le32_to_cpu(*p);
+ tmp = tind_bh ? le32_to_cpu(*p) : *p;
if (!tmp)
return 0;
dind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize);
- if (tmp != (in_inode ? *p : le32_to_cpu(*p))) {
+ if (tmp != (tind_bh ? le32_to_cpu(*p) : *p)) {
brelse (dind_bh);
return 1;
}
+ /* A read failure? Report error and clear slot (should be rare). */
if (!dind_bh) {
- *p = in_inode ? 0 : cpu_to_le32(0);
- return 0;
- }
-repeat:
- for (i = dindirect_block ; i < addr_per_block ; i++) {
- if (i < 0)
- i = 0;
- if (i < dindirect_block)
- goto repeat;
- dind = i + (u32 *) dind_bh->b_data;
- tmp = le32_to_cpu(*dind);
- if (!tmp)
- continue;
- retry |= trunc_indirect(inode, offset + (i * addr_per_block),
- dind, 0);
- mark_buffer_dirty(dind_bh, 1);
- }
- dind = (u32 *) dind_bh->b_data;
- for (i = 0; i < addr_per_block; i++)
- if (le32_to_cpu(*(dind++)))
- break;
- if (i >= addr_per_block) {
- if (dind_bh->b_count != 1)
- retry = 1;
- else {
- tmp = in_inode ? *p : le32_to_cpu(*p);
- *p = in_inode ? 0 : cpu_to_le32(0);
- inode->i_blocks -= blocks;
+ ext2_error(inode->i_sb, "trunc_dindirect",
+ "Read failure, inode=%ld, block=%d",
+ inode->i_ino, tmp);
+ if (tind_bh) {
+ *p = le32_zero;
+ mark_buffer_dirty(tind_bh, 1);
+ } else {
+ *p = 0;
mark_inode_dirty(inode);
- ext2_free_blocks (inode, tmp, 1);
- bforget(dind_bh);
- dind_bh = 0;
}
+ return 0;
}
- if (IS_SYNC(inode) && dind_bh && buffer_dirty(dind_bh)) {
- ll_rw_block (WRITE, 1, &dind_bh);
- wait_on_buffer (dind_bh);
+
+ addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ dindirect_block = DINDIRECT_BLOCK(inode, offset);
+ if (dindirect_block < 0)
+ dindirect_block = 0;
+ for (i = dindirect_block ; i < addr_per_block ; i++) {
+ u32 * dind = i + (u32 *) dind_bh->b_data;
+
+ retry |= trunc_indirect(inode,
+ offset + (i * addr_per_block),
+ dind, dind_bh);
}
- brelse (dind_bh);
+ /*
+ * Check the block and dispose of the dind_bh buffer.
+ */
+ retry |= check_block_empty(inode, dind_bh, p, tind_bh);
+
return retry;
}
static int trunc_tindirect (struct inode * inode)
{
- int i, tmp;
+ u32 * p = inode->u.ext2_i.i_data + EXT2_TIND_BLOCK;
struct buffer_head * tind_bh;
- u32 * tind, * p;
- int retry = 0;
- int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
- int blocks = inode->i_sb->s_blocksize / 512;
-#define TINDIRECT_BLOCK (((int)DIRECT_BLOCK - (addr_per_block * addr_per_block + \
- addr_per_block + EXT2_NDIR_BLOCKS)) / \
- (addr_per_block * addr_per_block))
- int tindirect_block = TINDIRECT_BLOCK;
+ int i, tmp, retry = 0;
+ int tindirect_block, addr_per_block, offset;
- p = inode->u.ext2_i.i_data + EXT2_TIND_BLOCK;
if (!(tmp = *p))
return 0;
tind_bh = bread (inode->i_dev, tmp, inode->i_sb->s_blocksize);
@@ -286,53 +359,40 @@
brelse (tind_bh);
return 1;
}
+ /* A read failure? Report error and clear slot (should be rare). */
if (!tind_bh) {
+ ext2_error(inode->i_sb, "trunc_tindirect",
+ "Read failure, inode=%ld, block=%d",
+ inode->i_ino, tmp);
*p = 0;
+ mark_inode_dirty(inode);
return 0;
}
-repeat:
+
+ addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ offset = EXT2_NDIR_BLOCKS + addr_per_block +
+ (addr_per_block * addr_per_block);
+ tindirect_block = TINDIRECT_BLOCK(inode, offset);
+ if (tindirect_block < 0)
+ tindirect_block = 0;
for (i = tindirect_block ; i < addr_per_block ; i++) {
- if (i < 0)
- i = 0;
- if (i < tindirect_block)
- goto repeat;
- tind = i + (u32 *) tind_bh->b_data;
- retry |= trunc_dindirect(inode, EXT2_NDIR_BLOCKS +
- addr_per_block + (i + 1) * addr_per_block * addr_per_block,
- tind, 0);
- mark_buffer_dirty(tind_bh, 1);
- }
- tind = (u32 *) tind_bh->b_data;
- for (i = 0; i < addr_per_block; i++)
- if (le32_to_cpu(*(tind++)))
- break;
- if (i >= addr_per_block) {
- if (tind_bh->b_count != 1)
- retry = 1;
- else {
- tmp = *p;
- *p = 0;
- inode->i_blocks -= blocks;
- mark_inode_dirty(inode);
- ext2_free_blocks (inode, tmp, 1);
- bforget(tind_bh);
- tind_bh = 0;
- }
- }
- if (IS_SYNC(inode) && tind_bh && buffer_dirty(tind_bh)) {
- ll_rw_block (WRITE, 1, &tind_bh);
- wait_on_buffer (tind_bh);
+ u32 * tind = i + (u32 *) tind_bh->b_data;
+
+ retry |= trunc_dindirect(inode,
+ offset + (i * addr_per_block * addr_per_block),
+ tind, tind_bh);
}
- brelse (tind_bh);
+ /*
+ * Check the block and dispose of the tind_bh buffer.
+ */
+ retry |= check_block_empty(inode, tind_bh, p, NULL);
+
return retry;
}
void ext2_truncate (struct inode * inode)
{
- int retry;
- struct buffer_head * bh;
- int err;
- int offset;
+ int err, offset, retry;
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
@@ -342,11 +402,14 @@
ext2_discard_prealloc(inode);
while (1) {
retry = trunc_direct(inode);
- retry |= trunc_indirect (inode, EXT2_IND_BLOCK,
- (u32 *) &inode->u.ext2_i.i_data[EXT2_IND_BLOCK], 1);
- retry |= trunc_dindirect (inode, EXT2_IND_BLOCK +
- EXT2_ADDR_PER_BLOCK(inode->i_sb),
- (u32 *) &inode->u.ext2_i.i_data[EXT2_DIND_BLOCK], 1);
+ retry |= trunc_indirect (inode,
+ EXT2_IND_BLOCK,
+ (u32 *) &inode->u.ext2_i.i_data[EXT2_IND_BLOCK],
+ NULL);
+ retry |= trunc_dindirect (inode,
+ EXT2_IND_BLOCK+EXT2_ADDR_PER_BLOCK(inode->i_sb),
+ (u32 *)&inode->u.ext2_i.i_data[EXT2_DIND_BLOCK],
+ NULL);
retry |= trunc_tindirect (inode);
if (!retry)
break;
@@ -357,12 +420,13 @@
}
/*
* If the file is not being truncated to a block boundary, the
- * contents of the partial block following the end of the file must be
- * zeroed in case it ever becomes accessible again because of
- * subsequent file growth.
+ * contents of the partial block following the end of the file
+ * must be zeroed in case it ever becomes accessible again due
+ * to subsequent file growth.
*/
offset = inode->i_size & (inode->i_sb->s_blocksize - 1);
if (offset) {
+ struct buffer_head * bh;
bh = ext2_bread (inode,
inode->i_size >> EXT2_BLOCK_SIZE_BITS(inode->i_sb),
0, &err);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov