patch-2.2.14 linux/fs/buffer.c
Next file: linux/fs/dcache.c
Previous file: linux/fs/block_dev.c
Back to the patch index
Back to the overall index
- Lines: 318
- Date:
Tue Jan 4 10:12:22 2000
- Orig file:
v2.2.13/linux/fs/buffer.c
- Orig date:
Mon Aug 9 16:05:57 1999
diff -u --recursive --new-file v2.2.13/linux/fs/buffer.c linux/fs/buffer.c
@@ -24,6 +24,9 @@
* - RMK
*/
+/* invalidate_buffers/set_blocksize/sync_dev race conditions and
+ fs corruption fixes, 1999, Andrea Arcangeli <andrea@suse.de> */
+
#include <linux/malloc.h>
#include <linux/locks.h>
#include <linux/errno.h>
@@ -58,11 +61,12 @@
/*
* Hash table mask..
*/
-static unsigned long bh_hash_mask = 0;
+static unsigned int bh_hash_mask = 0;
+static unsigned int bh_hash_shift = 0;
+static struct buffer_head ** hash_table = NULL;
static int grow_buffers(int size);
-static struct buffer_head ** hash_table;
static struct buffer_head * lru_list[NR_LIST] = {NULL, };
static struct buffer_head * free_list[NR_SIZES] = {NULL, };
@@ -79,7 +83,7 @@
static int nr_hashed_buffers = 0;
/* This is used by some architectures to estimate available memory. */
-int buffermem = 0;
+long buffermem = 0;
/* Here is the parameter block for the bdflush process. If you add or
* remove any of the parameters, make sure to update kernel/sysctl.c.
@@ -262,11 +266,14 @@
void sync_dev(kdev_t dev)
{
- sync_buffers(dev, 0);
sync_supers(dev);
sync_inodes(dev);
- sync_buffers(dev, 0);
DQUOT_SYNC(dev);
+ /* sync all the dirty buffers out to disk only _after_ all the
+ high level layers finished generated buffer dirty data
+ (or we'll return with some buffer still dirty on the blockdevice
+ so breaking the semantics of this call) */
+ sync_buffers(dev, 0);
/*
* FIXME(eric) we need to sync the physical devices here.
* This is because some (scsi) controllers have huge amounts of
@@ -395,33 +402,13 @@
return err;
}
-void invalidate_buffers(kdev_t dev)
-{
- int i;
- int nlist;
- struct buffer_head * bh;
-
- for(nlist = 0; nlist < NR_LIST; nlist++) {
- bh = lru_list[nlist];
- for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
- if (bh->b_dev != dev)
- continue;
- wait_on_buffer(bh);
- if (bh->b_dev != dev)
- continue;
- if (bh->b_count)
- continue;
- bh->b_flushtime = 0;
- clear_bit(BH_Protected, &bh->b_state);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Dirty, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
- }
- }
-}
-
-#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block)) & bh_hash_mask)
-#define hash(dev,block) hash_table[_hashfn(dev,block)]
+/* After several hours of tedious analysis, the following hash
+ * function won. Do not mess with it... -DaveM
+ */
+#define _hashfn(dev,block) \
+ ((((dev)<<(bh_hash_shift - 6)) ^ ((dev)<<(bh_hash_shift - 9))) ^ \
+ (((block)<<(bh_hash_shift - 6)) ^ ((block) >> 13) ^ ((block) << (bh_hash_shift - 12))))
+#define hash(dev,block) hash_table[_hashfn(dev,block) & bh_hash_mask]
static inline void remove_from_hash_queue(struct buffer_head * bh)
{
@@ -434,8 +421,8 @@
}
*pprev = next;
bh->b_pprev = NULL;
+ nr_hashed_buffers--;
}
- nr_hashed_buffers--;
}
static inline void remove_from_lru_list(struct buffer_head * bh)
@@ -486,11 +473,14 @@
remove_from_lru_list(bh);
}
-static inline void put_last_free(struct buffer_head * bh)
+static void put_last_free(struct buffer_head * bh)
{
if (bh) {
struct buffer_head **bhp = &free_list[BUFSIZE_INDEX(bh->b_size)];
+ bh->b_count = 0;
+ bh->b_state = 0;
+ remove_from_queues(bh);
bh->b_dev = B_FREE; /* So it is obvious we are on the free list. */
/* Add to back of free list. */
@@ -510,7 +500,7 @@
{
/* put at end of free list */
if(bh->b_dev == B_FREE) {
- put_last_free(bh);
+ panic("B_FREE inserted into queues");
} else {
struct buffer_head **bhp = &lru_list[bh->b_list];
@@ -542,8 +532,8 @@
}
*bhp = bh;
bh->b_pprev = bhp;
+ nr_hashed_buffers++;
}
- nr_hashed_buffers++;
}
}
@@ -600,10 +590,61 @@
return 0;
}
+/* If invalidate_buffers() will trash dirty buffers, it means some kind
+ of fs corruption is going on. Trashing dirty data always imply losing
+ information that was supposed to be just stored on the physical layer
+ by the user.
+
+ Thus invalidate_buffers in general usage is not allwowed to trash dirty
+ buffers. For example ioctl(FLSBLKBUF) expects dirty data to be preserved.
+
+ NOTE: In the case where the user removed a removable-media-disk even if
+ there's still dirty data not synced on disk (due a bug in the device driver
+ or due an error of the user), by not destroying the dirty buffers we could
+ generate corruption also on the next media inserted, thus a parameter is
+ necessary to handle this case in the most safe way possible (trying
+ to not corrupt also the new disk inserted with the data belonging to
+ the old now corrupted disk). Also for the ramdisk the natural thing
+ to do in order to release the ramdisk memory is to destroy dirty buffers.
+
+ These are two special cases. Normal usage imply the device driver
+ to issue a sync on the device (without waiting I/O completation) and
+ then an invalidate_buffers call that doesn't trashes dirty buffers. */
+void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
+{
+ int i, nlist, slept;
+ struct buffer_head * bh, * bhnext;
+
+ again:
+ slept = 0;
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ bh = lru_list[nlist];
+ if (!bh)
+ continue;
+ for (i = nr_buffers_type[nlist] ; i > 0 ;
+ bh = bhnext, i--)
+ {
+ bhnext = bh->b_next_free;
+ if (bh->b_dev != dev)
+ continue;
+ if (buffer_locked(bh))
+ {
+ slept = 1;
+ __wait_on_buffer(bh);
+ }
+ if (!bh->b_count &&
+ (destroy_dirty_buffers || !buffer_dirty(bh)))
+ put_last_free(bh);
+ if (slept)
+ goto again;
+ }
+ }
+}
+
void set_blocksize(kdev_t dev, int size)
{
extern int *blksize_size[];
- int i, nlist;
+ int i, nlist, slept;
struct buffer_head * bh, *bhnext;
if (!blksize_size[MAJOR(dev)])
@@ -625,27 +666,35 @@
/* We need to be quite careful how we do this - we are moving entries
* around on the free list, and we can get in a loop if we are not careful.
*/
- for(nlist = 0; nlist < NR_LIST; nlist++) {
+ again:
+ slept = 0;
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
bh = lru_list[nlist];
- for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
- if(!bh)
- break;
-
- bhnext = bh->b_next_free;
- if (bh->b_dev != dev)
- continue;
- if (bh->b_size == size)
- continue;
- bhnext->b_count++;
- wait_on_buffer(bh);
- bhnext->b_count--;
- if (bh->b_dev == dev && bh->b_size != size) {
- clear_bit(BH_Dirty, &bh->b_state);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
- bh->b_flushtime = 0;
+ if (!bh)
+ continue;
+ for (i = nr_buffers_type[nlist] ; i > 0 ;
+ bh = bhnext, i--)
+ {
+ bhnext = bh->b_next_free;
+ if (bh->b_dev != dev || bh->b_size == size)
+ continue;
+ if (buffer_dirty(bh))
+ printk(KERN_ERR "set_blocksize: dev %s buffer_dirty %lu size %lu\n", kdevname(dev), bh->b_blocknr, bh->b_size);
+ if (buffer_locked(bh))
+ {
+ slept = 1;
+ wait_on_buffer(bh);
}
- remove_from_hash_queue(bh);
+ if (!bh->b_count)
+ put_last_free(bh);
+ else
+ printk(KERN_ERR
+ "set_blocksize: "
+ "b_count %d, dev %s, block %lu!\n",
+ bh->b_count, bdevname(bh->b_dev),
+ bh->b_blocknr);
+ if (slept)
+ goto again;
}
}
}
@@ -825,9 +874,6 @@
__brelse(buf);
return;
}
- buf->b_count = 0;
- buf->b_state = 0;
- remove_from_queues(buf);
put_last_free(buf);
}
@@ -1459,7 +1505,7 @@
int nlist;
static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY"};
- printk("Buffer memory: %6dkB\n",buffermem>>10);
+ printk("Buffer memory: %8ldkB\n",buffermem>>10);
printk("Buffer heads: %6d\n",nr_buffer_heads);
printk("Buffer blocks: %6d\n",nr_buffers);
printk("Buffer hashed: %6d\n",nr_hashed_buffers);
@@ -1506,22 +1552,32 @@
fsync times (ext2) manageable, is the following */
memory_size >>= 20;
- for (order = 5; (1UL << order) < memory_size; order++);
+ for (order = 0; (1UL << order) < memory_size; order++);
/* try to allocate something until we get it or we're asking
for something that is really too small */
do {
+ unsigned long tmp;
+
nr_hash = (1UL << order) * PAGE_SIZE /
sizeof(struct buffer_head *);
+ bh_hash_mask = (nr_hash - 1);
+
+ tmp = nr_hash;
+ bh_hash_shift = 0;
+ while((tmp >>= 1UL) != 0UL)
+ bh_hash_shift++;
+
hash_table = (struct buffer_head **)
__get_free_pages(GFP_ATOMIC, order);
- } while (hash_table == NULL && --order > 4);
+ } while (hash_table == NULL && --order >= 0);
+ printk("Buffer cache hash table entries: %d (order %d, %ldk)\n",
+ nr_hash, order, (1UL<<order) * PAGE_SIZE / 1024);
if (!hash_table)
panic("Failed to allocate buffer hash table\n");
memset(hash_table, 0, nr_hash * sizeof(struct buffer_head *));
- bh_hash_mask = nr_hash-1;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head),
@@ -1563,7 +1619,6 @@
return;
wake_up(&bdflush_wait);
if (wait) {
- run_task_queue(&tq_disk);
sleep_on(&bdflush_done);
}
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)