patch-2.3.43 linux/fs/buffer.c
Next file: linux/fs/coda/cnode.c
Previous file: linux/fs/block_dev.c
Back to the patch index
Back to the overall index
- Lines: 1227
- Date:
Thu Feb 10 12:16:58 2000
- Orig file:
v2.3.42/linux/fs/buffer.c
- Orig date:
Fri Jan 7 19:13:22 2000
diff -u --recursive --new-file v2.3.42/linux/fs/buffer.c linux/fs/buffer.c
@@ -94,6 +94,7 @@
kmem_cache_t *bh_cachep;
static int grow_buffers(int size);
+static void __refile_buffer(struct buffer_head *);
/* This is used by some architectures to estimate available memory. */
atomic_t buffermem_pages = ATOMIC_INIT(0);
@@ -277,11 +278,14 @@
void sync_dev(kdev_t dev)
{
- sync_buffers(dev, 0);
sync_supers(dev);
sync_inodes(dev);
- sync_buffers(dev, 0);
DQUOT_SYNC(dev);
+ /* sync all the dirty buffers out to disk only _after_ all the
+ high level layers finished generated buffer dirty data
+ (or we'll return with some buffer still dirty on the blockdevice
+ so breaking the semantics of this call) */
+ sync_buffers(dev, 0);
/*
* FIXME(eric) we need to sync the physical devices here.
* This is because some (scsi) controllers have huge amounts of
@@ -323,7 +327,9 @@
struct inode * inode = dentry->d_inode;
struct super_block * sb;
kdev_t dev;
+ int ret;
+ lock_kernel();
/* sync the inode to buffers */
write_inode_now(inode);
@@ -335,7 +341,9 @@
/* .. finally sync the buffers to disk */
dev = inode->i_dev;
- return sync_buffers(dev, 1);
+ ret = sync_buffers(dev, 1);
+ unlock_kernel();
+ return ret;
}
asmlinkage long sys_fsync(unsigned int fd)
@@ -345,7 +353,6 @@
struct inode * inode;
int err;
- lock_kernel();
err = -EBADF;
file = fget(fd);
if (!file)
@@ -371,7 +378,6 @@
out_putf:
fput(file);
out:
- unlock_kernel();
return err;
}
@@ -382,7 +388,6 @@
struct inode * inode;
int err;
- lock_kernel();
err = -EBADF;
file = fget(fd);
if (!file)
@@ -408,44 +413,9 @@
out_putf:
fput(file);
out:
- unlock_kernel();
return err;
}
-void invalidate_buffers(kdev_t dev)
-{
- int nlist;
-
- spin_lock(&lru_list_lock);
- for(nlist = 0; nlist < NR_LIST; nlist++) {
- struct buffer_head * bh;
- int i;
- retry:
- bh = lru_list[nlist];
- if (!bh)
- continue;
- for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
- if (bh->b_dev != dev)
- continue;
- if (buffer_locked(bh)) {
- atomic_inc(&bh->b_count);
- spin_unlock(&lru_list_lock);
- wait_on_buffer(bh);
- spin_lock(&lru_list_lock);
- atomic_dec(&bh->b_count);
- goto retry;
- }
- if (atomic_read(&bh->b_count))
- continue;
- clear_bit(BH_Protected, &bh->b_state);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Dirty, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
- }
- }
- spin_unlock(&lru_list_lock);
-}
-
/* After several hours of tedious analysis, the following hash
* function won. Do not mess with it... -DaveM
*/
@@ -464,10 +434,12 @@
static __inline__ void __hash_unlink(struct buffer_head *bh)
{
- if (bh->b_next)
- bh->b_next->b_pprev = bh->b_pprev;
- *(bh->b_pprev) = bh->b_next;
- bh->b_pprev = NULL;
+ if (bh->b_pprev) {
+ if (bh->b_next)
+ bh->b_next->b_pprev = bh->b_pprev;
+ *(bh->b_pprev) = bh->b_next;
+ bh->b_pprev = NULL;
+ }
}
static void __insert_into_lru_list(struct buffer_head * bh, int blist)
@@ -514,17 +486,12 @@
bh->b_next_free = bh->b_prev_free = NULL;
}
-/* The following two functions must operate atomically
- * because they control the visibility of a buffer head
- * to the rest of the kernel.
- */
-static __inline__ void __remove_from_queues(struct buffer_head *bh)
+/* must be called with both the hash_table_lock and the lru_list_lock
+ held */
+static void __remove_from_queues(struct buffer_head *bh)
{
- write_lock(&hash_table_lock);
- if (bh->b_pprev)
- __hash_unlink(bh);
+ __hash_unlink(bh);
__remove_from_lru_list(bh, bh->b_list);
- write_unlock(&hash_table_lock);
}
static void insert_into_queues(struct buffer_head *bh)
@@ -547,6 +514,8 @@
struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)];
struct buffer_head **bhp = &head->list;
+ bh->b_state = 0;
+
spin_lock(&head->lock);
bh->b_dev = B_FREE;
if(!*bhp) {
@@ -604,11 +573,73 @@
return 0;
}
+/* If invalidate_buffers() will trash dirty buffers, it means some kind
+ of fs corruption is going on. Trashing dirty data always imply losing
+ information that was supposed to be just stored on the physical layer
+ by the user.
+
+ Thus invalidate_buffers in general usage is not allwowed to trash dirty
+ buffers. For example ioctl(FLSBLKBUF) expects dirty data to be preserved.
+
+ NOTE: In the case where the user removed a removable-media-disk even if
+ there's still dirty data not synced on disk (due a bug in the device driver
+ or due an error of the user), by not destroying the dirty buffers we could
+ generate corruption also on the next media inserted, thus a parameter is
+ necessary to handle this case in the most safe way possible (trying
+ to not corrupt also the new disk inserted with the data belonging to
+ the old now corrupted disk). Also for the ramdisk the natural thing
+ to do in order to release the ramdisk memory is to destroy dirty buffers.
+
+ These are two special cases. Normal usage imply the device driver
+ to issue a sync on the device (without waiting I/O completation) and
+ then an invalidate_buffers call that doesn't trashes dirty buffers. */
+void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
+{
+ int i, nlist, slept;
+ struct buffer_head * bh, * bh_next;
+
+ retry:
+ slept = 0;
+ spin_lock(&lru_list_lock);
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ bh = lru_list[nlist];
+ if (!bh)
+ continue;
+ for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) {
+ bh_next = bh->b_next_free;
+ if (bh->b_dev != dev)
+ continue;
+ if (buffer_locked(bh)) {
+ atomic_inc(&bh->b_count);
+ spin_unlock(&lru_list_lock);
+ wait_on_buffer(bh);
+ slept = 1;
+ spin_lock(&lru_list_lock);
+ atomic_dec(&bh->b_count);
+ }
+
+ write_lock(&hash_table_lock);
+ if (!atomic_read(&bh->b_count) &&
+ (destroy_dirty_buffers || !buffer_dirty(bh))) {
+ __remove_from_queues(bh);
+ put_last_free(bh);
+ }
+ write_unlock(&hash_table_lock);
+ if (slept)
+ goto out;
+ }
+ }
+out:
+ spin_unlock(&lru_list_lock);
+ if (slept)
+ goto retry;
+}
+
void set_blocksize(kdev_t dev, int size)
{
extern int *blksize_size[];
- int i, nlist;
- struct buffer_head * bh, *bhnext;
+ int i, nlist, slept;
+ struct buffer_head * bh, * bh_next;
if (!blksize_size[MAJOR(dev)])
return;
@@ -626,41 +657,53 @@
sync_buffers(dev, 2);
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
- /* We need to be quite careful how we do this - we are moving entries
- * around on the free list, and we can get in a loop if we are not careful.
- */
+ retry:
+ slept = 0;
+ spin_lock(&lru_list_lock);
for(nlist = 0; nlist < NR_LIST; nlist++) {
- repeat:
- spin_lock(&lru_list_lock);
bh = lru_list[nlist];
- for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
- if(!bh)
- break;
-
- bhnext = bh->b_next_free;
- if (bh->b_dev != dev)
- continue;
- if (bh->b_size == size)
- continue;
+ if (!bh)
+ continue;
+ for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) {
+ bh_next = bh->b_next_free;
+ if (bh->b_dev != dev || bh->b_size == size)
+ continue;
if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
spin_unlock(&lru_list_lock);
wait_on_buffer(bh);
+ slept = 1;
+ spin_lock(&lru_list_lock);
atomic_dec(&bh->b_count);
- goto repeat;
- }
- if (bh->b_dev == dev && bh->b_size != size) {
- clear_bit(BH_Dirty, &bh->b_state);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
}
- if (atomic_read(&bh->b_count) == 0) {
+
+ write_lock(&hash_table_lock);
+ if (!atomic_read(&bh->b_count)) {
+ if (buffer_dirty(bh))
+ printk(KERN_WARNING
+ "set_blocksize: dev %s buffer_dirty %lu size %hu\n",
+ kdevname(dev), bh->b_blocknr, bh->b_size);
__remove_from_queues(bh);
put_last_free(bh);
+ } else {
+ if (atomic_set_buffer_clean(bh))
+ __refile_buffer(bh);
+ clear_bit(BH_Uptodate, &bh->b_state);
+ printk(KERN_WARNING
+ "set_blocksize: "
+ "b_count %d, dev %s, block %lu, from %p\n",
+ atomic_read(&bh->b_count), bdevname(bh->b_dev),
+ bh->b_blocknr, __builtin_return_address(0));
}
+ write_unlock(&hash_table_lock);
+ if (slept)
+ goto out;
}
- spin_unlock(&lru_list_lock);
}
+ out:
+ spin_unlock(&lru_list_lock);
+ if (slept)
+ goto retry;
}
/*
@@ -785,30 +828,31 @@
atomic_set(&bh->b_count, 1);
}
spin_unlock(&free_list[isize].lock);
- if (!bh)
- goto refill;
- /* OK, FINALLY we know that this buffer is the only one of its kind,
- * we hold a reference (b_count>0), it is unlocked, and it is clean.
+ /*
+ * OK, FINALLY we know that this buffer is the only one of
+ * its kind, we hold a reference (b_count>0), it is unlocked,
+ * and it is clean.
*/
- init_buffer(bh, end_buffer_io_sync, NULL);
- bh->b_dev = dev;
- bh->b_blocknr = block;
- bh->b_state = 1 << BH_Mapped;
+ if (bh) {
+ init_buffer(bh, end_buffer_io_sync, NULL);
+ bh->b_dev = dev;
+ bh->b_blocknr = block;
+ bh->b_state = 1 << BH_Mapped;
- /* Insert the buffer into the regular lists */
- insert_into_queues(bh);
- goto out;
+ /* Insert the buffer into the regular lists */
+ insert_into_queues(bh);
+ out:
+ touch_buffer(bh);
+ return bh;
+ }
/*
* If we block while refilling the free list, somebody may
* create the buffer first ... search the hashes again.
*/
-refill:
refill_freelist(size);
goto repeat;
-out:
- return bh;
}
/* -1 -> no need to flush
@@ -820,11 +864,13 @@
dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
tot = nr_free_buffer_pages();
- hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100;
- soft_dirty_limit = hard_dirty_limit >> 1;
+ tot -= size_buffers_type[BUF_PROTECTED] >> PAGE_SHIFT;
- if (dirty > soft_dirty_limit)
- {
+ dirty *= 200;
+ soft_dirty_limit = tot * bdf_prm.b_un.nfract;
+ hard_dirty_limit = soft_dirty_limit * 2;
+
+ if (dirty > soft_dirty_limit) {
if (dirty > hard_dirty_limit)
return 1;
return 0;
@@ -848,29 +894,39 @@
wakeup_bdflush(state);
}
-static inline void __mark_dirty(struct buffer_head *bh, int flag)
+static __inline__ void __mark_dirty(struct buffer_head *bh, int flag)
{
bh->b_flushtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer);
- clear_bit(BH_New, &bh->b_state);
refile_buffer(bh);
}
+/* atomic version, the user must call balance_dirty() by hand
+ as soon as it become possible to block */
void __mark_buffer_dirty(struct buffer_head *bh, int flag)
{
- __mark_dirty(bh, flag);
+ if (!atomic_set_buffer_dirty(bh))
+ __mark_dirty(bh, flag);
+}
+
+void mark_buffer_dirty(struct buffer_head *bh, int flag)
+{
+ __mark_buffer_dirty(bh, flag);
+ balance_dirty(bh->b_dev);
}
/*
* A buffer may need to be moved from one buffer list to another
* (e.g. in case it is not shared any more). Handle this.
*/
-static __inline__ void __refile_buffer(struct buffer_head *bh)
+static void __refile_buffer(struct buffer_head *bh)
{
int dispose = BUF_CLEAN;
if (buffer_locked(bh))
dispose = BUF_LOCKED;
if (buffer_dirty(bh))
dispose = BUF_DIRTY;
+ if (buffer_protected(bh))
+ dispose = BUF_PROTECTED;
if (dispose != bh->b_list) {
__remove_from_lru_list(bh, bh->b_list);
bh->b_list = dispose;
@@ -890,8 +946,6 @@
*/
void __brelse(struct buffer_head * buf)
{
- touch_buffer(buf);
-
if (atomic_read(&buf->b_count)) {
atomic_dec(&buf->b_count);
return;
@@ -912,12 +966,10 @@
write_lock(&hash_table_lock);
if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf))
goto in_use;
- if (buf->b_pprev)
- __hash_unlink(buf);
+ __hash_unlink(buf);
write_unlock(&hash_table_lock);
__remove_from_lru_list(buf, buf->b_list);
spin_unlock(&lru_list_lock);
- buf->b_state = 0;
put_last_free(buf);
return;
@@ -1218,13 +1270,13 @@
static void unmap_buffer(struct buffer_head * bh)
{
- if (buffer_mapped(bh))
- {
+ if (buffer_mapped(bh)) {
mark_buffer_clean(bh);
wait_on_buffer(bh);
clear_bit(BH_Uptodate, &bh->b_state);
clear_bit(BH_Mapped, &bh->b_state);
clear_bit(BH_Req, &bh->b_state);
+ clear_bit(BH_New, &bh->b_state);
}
}
@@ -1303,30 +1355,25 @@
static void unmap_underlying_metadata(struct buffer_head * bh)
{
-#if 0
- if (buffer_new(bh)) {
- struct buffer_head *old_bh;
+ struct buffer_head *old_bh;
- old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size);
- if (old_bh) {
- unmap_buffer(old_bh);
- /* Here we could run brelse or bforget. We use
- bforget because it will try to put the buffer
- in the freelist. */
- __bforget(old_bh);
- }
+ old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size);
+ if (old_bh) {
+ unmap_buffer(old_bh);
+ /* Here we could run brelse or bforget. We use
+ bforget because it will try to put the buffer
+ in the freelist. */
+ __bforget(old_bh);
}
-#endif
}
/*
* block_write_full_page() is SMP-safe - currently it's still
* being called with the kernel lock held, but the code is ready.
*/
-int block_write_full_page(struct dentry *dentry, struct page *page)
+static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block)
{
- struct inode *inode = dentry->d_inode;
- int err, i;
+ int err, i, need_balance_dirty = 0;
unsigned long block;
struct buffer_head *bh, *head;
@@ -1337,17 +1384,11 @@
create_empty_buffers(page, inode, inode->i_sb->s_blocksize);
head = page->buffers;
- /* The page cache is now PAGE_CACHE_SIZE aligned, period. We handle old a.out
- * and others via unaligned private mappings.
- */
block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
bh = head;
i = 0;
do {
- if (!bh)
- BUG();
-
/*
* If the buffer isn't up-to-date, we can't be sure
* that the buffer has been initialized with the proper
@@ -1358,18 +1399,25 @@
*/
bh->b_end_io = end_buffer_io_sync;
if (!buffer_mapped(bh)) {
- err = inode->i_op->get_block(inode, block, bh, 1);
+ err = get_block(inode, block, bh, 1);
if (err)
goto out;
- unmap_underlying_metadata(bh);
+ if (buffer_new(bh))
+ unmap_underlying_metadata(bh);
}
set_bit(BH_Uptodate, &bh->b_state);
- mark_buffer_dirty(bh,0);
+ if (!atomic_set_buffer_dirty(bh)) {
+ __mark_dirty(bh, 0);
+ need_balance_dirty = 1;
+ }
bh = bh->b_this_page;
block++;
} while (bh != head);
+ if (need_balance_dirty)
+ balance_dirty(bh->b_dev);
+
SetPageUptodate(page);
return 0;
out:
@@ -1377,13 +1425,12 @@
return err;
}
-int block_write_zero_range(struct inode *inode, struct page *page,
- unsigned zerofrom, unsigned from, unsigned to,
- const char * buf)
+static int __block_prepare_write(struct inode *inode, struct page *page,
+ unsigned from, unsigned to, get_block_t *get_block)
{
- unsigned zeroto = 0, block_start, block_end;
+ unsigned block_start, block_end;
unsigned long block;
- int err = 0, partial = 0, need_balance_dirty = 0;
+ int err = 0;
unsigned blocksize, bbits;
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
char *kaddr = (char *)kmap(page);
@@ -1396,35 +1443,31 @@
bbits = inode->i_sb->s_blocksize_bits;
block = page->index << (PAGE_CACHE_SHIFT - bbits);
- /*
- * First pass - map what needs to be mapped, initiate reads
- * on the boundaries if needed (i.e. if block is partially covered
- * _and_ is not up-to-date _and_ is not new).
- */
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
if (!bh)
BUG();
block_end = block_start+blocksize;
- if (block_end <= zerofrom)
+ if (block_end <= from)
continue;
if (block_start >= to)
break;
bh->b_end_io = end_buffer_io_sync;
if (!buffer_mapped(bh)) {
- err = inode->i_op->get_block(inode, block, bh, 1);
+ err = get_block(inode, block, bh, 1);
if (err)
goto out;
- unmap_underlying_metadata(bh);
- }
- if (buffer_new(bh)) {
- zeroto = block_end;
- if (block_start < zerofrom)
- zerofrom = block_start;
- continue;
+ if (buffer_new(bh)) {
+ unmap_underlying_metadata(bh);
+ if (block_end > to)
+ memset(kaddr+to, 0, block_end-to);
+ if (block_start < from)
+ memset(kaddr+block_start, 0, from-block_start);
+ continue;
+ }
}
if (!buffer_uptodate(bh) &&
- (block_start < zerofrom || block_end > to)) {
+ (block_start < from || block_end > to)) {
ll_rw_block(READ, 1, &bh);
*wait_bh++=bh;
}
@@ -1438,44 +1481,31 @@
if (!buffer_uptodate(*wait_bh))
goto out;
}
- /*
- * Now we can copy the data.
- */
- if (zerofrom < from)
- memset(kaddr+zerofrom, 0, from-zerofrom);
- if (from < to)
- err = copy_from_user(kaddr+from, buf, to-from);
- if (to < zeroto)
- memset(kaddr+to, 0, zeroto-to);
- else
- zeroto = to;
- if (err < 0)
- goto out;
- /*
- * Second pass: check if all out-of-range blocks are up-to-date
- * and mark the rest up-to-date and dirty.
- *
- * NOTE! This also does a direct dirty balace check,
- * rather than relying on bdflush just waking up every
- * once in a while. This is to catch (and slow down)
- * the processes that write tons of buffer..
- *
- * Note how we do NOT want to do this in the full block
- * case: full pages are flushed not by the people who
- * dirtied them, but by people who need memory. And we
- * should not penalize them for somebody else writing
- * lots of dirty pages.
- */
- for(bh = head, block_start = 0;
+ return 0;
+out:
+ return err;
+}
+
+static int __block_commit_write(struct inode *inode, struct page *page,
+ unsigned from, unsigned to)
+{
+ unsigned block_start, block_end;
+ int partial = 0, need_balance_dirty = 0;
+ unsigned blocksize;
+ struct buffer_head *bh, *head;
+
+ blocksize = inode->i_sb->s_blocksize;
+
+ for(bh = head = page->buffers, block_start = 0;
bh != head || !block_start;
block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
- if (block_end <= zerofrom || block_start >= zeroto) {
+ if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
} else {
set_bit(BH_Uptodate, &bh->b_state);
- if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
+ if (!atomic_set_buffer_dirty(bh)) {
__mark_dirty(bh, 0);
need_balance_dirty = 1;
}
@@ -1492,51 +1522,202 @@
*/
if (!partial)
SetPageUptodate(page);
- kunmap(page);
return 0;
-out:
- ClearPageUptodate(page);
- kunmap(page);
- return err;
}
-int block_write_partial_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+/*
+ * Generic "read page" function for block devices that have the normal
+ * get_block functionality. This is most of the block device filesystems.
+ * Reads the page asynchronously --- the unlock_buffer() and
+ * mark_buffer_uptodate() functions propagate buffer state into the
+ * page struct once IO has completed.
+ */
+static inline int __block_read_full_page(struct inode *inode, struct page *page,
+ get_block_t *get_block)
{
- struct inode *inode = file->f_dentry->d_inode;
- int err;
+ unsigned long iblock;
+ struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+ unsigned int blocksize, blocks;
+ unsigned long kaddr = 0;
+ int nr, i;
if (!PageLocked(page))
- BUG();
- if (offset < 0 || offset >= PAGE_SIZE)
- BUG();
- if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
- BUG();
+ PAGE_BUG(page);
+ blocksize = inode->i_sb->s_blocksize;
+ if (!page->buffers)
+ create_empty_buffers(page, inode, blocksize);
+ head = page->buffers;
+
+ blocks = PAGE_CACHE_SIZE >> inode->i_sb->s_blocksize_bits;
+ iblock = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+ bh = head;
+ nr = 0;
+ i = 0;
+
+ do {
+ if (buffer_uptodate(bh))
+ continue;
+
+ if (!buffer_mapped(bh)) {
+ get_block(inode, iblock, bh, 0);
+ if (!buffer_mapped(bh)) {
+ if (!kaddr)
+ kaddr = kmap(page);
+ memset((char *)(kaddr + i*blocksize), 0, blocksize);
+ set_bit(BH_Uptodate, &bh->b_state);
+ continue;
+ }
+ }
- err = block_write_range(inode, page, offset, bytes, buf);
- return err ? err : bytes;
+ init_buffer(bh, end_buffer_io_async, NULL);
+ atomic_inc(&bh->b_count);
+ arr[nr] = bh;
+ nr++;
+ } while (i++, iblock++, (bh = bh->b_this_page) != head);
+
+ ++current->maj_flt;
+ if (nr) {
+ if (Page_Uptodate(page))
+ BUG();
+ ll_rw_block(READ, nr, arr);
+ } else {
+ /*
+ * all buffers are uptodate - we can set the page
+ * uptodate as well.
+ */
+ SetPageUptodate(page);
+ UnlockPage(page);
+ }
+ if (kaddr)
+ kunmap(page);
+ return 0;
}
/*
* For moronic filesystems that do not allow holes in file.
- * we allow offset==PAGE_SIZE, bytes==0
+ * We may have to extend the file.
*/
-int block_write_cont_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+int cont_prepare_write(struct page *page, unsigned offset, unsigned to, get_block_t *get_block, unsigned long *bytes)
{
- struct inode *inode = file->f_dentry->d_inode;
- int err;
- unsigned zerofrom = offset;
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = (struct inode*)mapping->host;
+ struct page *new_page;
+ unsigned long pgpos;
+ long status;
+ unsigned zerofrom;
+ unsigned blocksize = inode->i_sb->s_blocksize;
+ char *kaddr;
+
+ while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
+ status = -ENOMEM;
+ new_page = grab_cache_page(mapping, pgpos);
+ if (!new_page)
+ goto out;
+ /* we might sleep */
+ if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
+ UnlockPage(new_page);
+ page_cache_release(new_page);
+ continue;
+ }
+ zerofrom = *bytes & ~PAGE_CACHE_MASK;
+ if (zerofrom & (blocksize-1)) {
+ *bytes |= (blocksize-1);
+ (*bytes)++;
+ }
+ status = __block_prepare_write(inode, new_page, zerofrom,
+ PAGE_CACHE_SIZE, get_block);
+ if (status)
+ goto out_unmap;
+ kaddr = (char*)page_address(page);
+ memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
+ __block_commit_write(inode, new_page, zerofrom, to);
+ kunmap(new_page);
+ UnlockPage(new_page);
+ page_cache_release(new_page);
+ }
+
+ if (page->index < pgpos) {
+ /* completely inside the area */
+ zerofrom = offset;
+ } else {
+ /* page covers the boundary, find the boundary offset */
+ zerofrom = *bytes & ~PAGE_CACHE_MASK;
- if (page->index > (inode->i_size >> PAGE_CACHE_SHIFT))
- zerofrom = 0;
- else if (page->index == (inode->i_size >> PAGE_CACHE_SHIFT) &&
- offset > (inode->i_size & ~PAGE_CACHE_MASK))
- zerofrom = inode->i_size & ~PAGE_CACHE_MASK;
- err = block_write_zero_range(inode, page, zerofrom,offset,offset+bytes,
- buf);
- return err ? err : bytes;
+ /* if we will expand the thing last block will be filled */
+ if (to > zerofrom && (zerofrom & (blocksize-1))) {
+ *bytes |= (blocksize-1);
+ (*bytes)++;
+ }
+
+ /* starting below the boundary? Nothing to zero out */
+ if (offset <= zerofrom)
+ zerofrom = offset;
+ }
+ status = __block_prepare_write(inode, page, zerofrom, to, get_block);
+ if (status)
+ goto out1;
+ kaddr = (char*)page_address(page);
+ if (zerofrom < offset) {
+ memset(kaddr+zerofrom, 0, offset-zerofrom);
+ __block_commit_write(inode, page, zerofrom, offset);
+ }
+ return 0;
+out1:
+ ClearPageUptodate(page);
+ kunmap(page);
+ return status;
+
+out_unmap:
+ ClearPageUptodate(new_page);
+ kunmap(new_page);
+ UnlockPage(new_page);
+ page_cache_release(new_page);
+out:
+ return status;
}
+int block_prepare_write(struct page *page, unsigned from, unsigned to,
+ get_block_t *get_block)
+{
+ struct inode *inode = (struct inode*)page->mapping->host;
+ int err = __block_prepare_write(inode, page, from, to, get_block);
+ if (err) {
+ ClearPageUptodate(page);
+ kunmap(page);
+ }
+ return err;
+}
+
+int generic_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
+{
+ __block_commit_write((struct inode*)page->mapping->host,page,from,to);
+ kunmap(page);
+ return 0;
+}
+
+int block_write_full_page(struct page *page, get_block_t *get_block)
+{
+ struct inode *inode = (struct inode*)page->mapping->host;
+ return __block_write_full_page(inode, page, get_block);
+}
+
+int block_read_full_page(struct page *page, get_block_t *get_block)
+{
+ struct inode *inode = (struct inode*)page->mapping->host;
+ return __block_read_full_page(inode, page, get_block);
+}
+
+int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block)
+{
+ struct buffer_head tmp;
+ struct inode *inode = (struct inode*)mapping->host;
+ tmp.b_state = 0;
+ tmp.b_blocknr = 0;
+ get_block(inode, block, &tmp, 0);
+ return tmp.b_blocknr;
+}
/*
* IO completion routine for a buffer_head being used for kiobuf IO: we
@@ -1814,93 +1995,22 @@
return 0;
}
-/*
- * Generic "read page" function for block devices that have the normal
- * get_block functionality. This is most of the block device filesystems.
- * Reads the page asynchronously --- the unlock_buffer() and
- * mark_buffer_uptodate() functions propagate buffer state into the
- * page struct once IO has completed.
- */
-static inline int __block_read_full_page(struct inode *inode, struct page *page)
-{
- unsigned long iblock;
- struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
- unsigned int blocksize, blocks;
- unsigned long kaddr = 0;
- int nr, i;
-
- if (!PageLocked(page))
- PAGE_BUG(page);
- blocksize = inode->i_sb->s_blocksize;
- if (!page->buffers)
- create_empty_buffers(page, inode, blocksize);
- head = page->buffers;
-
- blocks = PAGE_CACHE_SIZE >> inode->i_sb->s_blocksize_bits;
- iblock = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
- bh = head;
- nr = 0;
- i = 0;
-
- do {
- if (buffer_uptodate(bh))
- continue;
-
- if (!buffer_mapped(bh)) {
- inode->i_op->get_block(inode, iblock, bh, 0);
- if (!buffer_mapped(bh)) {
- if (!kaddr)
- kaddr = kmap(page);
- memset((char *)(kaddr + i*blocksize), 0, blocksize);
- set_bit(BH_Uptodate, &bh->b_state);
- continue;
- }
- }
-
- init_buffer(bh, end_buffer_io_async, NULL);
- atomic_inc(&bh->b_count);
- arr[nr] = bh;
- nr++;
- } while (i++, iblock++, (bh = bh->b_this_page) != head);
-
- ++current->maj_flt;
- if (nr) {
- if (Page_Uptodate(page))
- BUG();
- ll_rw_block(READ, nr, arr);
- } else {
- /*
- * all buffers are uptodate - we can set the page
- * uptodate as well.
- */
- SetPageUptodate(page);
- UnlockPage(page);
- }
- if (kaddr)
- kunmap(page);
- return 0;
-}
-
-int block_read_full_page(struct dentry *dentry, struct page *page)
-{
- return __block_read_full_page(dentry->d_inode, page);
-}
-
int block_symlink(struct inode *inode, const char *symname, int len)
{
- struct page *page = grab_cache_page(&inode->i_data, 0);
- mm_segment_t fs;
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = grab_cache_page(mapping, 0);
int err = -ENOMEM;
+ char *kaddr;
if (!page)
goto fail;
- fs = get_fs();
- set_fs(KERNEL_DS);
- err = block_write_range(inode, page, 0, len-1, symname);
- set_fs(fs);
- inode->i_size = len-1;
+ err = mapping->a_ops->prepare_write(page, 0, len-1);
if (err)
- goto fail_write;
+ goto fail_map;
+ kaddr = (char*)page_address(page);
+ memcpy(kaddr, symname, len-1);
+ mapping->a_ops->commit_write(NULL, page, 0, len-1);
+ inode->i_size = len-1;
/*
* Notice that we are _not_ going to block here - end of page is
* unmapped, so this will only try to map the rest of page, see
@@ -1908,14 +2018,15 @@
* ->i_size will be enough for everything) and zero it out.
* OTOH it's obviously correct and should make the page up-to-date.
*/
- err = __block_read_full_page(inode, page);
+ err = mapping->a_ops->readpage(NULL, page);
wait_on_page(page);
page_cache_release(page);
if (err < 0)
goto fail;
mark_inode_dirty(inode);
return 0;
-fail_write:
+fail_map:
+ inode->i_size = len-1;
UnlockPage(page);
page_cache_release(page);
fail:
@@ -2000,7 +2111,7 @@
*/
int try_to_free_buffers(struct page * page)
{
- struct buffer_head * tmp, * bh = page->buffers;
+ struct buffer_head * tmp, * p, * bh = page->buffers;
int index = BUFSIZE_INDEX(bh->b_size);
int ret;
@@ -2009,7 +2120,7 @@
spin_lock(&free_list[index].lock);
tmp = bh;
do {
- struct buffer_head * p = tmp;
+ p = tmp;
tmp = tmp->b_this_page;
if (buffer_busy(p))
@@ -2025,13 +2136,10 @@
/* The buffer can be either on the regular
* queues or on the free list..
*/
- if (p->b_dev == B_FREE) {
+ if (p->b_dev != B_FREE)
+ __remove_from_queues(p);
+ else
__remove_from_free_list(p, index);
- } else {
- if (p->b_pprev)
- __hash_unlink(p);
- __remove_from_lru_list(p, p->b_list);
- }
__put_unused_buffer_head(p);
} while (tmp != bh);
spin_unlock(&unused_list_lock);
@@ -2051,7 +2159,8 @@
busy_buffer_page:
/* Uhhuh, start writeback so that we don't end up with all dirty pages */
- wakeup_bdflush(0);
+ if (buffer_dirty(p))
+ wakeup_bdflush(0);
ret = 0;
goto out;
}
@@ -2065,7 +2174,7 @@
int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
int protected = 0;
int nlist;
- static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" };
+ static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY", "PROTECTED", };
#endif
printk("Buffer memory: %6dkB\n",
@@ -2091,10 +2200,16 @@
used++, lastused = found;
bh = bh->b_next_free;
} while (bh != lru_list[nlist]);
- printk("%8s: %d buffers, %d used (last=%d), "
+ {
+ int tmp = nr_buffers_type[nlist];
+ if (found != tmp)
+ printk("%9s: BUG -> found %d, reported %d\n",
+ buf_types[nlist], found, tmp);
+ }
+ printk("%9s: %d buffers, %lu kbyte, %d used (last=%d), "
"%d locked, %d protected, %d dirty\n",
- buf_types[nlist], found, used, lastused,
- locked, protected, dirty);
+ buf_types[nlist], found, size_buffers_type[nlist]>>10,
+ used, lastused, locked, protected, dirty);
}
spin_unlock(&lru_list_lock);
#endif
@@ -2184,8 +2299,7 @@
if (current == bdflush_tsk)
return;
- if (!block)
- {
+ if (!block) {
wake_up_process(bdflush_tsk);
return;
}
@@ -2210,7 +2324,7 @@
as all dirty buffers lives _only_ in the DIRTY lru list.
As we never browse the LOCKED and CLEAN lru lists they are infact
completly useless. */
-static void flush_dirty_buffers(int check_flushtime)
+static int flush_dirty_buffers(int check_flushtime)
{
struct buffer_head * bh, *next;
int flushed = 0, i;
@@ -2220,29 +2334,24 @@
bh = lru_list[BUF_DIRTY];
if (!bh)
goto out_unlock;
- for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next)
- {
+ for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) {
next = bh->b_next_free;
- if (!buffer_dirty(bh))
- {
+ if (!buffer_dirty(bh)) {
__refile_buffer(bh);
continue;
}
if (buffer_locked(bh))
continue;
- if (check_flushtime)
- {
- /* The dirty lru list is chronogical ordered so
+ if (check_flushtime) {
+ /* The dirty lru list is chronologically ordered so
if the current bh is not yet timed out,
then also all the following bhs
will be too young. */
if (time_before(jiffies, bh->b_flushtime))
goto out_unlock;
- }
- else
- {
+ } else {
if (++flushed > bdf_prm.b_un.ndirty)
goto out_unlock;
}
@@ -2259,6 +2368,8 @@
}
out_unlock:
spin_unlock(&lru_list_lock);
+
+ return flushed;
}
/*
@@ -2342,6 +2453,7 @@
*/
int bdflush(void * unused)
{
+ int flushed;
/*
* We have a bare-bones task_struct, and really should fill
* in a few more things so "top" and /proc/2/{exe,root,cwd}
@@ -2363,7 +2475,7 @@
for (;;) {
CHECK_EMERGENCY_SYNC
- flush_dirty_buffers(0);
+ flushed = flush_dirty_buffers(0);
/* If wakeup_bdflush will wakeup us
after our bdflush_done wakeup, then
@@ -2378,10 +2490,10 @@
/*
* If there are still a lot of dirty buffers around,
* skip the sleep and flush some more. Otherwise, we
- * sleep for a while.
+ * go to sleep waiting a wakeup.
*/
- if (balance_dirty_state(NODEV) < 0)
- schedule_timeout(5*HZ);
+ if (!flushed || balance_dirty_state(NODEV) < 0)
+ schedule();
/* Remember to mark us as running otherwise
the next schedule will block. */
__set_current_state(TASK_RUNNING);
@@ -2413,24 +2525,19 @@
for (;;) {
/* update interval */
interval = bdf_prm.b_un.interval;
- if (interval)
- {
+ if (interval) {
tsk->state = TASK_INTERRUPTIBLE;
schedule_timeout(interval);
- }
- else
- {
+ } else {
stop_kupdate:
tsk->state = TASK_STOPPED;
schedule(); /* wait for SIGCONT */
}
/* check for sigstop */
- if (signal_pending(tsk))
- {
+ if (signal_pending(tsk)) {
int stopped = 0;
spin_lock_irq(&tsk->sigmask_lock);
- if (sigismember(&tsk->signal, SIGSTOP))
- {
+ if (sigismember(&tsk->signal, SIGSTOP)) {
sigdelset(&tsk->signal, SIGSTOP);
stopped = 1;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)