patch-2.4.21 linux-2.4.21/drivers/md/md.c

Next file: linux-2.4.21/drivers/md/multipath.c
Previous file: linux-2.4.21/drivers/md/lvm.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.20/drivers/md/md.c linux-2.4.21/drivers/md/md.c
@@ -445,21 +445,22 @@
 	if (rdev->sb)
 		MD_BUG();
 
-	rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL);
-	if (!rdev->sb) {
+	rdev->sb_page = alloc_page(GFP_KERNEL);
+	if (!rdev->sb_page) {
 		printk(OUT_OF_MEM);
 		return -EINVAL;
 	}
-	md_clear_page(rdev->sb);
+	rdev->sb = (mdp_super_t *) page_address(rdev->sb_page);
 
 	return 0;
 }
 
 static void free_disk_sb(mdk_rdev_t * rdev)
 {
-	if (rdev->sb) {
-		free_page((unsigned long) rdev->sb);
+	if (rdev->sb_page) {
+		page_cache_release(rdev->sb_page);
 		rdev->sb = NULL;
+		rdev->sb_page = NULL;
 		rdev->sb_offset = 0;
 		rdev->size = 0;
 	} else {
@@ -468,12 +469,43 @@
 	}
 }
 
+
+static void bh_complete(struct buffer_head *bh, int uptodate)
+{
+
+	if (uptodate)
+		set_bit(BH_Uptodate, &bh->b_state);
+
+	complete((struct completion*)bh->b_private);
+}
+
+static int sync_page_io(kdev_t dev, unsigned long sector, int size,
+			struct page *page, int rw)
+{
+	struct buffer_head bh;
+	struct completion event;
+
+	init_completion(&event);
+	init_buffer(&bh, bh_complete, &event);
+	bh.b_rdev = dev;
+	bh.b_rsector = sector;
+	bh.b_state	= (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
+	bh.b_size = size;
+	bh.b_page = page;
+	bh.b_reqnext = NULL;
+	bh.b_data = page_address(page);
+	generic_make_request(rw, &bh);
+
+	run_task_queue(&tq_disk);
+	wait_for_completion(&event);
+
+	return test_bit(BH_Uptodate, &bh.b_state);
+}
+
 static int read_disk_sb(mdk_rdev_t * rdev)
 {
 	int ret = -EINVAL;
-	struct buffer_head *bh = NULL;
 	kdev_t dev = rdev->dev;
-	mdp_super_t *sb;
 	unsigned long sb_offset;
 
 	if (!rdev->sb) {
@@ -487,22 +519,14 @@
 	 */
 	sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1);
 	rdev->sb_offset = sb_offset;
-	fsync_dev(dev);
-	set_blocksize (dev, MD_SB_BYTES);
-	bh = bread (dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES);
-
-	if (bh) {
-		sb = (mdp_super_t *) bh->b_data;
-		memcpy (rdev->sb, sb, MD_SB_BYTES);
-	} else {
-		printk(NO_SB,partition_name(rdev->dev));
-		goto abort;
+
+	if (!sync_page_io(dev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ)) {
+		printk(NO_SB,partition_name(dev));
+		return -EINVAL;
 	}
 	printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
 	ret = 0;
 abort:
-	if (bh)
-		brelse (bh);
 	return ret;
 }
 
@@ -890,10 +914,8 @@
 
 static int write_disk_sb(mdk_rdev_t * rdev)
 {
-	struct buffer_head *bh;
 	kdev_t dev;
 	unsigned long sb_offset, size;
-	mdp_super_t *sb;
 
 	if (!rdev->sb) {
 		MD_BUG();
@@ -928,23 +950,11 @@
 	}
 
 	printk(KERN_INFO "(write) %s's sb offset: %ld\n", partition_name(dev), sb_offset);
-	fsync_dev(dev);
-	set_blocksize(dev, MD_SB_BYTES);
-	bh = getblk(dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES);
-	if (!bh) {
-		printk(GETBLK_FAILED, partition_name(dev));
+
+	if (!sync_page_io(dev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE)) {
+		printk("md: write_disk_sb failed for device %s\n", partition_name(dev));
 		return 1;
 	}
-	memset(bh->b_data,0,bh->b_size);
-	sb = (mdp_super_t *) bh->b_data;
-	memcpy(sb, rdev->sb, MD_SB_BYTES);
-
-	mark_buffer_uptodate(bh, 1);
-	mark_buffer_dirty(bh);
-	ll_rw_block(WRITE, 1, &bh);
-	wait_on_buffer(bh);
-	brelse(bh);
-	fsync_dev(dev);
 skip:
 	return 0;
 }
@@ -1038,7 +1048,11 @@
 			printk("(skipping faulty ");
 		if (rdev->alias_device)
 			printk("(skipping alias ");
-
+		if (!rdev->faulty && disk_faulty(&rdev->sb->this_disk)) {
+			printk("(skipping new-faulty %s )\n",
+			       partition_name(rdev->dev));
+			continue;
+		}
 		printk("%s ", partition_name(rdev->dev));
 		if (!rdev->faulty && !rdev->alias_device) {
 			printk("[events: %08lx]",
@@ -1065,7 +1079,6 @@
  *   - the device is nonexistent (zero size)
  *   - the device has no valid superblock
  *
- * a faulty rdev _never_ has rdev->sb set.
  */
 static int md_import_device(kdev_t newdev, int on_disk)
 {
@@ -1137,8 +1150,6 @@
 	md_list_add(&rdev->all, &all_raid_disks);
 	MD_INIT_LIST_HEAD(&rdev->pending);
 
-	if (rdev->faulty && rdev->sb)
-		free_disk_sb(rdev);
 	return 0;
 
 abort_free:
@@ -2337,20 +2348,16 @@
 		return -EINVAL;
 	}
 	disk = &mddev->sb->disks[rdev->desc_nr];
-	if (disk_active(disk)) {
-		MD_BUG();
+	if (disk_active(disk))
 		goto busy;
-	}
-	if (disk_removed(disk)) {
-		MD_BUG();
+
+	if (disk_removed(disk))
 		return -EINVAL;
-	}
 
 	err = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_DISK);
-	if (err == -EBUSY) {
-		MD_BUG();
+	if (err == -EBUSY)
 		goto busy;
-	}
+
 	if (err) {
 		MD_BUG();
 		return -EINVAL;
@@ -2388,13 +2395,6 @@
 	}
 
 	persistent = !mddev->sb->not_persistent;
-	size = calc_dev_size(dev, mddev, persistent);
-
-	if (size < mddev->sb->size) {
-		printk(KERN_WARNING "md%d: disk size %d blocks < array size %d\n",
-				mdidx(mddev), size, mddev->sb->size);
-		return -ENOSPC;
-	}
 
 	rdev = find_rdev(mddev, dev);
 	if (rdev)
@@ -2416,6 +2416,14 @@
 		err = -EINVAL;
 		goto abort_export;
 	}
+	size = calc_dev_size(dev, mddev, persistent);
+
+	if (size < mddev->sb->size) {
+		printk(KERN_WARNING "md%d: disk size %d blocks < array size %d\n",
+				mdidx(mddev), size, mddev->sb->size);
+		err = -ENOSPC;
+		goto abort_export;
+	}
 	bind_rdev_to_array(rdev, mddev);
 
 	/*
@@ -2610,21 +2618,8 @@
 			goto done;
 #endif
 
-		case BLKGETSIZE:	/* Return device size */
-			if (!arg) {
-				err = -EINVAL;
-				MD_BUG();
-				goto abort;
-			}
-			err = md_put_user(md_hd_struct[minor].nr_sects,
-						(unsigned long *) arg);
-			goto done;
-
-		case BLKGETSIZE64:	/* Return device size */
-			err = md_put_user((u64)md_hd_struct[minor].nr_sects << 9,
-						(u64 *) arg);
-			goto done;
-
+		case BLKGETSIZE:
+		case BLKGETSIZE64:
 		case BLKRAGET:
 		case BLKRASET:
 		case BLKFLSBUF:
@@ -3056,7 +3051,6 @@
 		return 0;
 	if (!mddev->pers->error_handler
 			|| mddev->pers->error_handler(mddev,rdev) <= 0) {
-		free_disk_sb(rrdev);
 		rrdev->faulty = 1;
 	} else
 		return 1;
@@ -3072,13 +3066,13 @@
 	return 0;
 }
 
-static int status_unused(char * page)
+static void status_unused(struct seq_file *seq)
 {
-	int sz = 0, i = 0;
+	int i = 0;
 	mdk_rdev_t *rdev;
 	struct md_list_head *tmp;
 
-	sz += sprintf(page + sz, "unused devices: ");
+	seq_printf(seq, "unused devices: ");
 
 	ITERATE_RDEV_ALL(rdev,tmp) {
 		if (!rdev->same_set.next && !rdev->same_set.prev) {
@@ -3086,21 +3080,19 @@
 			 * The device is not yet used by any array.
 			 */
 			i++;
-			sz += sprintf(page + sz, "%s ",
+			seq_printf(seq, "%s ",
 				partition_name(rdev->dev));
 		}
 	}
 	if (!i)
-		sz += sprintf(page + sz, "<none>");
+		seq_printf(seq, "<none>");
 
-	sz += sprintf(page + sz, "\n");
-	return sz;
+	seq_printf(seq, "\n");
 }
 
 
-static int status_resync(char * page, mddev_t * mddev)
+static void status_resync(struct seq_file *seq, mddev_t * mddev)
 {
-	int sz = 0;
 	unsigned long max_blocks, resync, res, dt, db, rt;
 
 	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
@@ -3109,32 +3101,31 @@
 	/*
 	 * Should not happen.
 	 */
-	if (!max_blocks) {
+	if (!max_blocks)
 		MD_BUG();
-		return 0;
-	}
+
 	res = (resync/1024)*1000/(max_blocks/1024 + 1);
 	{
 		int i, x = res/50, y = 20-x;
-		sz += sprintf(page + sz, "[");
+		seq_printf(seq, "[");
 		for (i = 0; i < x; i++)
-			sz += sprintf(page + sz, "=");
-		sz += sprintf(page + sz, ">");
+			seq_printf(seq, "=");
+		seq_printf(seq, ">");
 		for (i = 0; i < y; i++)
-			sz += sprintf(page + sz, ".");
-		sz += sprintf(page + sz, "] ");
+			seq_printf(seq, ".");
+		seq_printf(seq, "] ");
 	}
 	if (!mddev->recovery_running)
 		/*
 		 * true resync
 		 */
-		sz += sprintf(page + sz, " resync =%3lu.%lu%% (%lu/%lu)",
+		seq_printf(seq, " resync =%3lu.%lu%% (%lu/%lu)",
 				res/10, res % 10, resync, max_blocks);
 	else
 		/*
 		 * recovery ...
 		 */
-		sz += sprintf(page + sz, " recovery =%3lu.%lu%% (%lu/%lu)",
+		seq_printf(seq, " recovery =%3lu.%lu%% (%lu/%lu)",
 				res/10, res % 10, resync, max_blocks);
 
 	/*
@@ -3151,84 +3142,156 @@
 	db = resync - (mddev->resync_mark_cnt/2);
 	rt = (dt * ((max_blocks-resync) / (db/100+1)))/100;
 
-	sz += sprintf(page + sz, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
+	seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
 
-	sz += sprintf(page + sz, " speed=%ldK/sec", db/dt);
+	seq_printf(seq, " speed=%ldK/sec", db/dt);
 
-	return sz;
 }
 
-static int md_status_read_proc(char *page, char **start, off_t off,
-			int count, int *eof, void *data)
+
+static void *md_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	int sz = 0, j, size;
-	struct md_list_head *tmp, *tmp2;
-	mdk_rdev_t *rdev;
+	struct list_head *tmp;
+	loff_t l = *pos;
 	mddev_t *mddev;
 
-	sz += sprintf(page + sz, "Personalities : ");
-	for (j = 0; j < MAX_PERSONALITY; j++)
-	if (pers[j])
-		sz += sprintf(page+sz, "[%s] ", pers[j]->name);
+	if (l > 0x10000)
+		return NULL;
+	if (!l--)
+		/* header */
+		return (void*)1;
 
-	sz += sprintf(page+sz, "\n");
+	list_for_each(tmp,&all_mddevs)
+		if (!l--) {
+			mddev = list_entry(tmp, mddev_t, all_mddevs);
+			return mddev;
+		}
+	return (void*)2;/* tail */
+}
 
+static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *tmp;
+	mddev_t *next_mddev, *mddev = v;
+	
+	++*pos;
+	if (v == (void*)2)
+		return NULL;
 
-	sz += sprintf(page+sz, "read_ahead ");
-	if (read_ahead[MD_MAJOR] == INT_MAX)
-		sz += sprintf(page+sz, "not set\n");
+	if (v == (void*)1)
+		tmp = all_mddevs.next;
 	else
-		sz += sprintf(page+sz, "%d sectors\n", read_ahead[MD_MAJOR]);
+		tmp = mddev->all_mddevs.next;
+	if (tmp != &all_mddevs)
+		next_mddev = list_entry(tmp,mddev_t,all_mddevs);
+	else {
+		next_mddev = (void*)2;
+		*pos = 0x10000;
+	}		
 
-	ITERATE_MDDEV(mddev,tmp) {
-		sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev),
-						mddev->pers ? "" : "in");
-		if (mddev->pers) {
-			if (mddev->ro)
-				sz += sprintf(page + sz, " (read-only)");
-			sz += sprintf(page + sz, " %s", mddev->pers->name);
-		}
+	return next_mddev;
 
-		size = 0;
-		ITERATE_RDEV(mddev,rdev,tmp2) {
-			sz += sprintf(page + sz, " %s[%d]",
-				partition_name(rdev->dev), rdev->desc_nr);
-			if (rdev->faulty) {
-				sz += sprintf(page + sz, "(F)");
-				continue;
-			}
-			size += rdev->size;
-		}
+}
 
-		if (mddev->nb_dev) {
-			if (mddev->pers)
-				sz += sprintf(page + sz, "\n      %d blocks",
-						 md_size[mdidx(mddev)]);
-			else
-				sz += sprintf(page + sz, "\n      %d blocks", size);
-		}
+static void md_seq_stop(struct seq_file *seq, void *v)
+{
+
+}
 
-		if (!mddev->pers) {
-			sz += sprintf(page+sz, "\n");
+static int md_seq_show(struct seq_file *seq, void *v)
+{
+	int j, size;
+	struct md_list_head *tmp2;
+	mdk_rdev_t *rdev;
+	mddev_t *mddev = v;
+
+	if (v == (void*)1) {
+		seq_printf(seq, "Personalities : ");
+		for (j = 0; j < MAX_PERSONALITY; j++)
+			if (pers[j])
+				seq_printf(seq, "[%s] ", pers[j]->name);
+
+		seq_printf(seq, "\n");
+		seq_printf(seq, "read_ahead ");
+		if (read_ahead[MD_MAJOR] == INT_MAX)
+			seq_printf(seq, "not set\n");
+		else
+			seq_printf(seq, "%d sectors\n", read_ahead[MD_MAJOR]);
+		return 0;
+	}
+	if (v == (void*)2) {
+		status_unused(seq);
+		return 0;
+	}
+
+	seq_printf(seq, "md%d : %sactive", mdidx(mddev),
+		   mddev->pers ? "" : "in");
+	if (mddev->pers) {
+		if (mddev->ro)
+			seq_printf(seq, " (read-only)");
+		seq_printf(seq, " %s", mddev->pers->name);
+	}
+	
+	size = 0;
+	ITERATE_RDEV(mddev,rdev,tmp2) {
+		seq_printf(seq, " %s[%d]",
+			   partition_name(rdev->dev), rdev->desc_nr);
+		if (rdev->faulty) {
+			seq_printf(seq, "(F)");
 			continue;
 		}
+		size += rdev->size;
+	}
 
-		sz += mddev->pers->status (page+sz, mddev);
+	if (mddev->nb_dev) {
+		if (mddev->pers)
+			seq_printf(seq, "\n      %d blocks",
+				   md_size[mdidx(mddev)]);
+		else
+			seq_printf(seq, "\n      %d blocks", size);
+	}
 
-		sz += sprintf(page+sz, "\n      ");
+	if (mddev->pers) {
+
+		mddev->pers->status (seq, mddev);
+
+		seq_printf(seq, "\n      ");
 		if (mddev->curr_resync) {
-			sz += status_resync (page+sz, mddev);
+			status_resync (seq, mddev);
 		} else {
 			if (sem_getcount(&mddev->resync_sem) != 1)
-				sz += sprintf(page + sz, "	resync=DELAYED");
+				seq_printf(seq, "	resync=DELAYED");
 		}
-		sz += sprintf(page + sz, "\n");
 	}
-	sz += status_unused(page + sz);
+	seq_printf(seq, "\n");
 
-	return sz;
+	return 0;
 }
 
+  
+static struct seq_operations md_seq_ops = {
+	.start  = md_seq_start,
+	.next   = md_seq_next,
+	.stop   = md_seq_stop,
+	.show   = md_seq_show,
+};
+
+static int md_seq_open(struct inode *inode, struct file *file)
+{
+	int error;
+
+	error = seq_open(file, &md_seq_ops);
+	return error;
+}
+
+static struct file_operations md_seq_fops = {
+	.open           = md_seq_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release	= seq_release,
+};
+
+
 int register_md_personality(int pnum, mdk_personality_t *p)
 {
 	if (pnum >= MAX_PERSONALITY) {
@@ -3628,6 +3691,7 @@
 
 static void md_geninit(void)
 {
+	struct proc_dir_entry *p;
 	int i;
 
 	for(i = 0; i < MAX_MD_DEVS; i++) {
@@ -3644,7 +3708,9 @@
 	dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
 
 #ifdef CONFIG_PROC_FS
-	create_proc_read_entry("mdstat", 0, NULL, md_status_read_proc, NULL);
+	p = create_proc_entry("mdstat", S_IRUGO, NULL);
+	if (p)
+		p->proc_fops = &md_seq_fops;
 #endif
 }
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)