patch-2.3.46 linux/drivers/block/ll_rw_blk.c
Next file: linux/drivers/block/loop.c
Previous file: linux/drivers/block/ide.c
Back to the patch index
Back to the overall index
- Lines: 623
- Date:
Wed Feb 16 14:36:45 2000
- Orig file:
v2.3.45/linux/drivers/block/ll_rw_blk.c
- Orig date:
Thu Feb 10 17:11:07 2000
diff -u --recursive --new-file v2.3.45/linux/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1994, Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
*/
/*
@@ -27,6 +28,8 @@
#include <linux/module.h>
+#define DEBUG_ELEVATOR
+
/*
* MAC Floppy IWM hooks
*/
@@ -147,6 +150,18 @@
return ret;
}
+static inline int get_request_latency(elevator_t * elevator, int rw)
+{
+ int latency;
+
+ if (rw != READ)
+ latency = elevator->write_latency;
+ else
+ latency = elevator->read_latency;
+
+ return latency;
+}
+
void blk_cleanup_queue(request_queue_t * q)
{
memset(q, 0, sizeof(*q));
@@ -167,55 +182,49 @@
q->make_request_fn = mfn;
}
-static int ll_merge_fn(request_queue_t *q, struct request *req,
- struct buffer_head *bh)
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
{
- if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) {
- if (req->nr_segments < MAX_SEGMENTS) {
- req->nr_segments++;
- return 1;
- }
- return 0;
+ if (req->nr_segments < max_segments) {
+ req->nr_segments++;
+ q->nr_segments++;
+ return 1;
}
- return 1;
+ return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+ return 1;
+ return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int max_segments)
+{
+ if (bh->b_data + bh->b_size == req->bh->b_data)
+ return 1;
+ return ll_new_segment(q, req, max_segments);
}
static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
- struct request *next)
+ struct request *next, int max_segments)
{
int total_segments = req->nr_segments + next->nr_segments;
- if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+ if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) {
total_segments--;
+ q->nr_segments--;
+ }
- if (total_segments > MAX_SEGMENTS)
+ if (total_segments > max_segments)
return 0;
req->nr_segments = total_segments;
return 1;
}
-void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
-{
- q->request_fn = rfn;
- q->current_request = NULL;
- q->merge_fn = ll_merge_fn;
- q->merge_requests_fn = ll_merge_requests_fn;
- q->make_request_fn = NULL;
- q->plug_tq.sync = 0;
- q->plug_tq.routine = &generic_unplug_device;
- q->plug_tq.data = q;
- q->plugged = 0;
- /*
- * These booleans describe the queue properties. We set the
- * default (and most common) values here. Other drivers can
- * use the appropriate functions to alter the queue properties.
- * as appropriate.
- */
- q->plug_device_fn = NULL;
- q->head_active = 1;
-}
-
/*
* "plug" the device if there are no outstanding requests: this will
* force the transfer to start only after we have put all the requests
@@ -224,19 +233,44 @@
* This is called with interrupts off and no requests on the queue.
* (and with the request spinlock aquired)
*/
-inline void generic_plug_device (request_queue_t *q, kdev_t dev)
+static void generic_plug_device (request_queue_t *q, kdev_t dev)
{
+#ifdef CONFIG_BLK_DEV_MD
if (MAJOR(dev) == MD_MAJOR) {
spin_unlock_irq(&io_request_lock);
BUG();
}
- if (q->current_request)
+#endif
+ if (!list_empty(&q->queue_head))
return;
q->plugged = 1;
queue_task(&q->plug_tq, &tq_disk);
}
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+ INIT_LIST_HEAD(&q->queue_head);
+ q->elevator = ELEVATOR_DEFAULTS;
+ q->request_fn = rfn;
+ q->back_merges_fn = ll_back_merge_fn;
+ q->front_merge_fn = ll_front_merge_fn;
+ q->merge_requests_fn = ll_merge_requests_fn;
+ q->make_request_fn = NULL;
+ q->plug_tq.sync = 0;
+ q->plug_tq.routine = &generic_unplug_device;
+ q->plug_tq.data = q;
+ q->plugged = 0;
+ /*
+ * These booleans describe the queue properties. We set the
+ * default (and most common) values here. Other drivers can
+ * use the appropriate functions to alter the queue properties.
+ * as appropriate.
+ */
+ q->plug_device_fn = generic_plug_device;
+ q->head_active = 1;
+}
+
/*
* remove the plug and let it rip..
*/
@@ -248,7 +282,7 @@
spin_lock_irqsave(&io_request_lock,flags);
if (q->plugged) {
q->plugged = 0;
- if (q->current_request)
+ if (!list_empty(&q->queue_head))
(q->request_fn)(q);
}
spin_unlock_irqrestore(&io_request_lock,flags);
@@ -388,6 +422,124 @@
printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
}
+/* elevator */
+
+#define elevator_sequence_after(a,b) ((int)((b)-(a)) < 0)
+#define elevator_sequence_before(a,b) elevator_sequence_after(b,a)
+#define elevator_sequence_after_eq(a,b) ((int)((b)-(a)) <= 0)
+#define elevator_sequence_before_eq(a,b) elevator_sequence_after_eq(b,a)
+
+static inline struct list_head * seek_to_not_starving_chunk(request_queue_t * q,
+ int * lat, int * starving)
+{
+ int sequence = q->elevator.sequence;
+ struct list_head * entry = q->queue_head.prev;
+ int pos = 0;
+
+ do {
+ struct request * req = blkdev_entry_to_request(entry);
+ if (elevator_sequence_before(req->elevator_sequence, sequence)) {
+ *lat -= q->nr_segments - pos;
+ *starving = 1;
+ return entry;
+ }
+ pos += req->nr_segments;
+ } while ((entry = entry->prev) != &q->queue_head);
+
+ *starving = 0;
+
+ return entry->next;
+}
+
+static inline void elevator_merge_requests(elevator_t * e, struct request * req, struct request * next)
+{
+ if (elevator_sequence_before(next->elevator_sequence, req->elevator_sequence))
+ req->elevator_sequence = next->elevator_sequence;
+ if (req->cmd == READ)
+ e->read_pendings--;
+
+}
+
+static inline int elevator_sequence(elevator_t * e, int latency)
+{
+ return latency + e->sequence;
+}
+
+#define elevator_merge_before(q, req, lat) __elevator_merge((q), (req), (lat), 0)
+#define elevator_merge_after(q, req, lat) __elevator_merge((q), (req), (lat), 1)
+static inline void __elevator_merge(request_queue_t * q, struct request * req, int latency, int after)
+{
+#ifdef DEBUG_ELEVATOR
+ int sequence = elevator_sequence(&q->elevator, latency);
+ if (after)
+ sequence -= req->nr_segments;
+ if (elevator_sequence_before(sequence, req->elevator_sequence)) {
+ static int warned = 0;
+ if (!warned) {
+ printk(KERN_WARNING __FUNCTION__
+ ": req latency %d req latency %d\n",
+ req->elevator_sequence - q->elevator.sequence,
+ sequence - q->elevator.sequence);
+ warned = 1;
+ }
+ req->elevator_sequence = sequence;
+ }
+#endif
+}
+
+static inline void elevator_queue(request_queue_t * q,
+ struct request * req,
+ struct list_head * entry,
+ int latency, int starving)
+{
+ struct request * tmp, * __tmp;
+ int __latency = latency;
+
+ __tmp = tmp = blkdev_entry_to_request(entry);
+
+ for (;; tmp = blkdev_next_request(tmp))
+ {
+ if ((latency -= tmp->nr_segments) <= 0)
+ {
+ tmp = __tmp;
+ latency = __latency;
+
+ if (starving)
+ break;
+
+ if (q->head_active && !q->plugged)
+ {
+ latency -= tmp->nr_segments;
+ break;
+ }
+
+ list_add(&req->queue, &q->queue_head);
+ goto after_link;
+ }
+
+ if (tmp->queue.next == &q->queue_head)
+ break;
+
+ {
+ const int after_current = IN_ORDER(tmp,req);
+ const int before_next = IN_ORDER(req,blkdev_next_request(tmp));
+
+ if (!IN_ORDER(tmp,blkdev_next_request(tmp))) {
+ if (after_current || before_next)
+ break;
+ } else {
+ if (after_current && before_next)
+ break;
+ }
+ }
+ }
+
+ list_add(&req->queue, &tmp->queue);
+
+ after_link:
+ req->elevator_sequence = elevator_sequence(&q->elevator, latency);
+}
+
/*
* add-request adds a request to the linked list.
* It disables interrupts (aquires the request spinlock) so that it can muck
@@ -398,32 +550,20 @@
* which is important for drive_stat_acct() above.
*/
-static inline void __add_request(request_queue_t * q, struct request * req)
+static inline void __add_request(request_queue_t * q, struct request * req,
+ int empty, struct list_head * entry,
+ int latency, int starving)
{
- int major = MAJOR(req->rq_dev);
- struct request * tmp;
+ int major;
drive_stat_acct(req, req->nr_sectors, 1);
- req->next = NULL;
- if (!(tmp = q->current_request)) {
- q->current_request = req;
+ if (empty) {
+ req->elevator_sequence = elevator_sequence(&q->elevator, latency);
+ list_add(&req->queue, &q->queue_head);
return;
}
- for ( ; tmp->next ; tmp = tmp->next) {
- const int after_current = IN_ORDER(tmp,req);
- const int before_next = IN_ORDER(req,tmp->next);
-
- if (!IN_ORDER(tmp,tmp->next)) {
- if (after_current || before_next)
- break;
- } else {
- if (after_current && before_next)
- break;
- }
- }
- req->next = tmp->next;
- tmp->next = req;
+ elevator_queue(q, req, entry, latency, starving);
/*
* FIXME(eric) I don't understand why there is a need for this
@@ -432,6 +572,7 @@
* I am leaving this in here until I hear back from the COMPAQ
* people.
*/
+ major = MAJOR(req->rq_dev);
if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
{
(q->request_fn)(q);
@@ -448,12 +589,14 @@
*/
static inline void attempt_merge (request_queue_t * q,
struct request *req,
- int max_sectors)
+ int max_sectors,
+ int max_segments)
{
- struct request *next = req->next;
-
- if (!next)
+ struct request *next;
+
+ if (req->queue.next == &q->queue_head)
return;
+ next = blkdev_next_request(req);
if (req->sector + req->nr_sectors != next->sector)
return;
if (next->sem || req->cmd != next->cmd || req->rq_dev != next->rq_dev || req->nr_sectors + next->nr_sectors > max_sectors)
@@ -464,25 +607,79 @@
* will have been updated to the appropriate number,
* and we shouldn't do it here too.
*/
- if(!(q->merge_requests_fn)(q, req, next))
+ if(!(q->merge_requests_fn)(q, req, next, max_segments))
return;
+ elevator_merge_requests(&q->elevator, req, next);
req->bhtail->b_reqnext = next->bh;
req->bhtail = next->bhtail;
req->nr_sectors += next->nr_sectors;
next->rq_status = RQ_INACTIVE;
- req->next = next->next;
+ list_del(&next->queue);
wake_up (&wait_for_request);
}
+static inline void elevator_debug(request_queue_t * q, kdev_t dev)
+{
+#ifdef DEBUG_ELEVATOR
+ int read_pendings = 0, nr_segments = 0;
+ elevator_t * elevator = &q->elevator;
+ struct list_head * entry = &q->queue_head;
+ static int counter;
+
+ if (counter++ % 100)
+ return;
+
+ while ((entry = entry->next) != &q->queue_head)
+ {
+ struct request * req;
+
+ req = blkdev_entry_to_request(entry);
+ if (!req->q)
+ continue;
+ if (req->cmd == READ)
+ read_pendings++;
+ nr_segments += req->nr_segments;
+ }
+
+ if (read_pendings != elevator->read_pendings)
+ {
+ printk(KERN_WARNING
+ "%s: elevator read_pendings %d should be %d\n",
+ kdevname(dev), elevator->read_pendings,
+ read_pendings);
+ elevator->read_pendings = read_pendings;
+ }
+ if (nr_segments != q->nr_segments)
+ {
+ printk(KERN_WARNING
+ "%s: elevator nr_segments %d should be %d\n",
+ kdevname(dev), q->nr_segments,
+ nr_segments);
+ q->nr_segments = nr_segments;
+ }
+#endif
+}
+
+static inline void elevator_account_request(request_queue_t * q, struct request * req)
+{
+ q->elevator.sequence++;
+ if (req->cmd == READ)
+ q->elevator.read_pendings++;
+ q->nr_segments++;
+}
+
static inline void __make_request(request_queue_t * q, int rw,
struct buffer_head * bh)
{
int major = MAJOR(bh->b_rdev);
unsigned int sector, count;
- struct request * req;
+ int max_segments = MAX_SEGMENTS;
+ struct request * req, * prev;
int rw_ahead, max_req, max_sectors;
unsigned long flags;
+ int orig_latency, latency, __latency, starving, __starving, empty;
+ struct list_head * entry, * __entry;
count = bh->b_size >> 9;
sector = bh->b_rsector;
@@ -569,21 +766,33 @@
*/
max_sectors = get_max_sectors(bh->b_rdev);
+ __latency = orig_latency = get_request_latency(&q->elevator, rw);
+
/*
* Now we acquire the request spinlock, we have to be mega careful
* not to schedule or do something nonatomic
*/
spin_lock_irqsave(&io_request_lock,flags);
- req = q->current_request;
- if (!req) {
- /* MD and loop can't handle plugging without deadlocking */
- if (q->plug_device_fn)
- q->plug_device_fn(q, bh->b_rdev); /* is atomic */
- else
- generic_plug_device(q, bh->b_rdev); /* is atomic */
+ elevator_debug(q, bh->b_rdev);
+
+ empty = 0;
+ if (list_empty(&q->queue_head)) {
+ empty = 1;
+ q->plug_device_fn(q, bh->b_rdev); /* is atomic */
goto get_rq;
}
+ /* avoid write-bombs to not hurt iteractiveness of reads */
+ if (rw != READ && q->elevator.read_pendings)
+ max_segments = q->elevator.max_bomb_segments;
+
+ entry = seek_to_not_starving_chunk(q, &__latency, &starving);
+
+ __entry = entry;
+ __starving = starving;
+
+ latency = __latency;
+
if (q->head_active && !q->plugged) {
/*
* The scsi disk and cdrom drivers completely remove the request
@@ -595,11 +804,18 @@
* entry may be busy being processed and we thus can't change
* it.
*/
- if ((req = req->next) == NULL)
- goto get_rq;
+ if (entry == q->queue_head.next) {
+ latency -= blkdev_entry_to_request(entry)->nr_segments;
+ if ((entry = entry->next) == &q->queue_head)
+ goto get_rq;
+ starving = 0;
+ }
}
+ prev = NULL;
do {
+ req = blkdev_entry_to_request(entry);
+
if (req->sem)
continue;
if (req->cmd != rw)
@@ -610,6 +826,8 @@
continue;
/* Can we add it to the end of this request? */
if (req->sector + req->nr_sectors == sector) {
+ if (latency - req->nr_segments < 0)
+ break;
/*
* The merge_fn is a more advanced way
* of accomplishing the same task. Instead
@@ -622,16 +840,21 @@
* may suggest that we shouldn't merge
* this
*/
- if(!(q->merge_fn)(q, req, bh))
+ if(!(q->back_merge_fn)(q, req, bh, max_segments))
continue;
req->bhtail->b_reqnext = bh;
req->bhtail = bh;
req->nr_sectors += count;
drive_stat_acct(req, count, 0);
+
+ elevator_merge_after(q, req, latency);
+
/* Can we now merge this req with the next? */
- attempt_merge(q, req, max_sectors);
+ attempt_merge(q, req, max_sectors, max_segments);
/* or to the beginning? */
} else if (req->sector - count == sector) {
+ if (!prev && starving)
+ continue;
/*
* The merge_fn is a more advanced way
* of accomplishing the same task. Instead
@@ -644,7 +867,7 @@
* may suggest that we shouldn't merge
* this
*/
- if(!(q->merge_fn)(q, req, bh))
+ if(!(q->front_merge_fn)(q, req, bh, max_segments))
continue;
bh->b_reqnext = req->bh;
req->bh = bh;
@@ -653,13 +876,21 @@
req->sector = sector;
req->nr_sectors += count;
drive_stat_acct(req, count, 0);
+
+ elevator_merge_before(q, req, latency);
+
+ if (prev)
+ attempt_merge(q, prev, max_sectors, max_segments);
} else
continue;
+ q->elevator.sequence++;
spin_unlock_irqrestore(&io_request_lock,flags);
return;
- } while ((req = req->next) != NULL);
+ } while (prev = req,
+ (latency -= req->nr_segments) >= 0 &&
+ (entry = entry->next) != &q->queue_head);
/* find an unused request. */
get_rq:
@@ -675,6 +906,14 @@
goto end_io;
req = __get_request_wait(max_req, bh->b_rdev);
spin_lock_irqsave(&io_request_lock,flags);
+
+ /* lock got dropped so revalidate elevator */
+ empty = 1;
+ if (!list_empty(&q->queue_head)) {
+ empty = 0;
+ __latency = orig_latency;
+ __entry = seek_to_not_starving_chunk(q, &__latency, &__starving);
+ }
}
/*
* Dont start the IO if the buffer has been
@@ -707,8 +946,10 @@
req->sem = NULL;
req->bh = bh;
req->bhtail = bh;
- req->next = NULL;
- __add_request(q, req);
+ req->q = q;
+ __add_request(q, req, empty, __entry, __latency, __starving);
+ elevator_account_request(q, req);
+
spin_unlock_irqrestore(&io_request_lock, flags);
return;
@@ -867,6 +1108,8 @@
void end_that_request_last(struct request *req)
{
+ if (req->q)
+ BUG();
if (req->sem != NULL)
up(req->sem);
req->rq_status = RQ_INACTIVE;
@@ -886,7 +1129,6 @@
req = all_requests + NR_REQUEST;
while (--req >= all_requests) {
req->rq_status = RQ_INACTIVE;
- req->next = NULL;
}
memset(ro_bits,0,sizeof(ro_bits));
memset(max_readahead, 0, sizeof(max_readahead));
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)