Merge tag 'v3.15-rc1' into for-3.16/core
We don't like this, but things have diverged with the blk-mq fixes
in 3.15-rc1. So merge it in.
diff --git a/block/blk-core.c b/block/blk-core.c
index a0e3096..1fe9ff6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1654,7 +1654,7 @@
struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
NULL, &fail_make_request);
- return IS_ERR(dir) ? PTR_ERR(dir) : 0;
+ return PTR_ERR_OR_ZERO(dir);
}
late_initcall(fail_make_request_debugfs);
@@ -2904,19 +2904,26 @@
}
EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
-int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
+int kblockd_schedule_work(struct work_struct *work)
{
return queue_work(kblockd_workqueue, work);
}
EXPORT_SYMBOL(kblockd_schedule_work);
-int kblockd_schedule_delayed_work(struct request_queue *q,
- struct delayed_work *dwork, unsigned long delay)
+int kblockd_schedule_delayed_work(struct delayed_work *dwork,
+ unsigned long delay)
{
return queue_delayed_work(kblockd_workqueue, dwork, delay);
}
EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+ unsigned long delay)
+{
+ return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);
+
#define PLUG_MAGIC 0x91827364
/**
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 43e6b47..77f2045 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -144,7 +144,7 @@
{
if (rq->q->mq_ops) {
INIT_WORK(&rq->mq_flush_work, mq_flush_run);
- kblockd_schedule_work(rq->q, &rq->mq_flush_work);
+ kblockd_schedule_work(&rq->mq_flush_work);
return false;
} else {
if (add_front)
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index b0ba264..9176a69 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -246,16 +246,12 @@
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
- unsigned int i, queue_num, first = 1;
+ unsigned int i, first = 1;
ssize_t ret = 0;
blk_mq_disable_hotplug();
- for_each_online_cpu(i) {
- queue_num = hctx->queue->mq_map[i];
- if (queue_num != hctx->queue_num)
- continue;
-
+ for_each_cpu(i, hctx->cpumask) {
if (first)
ret += sprintf(ret + page, "%u", i);
else
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1d2a9bd..5455ed1 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -209,11 +209,14 @@
break;
}
- blk_mq_put_ctx(ctx);
- if (!(gfp & __GFP_WAIT))
+ if (gfp & __GFP_WAIT) {
+ __blk_mq_run_hw_queue(hctx);
+ blk_mq_put_ctx(ctx);
+ } else {
+ blk_mq_put_ctx(ctx);
break;
+ }
- __blk_mq_run_hw_queue(hctx);
blk_mq_wait_for_tags(hctx->tags);
} while (1);
@@ -514,6 +517,8 @@
LIST_HEAD(rq_list);
int bit, queued;
+ WARN_ON(!preempt_count());
+
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
return;
@@ -606,12 +611,21 @@
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
return;
- if (!async)
+ if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
__blk_mq_run_hw_queue(hctx);
+ else if (hctx->queue->nr_hw_queues == 1)
+ kblockd_schedule_delayed_work(&hctx->delayed_work, 0);
else {
- struct request_queue *q = hctx->queue;
+ unsigned int cpu;
- kblockd_schedule_delayed_work(q, &hctx->delayed_work, 0);
+ /*
+ * It'd be great if the workqueue API had a way to pass
+ * in a mask and had some smarts for more clever placement
+ * than the first CPU. Or we could round-robin here. For now,
+ * just queue on the first CPU.
+ */
+ cpu = cpumask_first(hctx->cpumask);
+ kblockd_schedule_delayed_work_on(cpu, &hctx->delayed_work, 0);
}
}
@@ -626,7 +640,9 @@
test_bit(BLK_MQ_S_STOPPED, &hctx->state))
continue;
+ preempt_disable();
blk_mq_run_hw_queue(hctx, async);
+ preempt_enable();
}
}
EXPORT_SYMBOL(blk_mq_run_queues);
@@ -651,7 +667,10 @@
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
{
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+
+ preempt_disable();
__blk_mq_run_hw_queue(hctx);
+ preempt_enable();
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);
@@ -665,7 +684,9 @@
continue;
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+ preempt_disable();
blk_mq_run_hw_queue(hctx, true);
+ preempt_enable();
}
}
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
@@ -675,7 +696,10 @@
struct blk_mq_hw_ctx *hctx;
hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work);
+
+ preempt_disable();
__blk_mq_run_hw_queue(hctx);
+ preempt_enable();
}
static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
@@ -719,10 +743,10 @@
spin_unlock(&ctx->lock);
}
- blk_mq_put_ctx(current_ctx);
-
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
+
+ blk_mq_put_ctx(current_ctx);
}
static void blk_mq_insert_requests(struct request_queue *q,
@@ -758,9 +782,8 @@
}
spin_unlock(&ctx->lock);
- blk_mq_put_ctx(current_ctx);
-
blk_mq_run_hw_queue(hctx, from_schedule);
+ blk_mq_put_ctx(current_ctx);
}
static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -879,7 +902,6 @@
if (unlikely(is_flush_fua)) {
blk_mq_bio_to_request(rq, bio);
- blk_mq_put_ctx(ctx);
blk_insert_flush(rq);
goto run_queue;
}
@@ -917,7 +939,6 @@
}
spin_unlock(&ctx->lock);
- blk_mq_put_ctx(ctx);
/*
* For a SYNC request, send it to the hardware immediately. For an
@@ -926,6 +947,7 @@
*/
run_queue:
blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua);
+ blk_mq_put_ctx(ctx);
}
/*
@@ -993,9 +1015,9 @@
blk_mq_hctx_mark_pending(hctx, ctx);
spin_unlock(&ctx->lock);
- blk_mq_put_ctx(ctx);
blk_mq_run_hw_queue(hctx, true);
+ blk_mq_put_ctx(ctx);
}
static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
@@ -1258,12 +1280,13 @@
__ctx->queue = q;
/* If the cpu isn't online, the cpu is mapped to first hctx */
- hctx = q->mq_ops->map_queue(q, i);
- hctx->nr_ctx++;
-
if (!cpu_online(i))
continue;
+ hctx = q->mq_ops->map_queue(q, i);
+ cpumask_set_cpu(i, hctx->cpumask);
+ hctx->nr_ctx++;
+
/*
* Set local node, IFF we have more than one hw queue. If
* not, we remain on the home node of the device
@@ -1280,6 +1303,7 @@
struct blk_mq_ctx *ctx;
queue_for_each_hw_ctx(q, hctx, i) {
+ cpumask_clear(hctx->cpumask);
hctx->nr_ctx = 0;
}
@@ -1288,7 +1312,11 @@
*/
queue_for_each_ctx(q, ctx, i) {
/* If the cpu isn't online, the cpu is mapped to first hctx */
+ if (!cpu_online(i))
+ continue;
+
hctx = q->mq_ops->map_queue(q, i);
+ cpumask_set_cpu(i, hctx->cpumask);
ctx->index_hw = hctx->nr_ctx;
hctx->ctxs[hctx->nr_ctx++] = ctx;
}
@@ -1332,6 +1360,9 @@
if (!hctxs[i])
goto err_hctxs;
+ if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
+ goto err_hctxs;
+
hctxs[i]->numa_node = NUMA_NO_NODE;
hctxs[i]->queue_num = i;
}
@@ -1395,6 +1426,7 @@
for (i = 0; i < reg->nr_hw_queues; i++) {
if (!hctxs[i])
break;
+ free_cpumask_var(hctxs[i]->cpumask);
reg->ops->free_hctx(hctxs[i], i);
}
kfree(hctxs);
@@ -1416,6 +1448,7 @@
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
if (q->mq_ops->exit_hctx)
q->mq_ops->exit_hctx(hctx, i);
+ free_cpumask_var(hctx->cpumask);
q->mq_ops->free_hctx(hctx, i);
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e0985f1..5063a0b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -908,7 +908,7 @@
{
if (cfqd->busy_queues) {
cfq_log(cfqd, "schedule dispatch");
- kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
+ kblockd_schedule_work(&cfqd->unplug_work);
}
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 65a123d..0f3bddc 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -139,7 +139,7 @@
*/
spin_lock_irqsave(q->queue_lock, flags);
blk_requeue_request(q, cmd->request);
- kblockd_schedule_work(q, &device->requeue_work);
+ kblockd_schedule_work(&device->requeue_work);
spin_unlock_irqrestore(q->queue_lock, flags);
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0120451..b6ee487 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -19,6 +19,7 @@
unsigned long state; /* BLK_MQ_S_* flags */
struct delayed_work delayed_work;
+ cpumask_var_t cpumask;
unsigned long flags; /* BLK_MQ_F_* flags */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0d84981..86a8df1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1370,8 +1370,9 @@
}
struct work_struct;
-int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
+int kblockd_schedule_work(struct work_struct *work);
+int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
+int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
/*