xen/mce: schedule a workqueue to avoid sleep in atomic context
copy_to_user might sleep and print a stack trace if it is executed
in an atomic spinlock context. Like this:
(XEN) CMCI: send CMCI to DOM0 through virq
BUG: sleeping function called from invalid context at /home/konradinux/kernel.h:199
in_atomic(): 1, irqs_disabled(): 0, pid: 4581, name: mcelog
Pid: 4581, comm: mcelog Tainted: G O 3.5.0-rc1upstream-00003-g149000b-dirty #1
[<ffffffff8109ad9a>] __might_sleep+0xda/0x100
[<ffffffff81329b0b>] xen_mce_chrdev_read+0xab/0x140
[<ffffffff81148945>] vfs_read+0xc5/0x190
[<ffffffff81148b0c>] sys_read+0x4c/0x90
[<ffffffff815bd039>] system_call_fastpath+0x16
This patch schedule a workqueue for IRQ handler to poll the data,
and use mutex instead of spinlock, so copy_to_user sleep in atomic
context would not occur.
Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Suggested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index 72e87d2..804aa3c 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -55,7 +55,7 @@
static struct mcinfo_logical_cpu *g_physinfo;
static uint32_t ncpus;
-static DEFINE_SPINLOCK(mcelog_lock);
+static DEFINE_MUTEX(mcelog_lock);
static struct xen_mce_log xen_mcelog = {
.signature = XEN_MCE_LOG_SIGNATURE,
@@ -106,7 +106,7 @@
unsigned num;
int i, err;
- spin_lock(&mcelog_lock);
+ mutex_lock(&mcelog_lock);
num = xen_mcelog.next;
@@ -130,7 +130,7 @@
err = -EFAULT;
out:
- spin_unlock(&mcelog_lock);
+ mutex_unlock(&mcelog_lock);
return err ? err : buf - ubuf;
}
@@ -310,12 +310,11 @@
}
/* virq handler for machine check error info*/
-static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
+static void xen_mce_work_fn(struct work_struct *work)
{
int err;
- unsigned long tmp;
- spin_lock_irqsave(&mcelog_lock, tmp);
+ mutex_lock(&mcelog_lock);
/* urgent mc_info */
err = mc_queue_handle(XEN_MC_URGENT);
@@ -330,8 +329,13 @@
pr_err(XEN_MCELOG
"Failed to handle nonurgent mc_info queue.\n");
- spin_unlock_irqrestore(&mcelog_lock, tmp);
+ mutex_unlock(&mcelog_lock);
+}
+static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
+static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
+{
+ schedule_work(&xen_mce_work);
return IRQ_HANDLED;
}