|  | /* | 
|  | * Driver giving user-space access to the kernel's xenbus connection | 
|  | * to xenstore. | 
|  | * | 
|  | * Copyright (c) 2005, Christian Limpach | 
|  | * Copyright (c) 2005, Rusty Russell, IBM Corporation | 
|  | * | 
|  | * This program is free software; you can redistribute it and/or | 
|  | * modify it under the terms of the GNU General Public License version 2 | 
|  | * as published by the Free Software Foundation; or, when distributed | 
|  | * separately from the Linux kernel or incorporated into other | 
|  | * software packages, subject to the following license: | 
|  | * | 
|  | * Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | * of this source file (the "Software"), to deal in the Software without | 
|  | * restriction, including without limitation the rights to use, copy, modify, | 
|  | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 
|  | * and to permit persons to whom the Software is furnished to do so, subject to | 
|  | * the following conditions: | 
|  | * | 
|  | * The above copyright notice and this permission notice shall be included in | 
|  | * all copies or substantial portions of the Software. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
|  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
|  | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 
|  | * IN THE SOFTWARE. | 
|  | * | 
|  | * Changes: | 
|  | * 2008-10-07  Alex Zeffertt    Replaced /proc/xen/xenbus with xenfs filesystem | 
|  | *                              and /proc/xen compatibility mount point. | 
|  | *                              Turned xenfs into a loadable module. | 
|  | */ | 
|  |  | 
|  | #include <linux/kernel.h> | 
|  | #include <linux/errno.h> | 
|  | #include <linux/uio.h> | 
|  | #include <linux/notifier.h> | 
|  | #include <linux/wait.h> | 
|  | #include <linux/fs.h> | 
|  | #include <linux/poll.h> | 
|  | #include <linux/mutex.h> | 
|  | #include <linux/sched.h> | 
|  | #include <linux/spinlock.h> | 
|  | #include <linux/mount.h> | 
|  | #include <linux/pagemap.h> | 
|  | #include <linux/uaccess.h> | 
|  | #include <linux/init.h> | 
|  | #include <linux/namei.h> | 
|  | #include <linux/string.h> | 
|  | #include <linux/slab.h> | 
|  | #include <linux/miscdevice.h> | 
|  | #include <linux/module.h> | 
|  |  | 
|  | #include "xenbus_comms.h" | 
|  |  | 
|  | #include <xen/xenbus.h> | 
|  | #include <xen/xen.h> | 
|  | #include <asm/xen/hypervisor.h> | 
|  |  | 
|  | MODULE_LICENSE("GPL"); | 
|  |  | 
|  | /* | 
|  | * An element of a list of outstanding transactions, for which we're | 
|  | * still waiting a reply. | 
|  | */ | 
|  | struct xenbus_transaction_holder { | 
|  | struct list_head list; | 
|  | struct xenbus_transaction handle; | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * A buffer of data on the queue. | 
|  | */ | 
|  | struct read_buffer { | 
|  | struct list_head list; | 
|  | unsigned int cons; | 
|  | unsigned int len; | 
|  | char msg[]; | 
|  | }; | 
|  |  | 
|  | struct xenbus_file_priv { | 
|  | /* | 
|  | * msgbuffer_mutex is held while partial requests are built up | 
|  | * and complete requests are acted on.  It therefore protects | 
|  | * the "transactions" and "watches" lists, and the partial | 
|  | * request length and buffer. | 
|  | * | 
|  | * reply_mutex protects the reply being built up to return to | 
|  | * usermode.  It nests inside msgbuffer_mutex but may be held | 
|  | * alone during a watch callback. | 
|  | */ | 
|  | struct mutex msgbuffer_mutex; | 
|  |  | 
|  | /* In-progress transactions */ | 
|  | struct list_head transactions; | 
|  |  | 
|  | /* Active watches. */ | 
|  | struct list_head watches; | 
|  |  | 
|  | /* Partial request. */ | 
|  | unsigned int len; | 
|  | union { | 
|  | struct xsd_sockmsg msg; | 
|  | char buffer[XENSTORE_PAYLOAD_MAX]; | 
|  | } u; | 
|  |  | 
|  | /* Response queue. */ | 
|  | struct mutex reply_mutex; | 
|  | struct list_head read_buffers; | 
|  | wait_queue_head_t read_waitq; | 
|  |  | 
|  | }; | 
|  |  | 
|  | /* Read out any raw xenbus messages queued up. */ | 
|  | static ssize_t xenbus_file_read(struct file *filp, | 
|  | char __user *ubuf, | 
|  | size_t len, loff_t *ppos) | 
|  | { | 
|  | struct xenbus_file_priv *u = filp->private_data; | 
|  | struct read_buffer *rb; | 
|  | unsigned i; | 
|  | int ret; | 
|  |  | 
|  | mutex_lock(&u->reply_mutex); | 
|  | again: | 
|  | while (list_empty(&u->read_buffers)) { | 
|  | mutex_unlock(&u->reply_mutex); | 
|  | if (filp->f_flags & O_NONBLOCK) | 
|  | return -EAGAIN; | 
|  |  | 
|  | ret = wait_event_interruptible(u->read_waitq, | 
|  | !list_empty(&u->read_buffers)); | 
|  | if (ret) | 
|  | return ret; | 
|  | mutex_lock(&u->reply_mutex); | 
|  | } | 
|  |  | 
|  | rb = list_entry(u->read_buffers.next, struct read_buffer, list); | 
|  | i = 0; | 
|  | while (i < len) { | 
|  | unsigned sz = min((unsigned)len - i, rb->len - rb->cons); | 
|  |  | 
|  | ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); | 
|  |  | 
|  | i += sz - ret; | 
|  | rb->cons += sz - ret; | 
|  |  | 
|  | if (ret != 0) { | 
|  | if (i == 0) | 
|  | i = -EFAULT; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* Clear out buffer if it has been consumed */ | 
|  | if (rb->cons == rb->len) { | 
|  | list_del(&rb->list); | 
|  | kfree(rb); | 
|  | if (list_empty(&u->read_buffers)) | 
|  | break; | 
|  | rb = list_entry(u->read_buffers.next, | 
|  | struct read_buffer, list); | 
|  | } | 
|  | } | 
|  | if (i == 0) | 
|  | goto again; | 
|  |  | 
|  | out: | 
|  | mutex_unlock(&u->reply_mutex); | 
|  | return i; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Add a buffer to the queue.  Caller must hold the appropriate lock | 
|  | * if the queue is not local.  (Commonly the caller will build up | 
|  | * multiple queued buffers on a temporary local list, and then add it | 
|  | * to the appropriate list under lock once all the buffers have een | 
|  | * successfully allocated.) | 
|  | */ | 
|  | static int queue_reply(struct list_head *queue, const void *data, size_t len) | 
|  | { | 
|  | struct read_buffer *rb; | 
|  |  | 
|  | if (len == 0) | 
|  | return 0; | 
|  |  | 
|  | rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); | 
|  | if (rb == NULL) | 
|  | return -ENOMEM; | 
|  |  | 
|  | rb->cons = 0; | 
|  | rb->len = len; | 
|  |  | 
|  | memcpy(rb->msg, data, len); | 
|  |  | 
|  | list_add_tail(&rb->list, queue); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Free all the read_buffer s on a list. | 
|  | * Caller must have sole reference to list. | 
|  | */ | 
|  | static void queue_cleanup(struct list_head *list) | 
|  | { | 
|  | struct read_buffer *rb; | 
|  |  | 
|  | while (!list_empty(list)) { | 
|  | rb = list_entry(list->next, struct read_buffer, list); | 
|  | list_del(list->next); | 
|  | kfree(rb); | 
|  | } | 
|  | } | 
|  |  | 
|  | struct watch_adapter { | 
|  | struct list_head list; | 
|  | struct xenbus_watch watch; | 
|  | struct xenbus_file_priv *dev_data; | 
|  | char *token; | 
|  | }; | 
|  |  | 
|  | static void free_watch_adapter(struct watch_adapter *watch) | 
|  | { | 
|  | kfree(watch->watch.node); | 
|  | kfree(watch->token); | 
|  | kfree(watch); | 
|  | } | 
|  |  | 
|  | static struct watch_adapter *alloc_watch_adapter(const char *path, | 
|  | const char *token) | 
|  | { | 
|  | struct watch_adapter *watch; | 
|  |  | 
|  | watch = kzalloc(sizeof(*watch), GFP_KERNEL); | 
|  | if (watch == NULL) | 
|  | goto out_fail; | 
|  |  | 
|  | watch->watch.node = kstrdup(path, GFP_KERNEL); | 
|  | if (watch->watch.node == NULL) | 
|  | goto out_free; | 
|  |  | 
|  | watch->token = kstrdup(token, GFP_KERNEL); | 
|  | if (watch->token == NULL) | 
|  | goto out_free; | 
|  |  | 
|  | return watch; | 
|  |  | 
|  | out_free: | 
|  | free_watch_adapter(watch); | 
|  |  | 
|  | out_fail: | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static void watch_fired(struct xenbus_watch *watch, | 
|  | const char **vec, | 
|  | unsigned int len) | 
|  | { | 
|  | struct watch_adapter *adap; | 
|  | struct xsd_sockmsg hdr; | 
|  | const char *path, *token; | 
|  | int path_len, tok_len, body_len, data_len = 0; | 
|  | int ret; | 
|  | LIST_HEAD(staging_q); | 
|  |  | 
|  | adap = container_of(watch, struct watch_adapter, watch); | 
|  |  | 
|  | path = vec[XS_WATCH_PATH]; | 
|  | token = adap->token; | 
|  |  | 
|  | path_len = strlen(path) + 1; | 
|  | tok_len = strlen(token) + 1; | 
|  | if (len > 2) | 
|  | data_len = vec[len] - vec[2] + 1; | 
|  | body_len = path_len + tok_len + data_len; | 
|  |  | 
|  | hdr.type = XS_WATCH_EVENT; | 
|  | hdr.len = body_len; | 
|  |  | 
|  | mutex_lock(&adap->dev_data->reply_mutex); | 
|  |  | 
|  | ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); | 
|  | if (!ret) | 
|  | ret = queue_reply(&staging_q, path, path_len); | 
|  | if (!ret) | 
|  | ret = queue_reply(&staging_q, token, tok_len); | 
|  | if (!ret && len > 2) | 
|  | ret = queue_reply(&staging_q, vec[2], data_len); | 
|  |  | 
|  | if (!ret) { | 
|  | /* success: pass reply list onto watcher */ | 
|  | list_splice_tail(&staging_q, &adap->dev_data->read_buffers); | 
|  | wake_up(&adap->dev_data->read_waitq); | 
|  | } else | 
|  | queue_cleanup(&staging_q); | 
|  |  | 
|  | mutex_unlock(&adap->dev_data->reply_mutex); | 
|  | } | 
|  |  | 
|  | static int xenbus_write_transaction(unsigned msg_type, | 
|  | struct xenbus_file_priv *u) | 
|  | { | 
|  | int rc; | 
|  | void *reply; | 
|  | struct xenbus_transaction_holder *trans = NULL; | 
|  | LIST_HEAD(staging_q); | 
|  |  | 
|  | if (msg_type == XS_TRANSACTION_START) { | 
|  | trans = kmalloc(sizeof(*trans), GFP_KERNEL); | 
|  | if (!trans) { | 
|  | rc = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  | } | 
|  |  | 
|  | reply = xenbus_dev_request_and_reply(&u->u.msg); | 
|  | if (IS_ERR(reply)) { | 
|  | kfree(trans); | 
|  | rc = PTR_ERR(reply); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (msg_type == XS_TRANSACTION_START) { | 
|  | trans->handle.id = simple_strtoul(reply, NULL, 0); | 
|  |  | 
|  | list_add(&trans->list, &u->transactions); | 
|  | } else if (msg_type == XS_TRANSACTION_END) { | 
|  | list_for_each_entry(trans, &u->transactions, list) | 
|  | if (trans->handle.id == u->u.msg.tx_id) | 
|  | break; | 
|  | BUG_ON(&trans->list == &u->transactions); | 
|  | list_del(&trans->list); | 
|  |  | 
|  | kfree(trans); | 
|  | } | 
|  |  | 
|  | mutex_lock(&u->reply_mutex); | 
|  | rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); | 
|  | if (!rc) | 
|  | rc = queue_reply(&staging_q, reply, u->u.msg.len); | 
|  | if (!rc) { | 
|  | list_splice_tail(&staging_q, &u->read_buffers); | 
|  | wake_up(&u->read_waitq); | 
|  | } else { | 
|  | queue_cleanup(&staging_q); | 
|  | } | 
|  | mutex_unlock(&u->reply_mutex); | 
|  |  | 
|  | kfree(reply); | 
|  |  | 
|  | out: | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) | 
|  | { | 
|  | struct watch_adapter *watch, *tmp_watch; | 
|  | char *path, *token; | 
|  | int err, rc; | 
|  | LIST_HEAD(staging_q); | 
|  |  | 
|  | path = u->u.buffer + sizeof(u->u.msg); | 
|  | token = memchr(path, 0, u->u.msg.len); | 
|  | if (token == NULL) { | 
|  | rc = -EILSEQ; | 
|  | goto out; | 
|  | } | 
|  | token++; | 
|  |  | 
|  | if (msg_type == XS_WATCH) { | 
|  | watch = alloc_watch_adapter(path, token); | 
|  | if (watch == NULL) { | 
|  | rc = -ENOMEM; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | watch->watch.callback = watch_fired; | 
|  | watch->dev_data = u; | 
|  |  | 
|  | err = register_xenbus_watch(&watch->watch); | 
|  | if (err) { | 
|  | free_watch_adapter(watch); | 
|  | rc = err; | 
|  | goto out; | 
|  | } | 
|  | list_add(&watch->list, &u->watches); | 
|  | } else { | 
|  | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | 
|  | if (!strcmp(watch->token, token) && | 
|  | !strcmp(watch->watch.node, path)) { | 
|  | unregister_xenbus_watch(&watch->watch); | 
|  | list_del(&watch->list); | 
|  | free_watch_adapter(watch); | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Success.  Synthesize a reply to say all is OK. */ | 
|  | { | 
|  | struct { | 
|  | struct xsd_sockmsg hdr; | 
|  | char body[3]; | 
|  | } __packed reply = { | 
|  | { | 
|  | .type = msg_type, | 
|  | .len = sizeof(reply.body) | 
|  | }, | 
|  | "OK" | 
|  | }; | 
|  |  | 
|  | mutex_lock(&u->reply_mutex); | 
|  | rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); | 
|  | wake_up(&u->read_waitq); | 
|  | mutex_unlock(&u->reply_mutex); | 
|  | } | 
|  |  | 
|  | out: | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | static ssize_t xenbus_file_write(struct file *filp, | 
|  | const char __user *ubuf, | 
|  | size_t len, loff_t *ppos) | 
|  | { | 
|  | struct xenbus_file_priv *u = filp->private_data; | 
|  | uint32_t msg_type; | 
|  | int rc = len; | 
|  | int ret; | 
|  | LIST_HEAD(staging_q); | 
|  |  | 
|  | /* | 
|  | * We're expecting usermode to be writing properly formed | 
|  | * xenbus messages.  If they write an incomplete message we | 
|  | * buffer it up.  Once it is complete, we act on it. | 
|  | */ | 
|  |  | 
|  | /* | 
|  | * Make sure concurrent writers can't stomp all over each | 
|  | * other's messages and make a mess of our partial message | 
|  | * buffer.  We don't make any attemppt to stop multiple | 
|  | * writers from making a mess of each other's incomplete | 
|  | * messages; we're just trying to guarantee our own internal | 
|  | * consistency and make sure that single writes are handled | 
|  | * atomically. | 
|  | */ | 
|  | mutex_lock(&u->msgbuffer_mutex); | 
|  |  | 
|  | /* Get this out of the way early to avoid confusion */ | 
|  | if (len == 0) | 
|  | goto out; | 
|  |  | 
|  | /* Can't write a xenbus message larger we can buffer */ | 
|  | if ((len + u->len) > sizeof(u->u.buffer)) { | 
|  | /* On error, dump existing buffer */ | 
|  | u->len = 0; | 
|  | rc = -EINVAL; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | ret = copy_from_user(u->u.buffer + u->len, ubuf, len); | 
|  |  | 
|  | if (ret != 0) { | 
|  | rc = -EFAULT; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* Deal with a partial copy. */ | 
|  | len -= ret; | 
|  | rc = len; | 
|  |  | 
|  | u->len += len; | 
|  |  | 
|  | /* Return if we haven't got a full message yet */ | 
|  | if (u->len < sizeof(u->u.msg)) | 
|  | goto out;	/* not even the header yet */ | 
|  |  | 
|  | /* If we're expecting a message that's larger than we can | 
|  | possibly send, dump what we have and return an error. */ | 
|  | if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { | 
|  | rc = -E2BIG; | 
|  | u->len = 0; | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) | 
|  | goto out;	/* incomplete data portion */ | 
|  |  | 
|  | /* | 
|  | * OK, now we have a complete message.  Do something with it. | 
|  | */ | 
|  |  | 
|  | msg_type = u->u.msg.type; | 
|  |  | 
|  | switch (msg_type) { | 
|  | case XS_WATCH: | 
|  | case XS_UNWATCH: | 
|  | /* (Un)Ask for some path to be watched for changes */ | 
|  | ret = xenbus_write_watch(msg_type, u); | 
|  | break; | 
|  |  | 
|  | default: | 
|  | /* Send out a transaction */ | 
|  | ret = xenbus_write_transaction(msg_type, u); | 
|  | break; | 
|  | } | 
|  | if (ret != 0) | 
|  | rc = ret; | 
|  |  | 
|  | /* Buffered message consumed */ | 
|  | u->len = 0; | 
|  |  | 
|  | out: | 
|  | mutex_unlock(&u->msgbuffer_mutex); | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | static int xenbus_file_open(struct inode *inode, struct file *filp) | 
|  | { | 
|  | struct xenbus_file_priv *u; | 
|  |  | 
|  | if (xen_store_evtchn == 0) | 
|  | return -ENOENT; | 
|  |  | 
|  | nonseekable_open(inode, filp); | 
|  |  | 
|  | u = kzalloc(sizeof(*u), GFP_KERNEL); | 
|  | if (u == NULL) | 
|  | return -ENOMEM; | 
|  |  | 
|  | INIT_LIST_HEAD(&u->transactions); | 
|  | INIT_LIST_HEAD(&u->watches); | 
|  | INIT_LIST_HEAD(&u->read_buffers); | 
|  | init_waitqueue_head(&u->read_waitq); | 
|  |  | 
|  | mutex_init(&u->reply_mutex); | 
|  | mutex_init(&u->msgbuffer_mutex); | 
|  |  | 
|  | filp->private_data = u; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int xenbus_file_release(struct inode *inode, struct file *filp) | 
|  | { | 
|  | struct xenbus_file_priv *u = filp->private_data; | 
|  | struct xenbus_transaction_holder *trans, *tmp; | 
|  | struct watch_adapter *watch, *tmp_watch; | 
|  | struct read_buffer *rb, *tmp_rb; | 
|  |  | 
|  | /* | 
|  | * No need for locking here because there are no other users, | 
|  | * by definition. | 
|  | */ | 
|  |  | 
|  | list_for_each_entry_safe(trans, tmp, &u->transactions, list) { | 
|  | xenbus_transaction_end(trans->handle, 1); | 
|  | list_del(&trans->list); | 
|  | kfree(trans); | 
|  | } | 
|  |  | 
|  | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | 
|  | unregister_xenbus_watch(&watch->watch); | 
|  | list_del(&watch->list); | 
|  | free_watch_adapter(watch); | 
|  | } | 
|  |  | 
|  | list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { | 
|  | list_del(&rb->list); | 
|  | kfree(rb); | 
|  | } | 
|  | kfree(u); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) | 
|  | { | 
|  | struct xenbus_file_priv *u = file->private_data; | 
|  |  | 
|  | poll_wait(file, &u->read_waitq, wait); | 
|  | if (!list_empty(&u->read_buffers)) | 
|  | return POLLIN | POLLRDNORM; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | const struct file_operations xen_xenbus_fops = { | 
|  | .read = xenbus_file_read, | 
|  | .write = xenbus_file_write, | 
|  | .open = xenbus_file_open, | 
|  | .release = xenbus_file_release, | 
|  | .poll = xenbus_file_poll, | 
|  | .llseek = no_llseek, | 
|  | }; | 
|  | EXPORT_SYMBOL_GPL(xen_xenbus_fops); | 
|  |  | 
|  | static struct miscdevice xenbus_dev = { | 
|  | .minor = MISC_DYNAMIC_MINOR, | 
|  | .name = "xen/xenbus", | 
|  | .fops = &xen_xenbus_fops, | 
|  | }; | 
|  |  | 
|  | static int __init xenbus_init(void) | 
|  | { | 
|  | int err; | 
|  |  | 
|  | if (!xen_domain()) | 
|  | return -ENODEV; | 
|  |  | 
|  | err = misc_register(&xenbus_dev); | 
|  | if (err) | 
|  | printk(KERN_ERR "Could not register xenbus frontend device\n"); | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static void __exit xenbus_exit(void) | 
|  | { | 
|  | misc_deregister(&xenbus_dev); | 
|  | } | 
|  |  | 
|  | module_init(xenbus_init); | 
|  | module_exit(xenbus_exit); |