| /* | 
 |  * kvm trace | 
 |  * | 
 |  * It is designed to allow debugging traces of kvm to be generated | 
 |  * on UP / SMP machines.  Each trace entry can be timestamped so that | 
 |  * it's possible to reconstruct a chronological record of trace events. | 
 |  * The implementation refers to blktrace kernel support. | 
 |  * | 
 |  * Copyright (c) 2008 Intel Corporation | 
 |  * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | 
 |  * | 
 |  * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | 
 |  * | 
 |  * Date:    Feb 2008 | 
 |  */ | 
 |  | 
 | #include <linux/module.h> | 
 | #include <linux/relay.h> | 
 | #include <linux/debugfs.h> | 
 | #include <linux/ktime.h> | 
 |  | 
 | #include <linux/kvm_host.h> | 
 |  | 
 | #define KVM_TRACE_STATE_RUNNING 	(1 << 0) | 
 | #define KVM_TRACE_STATE_PAUSE 		(1 << 1) | 
 | #define KVM_TRACE_STATE_CLEARUP 	(1 << 2) | 
 |  | 
 | struct kvm_trace { | 
 | 	int trace_state; | 
 | 	struct rchan *rchan; | 
 | 	struct dentry *lost_file; | 
 | 	atomic_t lost_records; | 
 | }; | 
 | static struct kvm_trace *kvm_trace; | 
 |  | 
 | struct kvm_trace_probe { | 
 | 	const char *name; | 
 | 	const char *format; | 
 | 	u32 timestamp_in; | 
 | 	marker_probe_func *probe_func; | 
 | }; | 
 |  | 
 | static inline int calc_rec_size(int timestamp, int extra) | 
 | { | 
 | 	int rec_size = KVM_TRC_HEAD_SIZE; | 
 |  | 
 | 	rec_size += extra; | 
 | 	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | 
 | } | 
 |  | 
 | static void kvm_add_trace(void *probe_private, void *call_data, | 
 | 			  const char *format, va_list *args) | 
 | { | 
 | 	struct kvm_trace_probe *p = probe_private; | 
 | 	struct kvm_trace *kt = kvm_trace; | 
 | 	struct kvm_trace_rec rec; | 
 | 	struct kvm_vcpu *vcpu; | 
 | 	int    i, size; | 
 | 	u32    extra; | 
 |  | 
 | 	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | 
 | 		return; | 
 |  | 
 | 	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32)); | 
 | 	vcpu		= va_arg(*args, struct kvm_vcpu *); | 
 | 	rec.pid		= current->tgid; | 
 | 	rec.vcpu_id	= vcpu->vcpu_id; | 
 |  | 
 | 	extra   	= va_arg(*args, u32); | 
 | 	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | 
 | 	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX); | 
 |  | 
 | 	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) | 
 | 			| TRACE_REC_NUM_DATA_ARGS(extra); | 
 |  | 
 | 	if (p->timestamp_in) { | 
 | 		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); | 
 |  | 
 | 		for (i = 0; i < extra; i++) | 
 | 			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); | 
 | 	} else { | 
 | 		for (i = 0; i < extra; i++) | 
 | 			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); | 
 | 	} | 
 |  | 
 | 	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); | 
 | 	relay_write(kt->rchan, &rec, size); | 
 | } | 
 |  | 
 | static struct kvm_trace_probe kvm_trace_probes[] = { | 
 | 	{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | 
 | 	{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | 
 | }; | 
 |  | 
 | static int lost_records_get(void *data, u64 *val) | 
 | { | 
 | 	struct kvm_trace *kt = data; | 
 |  | 
 | 	*val = atomic_read(&kt->lost_records); | 
 | 	return 0; | 
 | } | 
 |  | 
 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | 
 |  | 
 | /* | 
 |  *  The relay channel is used in "no-overwrite" mode, it keeps trace of how | 
 |  *  many times we encountered a full subbuffer, to tell user space app the | 
 |  *  lost records there were. | 
 |  */ | 
 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | 
 | 				     void *prev_subbuf, size_t prev_padding) | 
 | { | 
 | 	struct kvm_trace *kt; | 
 |  | 
 | 	if (!relay_buf_full(buf)) { | 
 | 		if (!prev_subbuf) { | 
 | 			/* | 
 | 			 * executed only once when the channel is opened | 
 | 			 * save metadata as first record | 
 | 			 */ | 
 | 			subbuf_start_reserve(buf, sizeof(u32)); | 
 | 			*(u32 *)subbuf = 0x12345678; | 
 | 		} | 
 |  | 
 | 		return 1; | 
 | 	} | 
 |  | 
 | 	kt = buf->chan->private_data; | 
 | 	atomic_inc(&kt->lost_records); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | 
 | 						 struct dentry *parent, | 
 | 						 int mode, | 
 | 						 struct rchan_buf *buf, | 
 | 						 int *is_global) | 
 | { | 
 | 	return debugfs_create_file(filename, mode, parent, buf, | 
 | 				   &relay_file_operations); | 
 | } | 
 |  | 
 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | 
 | { | 
 | 	debugfs_remove(dentry); | 
 | 	return 0; | 
 | } | 
 |  | 
 | static struct rchan_callbacks kvm_relay_callbacks = { | 
 | 	.subbuf_start 		= kvm_subbuf_start_callback, | 
 | 	.create_buf_file 	= kvm_create_buf_file_callack, | 
 | 	.remove_buf_file 	= kvm_remove_buf_file_callback, | 
 | }; | 
 |  | 
 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | 
 | { | 
 | 	struct kvm_trace *kt; | 
 | 	int i, r = -ENOMEM; | 
 |  | 
 | 	if (!kuts->buf_size || !kuts->buf_nr) | 
 | 		return -EINVAL; | 
 |  | 
 | 	kt = kzalloc(sizeof(*kt), GFP_KERNEL); | 
 | 	if (!kt) | 
 | 		goto err; | 
 |  | 
 | 	r = -EIO; | 
 | 	atomic_set(&kt->lost_records, 0); | 
 | 	kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, | 
 | 					    kt, &kvm_trace_lost_ops); | 
 | 	if (!kt->lost_file) | 
 | 		goto err; | 
 |  | 
 | 	kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, | 
 | 				kuts->buf_nr, &kvm_relay_callbacks, kt); | 
 | 	if (!kt->rchan) | 
 | 		goto err; | 
 |  | 
 | 	kvm_trace = kt; | 
 |  | 
 | 	for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | 
 | 		struct kvm_trace_probe *p = &kvm_trace_probes[i]; | 
 |  | 
 | 		r = marker_probe_register(p->name, p->format, p->probe_func, p); | 
 | 		if (r) | 
 | 			printk(KERN_INFO "Unable to register probe %s\n", | 
 | 			       p->name); | 
 | 	} | 
 |  | 
 | 	kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | 
 |  | 
 | 	return 0; | 
 | err: | 
 | 	if (kt) { | 
 | 		if (kt->lost_file) | 
 | 			debugfs_remove(kt->lost_file); | 
 | 		if (kt->rchan) | 
 | 			relay_close(kt->rchan); | 
 | 		kfree(kt); | 
 | 	} | 
 | 	return r; | 
 | } | 
 |  | 
 | static int kvm_trace_enable(char __user *arg) | 
 | { | 
 | 	struct kvm_user_trace_setup kuts; | 
 | 	int ret; | 
 |  | 
 | 	ret = copy_from_user(&kuts, arg, sizeof(kuts)); | 
 | 	if (ret) | 
 | 		return -EFAULT; | 
 |  | 
 | 	ret = do_kvm_trace_enable(&kuts); | 
 | 	if (ret) | 
 | 		return ret; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int kvm_trace_pause(void) | 
 | { | 
 | 	struct kvm_trace *kt = kvm_trace; | 
 | 	int r = -EINVAL; | 
 |  | 
 | 	if (kt == NULL) | 
 | 		return r; | 
 |  | 
 | 	if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | 
 | 		kt->trace_state = KVM_TRACE_STATE_PAUSE; | 
 | 		relay_flush(kt->rchan); | 
 | 		r = 0; | 
 | 	} | 
 |  | 
 | 	return r; | 
 | } | 
 |  | 
 | void kvm_trace_cleanup(void) | 
 | { | 
 | 	struct kvm_trace *kt = kvm_trace; | 
 | 	int i; | 
 |  | 
 | 	if (kt == NULL) | 
 | 		return; | 
 |  | 
 | 	if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | 
 | 	    kt->trace_state == KVM_TRACE_STATE_PAUSE) { | 
 |  | 
 | 		kt->trace_state = KVM_TRACE_STATE_CLEARUP; | 
 |  | 
 | 		for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | 
 | 			struct kvm_trace_probe *p = &kvm_trace_probes[i]; | 
 | 			marker_probe_unregister(p->name, p->probe_func, p); | 
 | 		} | 
 | 		marker_synchronize_unregister(); | 
 |  | 
 | 		relay_close(kt->rchan); | 
 | 		debugfs_remove(kt->lost_file); | 
 | 		kfree(kt); | 
 | 	} | 
 | } | 
 |  | 
 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | 
 | { | 
 | 	void __user *argp = (void __user *)arg; | 
 | 	long r = -EINVAL; | 
 |  | 
 | 	if (!capable(CAP_SYS_ADMIN)) | 
 | 		return -EPERM; | 
 |  | 
 | 	switch (ioctl) { | 
 | 	case KVM_TRACE_ENABLE: | 
 | 		r = kvm_trace_enable(argp); | 
 | 		break; | 
 | 	case KVM_TRACE_PAUSE: | 
 | 		r = kvm_trace_pause(); | 
 | 		break; | 
 | 	case KVM_TRACE_DISABLE: | 
 | 		r = 0; | 
 | 		kvm_trace_cleanup(); | 
 | 		break; | 
 | 	} | 
 |  | 
 | 	return r; | 
 | } |