|  | /* | 
|  | * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru> | 
|  | * All rights reserved. | 
|  | * | 
|  | * This program is free software; you can redistribute it and/or modify | 
|  | * it under the terms of the GNU General Public License as published by | 
|  | * the Free Software Foundation; either version 2 of the License, or | 
|  | * (at your option) any later version. | 
|  | * | 
|  | * This program is distributed in the hope that it will be useful, | 
|  | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | * GNU General Public License for more details. | 
|  | */ | 
|  |  | 
|  | #ifndef __DST_H | 
|  | #define __DST_H | 
|  |  | 
|  | #include <linux/types.h> | 
|  | #include <linux/connector.h> | 
|  |  | 
|  | #define DST_NAMELEN		32 | 
|  | #define DST_NAME		"dst" | 
|  |  | 
|  | enum { | 
|  | /* Remove node with given id from storage */ | 
|  | DST_DEL_NODE	= 0, | 
|  | /* Add remote node with given id to the storage */ | 
|  | DST_ADD_REMOTE, | 
|  | /* Add local node with given id to the storage to be exported and used by remote peers */ | 
|  | DST_ADD_EXPORT, | 
|  | /* Crypto initialization command (hash/cipher used to protect the connection) */ | 
|  | DST_CRYPTO, | 
|  | /* Security attributes for given connection (permissions for example) */ | 
|  | DST_SECURITY, | 
|  | /* Register given node in the block layer subsystem */ | 
|  | DST_START, | 
|  | DST_CMD_MAX | 
|  | }; | 
|  |  | 
|  | struct dst_ctl | 
|  | { | 
|  | /* Storage name */ | 
|  | char			name[DST_NAMELEN]; | 
|  | /* Command flags */ | 
|  | __u32			flags; | 
|  | /* Command itself (see above) */ | 
|  | __u32			cmd; | 
|  | /* Maximum number of pages per single request in this device */ | 
|  | __u32			max_pages; | 
|  | /* Stale/error transaction scanning timeout in milliseconds */ | 
|  | __u32			trans_scan_timeout; | 
|  | /* Maximum number of retry sends before completing transaction as broken */ | 
|  | __u32			trans_max_retries; | 
|  | /* Storage size */ | 
|  | __u64			size; | 
|  | }; | 
|  |  | 
|  | /* Reply command carries completion status */ | 
|  | struct dst_ctl_ack | 
|  | { | 
|  | struct cn_msg		msg; | 
|  | int			error; | 
|  | int			unused[3]; | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Unfortunaltely socket address structure is not exported to userspace | 
|  | * and is redefined there. | 
|  | */ | 
|  | #define SADDR_MAX_DATA	128 | 
|  |  | 
|  | struct saddr { | 
|  | /* address family, AF_xxx	*/ | 
|  | unsigned short		sa_family; | 
|  | /* 14 bytes of protocol address	*/ | 
|  | char			sa_data[SADDR_MAX_DATA]; | 
|  | /* Number of bytes used in sa_data */ | 
|  | unsigned short		sa_data_len; | 
|  | }; | 
|  |  | 
|  | /* Address structure */ | 
|  | struct dst_network_ctl | 
|  | { | 
|  | /* Socket type: datagram, stream...*/ | 
|  | unsigned int		type; | 
|  | /* Let me guess, is it a Jupiter diameter? */ | 
|  | unsigned int		proto; | 
|  | /* Peer's address */ | 
|  | struct saddr		addr; | 
|  | }; | 
|  |  | 
|  | struct dst_crypto_ctl | 
|  | { | 
|  | /* Cipher and hash names */ | 
|  | char			cipher_algo[DST_NAMELEN]; | 
|  | char			hash_algo[DST_NAMELEN]; | 
|  |  | 
|  | /* Key sizes. Can be zero for digest for example */ | 
|  | unsigned int		cipher_keysize, hash_keysize; | 
|  | /* Alignment. Calculated by the DST itself. */ | 
|  | unsigned int		crypto_attached_size; | 
|  | /* Number of threads to perform crypto operations */ | 
|  | int			thread_num; | 
|  | }; | 
|  |  | 
|  | /* Export security attributes have this bits checked in when client connects */ | 
|  | #define DST_PERM_READ		(1<<0) | 
|  | #define DST_PERM_WRITE		(1<<1) | 
|  |  | 
|  | /* | 
|  | * Right now it is simple model, where each remote address | 
|  | * is assigned to set of permissions it is allowed to perform. | 
|  | * In real world block device does not know anything but | 
|  | * reading and writing, so it should be more than enough. | 
|  | */ | 
|  | struct dst_secure_user | 
|  | { | 
|  | unsigned int		permissions; | 
|  | struct saddr		addr; | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Export control command: device to export and network address to accept | 
|  | * clients to work with given device | 
|  | */ | 
|  | struct dst_export_ctl | 
|  | { | 
|  | char			device[DST_NAMELEN]; | 
|  | struct dst_network_ctl	ctl; | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | DST_CFG	= 1, 		/* Request remote configuration */ | 
|  | DST_IO,			/* IO command */ | 
|  | DST_IO_RESPONSE,	/* IO response */ | 
|  | DST_PING,		/* Keepalive message */ | 
|  | DST_NCMD_MAX, | 
|  | }; | 
|  |  | 
|  | struct dst_cmd | 
|  | { | 
|  | /* Network command itself, see above */ | 
|  | __u32			cmd; | 
|  | /* | 
|  | * Size of the attached data | 
|  | * (in most cases, for READ command it means how many bytes were requested) | 
|  | */ | 
|  | __u32			size; | 
|  | /* Crypto size: number of attached bytes with digest/hmac */ | 
|  | __u32			csize; | 
|  | /* Here we can carry secret data */ | 
|  | __u32			reserved; | 
|  | /* Read/write bits, see how they are encoded in bio structure */ | 
|  | __u64			rw; | 
|  | /* BIO flags */ | 
|  | __u64			flags; | 
|  | /* Unique command id (like transaction ID) */ | 
|  | __u64			id; | 
|  | /* Sector to start IO from */ | 
|  | __u64			sector; | 
|  | /* Hash data is placed after this header */ | 
|  | __u8			hash[0]; | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * Convert command to/from network byte order. | 
|  | * We do not use hton*() functions, since there is | 
|  | * no 64-bit implementation. | 
|  | */ | 
|  | static inline void dst_convert_cmd(struct dst_cmd *c) | 
|  | { | 
|  | c->cmd = __cpu_to_be32(c->cmd); | 
|  | c->csize = __cpu_to_be32(c->csize); | 
|  | c->size = __cpu_to_be32(c->size); | 
|  | c->sector = __cpu_to_be64(c->sector); | 
|  | c->id = __cpu_to_be64(c->id); | 
|  | c->flags = __cpu_to_be64(c->flags); | 
|  | c->rw = __cpu_to_be64(c->rw); | 
|  | } | 
|  |  | 
|  | /* Transaction id */ | 
|  | typedef __u64 dst_gen_t; | 
|  |  | 
|  | #ifdef __KERNEL__ | 
|  |  | 
|  | #include <linux/blkdev.h> | 
|  | #include <linux/bio.h> | 
|  | #include <linux/device.h> | 
|  | #include <linux/mempool.h> | 
|  | #include <linux/net.h> | 
|  | #include <linux/poll.h> | 
|  | #include <linux/rbtree.h> | 
|  |  | 
|  | #ifdef CONFIG_DST_DEBUG | 
|  | #define dprintk(f, a...) printk(KERN_NOTICE f, ##a) | 
|  | #else | 
|  | static inline void __attribute__ ((format (printf, 1, 2))) | 
|  | dprintk(const char *fmt, ...) {} | 
|  | #endif | 
|  |  | 
|  | struct dst_node; | 
|  |  | 
|  | struct dst_trans | 
|  | { | 
|  | /* DST node we are working with */ | 
|  | struct dst_node		*n; | 
|  |  | 
|  | /* Entry inside transaction tree */ | 
|  | struct rb_node		trans_entry; | 
|  |  | 
|  | /* Merlin kills this transaction when this memory cell equals zero */ | 
|  | atomic_t		refcnt; | 
|  |  | 
|  | /* How this transaction should be processed by crypto engine */ | 
|  | short			enc; | 
|  | /* How many times this transaction was resent */ | 
|  | short			retries; | 
|  | /* Completion status */ | 
|  | int			error; | 
|  |  | 
|  | /* When did we send it to the remote peer */ | 
|  | long			send_time; | 
|  |  | 
|  | /* My name is... | 
|  | * Well, computers does not speak, they have unique id instead */ | 
|  | dst_gen_t		gen; | 
|  |  | 
|  | /* Block IO we are working with */ | 
|  | struct bio		*bio; | 
|  |  | 
|  | /* Network command for above block IO request */ | 
|  | struct dst_cmd		cmd; | 
|  | }; | 
|  |  | 
|  | struct dst_crypto_engine | 
|  | { | 
|  | /* What should we do with all block requests */ | 
|  | struct crypto_hash	*hash; | 
|  | struct crypto_ablkcipher	*cipher; | 
|  |  | 
|  | /* Pool of pages used to encrypt data into before sending */ | 
|  | int			page_num; | 
|  | struct page		**pages; | 
|  |  | 
|  | /* What to do with current request */ | 
|  | int			enc; | 
|  | /* Who we are and where do we go */ | 
|  | struct scatterlist	*src, *dst; | 
|  |  | 
|  | /* Maximum timeout waiting for encryption to be completed */ | 
|  | long			timeout; | 
|  | /* IV is a 64-bit sequential counter */ | 
|  | u64			iv; | 
|  |  | 
|  | /* Secret data */ | 
|  | void			*private; | 
|  |  | 
|  | /* Cached temporary data lives here */ | 
|  | int			size; | 
|  | void			*data; | 
|  | }; | 
|  |  | 
|  | struct dst_state | 
|  | { | 
|  | /* The main state protection */ | 
|  | struct mutex		state_lock; | 
|  |  | 
|  | /* Polling machinery for sockets */ | 
|  | wait_queue_t 		wait; | 
|  | wait_queue_head_t 	*whead; | 
|  | /* Most of events are being waited here */ | 
|  | wait_queue_head_t 	thread_wait; | 
|  |  | 
|  | /* Who owns this? */ | 
|  | struct dst_node		*node; | 
|  |  | 
|  | /* Network address for this state */ | 
|  | struct dst_network_ctl	ctl; | 
|  |  | 
|  | /* Permissions to work with: read-only or rw connection */ | 
|  | u32			permissions; | 
|  |  | 
|  | /* Called when we need to clean private data */ | 
|  | void			(* cleanup)(struct dst_state *st); | 
|  |  | 
|  | /* Used by the server: BIO completion queues BIOs here */ | 
|  | struct list_head	request_list; | 
|  | spinlock_t		request_lock; | 
|  |  | 
|  | /* Guess what? No, it is not number of planets */ | 
|  | atomic_t		refcnt; | 
|  |  | 
|  | /* This flags is set when connection should be dropped */ | 
|  | int			need_exit; | 
|  |  | 
|  | /* | 
|  | * Socket to work with. Second pointer is used for | 
|  | * lockless check if socket was changed before performing | 
|  | * next action (like working with cached polling result) | 
|  | */ | 
|  | struct socket		*socket, *read_socket; | 
|  |  | 
|  | /* Cached preallocated data */ | 
|  | void			*data; | 
|  | unsigned int		size; | 
|  |  | 
|  | /* Currently processed command */ | 
|  | struct dst_cmd		cmd; | 
|  | }; | 
|  |  | 
|  | struct dst_info | 
|  | { | 
|  | /* Device size */ | 
|  | u64			size; | 
|  |  | 
|  | /* Local device name for export devices */ | 
|  | char			local[DST_NAMELEN]; | 
|  |  | 
|  | /* Network setup */ | 
|  | struct dst_network_ctl	net; | 
|  |  | 
|  | /* Sysfs bits use this */ | 
|  | struct device		device; | 
|  | }; | 
|  |  | 
|  | struct dst_node | 
|  | { | 
|  | struct list_head	node_entry; | 
|  |  | 
|  | /* Hi, my name is stored here */ | 
|  | char			name[DST_NAMELEN]; | 
|  | /* My cache name is stored here */ | 
|  | char			cache_name[DST_NAMELEN]; | 
|  |  | 
|  | /* Block device attached to given node. | 
|  | * Only valid for exporting nodes */ | 
|  | struct block_device 	*bdev; | 
|  | /* Network state machine for given peer */ | 
|  | struct dst_state	*state; | 
|  |  | 
|  | /* Block IO machinery */ | 
|  | struct request_queue	*queue; | 
|  | struct gendisk		*disk; | 
|  |  | 
|  | /* Number of threads in processing pool */ | 
|  | int			thread_num; | 
|  | /* Maximum number of pages in single IO */ | 
|  | int			max_pages; | 
|  |  | 
|  | /* I'm that big in bytes */ | 
|  | loff_t			size; | 
|  |  | 
|  | /* Exported to userspace node information */ | 
|  | struct dst_info		*info; | 
|  |  | 
|  | /* | 
|  | * Security attribute list. | 
|  | * Used only by exporting node currently. | 
|  | */ | 
|  | struct list_head	security_list; | 
|  | struct mutex		security_lock; | 
|  |  | 
|  | /* | 
|  | * When this unerflows below zero, university collapses. | 
|  | * But this will not happen, since node will be freed, | 
|  | * when reference counter reaches zero. | 
|  | */ | 
|  | atomic_t		refcnt; | 
|  |  | 
|  | /* How precisely should I be started? */ | 
|  | int 			(*start)(struct dst_node *); | 
|  |  | 
|  | /* Crypto capabilities */ | 
|  | struct dst_crypto_ctl	crypto; | 
|  | u8			*hash_key; | 
|  | u8			*cipher_key; | 
|  |  | 
|  | /* Pool of processing thread */ | 
|  | struct thread_pool	*pool; | 
|  |  | 
|  | /* Transaction IDs live here */ | 
|  | atomic_long_t		gen; | 
|  |  | 
|  | /* | 
|  | * How frequently and how many times transaction | 
|  | * tree should be scanned to drop stale objects. | 
|  | */ | 
|  | long			trans_scan_timeout; | 
|  | int			trans_max_retries; | 
|  |  | 
|  | /* Small gnomes live here */ | 
|  | struct rb_root		trans_root; | 
|  | struct mutex		trans_lock; | 
|  |  | 
|  | /* | 
|  | * Transaction cache/memory pool. | 
|  | * It is big enough to contain not only transaction | 
|  | * itself, but additional crypto data (digest/hmac). | 
|  | */ | 
|  | struct kmem_cache	*trans_cache; | 
|  | mempool_t		*trans_pool; | 
|  |  | 
|  | /* This entity scans transaction tree */ | 
|  | struct delayed_work 	trans_work; | 
|  |  | 
|  | wait_queue_head_t	wait; | 
|  | }; | 
|  |  | 
|  | /* Kernel representation of the security attribute */ | 
|  | struct dst_secure | 
|  | { | 
|  | struct list_head	sec_entry; | 
|  | struct dst_secure_user	sec; | 
|  | }; | 
|  |  | 
|  | int dst_process_bio(struct dst_node *n, struct bio *bio); | 
|  |  | 
|  | int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r); | 
|  | int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le); | 
|  |  | 
|  | static inline struct dst_state *dst_state_get(struct dst_state *st) | 
|  | { | 
|  | BUG_ON(atomic_read(&st->refcnt) == 0); | 
|  | atomic_inc(&st->refcnt); | 
|  | return st; | 
|  | } | 
|  |  | 
|  | void dst_state_put(struct dst_state *st); | 
|  |  | 
|  | struct dst_state *dst_state_alloc(struct dst_node *n); | 
|  | int dst_state_socket_create(struct dst_state *st); | 
|  | void dst_state_socket_release(struct dst_state *st); | 
|  |  | 
|  | void dst_state_exit_connected(struct dst_state *st); | 
|  |  | 
|  | int dst_state_schedule_receiver(struct dst_state *st); | 
|  |  | 
|  | void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str); | 
|  |  | 
|  | static inline void dst_state_lock(struct dst_state *st) | 
|  | { | 
|  | mutex_lock(&st->state_lock); | 
|  | } | 
|  |  | 
|  | static inline void dst_state_unlock(struct dst_state *st) | 
|  | { | 
|  | mutex_unlock(&st->state_lock); | 
|  | } | 
|  |  | 
|  | void dst_poll_exit(struct dst_state *st); | 
|  | int dst_poll_init(struct dst_state *st); | 
|  |  | 
|  | static inline unsigned int dst_state_poll(struct dst_state *st) | 
|  | { | 
|  | unsigned int revents = POLLHUP | POLLERR; | 
|  |  | 
|  | dst_state_lock(st); | 
|  | if (st->socket) | 
|  | revents = st->socket->ops->poll(NULL, st->socket, NULL); | 
|  | dst_state_unlock(st); | 
|  |  | 
|  | return revents; | 
|  | } | 
|  |  | 
|  | static inline int dst_thread_setup(void *private, void *data) | 
|  | { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | void dst_node_put(struct dst_node *n); | 
|  |  | 
|  | static inline struct dst_node *dst_node_get(struct dst_node *n) | 
|  | { | 
|  | atomic_inc(&n->refcnt); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | int dst_data_recv(struct dst_state *st, void *data, unsigned int size); | 
|  | int dst_recv_cdata(struct dst_state *st, void *cdata); | 
|  | int dst_data_send_header(struct socket *sock, | 
|  | void *data, unsigned int size, int more); | 
|  |  | 
|  | int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio); | 
|  |  | 
|  | int dst_process_io(struct dst_state *st); | 
|  | int dst_export_crypto(struct dst_node *n, struct bio *bio); | 
|  | int dst_export_send_bio(struct bio *bio); | 
|  | int dst_start_export(struct dst_node *n); | 
|  |  | 
|  | int __init dst_export_init(void); | 
|  | void dst_export_exit(void); | 
|  |  | 
|  | /* Private structure for export block IO requests */ | 
|  | struct dst_export_priv | 
|  | { | 
|  | struct list_head		request_entry; | 
|  | struct dst_state		*state; | 
|  | struct bio			*bio; | 
|  | struct dst_cmd			cmd; | 
|  | }; | 
|  |  | 
|  | static inline void dst_trans_get(struct dst_trans *t) | 
|  | { | 
|  | atomic_inc(&t->refcnt); | 
|  | } | 
|  |  | 
|  | struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen); | 
|  | int dst_trans_remove(struct dst_trans *t); | 
|  | int dst_trans_remove_nolock(struct dst_trans *t); | 
|  | void dst_trans_put(struct dst_trans *t); | 
|  |  | 
|  | /* | 
|  | * Convert bio into network command. | 
|  | */ | 
|  | static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd, | 
|  | u32 command, u64 id) | 
|  | { | 
|  | cmd->cmd = command; | 
|  | cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS; | 
|  | cmd->rw = bio->bi_rw; | 
|  | cmd->size = bio->bi_size; | 
|  | cmd->csize = 0; | 
|  | cmd->id = id; | 
|  | cmd->sector = bio->bi_sector; | 
|  | }; | 
|  |  | 
|  | int dst_trans_send(struct dst_trans *t); | 
|  | int dst_trans_crypto(struct dst_trans *t); | 
|  |  | 
|  | int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl); | 
|  | void dst_node_crypto_exit(struct dst_node *n); | 
|  |  | 
|  | static inline int dst_need_crypto(struct dst_node *n) | 
|  | { | 
|  | struct dst_crypto_ctl *c = &n->crypto; | 
|  | /* | 
|  | * Logical OR is appropriate here, but boolean one produces | 
|  | * more optimal code, so it is used instead. | 
|  | */ | 
|  | return (c->hash_algo[0] | c->cipher_algo[0]); | 
|  | } | 
|  |  | 
|  | int dst_node_trans_init(struct dst_node *n, unsigned int size); | 
|  | void dst_node_trans_exit(struct dst_node *n); | 
|  |  | 
|  | /* | 
|  | * Pool of threads. | 
|  | * Ready list contains threads currently free to be used, | 
|  | * active one contains threads with some work scheduled for them. | 
|  | * Caller can wait in given queue when thread is ready. | 
|  | */ | 
|  | struct thread_pool | 
|  | { | 
|  | int			thread_num; | 
|  | struct mutex		thread_lock; | 
|  | struct list_head	ready_list, active_list; | 
|  |  | 
|  | wait_queue_head_t	wait; | 
|  | }; | 
|  |  | 
|  | void thread_pool_del_worker(struct thread_pool *p); | 
|  | void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id); | 
|  | int thread_pool_add_worker(struct thread_pool *p, | 
|  | char *name, | 
|  | unsigned int id, | 
|  | void *(* init)(void *data), | 
|  | void (* cleanup)(void *data), | 
|  | void *data); | 
|  |  | 
|  | void thread_pool_destroy(struct thread_pool *p); | 
|  | struct thread_pool *thread_pool_create(int num, char *name, | 
|  | void *(* init)(void *data), | 
|  | void (* cleanup)(void *data), | 
|  | void *data); | 
|  |  | 
|  | int thread_pool_schedule(struct thread_pool *p, | 
|  | int (* setup)(void *stored_private, void *setup_data), | 
|  | int (* action)(void *stored_private, void *setup_data), | 
|  | void *setup_data, long timeout); | 
|  | int thread_pool_schedule_private(struct thread_pool *p, | 
|  | int (* setup)(void *private, void *data), | 
|  | int (* action)(void *private, void *data), | 
|  | void *data, long timeout, void *id); | 
|  |  | 
|  | #endif /* __KERNEL__ */ | 
|  | #endif /* __DST_H */ |