fs: introduce iomap infrastructure

Add infrastructure for multipage buffered writes.  This is implemented
using an main iterator that applies an actor function to a range that
can be written.

This infrastucture is used to implement a buffered write helper, one
to zero file ranges and one to implement the ->page_mkwrite VM
operations.  All of them borrow a fair amount of code from fs/buffers.
for now by using an internal version of __block_write_begin that
gets passed an iomap and builds the corresponding buffer head.

The file system is gets a set of paired ->iomap_begin and ->iomap_end
calls which allow it to map/reserve a range and get a notification
once the write code is finished with it.

Based on earlier code from Dave Chinner.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>


diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 1b22197..d2f469a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -3,19 +3,65 @@
 
 #include <linux/types.h>
 
-/* types of block ranges for multipage write mappings. */
+struct inode;
+struct iov_iter;
+struct kiocb;
+struct vm_area_struct;
+struct vm_fault;
+
+/*
+ * Types of block ranges for iomap mappings:
+ */
 #define IOMAP_HOLE	0x01	/* no blocks allocated, need allocation */
 #define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
 #define IOMAP_MAPPED	0x03	/* blocks allocated @blkno */
 #define IOMAP_UNWRITTEN	0x04	/* blocks allocated @blkno in unwritten state */
 
+/*
+ * Magic value for blkno:
+ */
 #define IOMAP_NULL_BLOCK -1LL	/* blkno is not valid */
 
 struct iomap {
-	sector_t	blkno;	/* first sector of mapping */
-	loff_t		offset;	/* file offset of mapping, bytes */
-	u64		length;	/* length of mapping, bytes */
-	int		type;	/* type of mapping */
+	sector_t		blkno;	/* 1st sector of mapping, 512b units */
+	loff_t			offset;	/* file offset of mapping, bytes */
+	u64			length;	/* length of mapping, bytes */
+	int			type;	/* type of mapping */
+	struct block_device	*bdev;	/* block device for I/O */
 };
 
+/*
+ * Flags for iomap_begin / iomap_end.  No flag implies a read.
+ */
+#define IOMAP_WRITE		(1 << 0)
+#define IOMAP_ZERO		(1 << 1)
+
+struct iomap_ops {
+	/*
+	 * Return the existing mapping at pos, or reserve space starting at
+	 * pos for up to length, as long as we can do it as a single mapping.
+	 * The actual length is returned in iomap->length.
+	 */
+	int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
+			unsigned flags, struct iomap *iomap);
+
+	/*
+	 * Commit and/or unreserve space previous allocated using iomap_begin.
+	 * Written indicates the length of the successful write operation which
+	 * needs to be commited, while the rest needs to be unreserved.
+	 * Written might be zero if no data was written.
+	 */
+	int (*iomap_end)(struct inode *inode, loff_t pos, loff_t length,
+			ssize_t written, unsigned flags, struct iomap *iomap);
+};
+
+ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
+		struct iomap_ops *ops);
+int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
+		bool *did_zero, struct iomap_ops *ops);
+int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
+		struct iomap_ops *ops);
+int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
+		struct iomap_ops *ops);
+
 #endif /* LINUX_IOMAP_H */