ocfs2: Support creation of unwritten extents
This can now be trivially supported with re-use of our existing extend code.
ocfs2_allocate_unwritten_extents() takes a start offset and a byte length
and iterates over the inode, adding extents (marked as unwritten) until len
is reached. Existing extents are skipped over.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0db6a1f..2e38983 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3726,6 +3726,7 @@
u32 cpos,
u64 start_blk,
u32 new_clusters,
+ u8 flags,
struct ocfs2_alloc_context *meta_ac)
{
int status;
@@ -3749,6 +3750,7 @@
rec.e_cpos = cpu_to_le32(cpos);
rec.e_blkno = cpu_to_le64(start_blk);
rec.e_leaf_clusters = cpu_to_le16(new_clusters);
+ rec.e_flags = flags;
status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
&insert);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index d3acf45..e3284f3 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -34,6 +34,7 @@
u32 cpos,
u64 start_blk,
u32 new_clusters,
+ u8 flags,
struct ocfs2_alloc_context *meta_ac);
struct ocfs2_cached_dealloc_ctxt;
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8af9233..ec8b606 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1136,7 +1136,7 @@
*/
tmp_pos = cpos;
ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,
- &tmp_pos, 1, wc->w_di_bh,
+ &tmp_pos, 1, 0, wc->w_di_bh,
wc->w_handle, data_ac,
meta_ac, NULL);
/*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c441ef1..0d5fdde 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -368,7 +368,7 @@
u32 offset = OCFS2_I(dir)->ip_clusters;
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
- 1, parent_fe_bh, handle,
+ 1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
BUG_ON(status == -EAGAIN);
if (status < 0) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6745086..3e21ad9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -425,6 +425,7 @@
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
+ int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
@@ -437,9 +438,13 @@
enum ocfs2_alloc_restarted reason = RESTART_NONE;
u32 bit_off, num_bits;
u64 block;
+ u8 flags = 0;
BUG_ON(!clusters_to_add);
+ if (mark_unwritten)
+ flags = OCFS2_EXT_UNWRITTEN;
+
free_extents = ocfs2_num_free_extents(osb, inode, fe);
if (free_extents < 0) {
status = free_extents;
@@ -489,7 +494,7 @@
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
*logical_offset, block, num_bits,
- meta_ac);
+ flags, meta_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
@@ -522,9 +527,11 @@
* For a given allocation, determine which allocators will need to be
* accessed, and lock them, reserving the appropriate number of bits.
*
- * Called from ocfs2_extend_allocation() for file systems which don't
- * support holes, and from ocfs2_write() for file systems which
- * understand sparse inodes.
+ * Sparse file systems call this from ocfs2_write_begin_nolock()
+ * and ocfs2_allocate_unwritten_extents().
+ *
+ * File systems which don't support holes call this from
+ * ocfs2_extend_allocation().
*/
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add, u32 extents_to_split,
@@ -595,14 +602,13 @@
return ret;
}
-static int ocfs2_extend_allocation(struct inode *inode,
- u32 clusters_to_add)
+static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
+ u32 clusters_to_add, int mark_unwritten)
{
int status = 0;
int restart_func = 0;
- int drop_alloc_sem = 0;
int credits;
- u32 prev_clusters, logical_start;
+ u32 prev_clusters;
struct buffer_head *bh = NULL;
struct ocfs2_dinode *fe = NULL;
handle_t *handle = NULL;
@@ -617,7 +623,7 @@
* This function only exists for file systems which don't
* support holes.
*/
- BUG_ON(ocfs2_sparse_alloc(osb));
+ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
OCFS2_BH_CACHED, inode);
@@ -633,18 +639,9 @@
goto leave;
}
- logical_start = OCFS2_I(inode)->ip_clusters;
-
restart_all:
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
- /* blocks peope in read/write from reading our allocation
- * until we're done changing it. We depend on i_mutex to block
- * other extend/truncate calls while we're here. Ordering wrt
- * start_trans is important here -- always do it before! */
- down_write(&OCFS2_I(inode)->ip_alloc_sem);
- drop_alloc_sem = 1;
-
status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
&meta_ac);
if (status) {
@@ -678,6 +675,7 @@
inode,
&logical_start,
clusters_to_add,
+ mark_unwritten,
bh,
handle,
data_ac,
@@ -730,10 +728,6 @@
OCFS2_I(inode)->ip_clusters, i_size_read(inode));
leave:
- if (drop_alloc_sem) {
- up_write(&OCFS2_I(inode)->ip_alloc_sem);
- drop_alloc_sem = 0;
- }
if (handle) {
ocfs2_commit_trans(osb, handle);
handle = NULL;
@@ -759,6 +753,25 @@
return status;
}
+static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
+ u32 clusters_to_add, int mark_unwritten)
+{
+ int ret;
+
+ /*
+ * The alloc sem blocks peope in read/write from reading our
+ * allocation until we're done changing it. We depend on
+ * i_mutex to block other extend/truncate calls while we're
+ * here.
+ */
+ down_write(&OCFS2_I(inode)->ip_alloc_sem);
+ ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
+ mark_unwritten);
+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
+
+ return ret;
+}
+
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->prepare_write() and
@@ -900,7 +913,9 @@
}
if (clusters_to_add) {
- ret = ocfs2_extend_allocation(inode, clusters_to_add);
+ ret = ocfs2_extend_allocation(inode,
+ OCFS2_I(inode)->ip_clusters,
+ clusters_to_add, 0);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
@@ -1176,6 +1191,64 @@
return ret;
}
+/*
+ * Allocate enough extents to cover the region starting at byte offset
+ * start for len bytes. Existing extents are skipped, any extents
+ * added are marked as "unwritten".
+ */
+static int ocfs2_allocate_unwritten_extents(struct inode *inode,
+ u64 start, u64 len)
+{
+ int ret;
+ u32 cpos, phys_cpos, clusters, alloc_size;
+
+ /*
+ * We consider both start and len to be inclusive.
+ */
+ cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+ clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
+ clusters -= cpos;
+
+ while (clusters) {
+ ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
+ &alloc_size, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * Hole or existing extent len can be arbitrary, so
+ * cap it to our own allocation request.
+ */
+ if (alloc_size > clusters)
+ alloc_size = clusters;
+
+ if (phys_cpos) {
+ /*
+ * We already have an allocation at this
+ * region so we can safely skip it.
+ */
+ goto next;
+ }
+
+ ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
+ if (ret) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto out;
+ }
+
+next:
+ cpos += alloc_size;
+ clusters -= alloc_size;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
loff_t *ppos,
size_t count,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 54df3c4..79115c9 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -39,13 +39,14 @@
};
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
struct inode *inode,
- u32 *cluster_start,
+ u32 *logical_offset,
u32 clusters_to_add,
+ int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
- enum ocfs2_alloc_restarted *reason);
+ enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 36289e6..d430fda 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1674,7 +1674,7 @@
u32 offset = 0;
inode->i_op = &ocfs2_symlink_inode_operations;
- status = ocfs2_do_extend_allocation(osb, inode, &offset, 1,
+ status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
NULL);