make_disk_image: Add support for sparse files.

This is based on code from avbtool.

Bug: 31931311
Test: New unit test and all unit tests pass.
Change-Id: Ib174b42c5843b1eadf07563e6994db962e44e886
diff --git a/bpt_unittest.py b/bpt_unittest.py
index cc1d925..57a58ba 100755
--- a/bpt_unittest.py
+++ b/bpt_unittest.py
@@ -19,6 +19,8 @@
 
 
 import imp
+import json
+import os
 import sys
 import tempfile
 import unittest
@@ -225,6 +227,47 @@
     except bpttool.BptError as e:
       assert 'exceeds the partition size' in e.message
 
+  def testSparseImage(self):
+    """Checks that sparse input is unsparsified."""
+    bpt_file = open('test/test_sparse_image.bpt', 'r')
+    bpt_json, _ = self.bpt.make_table([bpt_file])
+    bpt_json_file = tempfile.NamedTemporaryFile()
+    bpt_json_file.write(bpt_json)
+    bpt_json_file.seek(0)
+    partitions, _ = self.bpt._read_json([bpt_json_file])
+
+    # Generate a disk image where one of the inputs is a sparse disk
+    # image. See below for details about test/test_file.bin and
+    # test/test_file.bin.sparse.
+    generated_disk_image = tempfile.NamedTemporaryFile()
+    bpt_json_file.seek(0)
+    self.bpt.make_disk_image(generated_disk_image,
+                             bpt_json_file,
+                             ['sparse_data:test/test_file.bin.sparse'])
+
+    # Get offset and size of the generated partition.
+    part = json.loads(bpt_json)['partitions'][0]
+    part_offset = int(part['offset'])
+    part_size = int(part['size'])
+
+    # Load the unsparsed data.
+    unsparse_file = open('test/test_file.bin', 'r')
+    unsparse_data = unsparse_file.read()
+    unsparse_size = unsparse_file.tell()
+
+    # Check that the unsparse image doesn't take up all the space.
+    self.assertLess(unsparse_size, part_size)
+
+    # Check that the sparse image was unsparsified correctly.
+    generated_disk_image.seek(part_offset)
+    disk_image_data = generated_disk_image.read(unsparse_size)
+    self.assertItemsEqual(disk_image_data, unsparse_data)
+
+    # Check that the remainder of the partition has zeroes.
+    trailing_size = part_size - unsparse_size
+    trailing_data = generated_disk_image.read(trailing_size)
+    self.assertItemsEqual(trailing_data, '\0'*trailing_size)
+
 
 class MakeTableTest(unittest.TestCase):
   """Unit tests for 'bpttool make_table'."""
@@ -450,5 +493,175 @@
         '1073741824')
 
 
+# The file test_file.bin and test_file.bin.sparse are generated using
+# the following python code:
+#
+#  with open('test_file.bin', 'w+b') as f:
+#    f.write('Barfoo43'*128*12)
+#  os.system('img2simg test_file.bin test_file.bin.sparse')
+#  image = bpttool.ImageHandler('test_file.bin.sparse')
+#  image.append_dont_care(12*1024)
+#  image.append_fill('\x01\x02\x03\x04', 12*1024)
+#  image.append_raw('Foobar42'*128*12)
+#  image.append_dont_care(12*1024)
+#  del image
+#  os.system('rm -f test_file.bin')
+#  os.system('simg2img test_file.bin.sparse test_file.bin')
+#
+# and manually verified to be correct. The content of the raw and
+# sparse files are as follows (the line with "Fill with 0x04030201" is
+# a simg_dump.py bug):
+#
+# $ hexdump -C test_file.bin
+# 00000000  42 61 72 66 6f 6f 34 33  42 61 72 66 6f 6f 34 33  |Barfoo43Barfoo43|
+# *
+# 00003000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+# *
+# 00006000  01 02 03 04 01 02 03 04  01 02 03 04 01 02 03 04  |................|
+# *
+# 00009000  46 6f 6f 62 61 72 34 32  46 6f 6f 62 61 72 34 32  |Foobar42Foobar42|
+# *
+# 0000c000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+# *
+# 0000f000
+#
+# $ system/core/libsparse/simg_dump.py -v test_file.bin.sparse
+# test_file.bin.sparse: Total of 15 4096-byte output blocks in 5 input chunks.
+#             input_bytes      output_blocks
+# chunk    offset     number  offset  number
+#    1         40      12288       0       3 Raw data
+#    2      12340          0       3       3 Don't care
+#    3      12352          4       6       3 Fill with 0x04030201
+#    4      12368      12288       9       3 Raw data
+#    5      24668          0      12       3 Don't care
+#           24668                 15         End
+#
+class ImageHandler(unittest.TestCase):
+
+  TEST_FILE_SPARSE_PATH = 'test/test_file.bin.sparse'
+  TEST_FILE_PATH = 'test/test_file.bin'
+  TEST_FILE_SIZE = 61440
+  TEST_FILE_BLOCK_SIZE = 4096
+
+  def _file_contents_equal(self, path1, path2, size):
+    f1 = open(path1, 'r')
+    f2 = open(path2, 'r')
+    if f1.read(size) != f2.read(size):
+      return False
+    return True
+
+  def _file_size(self, f):
+    old_pos = f.tell()
+    f.seek(0, os.SEEK_END)
+    size = f.tell()
+    f.seek(old_pos)
+    return size
+
+  def _clone_sparse_file(self):
+    f = tempfile.NamedTemporaryFile()
+    f.write(open(self.TEST_FILE_SPARSE_PATH).read())
+    f.flush()
+    return f
+
+  def _unsparsify(self, path):
+    f = tempfile.NamedTemporaryFile()
+    os.system('simg2img {} {}'.format(path, f.name))
+    return f
+
+  def testRead(self):
+    """Checks that reading from a sparse file works as intended."""
+    ih = bpttool.ImageHandler(self.TEST_FILE_SPARSE_PATH)
+
+    # Check that we start at offset 0.
+    self.assertEqual(ih.tell(), 0)
+
+    # Check that reading advances the cursor.
+    self.assertEqual(ih.read(14), bytearray('Barfoo43Barfoo'))
+    self.assertEqual(ih.tell(), 14)
+    self.assertEqual(ih.read(2), bytearray('43'))
+    self.assertEqual(ih.tell(), 16)
+
+    # Check reading in the middle of a fill chunk gets the right data.
+    ih.seek(0x6000 + 1)
+    self.assertEqual(ih.read(4), bytearray('\x02\x03\x04\x01'))
+
+    # Check we can cross the chunk boundary correctly.
+    ih.seek(0x3000 - 10)
+    self.assertEqual(ih.read(12), bytearray('43Barfoo43\x00\x00'))
+    ih.seek(0x9000 - 3)
+    self.assertEqual(ih.read(5), bytearray('\x02\x03\x04Fo'))
+
+    # Check reading at end of file is a partial read.
+    ih.seek(0xf000 - 2)
+    self.assertEqual(ih.read(16), bytearray('\x00\x00'))
+
+  def testTruncate(self):
+    """Checks that we can truncate a sparse file correctly."""
+    # Check truncation at all possible boundaries (including start and end).
+    for size in range(0, self.TEST_FILE_SIZE + self.TEST_FILE_BLOCK_SIZE,
+                      self.TEST_FILE_BLOCK_SIZE):
+      sparse_file = self._clone_sparse_file()
+      ih = bpttool.ImageHandler(sparse_file.name)
+      ih.truncate(size)
+      unsparse_file = self._unsparsify(sparse_file.name)
+      self.assertEqual(self._file_size(unsparse_file), size)
+      self.assertTrue(self._file_contents_equal(unsparse_file.name,
+                                                self.TEST_FILE_PATH,
+                                                size))
+
+    # Check truncation to grow the file.
+    grow_size = 8192
+    sparse_file = self._clone_sparse_file()
+    ih = bpttool.ImageHandler(sparse_file.name)
+    ih.truncate(self.TEST_FILE_SIZE + grow_size)
+    unsparse_file = self._unsparsify(sparse_file.name)
+    self.assertEqual(self._file_size(unsparse_file),
+                     self.TEST_FILE_SIZE + grow_size)
+    self.assertTrue(self._file_contents_equal(unsparse_file.name,
+                                              self.TEST_FILE_PATH,
+                                              self.TEST_FILE_SIZE))
+    unsparse_file.seek(self.TEST_FILE_SIZE)
+    self.assertEqual(unsparse_file.read(), '\0'*grow_size)
+
+  def testAppendRaw(self):
+    """Checks that we can append raw data correctly."""
+    sparse_file = self._clone_sparse_file()
+    ih = bpttool.ImageHandler(sparse_file.name)
+    data = 'SomeData'*4096
+    ih.append_raw(data)
+    unsparse_file = self._unsparsify(sparse_file.name)
+    self.assertTrue(self._file_contents_equal(unsparse_file.name,
+                                              self.TEST_FILE_PATH,
+                                              self.TEST_FILE_SIZE))
+    unsparse_file.seek(self.TEST_FILE_SIZE)
+    self.assertEqual(unsparse_file.read(), data)
+
+  def testAppendFill(self):
+    """Checks that we can append fill data correctly."""
+    sparse_file = self._clone_sparse_file()
+    ih = bpttool.ImageHandler(sparse_file.name)
+    data = 'ABCD'*4096
+    ih.append_fill('ABCD', len(data))
+    unsparse_file = self._unsparsify(sparse_file.name)
+    self.assertTrue(self._file_contents_equal(unsparse_file.name,
+                                              self.TEST_FILE_PATH,
+                                              self.TEST_FILE_SIZE))
+    unsparse_file.seek(self.TEST_FILE_SIZE)
+    self.assertEqual(unsparse_file.read(), data)
+
+  def testDontCare(self):
+    """Checks that we can append DONT_CARE data correctly."""
+    sparse_file = self._clone_sparse_file()
+    ih = bpttool.ImageHandler(sparse_file.name)
+    data = '\0'*40960
+    ih.append_dont_care(len(data))
+    unsparse_file = self._unsparsify(sparse_file.name)
+    self.assertTrue(self._file_contents_equal(unsparse_file.name,
+                                              self.TEST_FILE_PATH,
+                                              self.TEST_FILE_SIZE))
+    unsparse_file.seek(self.TEST_FILE_SIZE)
+    self.assertEqual(unsparse_file.read(), data)
+
+
 if __name__ == '__main__':
   unittest.main()
diff --git a/bpttool b/bpttool
index 09c39fb..85f3244 100755
--- a/bpttool
+++ b/bpttool
@@ -19,10 +19,12 @@
 
 
 import argparse
+import bisect
 import copy
 import json
 import math
 import numbers
+import os
 import struct
 import sys
 import uuid
@@ -195,6 +197,473 @@
   return value
 
 
+class ImageChunk(object):
+  """Data structure used for representing chunks in Android sparse files.
+
+  Attributes:
+    chunk_type: One of TYPE_RAW, TYPE_FILL, or TYPE_DONT_CARE.
+    chunk_offset: Offset in the sparse file where this chunk begins.
+    output_offset: Offset in de-sparsified file where output begins.
+    output_size: Number of bytes in output.
+    input_offset: Offset in sparse file for data if TYPE_RAW otherwise None.
+    fill_data: Blob with data to fill if TYPE_FILL otherwise None.
+  """
+
+  FORMAT = '<2H2I'
+  TYPE_RAW = 0xcac1
+  TYPE_FILL = 0xcac2
+  TYPE_DONT_CARE = 0xcac3
+  TYPE_CRC32 = 0xcac4
+
+  def __init__(self, chunk_type, chunk_offset, output_offset, output_size,
+               input_offset, fill_data):
+    """Initializes an ImageChunk object.
+
+    Arguments:
+      chunk_type: One of TYPE_RAW, TYPE_FILL, or TYPE_DONT_CARE.
+      chunk_offset: Offset in the sparse file where this chunk begins.
+      output_offset: Offset in de-sparsified file.
+      output_size: Number of bytes in output.
+      input_offset: Offset in sparse file if TYPE_RAW otherwise None.
+      fill_data: Blob with data to fill if TYPE_FILL otherwise None.
+
+    Raises:
+      ValueError: If data is not well-formed.
+    """
+    self.chunk_type = chunk_type
+    self.chunk_offset = chunk_offset
+    self.output_offset = output_offset
+    self.output_size = output_size
+    self.input_offset = input_offset
+    self.fill_data = fill_data
+    # Check invariants.
+    if self.chunk_type == self.TYPE_RAW:
+      if self.fill_data is not None:
+        raise ValueError('RAW chunk cannot have fill_data set.')
+      if not self.input_offset:
+        raise ValueError('RAW chunk must have input_offset set.')
+    elif self.chunk_type == self.TYPE_FILL:
+      if self.fill_data is None:
+        raise ValueError('FILL chunk must have fill_data set.')
+      if self.input_offset:
+        raise ValueError('FILL chunk cannot have input_offset set.')
+    elif self.chunk_type == self.TYPE_DONT_CARE:
+      if self.fill_data is not None:
+        raise ValueError('DONT_CARE chunk cannot have fill_data set.')
+      if self.input_offset:
+        raise ValueError('DONT_CARE chunk cannot have input_offset set.')
+    else:
+      raise ValueError('Invalid chunk type')
+
+
+class ImageHandler(object):
+  """Abstraction for image I/O with support for Android sparse images.
+
+  This class provides an interface for working with image files that
+  may be using the Android Sparse Image format. When an instance is
+  constructed, we test whether it's an Android sparse file. If so,
+  operations will be on the sparse file by interpreting the sparse
+  format, otherwise they will be directly on the file. Either way the
+  operations do the same.
+
+  For reading, this interface mimics a file object - it has seek(),
+  tell(), and read() methods. For writing, only truncation
+  (truncate()) and appending is supported (append_raw(),
+  append_fill(), and append_dont_care()). Additionally, data can only
+  be written in units of the block size.
+
+  Attributes:
+    is_sparse: Whether the file being operated on is sparse.
+    block_size: The block size, typically 4096.
+    image_size: The size of the unsparsified file.
+
+  """
+  # See system/core/libsparse/sparse_format.h for details.
+  MAGIC = 0xed26ff3a
+  HEADER_FORMAT = '<I4H4I'
+
+  # These are formats and offset of just the |total_chunks| and
+  # |total_blocks| fields.
+  NUM_CHUNKS_AND_BLOCKS_FORMAT = '<II'
+  NUM_CHUNKS_AND_BLOCKS_OFFSET = 16
+
+  def __init__(self, image_filename):
+    """Initializes an image handler.
+
+    Arguments:
+      image_filename: The name of the file to operate on.
+
+    Raises:
+      ValueError: If data in the file is invalid.
+    """
+    self._image_filename = image_filename
+    self._read_header()
+
+  def _read_header(self):
+    """Initializes internal data structures used for reading file.
+
+    This may be called multiple times and is typically called after
+    modifying the file (e.g. appending, truncation).
+
+    Raises:
+      ValueError: If data in the file is invalid.
+    """
+    self.is_sparse = False
+    self.block_size = 4096
+    self._file_pos = 0
+    self._image = open(self._image_filename, 'r+b')
+    self._image.seek(0, os.SEEK_END)
+    self.image_size = self._image.tell()
+
+    self._image.seek(0, os.SEEK_SET)
+    header_bin = self._image.read(struct.calcsize(self.HEADER_FORMAT))
+    if len(header_bin) < struct.calcsize(self.HEADER_FORMAT):
+      # Not a sparse image, our job here is done.
+      return
+    (magic, major_version, minor_version, file_hdr_sz, chunk_hdr_sz,
+     block_size, self._num_total_blocks, self._num_total_chunks,
+     _) = struct.unpack(self.HEADER_FORMAT, header_bin)
+    if magic != self.MAGIC:
+      # Not a sparse image, our job here is done.
+      return
+    if not (major_version == 1 and minor_version == 0):
+      raise ValueError('Encountered sparse image format version {}.{} but '
+                       'only 1.0 is supported'.format(major_version,
+                                                      minor_version))
+    if file_hdr_sz != struct.calcsize(self.HEADER_FORMAT):
+      raise ValueError('Unexpected file_hdr_sz value {}.'.
+                       format(file_hdr_sz))
+    if chunk_hdr_sz != struct.calcsize(ImageChunk.FORMAT):
+      raise ValueError('Unexpected chunk_hdr_sz value {}.'.
+                       format(chunk_hdr_sz))
+
+    self.block_size = block_size
+
+    # Build an list of chunks by parsing the file.
+    self._chunks = []
+
+    # Find the smallest offset where only "Don't care" chunks
+    # follow. This will be the size of the content in the sparse
+    # image.
+    offset = 0
+    output_offset = 0
+    for _ in xrange(1, self._num_total_chunks + 1):
+      chunk_offset = self._image.tell()
+
+      header_bin = self._image.read(struct.calcsize(ImageChunk.FORMAT))
+      (chunk_type, _, chunk_sz, total_sz) = struct.unpack(ImageChunk.FORMAT,
+                                                          header_bin)
+      data_sz = total_sz - struct.calcsize(ImageChunk.FORMAT)
+
+      if chunk_type == ImageChunk.TYPE_RAW:
+        if data_sz != (chunk_sz * self.block_size):
+          raise ValueError('Raw chunk input size ({}) does not match output '
+                           'size ({})'.
+                           format(data_sz, chunk_sz*self.block_size))
+        self._chunks.append(ImageChunk(ImageChunk.TYPE_RAW,
+                                       chunk_offset,
+                                       output_offset,
+                                       chunk_sz*self.block_size,
+                                       self._image.tell(),
+                                       None))
+        self._image.read(data_sz)
+
+      elif chunk_type == ImageChunk.TYPE_FILL:
+        if data_sz != 4:
+          raise ValueError('Fill chunk should have 4 bytes of fill, but this '
+                           'has {}'.format(data_sz))
+        fill_data = self._image.read(4)
+        self._chunks.append(ImageChunk(ImageChunk.TYPE_FILL,
+                                       chunk_offset,
+                                       output_offset,
+                                       chunk_sz*self.block_size,
+                                       None,
+                                       fill_data))
+      elif chunk_type == ImageChunk.TYPE_DONT_CARE:
+        if data_sz != 0:
+          raise ValueError('Don\'t care chunk input size is non-zero ({})'.
+                           format(data_sz))
+        self._chunks.append(ImageChunk(ImageChunk.TYPE_DONT_CARE,
+                                       chunk_offset,
+                                       output_offset,
+                                       chunk_sz*self.block_size,
+                                       None,
+                                       None))
+      elif chunk_type == ImageChunk.TYPE_CRC32:
+        if data_sz != 4:
+          raise ValueError('CRC32 chunk should have 4 bytes of CRC, but '
+                           'this has {}'.format(data_sz))
+        self._image.read(4)
+      else:
+        raise ValueError('Unknown chunk type {}'.format(chunk_type))
+
+      offset += chunk_sz
+      output_offset += chunk_sz * self.block_size
+
+    # Record where sparse data end.
+    self._sparse_end = self._image.tell()
+
+    # Now that we've traversed all chunks, sanity check.
+    if self._num_total_blocks != offset:
+      raise ValueError('The header said we should have {} output blocks, '
+                       'but we saw {}'.format(self._num_total_blocks, offset))
+    junk_len = len(self._image.read())
+    if junk_len > 0:
+      raise ValueError('There were {} bytes of extra data at the end of the '
+                       'file.'.format(junk_len))
+
+    # Assign |image_size|.
+    self.image_size = output_offset
+
+    # This is used when bisecting in read() to find the initial slice.
+    self._chunk_output_offsets = [i.output_offset for i in self._chunks]
+
+    self.is_sparse = True
+
+  def _update_chunks_and_blocks(self):
+    """Helper function to update the image header.
+
+    The the |total_chunks| and |total_blocks| fields in the header
+    will be set to value of the |_num_total_blocks| and
+    |_num_total_chunks| attributes.
+
+    """
+    self._image.seek(self.NUM_CHUNKS_AND_BLOCKS_OFFSET, os.SEEK_SET)
+    self._image.write(struct.pack(self.NUM_CHUNKS_AND_BLOCKS_FORMAT,
+                                  self._num_total_blocks,
+                                  self._num_total_chunks))
+
+  def append_dont_care(self, num_bytes):
+    """Appends a DONT_CARE chunk to the sparse file.
+
+    The given number of bytes must be a multiple of the block size.
+
+    Arguments:
+      num_bytes: Size in number of bytes of the DONT_CARE chunk.
+    """
+    assert num_bytes % self.block_size == 0
+
+    if not self.is_sparse:
+      self._image.seek(0, os.SEEK_END)
+      # This is more efficient that writing NUL bytes since it'll add
+      # a hole on file systems that support sparse files (native
+      # sparse, not Android sparse).
+      self._image.truncate(self._image.tell() + num_bytes)
+      self._read_header()
+      return
+
+    self._num_total_chunks += 1
+    self._num_total_blocks += num_bytes / self.block_size
+    self._update_chunks_and_blocks()
+
+    self._image.seek(self._sparse_end, os.SEEK_SET)
+    self._image.write(struct.pack(ImageChunk.FORMAT,
+                                  ImageChunk.TYPE_DONT_CARE,
+                                  0,  # Reserved
+                                  num_bytes / self.block_size,
+                                  struct.calcsize(ImageChunk.FORMAT)))
+    self._read_header()
+
+  def append_raw(self, data):
+    """Appends a RAW chunk to the sparse file.
+
+    The length of the given data must be a multiple of the block size.
+
+    Arguments:
+      data: Data to append.
+    """
+    assert len(data) % self.block_size == 0
+
+    if not self.is_sparse:
+      self._image.seek(0, os.SEEK_END)
+      self._image.write(data)
+      self._read_header()
+      return
+
+    self._num_total_chunks += 1
+    self._num_total_blocks += len(data) / self.block_size
+    self._update_chunks_and_blocks()
+
+    self._image.seek(self._sparse_end, os.SEEK_SET)
+    self._image.write(struct.pack(ImageChunk.FORMAT,
+                                  ImageChunk.TYPE_RAW,
+                                  0,  # Reserved
+                                  len(data) / self.block_size,
+                                  len(data) +
+                                  struct.calcsize(ImageChunk.FORMAT)))
+    self._image.write(data)
+    self._read_header()
+
+  def append_fill(self, fill_data, size):
+    """Appends a fill chunk to the sparse file.
+
+    The total length of the fill data must be a multiple of the block size.
+
+    Arguments:
+      fill_data: Fill data to append - must be four bytes.
+      size: Number of chunk - must be a multiple of four and the block size.
+    """
+    assert len(fill_data) == 4
+    assert size % 4 == 0
+    assert size % self.block_size == 0
+
+    if not self.is_sparse:
+      self._image.seek(0, os.SEEK_END)
+      self._image.write(fill_data * (size/4))
+      self._read_header()
+      return
+
+    self._num_total_chunks += 1
+    self._num_total_blocks += size / self.block_size
+    self._update_chunks_and_blocks()
+
+    self._image.seek(self._sparse_end, os.SEEK_SET)
+    self._image.write(struct.pack(ImageChunk.FORMAT,
+                                  ImageChunk.TYPE_FILL,
+                                  0,  # Reserved
+                                  size / self.block_size,
+                                  4 + struct.calcsize(ImageChunk.FORMAT)))
+    self._image.write(fill_data)
+    self._read_header()
+
+  def seek(self, offset):
+    """Sets the cursor position for reading from unsparsified file.
+
+    Arguments:
+      offset: Offset to seek to from the beginning of the file.
+    """
+    self._file_pos = offset
+
+  def read(self, size):
+    """Reads data from the unsparsified file.
+
+    This method may return fewer than |size| bytes of data if the end
+    of the file was encountered.
+
+    The file cursor for reading is advanced by the number of bytes
+    read.
+
+    Arguments:
+      size: Number of bytes to read.
+
+    Returns:
+      The data.
+
+    """
+    if not self.is_sparse:
+      self._image.seek(self._file_pos)
+      data = self._image.read(size)
+      self._file_pos += len(data)
+      return data
+
+    # Iterate over all chunks.
+    chunk_idx = bisect.bisect_right(self._chunk_output_offsets,
+                                    self._file_pos) - 1
+    data = bytearray()
+    to_go = size
+    while to_go > 0:
+      chunk = self._chunks[chunk_idx]
+      chunk_pos_offset = self._file_pos - chunk.output_offset
+      chunk_pos_to_go = min(chunk.output_size - chunk_pos_offset, to_go)
+
+      if chunk.chunk_type == ImageChunk.TYPE_RAW:
+        self._image.seek(chunk.input_offset + chunk_pos_offset)
+        data.extend(self._image.read(chunk_pos_to_go))
+      elif chunk.chunk_type == ImageChunk.TYPE_FILL:
+        all_data = chunk.fill_data*(chunk_pos_to_go/len(chunk.fill_data) + 2)
+        offset_mod = chunk_pos_offset % len(chunk.fill_data)
+        data.extend(all_data[offset_mod:(offset_mod + chunk_pos_to_go)])
+      else:
+        assert chunk.chunk_type == ImageChunk.TYPE_DONT_CARE
+        data.extend('\0' * chunk_pos_to_go)
+
+      to_go -= chunk_pos_to_go
+      self._file_pos += chunk_pos_to_go
+      chunk_idx += 1
+      # Generate partial read in case of EOF.
+      if chunk_idx >= len(self._chunks):
+        break
+
+    return data
+
+  def tell(self):
+    """Returns the file cursor position for reading from unsparsified file.
+
+    Returns:
+      The file cursor position for reading.
+    """
+    return self._file_pos
+
+  def truncate(self, size):
+    """Truncates the unsparsified file.
+
+    Arguments:
+      size: Desired size of unsparsified file.
+
+    Raises:
+      ValueError: If desired size isn't a multiple of the block size.
+    """
+    if not self.is_sparse:
+      self._image.truncate(size)
+      self._read_header()
+      return
+
+    if size % self.block_size != 0:
+      raise ValueError('Cannot truncate to a size which is not a multiple '
+                       'of the block size')
+
+    if size == self.image_size:
+      # Trivial where there's nothing to do.
+      return
+    elif size < self.image_size:
+      chunk_idx = bisect.bisect_right(self._chunk_output_offsets, size) - 1
+      chunk = self._chunks[chunk_idx]
+      if chunk.output_offset != size:
+        # Truncation in the middle of a trunk - need to keep the chunk
+        # and modify it.
+        chunk_idx_for_update = chunk_idx + 1
+        num_to_keep = size - chunk.output_offset
+        assert num_to_keep % self.block_size == 0
+        if chunk.chunk_type == ImageChunk.TYPE_RAW:
+          truncate_at = (chunk.chunk_offset +
+                         struct.calcsize(ImageChunk.FORMAT) + num_to_keep)
+          data_sz = num_to_keep
+        elif chunk.chunk_type == ImageChunk.TYPE_FILL:
+          truncate_at = (chunk.chunk_offset +
+                         struct.calcsize(ImageChunk.FORMAT) + 4)
+          data_sz = 4
+        else:
+          assert chunk.chunk_type == ImageChunk.TYPE_DONT_CARE
+          truncate_at = chunk.chunk_offset + struct.calcsize(ImageChunk.FORMAT)
+          data_sz = 0
+        chunk_sz = num_to_keep/self.block_size
+        total_sz = data_sz + struct.calcsize(ImageChunk.FORMAT)
+        self._image.seek(chunk.chunk_offset)
+        self._image.write(struct.pack(ImageChunk.FORMAT,
+                                      chunk.chunk_type,
+                                      0,  # Reserved
+                                      chunk_sz,
+                                      total_sz))
+        chunk.output_size = num_to_keep
+      else:
+        # Truncation at trunk boundary.
+        truncate_at = chunk.chunk_offset
+        chunk_idx_for_update = chunk_idx
+
+      self._num_total_chunks = chunk_idx_for_update
+      self._num_total_blocks = 0
+      for i in range(0, chunk_idx_for_update):
+        self._num_total_blocks += self._chunks[i].output_size / self.block_size
+      self._update_chunks_and_blocks()
+      self._image.truncate(truncate_at)
+
+      # We've modified the file so re-read all data.
+      self._read_header()
+    else:
+      # Truncating to grow - just add a DONT_CARE section.
+      self.append_dont_care(size - self.image_size)
+
+
 class GuidGenerator(object):
   """An interface for obtaining strings that are GUIDs.
 
@@ -914,13 +1383,10 @@
           raise BptParsingError(bpt.name, 'No content specified for partition'
                                 ' with label {}'.format(p.label))
 
-      with open(image_file_names[p.label], 'rb') as partition_image:
-        output.seek(p.offset)
-        partition_blob = partition_image.read()
-        if len(partition_blob) > p.size:
-          raise BptError('Partition image content with label "{}" exceeds the '
-                         'partition size.'.format(p.label))
-        output.write(partition_blob)
+      input_image = ImageHandler(image_file_names[p.label])
+      output.seek(p.offset)
+      partition_blob = input_image.read(p.size)
+      output.write(partition_blob)
 
     # Put secondary GPT and end of disk.
     output.seek(settings.disk_size - len(secondary_gpt))
diff --git a/test/test_file.bin b/test/test_file.bin
new file mode 100644
index 0000000..a3fc7fb
--- /dev/null
+++ b/test/test_file.bin
Binary files differ
diff --git a/test/test_file.bin.sparse b/test/test_file.bin.sparse
new file mode 100644
index 0000000..c4962b2
--- /dev/null
+++ b/test/test_file.bin.sparse
Binary files differ
diff --git a/test/test_sparse_image.bpt b/test/test_sparse_image.bpt
new file mode 100644
index 0000000..872f5a6
--- /dev/null
+++ b/test/test_sparse_image.bpt
@@ -0,0 +1,11 @@
+{
+  "settings": {
+    "disk_size": "128 KiB"
+  },
+  "partitions": [
+    {
+      "label": "sparse_data",
+      "grow": true
+    }
+  ]
+}