From 3944d27833cc4774a4563d6515b6dbcae018b125 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Tue, 26 Apr 2022 16:15:03 +0800 Subject: [PATCH] refactor python reader to take a get_bytes function; add reader tests [#38] --- python/bin/pmtiles-show | 5 ++-- python/pmtiles/reader.py | 54 +++++++++++++++++++++----------------- python/pmtiles/writer.py | 27 ++++++++++--------- python/test/test_reader.py | 32 ++++++++++++++++++++++ 4 files changed, 79 insertions(+), 39 deletions(-) create mode 100644 python/test/test_reader.py diff --git a/python/bin/pmtiles-show b/python/bin/pmtiles-show index 7031276..2e9caca 100755 --- a/python/bin/pmtiles-show +++ b/python/bin/pmtiles-show @@ -1,7 +1,7 @@ #!/usr/bin/env python import sys -from pmtiles.reader import read +from pmtiles.reader import Reader, MmapSource if len(sys.argv) <= 1: print("Usage: pmtiles-show PMTILES_FILE") @@ -9,7 +9,8 @@ if len(sys.argv) <= 1: print("Usage: pmtiles-show PMTILES_FILE list") exit(1) -with read(sys.argv[1]) as reader: +with open(sys.argv[1],'r+b') as f: + reader = Reader(MmapSource(f)) spec_version = reader.version if len(sys.argv) == 2: print("spec version: ", spec_version) diff --git a/python/pmtiles/reader.py b/python/pmtiles/reader.py index 21a16be..813b97a 100644 --- a/python/pmtiles/reader.py +++ b/python/pmtiles/reader.py @@ -3,20 +3,26 @@ import mmap from contextlib import contextmanager -@contextmanager -def read(fname): - r = Reader(fname) - try: - yield r - finally: - r.close() +def MmapSource(f): + mapping = mmap.mmap(f.fileno(), 0) + + def get_bytes(offset, length): + return mapping[offset : offset + length] + + return get_bytes + + +def MemorySource(buf): + def get_bytes(offset, length): + return buf[offset : offset + length] + + return get_bytes class Reader: - def __init__(self, fname): - self.f = open(fname, "r+b") - self.mmap = mmap.mmap(self.f.fileno(), 0) - assert int.from_bytes(self.mmap[0:2], byteorder="little") == 0x4D50 + def __init__(self, get_bytes): + self.get_bytes = get_bytes + assert int.from_bytes(self.get_bytes(0, 2), byteorder="little") == 0x4D50 first_entry_idx = 10 + self.metadata_len self.root_dir, self.leaves = self.load_directory( first_entry_idx, self.root_entries @@ -26,11 +32,11 @@ class Reader: directory = {} leaves = {} for i in range(offset, offset + num_entries * 17, 17): - z = int.from_bytes(self.mmap[i : i + 1], byteorder="little") - x = int.from_bytes(self.mmap[i + 1 : i + 4], byteorder="little") - y = int.from_bytes(self.mmap[i + 4 : i + 7], byteorder="little") - tile_off = int.from_bytes(self.mmap[i + 7 : i + 13], byteorder="little") - tile_len = int.from_bytes(self.mmap[i + 13 : i + 17], byteorder="little") + z = int.from_bytes(self.get_bytes(i, 1), byteorder="little") + x = int.from_bytes(self.get_bytes(i + 1, 3), byteorder="little") + y = int.from_bytes(self.get_bytes(i + 4, 3), byteorder="little") + tile_off = int.from_bytes(self.get_bytes(i + 7, 6), byteorder="little") + tile_len = int.from_bytes(self.get_bytes(i + 13, 4), byteorder="little") if z & 0b10000000: leaves[(z & 0b01111111, x, y)] = (tile_off, tile_len) else: @@ -42,20 +48,20 @@ class Reader: @property def metadata_len(self): - return int.from_bytes(self.mmap[4:8], byteorder="little") + return int.from_bytes(self.get_bytes(4, 4), byteorder="little") @property def metadata(self): - s = self.mmap[10 : 10 + self.metadata_len] + s = self.get_bytes(10, self.metadata_len) return json.loads(s) @property def version(self): - return int.from_bytes(self.mmap[2:4], byteorder="little") + return int.from_bytes(self.get_bytes(2, 2), byteorder="little") @property def root_entries(self): - return int.from_bytes(self.mmap[8:10], byteorder="little") + return int.from_bytes(self.get_bytes(8, 2), byteorder="little") @property def leaf_level(self): @@ -64,7 +70,7 @@ class Reader: def get(self, z, x, y): val = self.root_dir.get((z, x, y)) if val: - return self.mmap[val[0] : val[0] + val[1]] + return self.get_bytes(val[0], val[1]) else: if len(self.leaves) > 0: level_diff = z - self.leaf_level @@ -74,12 +80,12 @@ class Reader: directory, _ = self.load_directory(val[0], val[1] // 17) val = directory.get((z, x, y)) if val: - return self.mmap[val[0] : val[0] + val[1]] + return self.get_bytes(val[0], val[1]) def tiles(self): for k, v in self.root_dir.items(): - yield (k, self.mmap[v[0] : v[0] + v[1]]) + yield (k, self.get_bytes(v[0], v[1])) for val in self.leaves.values(): leaf_dir, _ = self.load_directory(val[0], val[1] // 17) for k, v in leaf_dir.items(): - yield (k, self.mmap[v[0] : v[0] + v[1]]) + yield (k, self.get_bytes(v[0], v[1])) diff --git a/python/pmtiles/writer.py b/python/pmtiles/writer.py index 295e8bc..c559860 100644 --- a/python/pmtiles/writer.py +++ b/python/pmtiles/writer.py @@ -80,20 +80,22 @@ def make_pyramid(tile_entries, start_leaf_offset, max_dir_size=21845): @contextmanager def write(fname): - w = Writer(fname) + f = open(fname, "wb") + w = Writer(f) try: yield w finally: - w.close() + f.close() class Writer: - def __init__(self, fname): - self.f = open(fname, "wb") + def __init__(self, f, max_dir_size): self.offset = 512000 + self.f = f self.f.write(b"\0" * self.offset) self.tile_entries = [] self.hash_to_offset = {} + self.max_dir_size = max_dir_size def write_tile(self, z, x, y, data): hsh = hash(data) @@ -107,7 +109,7 @@ class Writer: self.hash_to_offset[hsh] = self.offset self.offset = self.offset + len(data) - def write_entry(self, entry): + def _write_entry(self, entry): if entry.is_dir: z_bytes = 0b10000000 | entry.z else: @@ -118,7 +120,7 @@ class Writer: self.f.write(entry.offset.to_bytes(6, byteorder="little")) self.f.write(entry.length.to_bytes(4, byteorder="little")) - def write_header(self, metadata, root_entries_len): + def _write_header(self, metadata, root_entries_len): self.f.write((0x4D50).to_bytes(2, byteorder="little")) self.f.write((2).to_bytes(2, byteorder="little")) metadata_serialized = json.dumps(metadata) @@ -129,24 +131,23 @@ class Writer: self.f.write(metadata_serialized.encode("utf-8")) def finalize(self, metadata={}): - root_dir, leaf_dirs = make_pyramid(self.tile_entries, self.offset) + root_dir, leaf_dirs = make_pyramid( + self.tile_entries, self.offset, self.max_dir_size + ) if len(leaf_dirs) > 0: for leaf_dir in leaf_dirs: for entry in leaf_dir: - self.write_entry(entry) + self._write_entry(entry) self.f.seek(0) - self.write_header(metadata, len(root_dir)) + self._write_header(metadata, len(root_dir)) for entry in root_dir: - self.write_entry(entry) + self._write_entry(entry) return { "num_tiles": len(self.tile_entries), "num_unique_tiles": len(self.hash_to_offset), "num_leaves": len(leaf_dirs), } - - def close(self): - self.f.close() diff --git a/python/test/test_reader.py b/python/test/test_reader.py new file mode 100644 index 0000000..a237d0e --- /dev/null +++ b/python/test/test_reader.py @@ -0,0 +1,32 @@ +import unittest +from io import BytesIO +from pmtiles.writer import Writer +from pmtiles.reader import Reader, MemorySource + + +class TestReader(unittest.TestCase): + def test_roundtrip(self): + buf = BytesIO() + writer = Writer(buf, 7) + writer.write_tile(1, 0, 0, b"0") + writer.write_tile(1, 0, 1, b"1") + writer.write_tile(1, 1, 0, b"2") + writer.write_tile(1, 1, 1, b"3") + writer.write_tile(2, 0, 0, b"4") + writer.write_tile(3, 0, 0, b"5") + writer.write_tile(2, 0, 1, b"6") + writer.write_tile(3, 0, 2, b"7") + writer.finalize({"key": "value"}) + + reader = Reader(MemorySource(buf.getvalue())) + self.assertEqual(reader.version, 2) + self.assertEqual(reader.root_entries, 6) + self.assertEqual(reader.metadata["key"], "value") + self.assertEqual(reader.get(1, 0, 0), b"0") + self.assertEqual(reader.get(1, 0, 1), b"1") + self.assertEqual(reader.get(1, 1, 0), b"2") + self.assertEqual(reader.get(1, 1, 1), b"3") + self.assertEqual(reader.get(2, 0, 0), b"4") + self.assertEqual(reader.get(3, 0, 0), b"5") + self.assertEqual(reader.get(2, 0, 1), b"6") + self.assertEqual(reader.get(3, 0, 2), b"7")