refactor python reader to take a get_bytes function; add reader tests [#38]

This commit is contained in:
Brandon Liu
2022-04-26 16:15:03 +08:00
parent adb6f896a3
commit 3944d27833
4 changed files with 79 additions and 39 deletions

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
from pmtiles.reader import read from pmtiles.reader import Reader, MmapSource
if len(sys.argv) <= 1: if len(sys.argv) <= 1:
print("Usage: pmtiles-show PMTILES_FILE") print("Usage: pmtiles-show PMTILES_FILE")
@@ -9,7 +9,8 @@ if len(sys.argv) <= 1:
print("Usage: pmtiles-show PMTILES_FILE list") print("Usage: pmtiles-show PMTILES_FILE list")
exit(1) exit(1)
with read(sys.argv[1]) as reader: with open(sys.argv[1],'r+b') as f:
reader = Reader(MmapSource(f))
spec_version = reader.version spec_version = reader.version
if len(sys.argv) == 2: if len(sys.argv) == 2:
print("spec version: ", spec_version) print("spec version: ", spec_version)

View File

@@ -3,20 +3,26 @@ import mmap
from contextlib import contextmanager from contextlib import contextmanager
@contextmanager def MmapSource(f):
def read(fname): mapping = mmap.mmap(f.fileno(), 0)
r = Reader(fname)
try: def get_bytes(offset, length):
yield r return mapping[offset : offset + length]
finally:
r.close() return get_bytes
def MemorySource(buf):
def get_bytes(offset, length):
return buf[offset : offset + length]
return get_bytes
class Reader: class Reader:
def __init__(self, fname): def __init__(self, get_bytes):
self.f = open(fname, "r+b") self.get_bytes = get_bytes
self.mmap = mmap.mmap(self.f.fileno(), 0) assert int.from_bytes(self.get_bytes(0, 2), byteorder="little") == 0x4D50
assert int.from_bytes(self.mmap[0:2], byteorder="little") == 0x4D50
first_entry_idx = 10 + self.metadata_len first_entry_idx = 10 + self.metadata_len
self.root_dir, self.leaves = self.load_directory( self.root_dir, self.leaves = self.load_directory(
first_entry_idx, self.root_entries first_entry_idx, self.root_entries
@@ -26,11 +32,11 @@ class Reader:
directory = {} directory = {}
leaves = {} leaves = {}
for i in range(offset, offset + num_entries * 17, 17): for i in range(offset, offset + num_entries * 17, 17):
z = int.from_bytes(self.mmap[i : i + 1], byteorder="little") z = int.from_bytes(self.get_bytes(i, 1), byteorder="little")
x = int.from_bytes(self.mmap[i + 1 : i + 4], byteorder="little") x = int.from_bytes(self.get_bytes(i + 1, 3), byteorder="little")
y = int.from_bytes(self.mmap[i + 4 : i + 7], byteorder="little") y = int.from_bytes(self.get_bytes(i + 4, 3), byteorder="little")
tile_off = int.from_bytes(self.mmap[i + 7 : i + 13], byteorder="little") tile_off = int.from_bytes(self.get_bytes(i + 7, 6), byteorder="little")
tile_len = int.from_bytes(self.mmap[i + 13 : i + 17], byteorder="little") tile_len = int.from_bytes(self.get_bytes(i + 13, 4), byteorder="little")
if z & 0b10000000: if z & 0b10000000:
leaves[(z & 0b01111111, x, y)] = (tile_off, tile_len) leaves[(z & 0b01111111, x, y)] = (tile_off, tile_len)
else: else:
@@ -42,20 +48,20 @@ class Reader:
@property @property
def metadata_len(self): def metadata_len(self):
return int.from_bytes(self.mmap[4:8], byteorder="little") return int.from_bytes(self.get_bytes(4, 4), byteorder="little")
@property @property
def metadata(self): def metadata(self):
s = self.mmap[10 : 10 + self.metadata_len] s = self.get_bytes(10, self.metadata_len)
return json.loads(s) return json.loads(s)
@property @property
def version(self): def version(self):
return int.from_bytes(self.mmap[2:4], byteorder="little") return int.from_bytes(self.get_bytes(2, 2), byteorder="little")
@property @property
def root_entries(self): def root_entries(self):
return int.from_bytes(self.mmap[8:10], byteorder="little") return int.from_bytes(self.get_bytes(8, 2), byteorder="little")
@property @property
def leaf_level(self): def leaf_level(self):
@@ -64,7 +70,7 @@ class Reader:
def get(self, z, x, y): def get(self, z, x, y):
val = self.root_dir.get((z, x, y)) val = self.root_dir.get((z, x, y))
if val: if val:
return self.mmap[val[0] : val[0] + val[1]] return self.get_bytes(val[0], val[1])
else: else:
if len(self.leaves) > 0: if len(self.leaves) > 0:
level_diff = z - self.leaf_level level_diff = z - self.leaf_level
@@ -74,12 +80,12 @@ class Reader:
directory, _ = self.load_directory(val[0], val[1] // 17) directory, _ = self.load_directory(val[0], val[1] // 17)
val = directory.get((z, x, y)) val = directory.get((z, x, y))
if val: if val:
return self.mmap[val[0] : val[0] + val[1]] return self.get_bytes(val[0], val[1])
def tiles(self): def tiles(self):
for k, v in self.root_dir.items(): for k, v in self.root_dir.items():
yield (k, self.mmap[v[0] : v[0] + v[1]]) yield (k, self.get_bytes(v[0], v[1]))
for val in self.leaves.values(): for val in self.leaves.values():
leaf_dir, _ = self.load_directory(val[0], val[1] // 17) leaf_dir, _ = self.load_directory(val[0], val[1] // 17)
for k, v in leaf_dir.items(): for k, v in leaf_dir.items():
yield (k, self.mmap[v[0] : v[0] + v[1]]) yield (k, self.get_bytes(v[0], v[1]))

View File

@@ -80,20 +80,22 @@ def make_pyramid(tile_entries, start_leaf_offset, max_dir_size=21845):
@contextmanager @contextmanager
def write(fname): def write(fname):
w = Writer(fname) f = open(fname, "wb")
w = Writer(f)
try: try:
yield w yield w
finally: finally:
w.close() f.close()
class Writer: class Writer:
def __init__(self, fname): def __init__(self, f, max_dir_size):
self.f = open(fname, "wb")
self.offset = 512000 self.offset = 512000
self.f = f
self.f.write(b"\0" * self.offset) self.f.write(b"\0" * self.offset)
self.tile_entries = [] self.tile_entries = []
self.hash_to_offset = {} self.hash_to_offset = {}
self.max_dir_size = max_dir_size
def write_tile(self, z, x, y, data): def write_tile(self, z, x, y, data):
hsh = hash(data) hsh = hash(data)
@@ -107,7 +109,7 @@ class Writer:
self.hash_to_offset[hsh] = self.offset self.hash_to_offset[hsh] = self.offset
self.offset = self.offset + len(data) self.offset = self.offset + len(data)
def write_entry(self, entry): def _write_entry(self, entry):
if entry.is_dir: if entry.is_dir:
z_bytes = 0b10000000 | entry.z z_bytes = 0b10000000 | entry.z
else: else:
@@ -118,7 +120,7 @@ class Writer:
self.f.write(entry.offset.to_bytes(6, byteorder="little")) self.f.write(entry.offset.to_bytes(6, byteorder="little"))
self.f.write(entry.length.to_bytes(4, byteorder="little")) self.f.write(entry.length.to_bytes(4, byteorder="little"))
def write_header(self, metadata, root_entries_len): def _write_header(self, metadata, root_entries_len):
self.f.write((0x4D50).to_bytes(2, byteorder="little")) self.f.write((0x4D50).to_bytes(2, byteorder="little"))
self.f.write((2).to_bytes(2, byteorder="little")) self.f.write((2).to_bytes(2, byteorder="little"))
metadata_serialized = json.dumps(metadata) metadata_serialized = json.dumps(metadata)
@@ -129,24 +131,23 @@ class Writer:
self.f.write(metadata_serialized.encode("utf-8")) self.f.write(metadata_serialized.encode("utf-8"))
def finalize(self, metadata={}): def finalize(self, metadata={}):
root_dir, leaf_dirs = make_pyramid(self.tile_entries, self.offset) root_dir, leaf_dirs = make_pyramid(
self.tile_entries, self.offset, self.max_dir_size
)
if len(leaf_dirs) > 0: if len(leaf_dirs) > 0:
for leaf_dir in leaf_dirs: for leaf_dir in leaf_dirs:
for entry in leaf_dir: for entry in leaf_dir:
self.write_entry(entry) self._write_entry(entry)
self.f.seek(0) self.f.seek(0)
self.write_header(metadata, len(root_dir)) self._write_header(metadata, len(root_dir))
for entry in root_dir: for entry in root_dir:
self.write_entry(entry) self._write_entry(entry)
return { return {
"num_tiles": len(self.tile_entries), "num_tiles": len(self.tile_entries),
"num_unique_tiles": len(self.hash_to_offset), "num_unique_tiles": len(self.hash_to_offset),
"num_leaves": len(leaf_dirs), "num_leaves": len(leaf_dirs),
} }
def close(self):
self.f.close()

View File

@@ -0,0 +1,32 @@
import unittest
from io import BytesIO
from pmtiles.writer import Writer
from pmtiles.reader import Reader, MemorySource
class TestReader(unittest.TestCase):
def test_roundtrip(self):
buf = BytesIO()
writer = Writer(buf, 7)
writer.write_tile(1, 0, 0, b"0")
writer.write_tile(1, 0, 1, b"1")
writer.write_tile(1, 1, 0, b"2")
writer.write_tile(1, 1, 1, b"3")
writer.write_tile(2, 0, 0, b"4")
writer.write_tile(3, 0, 0, b"5")
writer.write_tile(2, 0, 1, b"6")
writer.write_tile(3, 0, 2, b"7")
writer.finalize({"key": "value"})
reader = Reader(MemorySource(buf.getvalue()))
self.assertEqual(reader.version, 2)
self.assertEqual(reader.root_entries, 6)
self.assertEqual(reader.metadata["key"], "value")
self.assertEqual(reader.get(1, 0, 0), b"0")
self.assertEqual(reader.get(1, 0, 1), b"1")
self.assertEqual(reader.get(1, 1, 0), b"2")
self.assertEqual(reader.get(1, 1, 1), b"3")
self.assertEqual(reader.get(2, 0, 0), b"4")
self.assertEqual(reader.get(3, 0, 0), b"5")
self.assertEqual(reader.get(2, 0, 1), b"6")
self.assertEqual(reader.get(3, 0, 2), b"7")