From 80c0e2b436271d0d2cb76a42a1f5e91d9d8bd893 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Thu, 7 Jul 2022 21:57:28 +0800 Subject: [PATCH] Python API changed to do I/O in larger blocks --- python/.gitignore | 2 + python/bin/pmtiles-serve | 15 ++--- python/bin/pmtiles-show | 21 +++---- python/pmtiles/convert.py | 8 ++- python/pmtiles/reader.py | 108 +++++++++++++++++++----------------- python/pmtiles/writer.py | 2 +- python/test/test_convert.py | 39 +++++++++++++ python/test/test_reader.py | 5 +- 8 files changed, 125 insertions(+), 75 deletions(-) create mode 100644 python/test/test_convert.py diff --git a/python/.gitignore b/python/.gitignore index f63128c..0419051 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -2,3 +2,5 @@ __pycache__ build dist +*.pmtiles +*.mbtiles diff --git a/python/bin/pmtiles-serve b/python/bin/pmtiles-serve index 1ad18dd..24c2da3 100755 --- a/python/bin/pmtiles-serve +++ b/python/bin/pmtiles-serve @@ -6,7 +6,7 @@ import json import re from socketserver import ThreadingMixIn import sys -from pmtiles.reader import read +from pmtiles.reader import Reader, MmapSource # https://docs.python.org/3/library/http.server.html @@ -14,9 +14,7 @@ class ThreadingSimpleServer(ThreadingMixIn, http.server.HTTPServer): pass -parser = argparse.ArgumentParser( - description="HTTP server for PMTiles archives." -) +parser = argparse.ArgumentParser(description="HTTP server for PMTiles archives.") parser.add_argument("pmtiles_file", help="PMTiles archive to serve") parser.add_argument("port", help="Port to bind to") parser.add_argument("--bind", help="Address to bind server to: default localhost") @@ -27,8 +25,11 @@ parser.add_argument( ) args = parser.parse_args() -with read(args.pmtiles_file) as reader: - fmt = reader.metadata["format"] +with open(args.pmtiles_file, "r+b") as f: + source = MmapSource(f) + reader = Reader(source) + + fmt = reader.header().metadata["format"] class Handler(http.server.SimpleHTTPRequestHandler): def do_GET(self): @@ -37,7 +38,7 @@ with read(args.pmtiles_file) as reader: if args.cors_allow_all: self.send_header("Access-Control-Allow-Origin", "*") self.end_headers() - self.wfile.write(json.dumps(reader.metadata).encode("utf-8")) + self.wfile.write(json.dumps(reader.header().metadata).encode("utf-8")) return match = re.match("/(\d+)/(\d+)/(\d+)." + fmt, self.path) if not match: diff --git a/python/bin/pmtiles-show b/python/bin/pmtiles-show index 2e9caca..df236ef 100755 --- a/python/bin/pmtiles-show +++ b/python/bin/pmtiles-show @@ -1,7 +1,7 @@ #!/usr/bin/env python import sys -from pmtiles.reader import Reader, MmapSource +from pmtiles.reader import Reader, MmapSource, load_directory if len(sys.argv) <= 1: print("Usage: pmtiles-show PMTILES_FILE") @@ -9,32 +9,33 @@ if len(sys.argv) <= 1: print("Usage: pmtiles-show PMTILES_FILE list") exit(1) -with open(sys.argv[1],'r+b') as f: +with open(sys.argv[1], "r+b") as f: reader = Reader(MmapSource(f)) - spec_version = reader.version + spec_version = reader.header().version if len(sys.argv) == 2: print("spec version: ", spec_version) print("metadata:") - for k, v in reader.metadata.items(): + for k, v in reader.header().metadata.items(): print(k, "=", v) - print("root entries:", reader.root_entries) - print("leaf directories:", len(set(reader.leaves.values()))) + print("root dir tiles:", len(reader.header().root_dir)) + print("leaf directories:", len(set(reader.header().leaves.values()))) elif len(sys.argv) == 3: last_val = None - for k, v in reader.root_dir.items(): + for k, v in reader.header().root_dir.items(): print(f"{k[0]} {k[1]} {k[2]} {v[0]} {v[1]}") if last_val and k <= last_val: raise Exception("Error: directory entries not sorted") last_val = k - for val in set(reader.leaves.values()): - leaf_dir, _ = reader.load_directory(val[0], val[1] // 17) + for val in set(reader.header().leaves.values()): + dir_bytes = reader.get_bytes(val[0], val[1]) + leaf_dir, _ = load_directory(dir_bytes, 0, val[1] // 17) last_val = None for k, v in leaf_dir.items(): print(f"{k[0]} {k[1]} {k[2]} {v[0]} {v[1]}") if last_val and k <= last_val: - raise Exception("Error: irectory entries not sorted") + raise Exception("Error: directory entries not sorted") last_val = k else: z = int(sys.argv[2]) diff --git a/python/pmtiles/convert.py b/python/pmtiles/convert.py index e3096d4..877b0cb 100644 --- a/python/pmtiles/convert.py +++ b/python/pmtiles/convert.py @@ -3,8 +3,8 @@ import gzip import json import os import sqlite3 -from pmtiles.reader import read from pmtiles.writer import write +from pmtiles.reader import Reader, MmapSource # if the tile is GZIP-encoded, it won't work with range queries # until transfer-encoding: gzip is well supported. @@ -61,8 +61,10 @@ def pmtiles_to_mbtiles(input, output, gzip): "CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob);" ) - with read(input) as reader: - metadata = reader.metadata + with open(input, "r+b") as f: + source = MmapSource(f) + reader = Reader(source) + metadata = reader.header().metadata metadata = set_metadata_compression(metadata, gzip) for k, v in metadata.items(): cursor.execute("INSERT INTO metadata VALUES(?,?)", (k, v)) diff --git a/python/pmtiles/reader.py b/python/pmtiles/reader.py index 712217d..f5aa366 100644 --- a/python/pmtiles/reader.py +++ b/python/pmtiles/reader.py @@ -1,6 +1,7 @@ import json import mmap from contextlib import contextmanager +from collections import namedtuple def MmapSource(f): @@ -18,73 +19,78 @@ def MemorySource(buf): return get_bytes + +def load_directory(data_bytes, offset, num_entries): + tile_entries = {} + leaves = {} + for i in range(offset, offset + num_entries * 17, 17): + z = int.from_bytes(data_bytes[i : i + 1], byteorder="little") + x = int.from_bytes(data_bytes[i + 1 : i + 4], byteorder="little") + y = int.from_bytes(data_bytes[i + 4 : i + 7], byteorder="little") + tile_off = int.from_bytes(data_bytes[i + 7 : i + 13], byteorder="little") + tile_len = int.from_bytes(data_bytes[i + 13 : i + 17], byteorder="little") + if z & 0b10000000: + leaves[(z & 0b01111111, x, y)] = (tile_off, tile_len) + else: + tile_entries[(z, x, y)] = (tile_off, tile_len) + return tile_entries, leaves + + +Header = namedtuple("Header", ["version", "metadata", "root_dir", "leaves"]) + + class Reader: def __init__(self, get_bytes): self.get_bytes = get_bytes - assert int.from_bytes(self.get_bytes(0, 2), byteorder="little") == 0x4D50 - first_entry_idx = 10 + self.metadata_len - self.root_dir, self.leaves = self.load_directory( - first_entry_idx, self.root_entries - ) + self._header = None - def load_directory(self, offset, num_entries): - directory = {} - leaves = {} - for i in range(offset, offset + num_entries * 17, 17): - z = int.from_bytes(self.get_bytes(i, 1), byteorder="little") - x = int.from_bytes(self.get_bytes(i + 1, 3), byteorder="little") - y = int.from_bytes(self.get_bytes(i + 4, 3), byteorder="little") - tile_off = int.from_bytes(self.get_bytes(i + 7, 6), byteorder="little") - tile_len = int.from_bytes(self.get_bytes(i + 13, 4), byteorder="little") - if z & 0b10000000: - leaves[(z & 0b01111111, x, y)] = (tile_off, tile_len) - else: - directory[(z, x, y)] = (tile_off, tile_len) - return (directory, leaves) + def header(self): + if self._header: + return self._header + else: + header_bytes = self.get_bytes(0, 512000) + assert int.from_bytes(header_bytes[0:2], byteorder="little") == 0x4D50 + version = int.from_bytes(header_bytes[2:4], byteorder="little") + metadata_len = int.from_bytes(header_bytes[4:8], byteorder="little") + metadata = json.loads(header_bytes[10 : 10 + metadata_len]) + num_entries = int.from_bytes(header_bytes[8:10], byteorder="little") + root_dir, leaves = load_directory( + header_bytes, 10 + metadata_len, num_entries + ) + self._header = Header(version, metadata, root_dir, leaves) + return self._header - def close(self): - self.f.close() - - @property - def metadata_len(self): - return int.from_bytes(self.get_bytes(4, 4), byteorder="little") - - @property - def metadata(self): - s = self.get_bytes(10, self.metadata_len) - return json.loads(s) - - @property - def version(self): - return int.from_bytes(self.get_bytes(2, 2), byteorder="little") - - @property - def root_entries(self): - return int.from_bytes(self.get_bytes(8, 2), byteorder="little") - - @property - def leaf_level(self): - return next(iter(self.leaves))[0] + def _leaf_level(self): + h = self.header() + return next(iter(h.leaves))[0] def get(self, z, x, y): - val = self.root_dir.get((z, x, y)) + h = self.header() + val = h.root_dir.get((z, x, y)) if val: return self.get_bytes(val[0], val[1]) else: - if len(self.leaves) > 0: - level_diff = z - self.leaf_level - leaf = (self.leaf_level, x // (1 << level_diff), y // (1 << level_diff)) - val = self.leaves.get(leaf) + if len(self.header().leaves) > 0: + level_diff = z - self._leaf_level() + leaf = ( + self._leaf_level(), + x // (1 << level_diff), + y // (1 << level_diff), + ) + val = h.leaves.get(leaf) if val: - directory, _ = self.load_directory(val[0], val[1] // 17) + dir_bytes = self.get_bytes(val[0], val[1]) + directory, _ = load_directory(dir_bytes, 0, val[1] // 17) val = directory.get((z, x, y)) if val: return self.get_bytes(val[0], val[1]) def tiles(self): - for k, v in self.root_dir.items(): + h = self.header() + for k, v in h.root_dir.items(): yield (k, self.get_bytes(v[0], v[1])) - for val in self.leaves.values(): - leaf_dir, _ = self.load_directory(val[0], val[1] // 17) + for val in set(h.leaves.values()): + dir_bytes = self.get_bytes(val[0], val[1]) + leaf_dir, _ = load_directory(dir_bytes, 0, val[1] // 17) for k, v in leaf_dir.items(): yield (k, self.get_bytes(v[0], v[1])) diff --git a/python/pmtiles/writer.py b/python/pmtiles/writer.py index c559860..4522b80 100644 --- a/python/pmtiles/writer.py +++ b/python/pmtiles/writer.py @@ -81,7 +81,7 @@ def make_pyramid(tile_entries, start_leaf_offset, max_dir_size=21845): @contextmanager def write(fname): f = open(fname, "wb") - w = Writer(f) + w = Writer(f, 21845) try: yield w finally: diff --git a/python/test/test_convert.py b/python/test/test_convert.py new file mode 100644 index 0000000..7a95226 --- /dev/null +++ b/python/test/test_convert.py @@ -0,0 +1,39 @@ +import unittest +from io import BytesIO +import os +from pmtiles.writer import Writer +from pmtiles.reader import Reader, MemorySource +from pmtiles.convert import pmtiles_to_mbtiles, mbtiles_to_pmtiles + + +class TestConvert(unittest.TestCase): + def tearDown(self): + try: + os.remove("test_tmp.pmtiles") + except: + pass + try: + os.remove("test_tmp.mbtiles") + except: + pass + try: + os.remove("test_tmp_2.mbtiles") + except: + pass + + def test_roundtrip(self): + + with open("test_tmp.pmtiles", "wb") as f: + writer = Writer(f, 7) + writer.write_tile(1, 0, 0, b"0") + writer.write_tile(1, 0, 1, b"1") + writer.write_tile(1, 1, 0, b"2") + writer.write_tile(1, 1, 1, b"3") + writer.write_tile(2, 0, 0, b"4") + writer.write_tile(3, 0, 0, b"5") + writer.write_tile(2, 0, 1, b"6") + writer.write_tile(3, 0, 2, b"7") + writer.finalize({"key": "value"}) + + pmtiles_to_mbtiles("test_tmp.pmtiles", "test_tmp.mbtiles", False) + mbtiles_to_pmtiles("test_tmp.mbtiles", "test_tmp_2.pmtiles", 3, False) diff --git a/python/test/test_reader.py b/python/test/test_reader.py index a237d0e..5e57a9d 100644 --- a/python/test/test_reader.py +++ b/python/test/test_reader.py @@ -19,9 +19,8 @@ class TestReader(unittest.TestCase): writer.finalize({"key": "value"}) reader = Reader(MemorySource(buf.getvalue())) - self.assertEqual(reader.version, 2) - self.assertEqual(reader.root_entries, 6) - self.assertEqual(reader.metadata["key"], "value") + self.assertEqual(reader.header().version, 2) + self.assertEqual(reader.header().metadata["key"], "value") self.assertEqual(reader.get(1, 0, 0), b"0") self.assertEqual(reader.get(1, 0, 1), b"1") self.assertEqual(reader.get(1, 1, 0), b"2")