From 1940b4eff6bf3e26c7842ffb19a6f402c213d705 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Fri, 23 Jun 2023 11:18:07 +0800 Subject: [PATCH] python writer improvements * flesh out roundtrip test case * detect unordered tile writes and set clustered flag appropriately [#115] --- python/pmtiles/tile.py | 18 +++++++----- python/pmtiles/writer.py | 20 +++++++++++-- python/test/test_reader.py | 30 ------------------- python/test/test_reader_writer.py | 48 +++++++++++++++++++++++++++++++ python/test/test_writer.py | 1 - 5 files changed, 76 insertions(+), 41 deletions(-) delete mode 100644 python/test/test_reader.py create mode 100644 python/test/test_reader_writer.py delete mode 100644 python/test/test_writer.py diff --git a/python/pmtiles/tile.py b/python/pmtiles/tile.py index b50729a..306fb63 100644 --- a/python/pmtiles/tile.py +++ b/python/pmtiles/tile.py @@ -273,12 +273,16 @@ def serialize_header(h): write_uint8(h["tile_type"].value) write_uint8(h["min_zoom"]) write_uint8(h["max_zoom"]) - write_int32(h["min_lon_e7"]) - write_int32(h["min_lat_e7"]) - write_int32(h["max_lon_e7"]) - write_int32(h["max_lat_e7"]) - write_uint8(h["center_zoom"]) - write_int32(h["center_lon_e7"]) - write_int32(h["center_lat_e7"]) + min_lon_e7 = h.get("min_lon_e7",-180) + write_int32(min_lon_e7) + min_lat_e7 = h.get("min_lat_e7",-90) + write_int32(min_lat_e7) + max_lon_e7 = h.get("max_lon_e7",180) + write_int32(max_lon_e7) + max_lat_e7 = h.get("max_lat_e7",90) + write_int32(max_lat_e7) + write_uint8(h.get("center_zoom",h["min_zoom"])) + write_int32(h.get("center_lon_e7", round((min_lon_e7 + max_lon_e7) / 2))) + write_int32(h.get("center_lat_e7", round((min_lat_e7 + max_lat_e7) / 2))) return b_io.getvalue() diff --git a/python/pmtiles/writer.py b/python/pmtiles/writer.py index d3fd776..6090850 100644 --- a/python/pmtiles/writer.py +++ b/python/pmtiles/writer.py @@ -3,7 +3,13 @@ import tempfile import gzip import shutil from contextlib import contextmanager -from .tile import Entry, serialize_directory, Compression, serialize_header +from .tile import ( + Entry, + serialize_directory, + Compression, + serialize_header, + tileid_to_zxy, +) @contextmanager @@ -55,9 +61,12 @@ class Writer: self.tile_f = tempfile.TemporaryFile() self.offset = 0 self.addressed_tiles = 0 + self.clustered = True - # TODO enforce ordered writes def write_tile(self, tileid, data): + if len(self.tile_entries) > 0 and tileid < self.tile_entries[-1].tile_id: + self.clustered = False + hsh = hash(data) if hsh in self.hash_to_offset: last = self.tile_entries[-1] @@ -83,6 +92,11 @@ class Writer: header["tile_entries_count"] = len(self.tile_entries) header["tile_contents_count"] = len(self.hash_to_offset) + self.tile_entries = sorted(self.tile_entries, key=lambda e: e.tile_id) + + header["min_zoom"] = tileid_to_zxy(self.tile_entries[0].tile_id)[0] + header["max_zoom"] = tileid_to_zxy(self.tile_entries[-1].tile_id)[0] + root_bytes, leaves_bytes, num_leaves = optimize_directories( self.tile_entries, 16384 - 127 ) @@ -105,7 +119,7 @@ class Writer: ) compressed_metadata = gzip.compress(json.dumps(metadata).encode()) - header["clustered"] = True + header["clustered"] = self.clustered header["internal_compression"] = Compression.GZIP header["root_offset"] = 127 header["root_length"] = len(root_bytes) diff --git a/python/test/test_reader.py b/python/test/test_reader.py deleted file mode 100644 index 5eef744..0000000 --- a/python/test/test_reader.py +++ /dev/null @@ -1,30 +0,0 @@ -import unittest -from io import BytesIO -from pmtiles.writer import Writer -from pmtiles.reader import Reader, MemorySource - - -class TestReader(unittest.TestCase): - def test_roundtrip(self): - buf = BytesIO() - # writer = Writer(buf, 5) - # writer.write_tile(1, 0, 0, b"0") - # writer.write_tile(1, 0, 1, b"1") - # writer.write_tile(1, 1, 0, b"2") - # writer.write_tile(2, 0, 0, b"4") - # writer.write_tile(3, 0, 0, b"5") - # writer.write_tile(2, 0, 1, b"6") - # writer.write_tile(3, 0, 2, b"7") - # writer.finalize({"key": "value"}) - - # reader = Reader(MemorySource(buf.getvalue())) - # self.assertEqual(reader.header().version, 2) - # self.assertEqual(reader.header().metadata["key"], "value") - # self.assertEqual(reader.get(1, 0, 0), b"0") - # self.assertEqual(reader.get(1, 0, 1), b"1") - # self.assertEqual(reader.get(1, 1, 0), b"2") - # self.assertEqual(reader.get(2, 0, 0), b"4") - # self.assertEqual(reader.get(3, 0, 0), b"5") - # self.assertEqual(reader.get(2, 0, 1), b"6") - # self.assertEqual(reader.get(3, 0, 2), b"7") - # self.assertEqual(reader.get(1, 1, 1), None) diff --git a/python/test/test_reader_writer.py b/python/test/test_reader_writer.py new file mode 100644 index 0000000..ec7647b --- /dev/null +++ b/python/test/test_reader_writer.py @@ -0,0 +1,48 @@ +import unittest +from io import BytesIO +from pmtiles.writer import Writer +from pmtiles.reader import Reader, MemorySource +from pmtiles.tile import Compression, TileType, tileid_to_zxy, zxy_to_tileid + + +class TestReaderWriter(unittest.TestCase): + def test_roundtrip(self): + buf = BytesIO() + writer = Writer(buf) + writer.write_tile(zxy_to_tileid(0, 0, 0), b"1") + writer.write_tile(zxy_to_tileid(1, 0, 0), b"2") + writer.write_tile(zxy_to_tileid(2, 0, 0), b"3") + writer.finalize( + { + "tile_compression": Compression.UNKNOWN, + "tile_type": TileType.UNKNOWN, + }, + {"key": "value"}, + ) + + reader = Reader(MemorySource(buf.getvalue())) + self.assertEqual(reader.header()["version"], 3) + self.assertEqual(reader.header()["min_zoom"], 0) + self.assertEqual(reader.header()["max_zoom"], 2) + self.assertEqual(reader.header()["clustered"], True) + self.assertEqual(reader.metadata()["key"], "value") + self.assertEqual(reader.get(0, 0, 0), b"1") + self.assertEqual(reader.get(1, 0, 0), b"2") + self.assertEqual(reader.get(2, 0, 0), b"3") + self.assertEqual(reader.get(3, 0, 0), None) + + def test_roundtrip_unclustered(self): + buf = BytesIO() + writer = Writer(buf) + writer.write_tile(zxy_to_tileid(1, 0, 0), b"2") + writer.write_tile(zxy_to_tileid(0, 0, 0), b"1") + writer.finalize( + { + "tile_compression": Compression.UNKNOWN, + "tile_type": TileType.UNKNOWN, + }, + {}, + ) + + reader = Reader(MemorySource(buf.getvalue())) + self.assertEqual(reader.header()["clustered"], False) diff --git a/python/test/test_writer.py b/python/test/test_writer.py deleted file mode 100644 index 10e6159..0000000 --- a/python/test/test_writer.py +++ /dev/null @@ -1 +0,0 @@ -import unittest