Python API changed to do I/O in larger blocks

This commit is contained in:
Brandon Liu
2022-07-07 21:57:28 +08:00
parent 3811ff9b1b
commit 80c0e2b436
8 changed files with 125 additions and 75 deletions

2
python/.gitignore vendored
View File

@@ -2,3 +2,5 @@
__pycache__ __pycache__
build build
dist dist
*.pmtiles
*.mbtiles

View File

@@ -6,7 +6,7 @@ import json
import re import re
from socketserver import ThreadingMixIn from socketserver import ThreadingMixIn
import sys import sys
from pmtiles.reader import read from pmtiles.reader import Reader, MmapSource
# https://docs.python.org/3/library/http.server.html # https://docs.python.org/3/library/http.server.html
@@ -14,9 +14,7 @@ class ThreadingSimpleServer(ThreadingMixIn, http.server.HTTPServer):
pass pass
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description="HTTP server for PMTiles archives.")
description="HTTP server for PMTiles archives."
)
parser.add_argument("pmtiles_file", help="PMTiles archive to serve") parser.add_argument("pmtiles_file", help="PMTiles archive to serve")
parser.add_argument("port", help="Port to bind to") parser.add_argument("port", help="Port to bind to")
parser.add_argument("--bind", help="Address to bind server to: default localhost") parser.add_argument("--bind", help="Address to bind server to: default localhost")
@@ -27,8 +25,11 @@ parser.add_argument(
) )
args = parser.parse_args() args = parser.parse_args()
with read(args.pmtiles_file) as reader: with open(args.pmtiles_file, "r+b") as f:
fmt = reader.metadata["format"] source = MmapSource(f)
reader = Reader(source)
fmt = reader.header().metadata["format"]
class Handler(http.server.SimpleHTTPRequestHandler): class Handler(http.server.SimpleHTTPRequestHandler):
def do_GET(self): def do_GET(self):
@@ -37,7 +38,7 @@ with read(args.pmtiles_file) as reader:
if args.cors_allow_all: if args.cors_allow_all:
self.send_header("Access-Control-Allow-Origin", "*") self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers() self.end_headers()
self.wfile.write(json.dumps(reader.metadata).encode("utf-8")) self.wfile.write(json.dumps(reader.header().metadata).encode("utf-8"))
return return
match = re.match("/(\d+)/(\d+)/(\d+)." + fmt, self.path) match = re.match("/(\d+)/(\d+)/(\d+)." + fmt, self.path)
if not match: if not match:

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
from pmtiles.reader import Reader, MmapSource from pmtiles.reader import Reader, MmapSource, load_directory
if len(sys.argv) <= 1: if len(sys.argv) <= 1:
print("Usage: pmtiles-show PMTILES_FILE") print("Usage: pmtiles-show PMTILES_FILE")
@@ -9,32 +9,33 @@ if len(sys.argv) <= 1:
print("Usage: pmtiles-show PMTILES_FILE list") print("Usage: pmtiles-show PMTILES_FILE list")
exit(1) exit(1)
with open(sys.argv[1],'r+b') as f: with open(sys.argv[1], "r+b") as f:
reader = Reader(MmapSource(f)) reader = Reader(MmapSource(f))
spec_version = reader.version spec_version = reader.header().version
if len(sys.argv) == 2: if len(sys.argv) == 2:
print("spec version: ", spec_version) print("spec version: ", spec_version)
print("metadata:") print("metadata:")
for k, v in reader.metadata.items(): for k, v in reader.header().metadata.items():
print(k, "=", v) print(k, "=", v)
print("root entries:", reader.root_entries) print("root dir tiles:", len(reader.header().root_dir))
print("leaf directories:", len(set(reader.leaves.values()))) print("leaf directories:", len(set(reader.header().leaves.values())))
elif len(sys.argv) == 3: elif len(sys.argv) == 3:
last_val = None last_val = None
for k, v in reader.root_dir.items(): for k, v in reader.header().root_dir.items():
print(f"{k[0]} {k[1]} {k[2]} {v[0]} {v[1]}") print(f"{k[0]} {k[1]} {k[2]} {v[0]} {v[1]}")
if last_val and k <= last_val: if last_val and k <= last_val:
raise Exception("Error: directory entries not sorted") raise Exception("Error: directory entries not sorted")
last_val = k last_val = k
for val in set(reader.leaves.values()): for val in set(reader.header().leaves.values()):
leaf_dir, _ = reader.load_directory(val[0], val[1] // 17) dir_bytes = reader.get_bytes(val[0], val[1])
leaf_dir, _ = load_directory(dir_bytes, 0, val[1] // 17)
last_val = None last_val = None
for k, v in leaf_dir.items(): for k, v in leaf_dir.items():
print(f"{k[0]} {k[1]} {k[2]} {v[0]} {v[1]}") print(f"{k[0]} {k[1]} {k[2]} {v[0]} {v[1]}")
if last_val and k <= last_val: if last_val and k <= last_val:
raise Exception("Error: irectory entries not sorted") raise Exception("Error: directory entries not sorted")
last_val = k last_val = k
else: else:
z = int(sys.argv[2]) z = int(sys.argv[2])

View File

@@ -3,8 +3,8 @@ import gzip
import json import json
import os import os
import sqlite3 import sqlite3
from pmtiles.reader import read
from pmtiles.writer import write from pmtiles.writer import write
from pmtiles.reader import Reader, MmapSource
# if the tile is GZIP-encoded, it won't work with range queries # if the tile is GZIP-encoded, it won't work with range queries
# until transfer-encoding: gzip is well supported. # until transfer-encoding: gzip is well supported.
@@ -61,8 +61,10 @@ def pmtiles_to_mbtiles(input, output, gzip):
"CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob);" "CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob);"
) )
with read(input) as reader: with open(input, "r+b") as f:
metadata = reader.metadata source = MmapSource(f)
reader = Reader(source)
metadata = reader.header().metadata
metadata = set_metadata_compression(metadata, gzip) metadata = set_metadata_compression(metadata, gzip)
for k, v in metadata.items(): for k, v in metadata.items():
cursor.execute("INSERT INTO metadata VALUES(?,?)", (k, v)) cursor.execute("INSERT INTO metadata VALUES(?,?)", (k, v))

View File

@@ -1,6 +1,7 @@
import json import json
import mmap import mmap
from contextlib import contextmanager from contextlib import contextmanager
from collections import namedtuple
def MmapSource(f): def MmapSource(f):
@@ -18,73 +19,78 @@ def MemorySource(buf):
return get_bytes return get_bytes
class Reader:
def __init__(self, get_bytes):
self.get_bytes = get_bytes
assert int.from_bytes(self.get_bytes(0, 2), byteorder="little") == 0x4D50
first_entry_idx = 10 + self.metadata_len
self.root_dir, self.leaves = self.load_directory(
first_entry_idx, self.root_entries
)
def load_directory(self, offset, num_entries): def load_directory(data_bytes, offset, num_entries):
directory = {} tile_entries = {}
leaves = {} leaves = {}
for i in range(offset, offset + num_entries * 17, 17): for i in range(offset, offset + num_entries * 17, 17):
z = int.from_bytes(self.get_bytes(i, 1), byteorder="little") z = int.from_bytes(data_bytes[i : i + 1], byteorder="little")
x = int.from_bytes(self.get_bytes(i + 1, 3), byteorder="little") x = int.from_bytes(data_bytes[i + 1 : i + 4], byteorder="little")
y = int.from_bytes(self.get_bytes(i + 4, 3), byteorder="little") y = int.from_bytes(data_bytes[i + 4 : i + 7], byteorder="little")
tile_off = int.from_bytes(self.get_bytes(i + 7, 6), byteorder="little") tile_off = int.from_bytes(data_bytes[i + 7 : i + 13], byteorder="little")
tile_len = int.from_bytes(self.get_bytes(i + 13, 4), byteorder="little") tile_len = int.from_bytes(data_bytes[i + 13 : i + 17], byteorder="little")
if z & 0b10000000: if z & 0b10000000:
leaves[(z & 0b01111111, x, y)] = (tile_off, tile_len) leaves[(z & 0b01111111, x, y)] = (tile_off, tile_len)
else: else:
directory[(z, x, y)] = (tile_off, tile_len) tile_entries[(z, x, y)] = (tile_off, tile_len)
return (directory, leaves) return tile_entries, leaves
def close(self):
self.f.close()
@property Header = namedtuple("Header", ["version", "metadata", "root_dir", "leaves"])
def metadata_len(self):
return int.from_bytes(self.get_bytes(4, 4), byteorder="little")
@property
def metadata(self):
s = self.get_bytes(10, self.metadata_len)
return json.loads(s)
@property class Reader:
def version(self): def __init__(self, get_bytes):
return int.from_bytes(self.get_bytes(2, 2), byteorder="little") self.get_bytes = get_bytes
self._header = None
@property def header(self):
def root_entries(self): if self._header:
return int.from_bytes(self.get_bytes(8, 2), byteorder="little") return self._header
else:
header_bytes = self.get_bytes(0, 512000)
assert int.from_bytes(header_bytes[0:2], byteorder="little") == 0x4D50
version = int.from_bytes(header_bytes[2:4], byteorder="little")
metadata_len = int.from_bytes(header_bytes[4:8], byteorder="little")
metadata = json.loads(header_bytes[10 : 10 + metadata_len])
num_entries = int.from_bytes(header_bytes[8:10], byteorder="little")
root_dir, leaves = load_directory(
header_bytes, 10 + metadata_len, num_entries
)
self._header = Header(version, metadata, root_dir, leaves)
return self._header
@property def _leaf_level(self):
def leaf_level(self): h = self.header()
return next(iter(self.leaves))[0] return next(iter(h.leaves))[0]
def get(self, z, x, y): def get(self, z, x, y):
val = self.root_dir.get((z, x, y)) h = self.header()
val = h.root_dir.get((z, x, y))
if val: if val:
return self.get_bytes(val[0], val[1]) return self.get_bytes(val[0], val[1])
else: else:
if len(self.leaves) > 0: if len(self.header().leaves) > 0:
level_diff = z - self.leaf_level level_diff = z - self._leaf_level()
leaf = (self.leaf_level, x // (1 << level_diff), y // (1 << level_diff)) leaf = (
val = self.leaves.get(leaf) self._leaf_level(),
x // (1 << level_diff),
y // (1 << level_diff),
)
val = h.leaves.get(leaf)
if val: if val:
directory, _ = self.load_directory(val[0], val[1] // 17) dir_bytes = self.get_bytes(val[0], val[1])
directory, _ = load_directory(dir_bytes, 0, val[1] // 17)
val = directory.get((z, x, y)) val = directory.get((z, x, y))
if val: if val:
return self.get_bytes(val[0], val[1]) return self.get_bytes(val[0], val[1])
def tiles(self): def tiles(self):
for k, v in self.root_dir.items(): h = self.header()
for k, v in h.root_dir.items():
yield (k, self.get_bytes(v[0], v[1])) yield (k, self.get_bytes(v[0], v[1]))
for val in self.leaves.values(): for val in set(h.leaves.values()):
leaf_dir, _ = self.load_directory(val[0], val[1] // 17) dir_bytes = self.get_bytes(val[0], val[1])
leaf_dir, _ = load_directory(dir_bytes, 0, val[1] // 17)
for k, v in leaf_dir.items(): for k, v in leaf_dir.items():
yield (k, self.get_bytes(v[0], v[1])) yield (k, self.get_bytes(v[0], v[1]))

View File

@@ -81,7 +81,7 @@ def make_pyramid(tile_entries, start_leaf_offset, max_dir_size=21845):
@contextmanager @contextmanager
def write(fname): def write(fname):
f = open(fname, "wb") f = open(fname, "wb")
w = Writer(f) w = Writer(f, 21845)
try: try:
yield w yield w
finally: finally:

View File

@@ -0,0 +1,39 @@
import unittest
from io import BytesIO
import os
from pmtiles.writer import Writer
from pmtiles.reader import Reader, MemorySource
from pmtiles.convert import pmtiles_to_mbtiles, mbtiles_to_pmtiles
class TestConvert(unittest.TestCase):
def tearDown(self):
try:
os.remove("test_tmp.pmtiles")
except:
pass
try:
os.remove("test_tmp.mbtiles")
except:
pass
try:
os.remove("test_tmp_2.mbtiles")
except:
pass
def test_roundtrip(self):
with open("test_tmp.pmtiles", "wb") as f:
writer = Writer(f, 7)
writer.write_tile(1, 0, 0, b"0")
writer.write_tile(1, 0, 1, b"1")
writer.write_tile(1, 1, 0, b"2")
writer.write_tile(1, 1, 1, b"3")
writer.write_tile(2, 0, 0, b"4")
writer.write_tile(3, 0, 0, b"5")
writer.write_tile(2, 0, 1, b"6")
writer.write_tile(3, 0, 2, b"7")
writer.finalize({"key": "value"})
pmtiles_to_mbtiles("test_tmp.pmtiles", "test_tmp.mbtiles", False)
mbtiles_to_pmtiles("test_tmp.mbtiles", "test_tmp_2.pmtiles", 3, False)

View File

@@ -19,9 +19,8 @@ class TestReader(unittest.TestCase):
writer.finalize({"key": "value"}) writer.finalize({"key": "value"})
reader = Reader(MemorySource(buf.getvalue())) reader = Reader(MemorySource(buf.getvalue()))
self.assertEqual(reader.version, 2) self.assertEqual(reader.header().version, 2)
self.assertEqual(reader.root_entries, 6) self.assertEqual(reader.header().metadata["key"], "value")
self.assertEqual(reader.metadata["key"], "value")
self.assertEqual(reader.get(1, 0, 0), b"0") self.assertEqual(reader.get(1, 0, 0), b"0")
self.assertEqual(reader.get(1, 0, 1), b"1") self.assertEqual(reader.get(1, 0, 1), b"1")
self.assertEqual(reader.get(1, 1, 0), b"2") self.assertEqual(reader.get(1, 1, 0), b"2")