mirror of
https://github.com/protomaps/PMTiles.git
synced 2026-02-04 10:51:07 +00:00
python: pmtiles-convert from mbtiles writes v3 spec
This commit is contained in:
@@ -5,97 +5,135 @@ import os
|
||||
import sqlite3
|
||||
from pmtiles.writer import write
|
||||
from pmtiles.reader import Reader, MmapSource
|
||||
|
||||
# if the tile is GZIP-encoded, it won't work with range queries
|
||||
# until transfer-encoding: gzip is well supported.
|
||||
def force_compress(data, compress):
|
||||
if compress and data[0:2] != b"\x1f\x8b":
|
||||
return gzip.compress(data)
|
||||
if not compress and data[0:2] == b"\x1f\x8b":
|
||||
return gzip.decompress(data)
|
||||
return data
|
||||
from .tile import zxy_to_tileid, tileid_to_zxy, TileType, Compression
|
||||
|
||||
|
||||
def set_metadata_compression(metadata, gzip):
|
||||
if gzip:
|
||||
metadata["compression"] = "gzip"
|
||||
def mbtiles_to_header_json(mbtiles_metadata):
|
||||
header = {}
|
||||
|
||||
header["min_zoom"] = int(mbtiles_metadata["minzoom"])
|
||||
del mbtiles_metadata["minzoom"]
|
||||
|
||||
header["max_zoom"] = int(mbtiles_metadata["maxzoom"])
|
||||
del mbtiles_metadata["maxzoom"]
|
||||
|
||||
bounds = mbtiles_metadata["bounds"].split(",")
|
||||
header["min_lon_e7"] = int(float(bounds[0]) * 10000000)
|
||||
header["min_lat_e7"] = int(float(bounds[1]) * 10000000)
|
||||
header["max_lon_e7"] = int(float(bounds[2]) * 10000000)
|
||||
header["max_lat_e7"] = int(float(bounds[3]) * 10000000)
|
||||
del mbtiles_metadata["bounds"]
|
||||
|
||||
center = mbtiles_metadata["center"].split(",")
|
||||
header["center_lon_e7"] = int(float(center[0]) * 10000000)
|
||||
header["center_lat_e7"] = int(float(center[1]) * 10000000)
|
||||
header["center_zoom"] = int(center[2])
|
||||
del mbtiles_metadata["center"]
|
||||
|
||||
tile_format = mbtiles_metadata["format"]
|
||||
if tile_format == "pbf":
|
||||
header["tile_type"] = TileType.MVT
|
||||
elif tile_format == "png":
|
||||
header["tile_type"] = TileType.PNG
|
||||
elif tile_format == "jpeg":
|
||||
header["tile_type"] = TileType.JPEG
|
||||
elif tile_format == "webp":
|
||||
header["tile_type"] = TileType.WEBP
|
||||
else:
|
||||
try:
|
||||
del metadata["compression"]
|
||||
except:
|
||||
pass
|
||||
return metadata
|
||||
header["tile_type"] = TileType.UNKNOWN
|
||||
|
||||
if mbtiles_metadata.get("compression") == "gzip":
|
||||
header["tile_compression"] = Compression.GZIP # TODO: does this ever matter?
|
||||
else:
|
||||
header["tile_compression"] = Compression.UNKNOWN
|
||||
|
||||
return header, mbtiles_metadata
|
||||
|
||||
|
||||
def mbtiles_to_pmtiles(input, output, maxzoom, gzip):
|
||||
def mbtiles_to_pmtiles(input, output, maxzoom):
|
||||
conn = sqlite3.connect(input)
|
||||
cursor = conn.cursor()
|
||||
|
||||
with write(output) as writer:
|
||||
|
||||
# collect a set of all tile IDs
|
||||
tileid_set = []
|
||||
for row in cursor.execute(
|
||||
"SELECT zoom_level,tile_column,tile_row,tile_data FROM tiles WHERE zoom_level <= ? ORDER BY zoom_level,tile_column,tile_row ASC",
|
||||
"SELECT zoom_level,tile_column,tile_row FROM tiles WHERE zoom_level <= ?",
|
||||
(maxzoom or 99,),
|
||||
):
|
||||
flipped = (1 << row[0]) - 1 - row[2]
|
||||
writer.write_tile(row[0], row[1], flipped, force_compress(row[3], gzip))
|
||||
tileid_set.append(zxy_to_tileid(row[0], row[1], flipped))
|
||||
|
||||
metadata = {}
|
||||
for row in cursor.execute("SELECT name,value FROM metadata"):
|
||||
metadata[row[0]] = row[1]
|
||||
if maxzoom:
|
||||
metadata["maxzoom"] = str(maxzoom)
|
||||
metadata = set_metadata_compression(metadata, gzip)
|
||||
result = writer.finalize(metadata)
|
||||
print("Num tiles:", result["num_tiles"])
|
||||
print("Num unique tiles:", result["num_unique_tiles"])
|
||||
print("Num leaves:", result["num_leaves"])
|
||||
tileid_set.sort()
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
def pmtiles_to_mbtiles(input, output, gzip):
|
||||
conn = sqlite3.connect(output)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE metadata (name text, value text);")
|
||||
cursor.execute(
|
||||
"CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob);"
|
||||
)
|
||||
|
||||
with open(input, "r+b") as f:
|
||||
source = MmapSource(f)
|
||||
reader = Reader(source)
|
||||
metadata = reader.header().metadata
|
||||
metadata = set_metadata_compression(metadata, gzip)
|
||||
for k, v in metadata.items():
|
||||
cursor.execute("INSERT INTO metadata VALUES(?,?)", (k, v))
|
||||
for tile, data in reader.tiles():
|
||||
flipped = (1 << tile[0]) - 1 - tile[2]
|
||||
cursor.execute(
|
||||
"INSERT INTO tiles VALUES(?,?,?,?)",
|
||||
(tile[0], tile[1], flipped, force_compress(data, gzip)),
|
||||
# query the db in ascending tile order
|
||||
for tileid in tileid_set:
|
||||
z, x, y = tileid_to_zxy(tileid)
|
||||
flipped = (1 << z) - 1 - y
|
||||
res = cursor.execute(
|
||||
"SELECT tile_data FROM tiles WHERE zoom_level = ? AND tile_column = ? AND tile_row = ?",
|
||||
(z, x, flipped),
|
||||
)
|
||||
data = res.fetchone()[0]
|
||||
# force gzip compression only for vector
|
||||
if data[0:2] != b"\x1f\x8b":
|
||||
data = gzip.compress(data)
|
||||
writer.write_tile(tileid, data)
|
||||
|
||||
mbtiles_metadata = {}
|
||||
for row in cursor.execute("SELECT name,value FROM metadata"):
|
||||
mbtiles_metadata[row[0]] = row[1]
|
||||
|
||||
pmtiles_header, pmtiles_metadata = mbtiles_to_header_json(mbtiles_metadata)
|
||||
result = writer.finalize(pmtiles_header, pmtiles_metadata)
|
||||
|
||||
cursor.execute(
|
||||
"CREATE UNIQUE INDEX tile_index on tiles (zoom_level, tile_column, tile_row);"
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def pmtiles_to_dir(input, output, gzip):
|
||||
os.makedirs(output)
|
||||
def pmtiles_to_mbtiles(input, output):
|
||||
pass
|
||||
# conn = sqlite3.connect(output)
|
||||
# cursor = conn.cursor()
|
||||
# cursor.execute("CREATE TABLE metadata (name text, value text);")
|
||||
# cursor.execute(
|
||||
# "CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob);"
|
||||
# )
|
||||
|
||||
with open(input, "r+b") as f:
|
||||
source = MmapSource(f)
|
||||
reader = Reader(source)
|
||||
metadata = reader.header().metadata
|
||||
metadata = set_metadata_compression(metadata, gzip)
|
||||
with open(os.path.join(output, "metadata.json"), "w") as f:
|
||||
f.write(json.dumps(metadata))
|
||||
# with open(input, "r+b") as f:
|
||||
# source = MmapSource(f)
|
||||
# reader = Reader(source)
|
||||
# metadata = reader.header().metadata
|
||||
# for k, v in metadata.items():
|
||||
# cursor.execute("INSERT INTO metadata VALUES(?,?)", (k, v))
|
||||
# for tile, data in reader.tiles():
|
||||
# flipped = (1 << tile[0]) - 1 - tile[2]
|
||||
# cursor.execute(
|
||||
# "INSERT INTO tiles VALUES(?,?,?,?)",
|
||||
# (tile[0], tile[1], flipped, force_compress(data, gzip)),
|
||||
# )
|
||||
|
||||
for tile, data in reader.tiles():
|
||||
directory = os.path.join(output, str(tile[0]), str(tile[1]))
|
||||
path = os.path.join(directory, str(tile[2]) + "." + metadata["format"])
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
with open(path, "wb") as f:
|
||||
f.write(force_compress(data, gzip))
|
||||
# cursor.execute(
|
||||
# "CREATE UNIQUE INDEX tile_index on tiles (zoom_level, tile_column, tile_row);"
|
||||
# )
|
||||
# conn.commit()
|
||||
# conn.close()
|
||||
|
||||
|
||||
def pmtiles_to_dir(input, output):
|
||||
pass
|
||||
# os.makedirs(output)
|
||||
|
||||
# with open(input, "r+b") as f:
|
||||
# source = MmapSource(f)
|
||||
# reader = Reader(source)
|
||||
# metadata = reader.header().metadata
|
||||
# with open(os.path.join(output, "metadata.json"), "w") as f:
|
||||
# f.write(json.dumps(metadata))
|
||||
|
||||
# for tile, data in reader.tiles():
|
||||
# directory = os.path.join(output, str(tile[0]), str(tile[1]))
|
||||
# path = os.path.join(directory, str(tile[2]) + "." + metadata["format"])
|
||||
# os.makedirs(directory, exist_ok=True)
|
||||
# with open(path, "wb") as f:
|
||||
# f.write(force_compress(data, gzip))
|
||||
|
||||
@@ -194,7 +194,7 @@ def deserialize_header(buf):
|
||||
return int.from_bytes(buf[pos : pos + 8], byteorder="little")
|
||||
|
||||
def read_int32(pos):
|
||||
return int.from_bytes(buf[pos : pos + 4], byteorder="little")
|
||||
return int.from_bytes(buf[pos : pos + 4], byteorder="little", signed=True)
|
||||
|
||||
return {
|
||||
"root_offset": read_uint64(8),
|
||||
@@ -231,7 +231,7 @@ def serialize_header(h):
|
||||
b_io.write(i.to_bytes(8, byteorder="little"))
|
||||
|
||||
def write_int32(i):
|
||||
b_io.write(i.to_bytes(4, byteorder="little"))
|
||||
b_io.write(i.to_bytes(4, byteorder="little", signed=True))
|
||||
|
||||
def write_uint8(i):
|
||||
b_io.write(i.to_bytes(1, byteorder="little"))
|
||||
|
||||
@@ -1,153 +1,133 @@
|
||||
import itertools
|
||||
import json
|
||||
import tempfile
|
||||
import gzip
|
||||
import shutil
|
||||
from contextlib import contextmanager
|
||||
from pmtiles import Entry
|
||||
|
||||
|
||||
def entrysort(t):
|
||||
return (t.z, t.x, t.y)
|
||||
|
||||
|
||||
# Find best base zoom to avoid extra indirection for as many tiles as we can
|
||||
# precondition: entries is sorted, only tile entries, len(entries) > max_dir_size
|
||||
def find_leaf_level(entries, max_dir_size):
|
||||
return entries[max_dir_size].z - 1
|
||||
|
||||
|
||||
def make_pyramid(tile_entries, start_leaf_offset, max_dir_size=21845):
|
||||
sorted_entries = sorted(tile_entries, key=entrysort)
|
||||
if len(sorted_entries) <= max_dir_size:
|
||||
return (sorted_entries, [])
|
||||
|
||||
leaf_dirs = []
|
||||
|
||||
# determine root leaf level
|
||||
leaf_level = find_leaf_level(sorted_entries, max_dir_size)
|
||||
|
||||
def by_parent(e):
|
||||
level_diff = e.z - leaf_level
|
||||
return (leaf_level, e.x // (1 << level_diff), e.y // (1 << level_diff))
|
||||
|
||||
root_entries = [e for e in sorted_entries if e.z < leaf_level]
|
||||
# get all entries greater than or equal to the leaf level
|
||||
entries_in_leaves = [e for e in sorted_entries if e.z >= leaf_level]
|
||||
|
||||
# group the entries by their parent (stable)
|
||||
entries_in_leaves.sort(key=by_parent)
|
||||
|
||||
current_offset = start_leaf_offset
|
||||
# pack entries into groups
|
||||
packed_entries = []
|
||||
packed_roots = []
|
||||
|
||||
for group in itertools.groupby(entries_in_leaves, key=by_parent):
|
||||
subpyramid_entries = list(group[1])
|
||||
|
||||
root = by_parent(subpyramid_entries[0])
|
||||
if len(packed_entries) + len(subpyramid_entries) <= max_dir_size:
|
||||
packed_entries.extend(subpyramid_entries)
|
||||
packed_roots.append((root[0], root[1], root[2]))
|
||||
else:
|
||||
# flush the current packed entries
|
||||
|
||||
for p in packed_roots:
|
||||
root_entries.append(
|
||||
Entry(
|
||||
p[0], p[1], p[2], current_offset, 17 * len(packed_entries), True
|
||||
)
|
||||
)
|
||||
# re-sort the packed_entries by ZXY order
|
||||
packed_entries.sort(key=entrysort)
|
||||
leaf_dirs.append(packed_entries)
|
||||
|
||||
current_offset += 17 * len(packed_entries)
|
||||
packed_entries = subpyramid_entries
|
||||
packed_roots = [(root[0], root[1], root[2])]
|
||||
|
||||
# finalize the last set
|
||||
if len(packed_entries):
|
||||
|
||||
for p in packed_roots:
|
||||
root_entries.append(
|
||||
Entry(p[0], p[1], p[2], current_offset, 17 * len(packed_entries), True)
|
||||
)
|
||||
# re-sort the packed_entries by ZXY order
|
||||
packed_entries.sort(key=entrysort)
|
||||
leaf_dirs.append(packed_entries)
|
||||
|
||||
return (root_entries, leaf_dirs)
|
||||
from .tile import Entry, serialize_directory, Compression, serialize_header
|
||||
|
||||
|
||||
@contextmanager
|
||||
def write(fname):
|
||||
f = open(fname, "wb")
|
||||
w = Writer(f, 21845)
|
||||
w = Writer(f)
|
||||
try:
|
||||
yield w
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
|
||||
def build_roots_leaves(entries, leaf_size):
|
||||
root_entries = []
|
||||
leaves_bytes = b""
|
||||
num_leaves = 0
|
||||
|
||||
i = 0
|
||||
while i < len(entries):
|
||||
num_leaves += 1
|
||||
serialized = serialize_directory(entries[i : i + leaf_size])
|
||||
root_entries.append(
|
||||
Entry(entries[0].tile_id, len(leaves_bytes), len(serialized), 0)
|
||||
)
|
||||
leaves_bytes += serialized
|
||||
i += leaf_size
|
||||
|
||||
return serialize_directory(root_entries), leaves_bytes, num_leaves
|
||||
|
||||
|
||||
def optimize_directories(entries, target_root_len):
|
||||
test_bytes = serialize_directory(entries)
|
||||
if len(test_bytes) < target_root_len:
|
||||
return test_bytes, b"", 0
|
||||
|
||||
leaf_size = 4096
|
||||
while True:
|
||||
root_bytes, leaves_bytes, num_leaves = build_roots_leaves(entries, leaf_size)
|
||||
if len(root_bytes) < target_root_len:
|
||||
return root_bytes, leaves_bytes, num_leaves
|
||||
leaf_size *= 2
|
||||
|
||||
|
||||
class Writer:
|
||||
def __init__(self, f, max_dir_size):
|
||||
self.offset = 512000
|
||||
def __init__(self, f):
|
||||
self.f = f
|
||||
self.f.write(b"\0" * self.offset)
|
||||
self.tile_entries = []
|
||||
self.hash_to_offset = {}
|
||||
self.max_dir_size = max_dir_size
|
||||
self.tile_f = tempfile.TemporaryFile()
|
||||
self.offset = 0
|
||||
self.addressed_tiles = 0
|
||||
|
||||
def write_tile(self, z, x, y, data):
|
||||
# TODO enforce ordered writes
|
||||
def write_tile(self, tileid, data):
|
||||
hsh = hash(data)
|
||||
if hsh in self.hash_to_offset:
|
||||
self.tile_entries.append(
|
||||
Entry(z, x, y, self.hash_to_offset[hsh], len(data), False)
|
||||
)
|
||||
last = self.tile_entries[-1]
|
||||
found = self.hash_to_offset[hsh]
|
||||
if tileid == last.tile_id + last.run_length and last.offset == found:
|
||||
self.tile_entries[-1].run_length += 1
|
||||
else:
|
||||
self.tile_entries.append(Entry(tileid, found, len(data), 1))
|
||||
else:
|
||||
self.f.write(data)
|
||||
self.tile_entries.append(Entry(z, x, y, self.offset, len(data), False))
|
||||
self.tile_f.write(data)
|
||||
self.tile_entries.append(Entry(tileid, self.offset, len(data), 1))
|
||||
self.hash_to_offset[hsh] = self.offset
|
||||
self.offset = self.offset + len(data)
|
||||
self.offset += len(data)
|
||||
|
||||
def _write_entry(self, entry):
|
||||
if entry.is_dir:
|
||||
z_bytes = 0b10000000 | entry.z
|
||||
else:
|
||||
z_bytes = entry.z
|
||||
self.f.write(z_bytes.to_bytes(1, byteorder="little"))
|
||||
self.f.write(entry.x.to_bytes(3, byteorder="little"))
|
||||
self.f.write(entry.y.to_bytes(3, byteorder="little"))
|
||||
self.f.write(entry.offset.to_bytes(6, byteorder="little"))
|
||||
self.f.write(entry.length.to_bytes(4, byteorder="little"))
|
||||
self.addressed_tiles += 1
|
||||
|
||||
def _write_header(self, metadata, root_entries_len):
|
||||
self.f.write((0x4D50).to_bytes(2, byteorder="little"))
|
||||
self.f.write((2).to_bytes(2, byteorder="little"))
|
||||
metadata_serialized = json.dumps(metadata)
|
||||
# 512000 - (17 * 21845) - 2 (magic) - 2 (version) - 4 (jsonlen) - 2 (dictentries) = 140625
|
||||
assert len(metadata_serialized) < 140625
|
||||
self.f.write(len(metadata_serialized).to_bytes(4, byteorder="little"))
|
||||
self.f.write(root_entries_len.to_bytes(2, byteorder="little"))
|
||||
self.f.write(metadata_serialized.encode("utf-8"))
|
||||
def finalize(self, header, metadata):
|
||||
print("# of addressed tiles:", self.addressed_tiles)
|
||||
print("# of tile entries (after RLE):", len(self.tile_entries))
|
||||
print("# of tile contents:", len(self.hash_to_offset))
|
||||
|
||||
def finalize(self, metadata={}):
|
||||
root_dir, leaf_dirs = make_pyramid(
|
||||
self.tile_entries, self.offset, self.max_dir_size
|
||||
header["addressed_tiles_count"] = self.addressed_tiles
|
||||
header["tile_entries_count"] = len(self.tile_entries)
|
||||
header["tile_contents_count"] = len(self.hash_to_offset)
|
||||
|
||||
root_bytes, leaves_bytes, num_leaves = optimize_directories(
|
||||
self.tile_entries, 16384 - 127
|
||||
)
|
||||
|
||||
if len(leaf_dirs) > 0:
|
||||
for leaf_dir in leaf_dirs:
|
||||
for entry in leaf_dir:
|
||||
self._write_entry(entry)
|
||||
if num_leaves > 0:
|
||||
print("Root dir bytes:", len(root_bytes))
|
||||
print("Leaves dir bytes:", len(leaves_bytes))
|
||||
print("Num leaf dirs:", num_leaves)
|
||||
print("Total dir bytes:", len(root_bytes) + len(leaves_bytes))
|
||||
print("Average leaf dir bytes:", len(leaves_bytes) / num_leaves)
|
||||
print(
|
||||
"Average bytes per entry:",
|
||||
(len(root_bytes) + len(leaves_bytes)) / self.addressed_tiles,
|
||||
)
|
||||
else:
|
||||
print("Total dir bytes:", len(root_bytes))
|
||||
print(
|
||||
"Average bytes per addressed tile:",
|
||||
len(root_bytes) / self.addressed_tiles,
|
||||
)
|
||||
|
||||
self.f.seek(0)
|
||||
self._write_header(metadata, len(root_dir))
|
||||
compressed_metadata = gzip.compress(json.dumps(metadata).encode())
|
||||
header["clustered"] = True
|
||||
header["internal_compression"] = Compression.GZIP
|
||||
header[
|
||||
"tile_compression"
|
||||
] = Compression.GZIP # TODO: not necessarily true for non-vector
|
||||
header["root_offset"] = 127
|
||||
header["root_length"] = len(root_bytes)
|
||||
header["metadata_offset"] = header["root_offset"] + header["root_length"]
|
||||
header["metadata_length"] = len(compressed_metadata)
|
||||
header["leaf_directory_offset"] = (
|
||||
header["metadata_offset"] + header["metadata_length"]
|
||||
)
|
||||
header["leaf_directory_length"] = len(leaves_bytes)
|
||||
header["tile_data_offset"] = (
|
||||
header["leaf_directory_offset"] + header["leaf_directory_length"]
|
||||
)
|
||||
header["tile_data_length"] = self.offset
|
||||
|
||||
for entry in root_dir:
|
||||
self._write_entry(entry)
|
||||
header_bytes = serialize_header(header)
|
||||
|
||||
return {
|
||||
"num_tiles": len(self.tile_entries),
|
||||
"num_unique_tiles": len(self.hash_to_offset),
|
||||
"num_leaves": len(leaf_dirs),
|
||||
}
|
||||
self.f.write(header_bytes)
|
||||
self.f.write(root_bytes)
|
||||
self.f.write(compressed_metadata)
|
||||
self.f.write(leaves_bytes)
|
||||
self.tile_f.seek(0)
|
||||
shutil.copyfileobj(self.tile_f, self.f)
|
||||
|
||||
Reference in New Issue
Block a user