From 0aae8344fca947434aa5a9d782fd79d4ca37378a Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Sat, 30 Jul 2022 11:27:48 +0800 Subject: [PATCH] add basic header-only c++ writer implementation (hardcoded leaf level) --- cpp/pmtiles.hpp | 155 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/cpp/pmtiles.hpp b/cpp/pmtiles.hpp index e69de29..acfc2c9 100644 --- a/cpp/pmtiles.hpp +++ b/cpp/pmtiles.hpp @@ -0,0 +1,155 @@ +#include +#include +#include +#include +#include "xxhash.h" +#include + +void writePmtilesHeader(std::ostream &outfile, const std::string &metadata, uint16_t root_entries_len) { + uint16_t MAGIC = 0x4d50; + outfile.write((char *)&MAGIC,2); + uint16_t version = 2; + outfile.write((char *)&version,2); + uint32_t metadata_size = metadata.size(); + outfile.write((char *)&metadata_size,4); + outfile.write((char *)&root_entries_len,2); + outfile << metadata; +} + +void writeEntry(std::ostream &outfile, const std::tuple &tile, bool is_directory = false) { + uint8_t z_val = std::get<0>(tile); + if (is_directory) z_val |= 0b10000000; + outfile.write((char *)&z_val,1); + outfile.write((char *)&std::get<1>(tile),3); + outfile.write((char *)&std::get<2>(tile),3); + outfile.write((char *)&std::get<3>(tile),6); + outfile.write((char *)&std::get<4>(tile),4); +} + +struct pmtiles_v2_writer { + std::vector> entries{}; + std::ofstream ostream; + uint64_t offset = 0; + std::map hash_to_offset; +}; + +pmtiles_v2_writer *pmtiles_v2_open(const char *filename) { + pmtiles_v2_writer *w = new pmtiles_v2_writer; + w->ostream.open(filename,std::ios::out | std::ios::binary); + + w->offset = 512000; + + for (int i = 0; i < w->offset; ++i) { + char zero = 0; + w->ostream.write(&zero,sizeof(char)); + } + + return w; +} + +void pmtiles_v2_write_tile(pmtiles_v2_writer *w, int z, int x, int y, const std::string &data) { + XXH64_hash_t hash = XXH64(data.data(),data.size(),3857); + if (w->hash_to_offset.count(hash) > 0) { + w->entries.emplace_back(z,x,y,w->hash_to_offset[hash],data.size()); + } else { + w->ostream << data; + w->entries.emplace_back(z,x,y,w->offset,data.size()); + w->hash_to_offset[hash] = w->offset; + w->offset += data.size(); + } +} + +struct TileCompare { + bool operator()(std::tuple const &lhs, std::tuple const &rhs) const + { + uint8_t zl = std::get<0>(lhs); + uint8_t zr = std::get<0>(rhs); + if (zl != zr) return zl < zr; + uint32_t xl = std::get<1>(lhs); + uint32_t xr = std::get<1>(rhs); + if (xl != xr) return xl < xr; + uint32_t yl = std::get<2>(lhs); + uint32_t yr = std::get<2>(rhs); + return yl < yr; + } +}; + +void pmtiles_v2_finalize(pmtiles_v2_writer *w, const std::string serialized_metadata) { + if (w->entries.size() < 21845) { + w->ostream.seekp(0); + writePmtilesHeader(w->ostream,serialized_metadata,w->entries.size()); + sort(begin(w->entries),end(w->entries),TileCompare()); + + for (auto const &entry : w->entries) { + writeEntry(w->ostream,entry); + } + } else { + // this eats too much ram + std::map,std::vector>> by_z7; + for (auto const &entry : w->entries) { + if (std::get<0>(entry) >= 7) { + int level_diff = std::get<0>(entry) - 7; + std::tuple z7_tile{7,std::get<1>(entry)/(1 << level_diff),std::get<2>(entry)/(1 << level_diff)}; + if (by_z7.count(z7_tile) > 0) { + by_z7[z7_tile].push_back(entry); + } else { + by_z7[z7_tile] = {entry}; + } + } + } + + std::vector> leaves; + std::vector> leafdir_z7s; + int leafdir_size = 0; + + for (auto const &group : by_z7) { + auto key = group.first; + if (leafdir_size + group.second.size() <= 21845) { + leafdir_z7s.push_back(key); + leafdir_size += group.second.size(); + } else { + for (auto const &k : leafdir_z7s) { + leaves.emplace_back(std::get<0>(k),std::get<1>(k),std::get<2>(k),w->offset,17*leafdir_size); + auto to_sort = by_z7[k]; + sort(begin(to_sort),end(to_sort),TileCompare()); + for (auto const &entry : to_sort) writeEntry(w->ostream,entry); + } + w->offset += 17 * leafdir_size; + leafdir_z7s = {key}; + leafdir_size = group.second.size(); + } + } + + if (leafdir_size > 0) { + for (auto const &k : leafdir_z7s) { + leaves.emplace_back(std::get<0>(k),std::get<1>(k),std::get<2>(k),w->offset,17*leafdir_size); + auto to_sort = by_z7[k]; + sort(begin(to_sort),end(to_sort),TileCompare()); + for (auto const &entry : to_sort) writeEntry(w->ostream,entry); + } + } + + std::vector> root_entries; + for (auto const &entry : w->entries) { + if (std::get<0>(entry) < 7) root_entries.push_back(entry); + } + + w->ostream.seekp(0); + writePmtilesHeader(w->ostream,serialized_metadata,root_entries.size() + leaves.size()); + + std::sort(begin(root_entries),end(root_entries),TileCompare()); + for (auto const &entry : root_entries) { + writeEntry(w->ostream,entry); + } + std::sort(begin(leaves),end(leaves),TileCompare()); + for (auto const & leaf : leaves) { + writeEntry(w->ostream,leaf,true); + } + } + + // cout << "Num tiles: " << tiles.size() << endl; + // cout << "Num unique tiles: " << hash_to_offset.size() << endl; + + w->ostream.close(); + +} \ No newline at end of file