pmtiles.hpp: multiple warning and robustness fixes

This fixes a number of warnings found when integrating it in GDAL
builds, which are done with quite pedantic warning levels.

- add explicit casts when shortening variables
- add support for big-endian targets if defining PMTILES_MSB
- add santity checks on reading directories against corrupt/hostile data
- replace a exit() call by an exception
This commit is contained in:
Even Rouault
2023-06-09 19:37:56 +02:00
parent fec0d1136c
commit 6cd1c3a39b

View File

@@ -7,6 +7,7 @@
#include <tuple> #include <tuple>
#include <functional> #include <functional>
#include <algorithm> #include <algorithm>
#include <limits> // for std::numeric_limits<>
namespace pmtiles { namespace pmtiles {
@@ -22,6 +23,32 @@ const uint8_t COMPRESSION_GZIP = 0x2;
const uint8_t COMPRESSION_BROTLI = 0x3; const uint8_t COMPRESSION_BROTLI = 0x3;
const uint8_t COMPRESSION_ZSTD = 0x4; const uint8_t COMPRESSION_ZSTD = 0x4;
#ifdef PMTILES_MSB
template<class T>
inline void swap_byte_order(T* ptr) {
unsigned char* ptrBytes = reinterpret_cast<unsigned char*>(ptr);
for (size_t i = 0; i < sizeof(T)/2; ++i) {
std::swap(ptrBytes[i], ptrBytes[sizeof(T)-1-i]);
}
}
#else
template<class T>
inline void swap_byte_order_if_msb(T* /*ptr*/)
{
}
#endif
template<class T>
inline void copy_to_lsb(std::stringstream& ss, T val) {
swap_byte_order_if_msb(&val);
ss.write(reinterpret_cast<char*>(&val), sizeof(T));
}
template<>
inline void copy_to_lsb<uint8_t>(std::stringstream& ss, uint8_t val) {
ss.write(reinterpret_cast<char*>(&val), 1);
}
struct headerv3 { struct headerv3 {
uint64_t root_dir_offset; uint64_t root_dir_offset;
uint64_t root_dir_bytes; uint64_t root_dir_bytes;
@@ -53,37 +80,37 @@ struct headerv3 {
std::stringstream ss; std::stringstream ss;
ss << "PMTiles"; ss << "PMTiles";
uint8_t version = 3; uint8_t version = 3;
ss.write((char *) &version, 1); copy_to_lsb(ss, version);
ss.write((char *) &root_dir_offset, 8); copy_to_lsb(ss, root_dir_offset);
ss.write((char *) &root_dir_bytes, 8); copy_to_lsb(ss, root_dir_bytes);
ss.write((char *) &json_metadata_offset, 8); copy_to_lsb(ss, json_metadata_offset);
ss.write((char *) &json_metadata_bytes, 8); copy_to_lsb(ss, json_metadata_bytes);
ss.write((char *) &leaf_dirs_offset, 8); copy_to_lsb(ss, leaf_dirs_offset);
ss.write((char *) &leaf_dirs_bytes, 8); copy_to_lsb(ss, leaf_dirs_bytes);
ss.write((char *) &tile_data_offset, 8); copy_to_lsb(ss, tile_data_offset);
ss.write((char *) &tile_data_bytes, 8); copy_to_lsb(ss, tile_data_bytes);
ss.write((char *) &addressed_tiles_count, 8); copy_to_lsb(ss, addressed_tiles_count);
ss.write((char *) &tile_entries_count, 8); copy_to_lsb(ss, tile_entries_count);
ss.write((char *) &tile_contents_count, 8); copy_to_lsb(ss, tile_contents_count);
uint8_t clustered_val = 0x0; uint8_t clustered_val = 0x0;
if (clustered) { if (clustered) {
clustered_val = 0x1; clustered_val = 0x1;
} }
ss.write((char *) &clustered_val, 1); copy_to_lsb(ss, clustered_val);
ss.write((char *) &internal_compression, 1); copy_to_lsb(ss, internal_compression);
ss.write((char *) &tile_compression, 1); copy_to_lsb(ss, tile_compression);
ss.write((char *) &tile_type, 1); copy_to_lsb(ss, tile_type);
ss.write((char *) &min_zoom, 1); copy_to_lsb(ss, min_zoom);
ss.write((char *) &max_zoom, 1); copy_to_lsb(ss, max_zoom);
ss.write((char *) &min_lon_e7, 4); copy_to_lsb(ss, min_lon_e7);
ss.write((char *) &min_lat_e7, 4); copy_to_lsb(ss, min_lat_e7);
ss.write((char *) &max_lon_e7, 4); copy_to_lsb(ss, max_lon_e7);
ss.write((char *) &max_lat_e7, 4); copy_to_lsb(ss, max_lat_e7);
ss.write((char *) &center_zoom, 1); copy_to_lsb(ss, center_zoom);
ss.write((char *) &center_lon_e7, 4); copy_to_lsb(ss, center_lon_e7);
ss.write((char *) &center_lat_e7, 4); copy_to_lsb(ss, center_lat_e7);
return ss.str(); return ss.str();
} }
@@ -101,6 +128,12 @@ struct pmtiles_version_exception : std::exception {
} }
}; };
template<class T>
inline void copy_from_lsb(T* ptr, const std::string &s, size_t offset) {
s.copy(reinterpret_cast<char *>(ptr), sizeof(T), offset);
swap_byte_order_if_msb(ptr);
}
inline headerv3 deserialize_header(const std::string &s) { inline headerv3 deserialize_header(const std::string &s) {
if (s.substr(0, 7) != "PMTiles") { if (s.substr(0, 7) != "PMTiles") {
throw pmtiles_magic_number_exception{}; throw pmtiles_magic_number_exception{};
@@ -109,17 +142,17 @@ inline headerv3 deserialize_header(const std::string &s) {
throw pmtiles_version_exception{}; throw pmtiles_version_exception{};
} }
headerv3 h; headerv3 h;
s.copy((char *) &h.root_dir_offset, 8, 8); copy_from_lsb(&h.root_dir_offset, s, 8);
s.copy((char *) &h.root_dir_bytes, 8, 16); copy_from_lsb(&h.root_dir_bytes, s, 16);
s.copy((char *) &h.json_metadata_offset, 8, 24); copy_from_lsb(&h.json_metadata_offset, s, 24);
s.copy((char *) &h.json_metadata_bytes, 8, 32); copy_from_lsb(&h.json_metadata_bytes, s, 32);
s.copy((char *) &h.leaf_dirs_offset, 8, 40); copy_from_lsb(&h.leaf_dirs_offset, s, 40);
s.copy((char *) &h.leaf_dirs_bytes, 8, 48); copy_from_lsb(&h.leaf_dirs_bytes, s, 48);
s.copy((char *) &h.tile_data_offset, 8, 56); copy_from_lsb(&h.tile_data_offset, s, 56);
s.copy((char *) &h.tile_data_bytes, 8, 64); copy_from_lsb(&h.tile_data_bytes, s, 64);
s.copy((char *) &h.addressed_tiles_count, 8, 72); copy_from_lsb(&h.addressed_tiles_count, s, 72);
s.copy((char *) &h.tile_entries_count, 8, 80); copy_from_lsb(&h.tile_entries_count, s, 80);
s.copy((char *) &h.tile_contents_count, 8, 88); copy_from_lsb(&h.tile_contents_count, s, 88);
if (s[96] == 0x1) { if (s[96] == 0x1) {
h.clustered = true; h.clustered = true;
} else { } else {
@@ -130,13 +163,13 @@ inline headerv3 deserialize_header(const std::string &s) {
h.tile_type = s[99]; h.tile_type = s[99];
h.min_zoom = s[100]; h.min_zoom = s[100];
h.max_zoom = s[101]; h.max_zoom = s[101];
s.copy((char *) &h.min_lon_e7, 4, 102); copy_from_lsb(&h.min_lon_e7, s, 102);
s.copy((char *) &h.min_lat_e7, 4, 106); copy_from_lsb(&h.min_lat_e7, s, 106);
s.copy((char *) &h.max_lon_e7, 4, 110); copy_from_lsb(&h.max_lon_e7, s, 110);
s.copy((char *) &h.max_lat_e7, 4, 114); copy_from_lsb(&h.max_lat_e7, s, 114);
h.center_zoom = s[118]; h.center_zoom = s[118];
s.copy((char *) &h.center_lon_e7, 4, 119); copy_from_lsb(&h.center_lon_e7, s, 119);
s.copy((char *) &h.center_lat_e7, 4, 123); copy_from_lsb(&h.center_lat_e7, s, 123);
return h; return h;
} }
@@ -145,7 +178,7 @@ struct zxy {
uint32_t x; uint32_t x;
uint32_t y; uint32_t y;
zxy(int _z, int _x, int _y) zxy(uint8_t _z, int _x, int _y)
: z(_z), x(_x), y(_y) { : z(_z), x(_x), y(_y) {
} }
}; };
@@ -312,7 +345,7 @@ zxy t_on_level(uint8_t z, uint64_t pos) {
ty += s * ry; ty += s * ry;
t /= 4; t /= 4;
} }
return zxy(z, tx, ty); return zxy(z, static_cast<int>(tx), static_cast<int>(ty));
} }
int write_varint(std::back_insert_iterator<std::string> data, uint64_t value) { int write_varint(std::back_insert_iterator<std::string> data, uint64_t value) {
@@ -344,13 +377,12 @@ struct {
// use a 0 length entry as a null value. // use a 0 length entry as a null value.
entryv3 find_tile(const std::vector<entryv3> &entries, uint64_t tile_id) { entryv3 find_tile(const std::vector<entryv3> &entries, uint64_t tile_id) {
int m = 0; int m = 0;
int n = entries.size() - 1; int n = static_cast<int>(entries.size()) - 1;
while (m <= n) { while (m <= n) {
int k = (n + m) >> 1; int k = (n + m) >> 1;
int cmp = tile_id - entries[k].tile_id; if (tile_id > entries[k].tile_id) {
if (cmp > 0) {
m = k + 1; m = k + 1;
} else if (cmp < 0) { } else if (tile_id < entries[k].tile_id) {
n = k - 1; n = k - 1;
} else { } else {
return entries[k]; return entries[k];
@@ -387,7 +419,7 @@ inline uint64_t zxy_to_tileid(uint8_t z, uint32_t x, uint32_t y) {
if (z > 31) { if (z > 31) {
throw std::overflow_error("tile zoom exceeds 64-bit limit"); throw std::overflow_error("tile zoom exceeds 64-bit limit");
} }
if (x > (1 << z) - 1 || y > (1 << z) - 1) { if (x > (1U << z) - 1U || y > (1U << z) - 1U) {
throw std::overflow_error("tile x/y outside zoom level bounds"); throw std::overflow_error("tile x/y outside zoom level bounds");
} }
uint64_t acc = 0; uint64_t acc = 0;
@@ -436,12 +468,24 @@ inline std::string serialize_directory(const std::vector<entryv3> &entries) {
return data; return data;
} }
struct malformed_directory_exception : std::exception {
const char *what() const noexcept override {
return "malformed directory exception";
}
};
// takes an uncompressed byte buffer // takes an uncompressed byte buffer
inline std::vector<entryv3> deserialize_directory(const std::string &decompressed) { inline std::vector<entryv3> deserialize_directory(const std::string &decompressed) {
const char *t = decompressed.data(); const char *t = decompressed.data();
const char *end = t + decompressed.size(); const char *end = t + decompressed.size();
uint64_t num_entries = decode_varint(&t, end); const uint64_t num_entries_64bit = decode_varint(&t, end);
// Sanity check to avoid excessive memory allocation attempt:
// each directory entry takes at least 4 bytes
if (num_entries_64bit / 4U > decompressed.size()) {
throw malformed_directory_exception();
}
const size_t num_entries = static_cast<size_t>(num_entries_64bit);
std::vector<entryv3> result; std::vector<entryv3> result;
result.resize(num_entries); result.resize(num_entries);
@@ -454,17 +498,28 @@ inline std::vector<entryv3> deserialize_directory(const std::string &decompresse
} }
for (size_t i = 0; i < num_entries; i++) { for (size_t i = 0; i < num_entries; i++) {
result[i].run_length = decode_varint(&t, end); const uint64_t val = decode_varint(&t, end);
if (val > std::numeric_limits<uint32_t>::max()) {
throw malformed_directory_exception();
}
result[i].run_length = static_cast<uint32_t>(val);
} }
for (size_t i = 0; i < num_entries; i++) { for (size_t i = 0; i < num_entries; i++) {
result[i].length = decode_varint(&t, end); const uint64_t val = decode_varint(&t, end);
if (val > std::numeric_limits<uint32_t>::max()) {
throw malformed_directory_exception();
}
result[i].length = static_cast<uint32_t>(val);
} }
for (size_t i = 0; i < num_entries; i++) { for (size_t i = 0; i < num_entries; i++) {
uint64_t tmp = decode_varint(&t, end); uint64_t tmp = decode_varint(&t, end);
if (i > 0 && tmp == 0) { if (i > 0 && tmp == 0) {
if (result[i - 1].offset > std::numeric_limits<uint64_t>::max() - result[i - 1].length) {
throw malformed_directory_exception();
}
result[i].offset = result[i - 1].offset + result[i - 1].length; result[i].offset = result[i - 1].offset + result[i - 1].length;
} else { } else {
result[i].offset = tmp - 1; result[i].offset = tmp - 1;
@@ -473,8 +528,7 @@ inline std::vector<entryv3> deserialize_directory(const std::string &decompresse
// assert the directory has been fully consumed // assert the directory has been fully consumed
if (t != end) { if (t != end) {
fprintf(stderr, "Error: malformed pmtiles directory\n"); throw malformed_directory_exception();
exit(EXIT_FAILURE);
} }
return result; return result;
@@ -486,14 +540,14 @@ inline std::tuple<std::string, std::string, int> build_root_leaves(const std::fu
int num_leaves = 0; int num_leaves = 0;
for (size_t i = 0; i < entries.size(); i += leaf_size) { for (size_t i = 0; i < entries.size(); i += leaf_size) {
num_leaves++; num_leaves++;
int end = i + leaf_size; size_t end = i + leaf_size;
if (i + leaf_size > entries.size()) { if (i + leaf_size > entries.size()) {
end = entries.size(); end = entries.size();
} }
std::vector<pmtiles::entryv3> subentries = {entries.begin() + i, entries.begin() + end}; std::vector<pmtiles::entryv3> subentries = {entries.begin() + i, entries.begin() + end};
auto uncompressed_leaf = pmtiles::serialize_directory(subentries); auto uncompressed_leaf = pmtiles::serialize_directory(subentries);
auto compressed_leaf = mycompress(uncompressed_leaf, compression); auto compressed_leaf = mycompress(uncompressed_leaf, compression);
root_entries.emplace_back(entries[i].tile_id, leaves_bytes.size(), compressed_leaf.size(), 0); root_entries.emplace_back(entries[i].tile_id, leaves_bytes.size(), static_cast<uint32_t>(compressed_leaf.size()), 0);
leaves_bytes += compressed_leaf; leaves_bytes += compressed_leaf;
} }
auto uncompressed_root = pmtiles::serialize_directory(root_entries); auto uncompressed_root = pmtiles::serialize_directory(root_entries);
@@ -521,7 +575,7 @@ inline std::tuple<std::string, std::string, int> make_root_leaves(const std::fun
} }
inline void collect_entries(const std::function<std::string(const std::string &, uint8_t)> decompress, std::vector<entry_zxy> &tile_entries, const char *pmtiles_map, const headerv3 &h, uint64_t dir_offset, uint64_t dir_len) { inline void collect_entries(const std::function<std::string(const std::string &, uint8_t)> decompress, std::vector<entry_zxy> &tile_entries, const char *pmtiles_map, const headerv3 &h, uint64_t dir_offset, uint64_t dir_len) {
std::string dir_s{pmtiles_map + dir_offset, dir_len}; std::string dir_s{pmtiles_map + dir_offset, static_cast<size_t>(dir_len)};
std::string decompressed_dir = decompress(dir_s, h.internal_compression); std::string decompressed_dir = decompress(dir_s, h.internal_compression);
auto dir_entries = pmtiles::deserialize_directory(decompressed_dir); auto dir_entries = pmtiles::deserialize_directory(decompressed_dir);
@@ -555,7 +609,10 @@ inline std::pair<uint64_t, uint32_t> get_tile(const std::function<std::string(co
auto h = pmtiles::deserialize_header(header_s); auto h = pmtiles::deserialize_header(header_s);
uint64_t dir_offset = h.root_dir_offset; uint64_t dir_offset = h.root_dir_offset;
uint32_t dir_length = h.root_dir_bytes; if (h.root_dir_bytes > std::numeric_limits<uint32_t>::max()) {
throw malformed_directory_exception();
}
uint32_t dir_length = static_cast<uint32_t>(h.root_dir_bytes);
for (int depth = 0; depth <= 3; depth++) { for (int depth = 0; depth <= 3; depth++) {
std::string dir_s{pmtiles_map + dir_offset, dir_length}; std::string dir_s{pmtiles_map + dir_offset, dir_length};
std::string decompressed_dir = decompress(dir_s, h.internal_compression); std::string decompressed_dir = decompress(dir_s, h.internal_compression);