/usr/include/osmium/utils/stringtable.hpp is in libosmium-dev 0.0~20111213-g7f3500a-1build2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | #ifndef OSMIUM_UTILS_STRINGTABLE_HPP
#define OSMIUM_UTILS_STRINGTABLE_HPP
/*
Copyright 2011 Jochen Topf <jochen@topf.org> and others (see README).
This file is part of Osmium (https://github.com/joto/osmium).
Osmium is free software: you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License or (at your option) the GNU
General Public License as published by the Free Software Foundation, either
version 3 of the Licenses, or (at your option) any later version.
Osmium is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU Lesser General Public License and the GNU
General Public License for more details.
You should have received a copy of the Licenses along with Osmium. If not, see
<http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
#include <string>
#include <map>
#include <iostream>
namespace Osmium {
/**
* StringTable management for PBF writer
*
* All strings are stored as indexes to rows in a StringTable. The StringTable contains
* one row for each used string, so strings that are used multiple times need to be
* stored only once. The StringTable is sorted by usage-count, so the most often used
* string is stored at index 1.
*/
class StringTable {
/// type for string IDs (interim and final)
typedef uint16_t string_id_t;
/**
* this is the struct used to build the StringTable. It is stored as
* the value-part in the strings-map.
*
* when a new string is added to the map, its count is set to 0 and
* the interim_id is set to the current size of the map. This interim_id
* is then stored into the pbf-objects.
*
* before the PrimitiveBlock is serialized, the map is sorted by count
* and stored into the pbf-StringTable. Afterwards the interim-ids are
* mapped to the "real" id in the StringTable.
*
* this way often used strings get lower ids in the StringTable. As the
* protobuf-serializer stores numbers in variable bit-lengths, lower
* IDs means less used space in the resulting file.
*/
struct string_info {
/**
* number of occurrences of this string
*/
uint16_t count;
/**
* an intermediate-id
*/
string_id_t interim_id;
};
friend bool operator<(const string_info& lhs, const string_info& rhs) {
return lhs.count > rhs.count;
}
/**
* Interim StringTable, storing all strings that should be written to
* the StringTable once the block is written to disk.
*/
typedef std::map<std::string, string_info> string2string_info_t;
string2string_info_t m_strings;
/**
* This vector is used to map the interim IDs to real StringTable IDs after
* writing all strings to the StringTable.
*/
typedef std::vector<string_id_t> interim_id2id_t;
interim_id2id_t m_id2id_map;
int m_size;
public:
StringTable() : m_strings(), m_id2id_map(), m_size(0) {
}
/**
* record a string in the interim StringTable if it's missing, otherwise just increase its counter,
* return the interim-id assigned to the string.
*/
string_id_t record_string(const std::string& string) {
string_info& info = m_strings[string];
if (info.interim_id == 0) {
info.interim_id = ++m_size;
} else {
info.count++;
}
return info.interim_id;
}
template<typename A, typename B>
static std::pair<B,A> flip_pair(const std::pair<A,B>& p) {
return std::pair<B,A>(p.second, p.first);
}
/**
* Sort the interim StringTable and store it to the real protobuf StringTable.
* while storing to the real table, this function fills the id2id_map with
* pairs, mapping the interim-ids to final and real StringTable ids.
*
* Note that the m_strings table is a std::map and as such is sorted lexicographically.
* When the transformation into the sortedby multimap is done, it gets sorted by
* the count. The end result (at least with the glibc standard container/algorithm
* implementation) is that the string table is sorted first by reverse count (ie descending)
* and then by reverse lexicographic order.
*/
void store_stringtable(OSMPBF::StringTable* st) {
typedef std::multimap<string_info, std::string> cmap;
cmap sortedbycount;
m_id2id_map.resize(m_size+1);
std::transform(m_strings.begin(), m_strings.end(),
std::inserter(sortedbycount, sortedbycount.begin()), flip_pair<std::string, string_info>);
int n=0;
cmap::const_iterator end=sortedbycount.end();
for (cmap::const_iterator it = sortedbycount.begin(); it != end; ++it) {
// add the string of the current item to the pbf StringTable
st->add_s(it->second);
// store the mapping from the interim-id to the real id
m_id2id_map[it->first.interim_id] = ++n;
}
}
/**
* Map from an interim ID to a real string ID.
*/
string_id_t map_string_id(const string_id_t interim_id) const {
return m_id2id_map[interim_id];
}
/**
* Clear the stringtable, preparing for the next block.
*/
void clear() {
m_strings.clear();
m_size = 0;
}
}; // class StringTable
} // namespace Osmium
#endif // OSMIUM_UTILS_STRINGTABLE_HPP
|