libscid 0.1.0
Chess applications made easy.
Loading...
Searching...
No Matches
namebase.h
1/*
2 * Copyright (C) 2014-2017 Fulvio Benini
3
4 * This file is part of Scid (Shane's Chess Information Database).
5 *
6 * Scid is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation.
9 *
10 * Scid is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with Scid. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef SCID_NAMEBASE_H
20#define SCID_NAMEBASE_H
21
22#include "scid/database/game_id.h"
23#include "scid/database/index.h"
24#include "scid/database/indexentry.h"
25#include "scid/database/misc.h"
26#include <algorithm>
27#include <array>
28#include <limits>
29#include <map>
30#include <memory>
31#include <string_view>
32#include <vector>
33
34namespace scid::database {
35
36using nameT = unsigned;
37enum {
38 NAME_PLAYER,
39 NAME_EVENT,
40 NAME_SITE,
41 NAME_ROUND,
42 NUM_NAME_TYPES,
43 NAME_INVALID = 99
44};
45
50class NameBase {
51 std::vector<std::unique_ptr<const char[]>> names_[NUM_NAME_TYPES];
52 struct idxCmp {
53 bool operator()(const char* str1, const char* str2) const {
54 // *** Compatibility ***
55 // Older code used a custom StrTree class with a peculiar sorting:
56 // - the first char was interpreted as an unsigned char;
57 // - the remaining part was compared with the function
58 // strComapare(),
59 // which converts the chars to ints, and is not consistent with
60 // the standard function strcmp().
61 // The old StrTree class did also have unpredictable behaviors when
62 // fed with names not sorted according to that criteria, for example
63 // it could create Namebase objects with duplicate entries.
64 // ***
65 if (*str1 == *str2)
66 return strCompare(str1, str2) < 0;
67
68 return static_cast<uint32_t>(*str1) < static_cast<uint32_t>(*str2);
69 }
70 };
71 std::map<const char*, idNumberT, idxCmp> idx_[NUM_NAME_TYPES];
72
73public:
74 // Add a name (string) to the NameBase.
75 // @param nt: @e nameT type of the name to add.
76 // @param name: the name to add.
77 // @return the ID assigned to @e name.
78 idNumberT namebase_add(
79 nameT nt, std::string_view name,
80 std::map<const char*, idNumberT, idxCmp>::iterator* hint = nullptr) {
81 ASSERT(IsValidNameType(nt));
82 ASSERT(names_[nt].size() <= std::numeric_limits<idNumberT>::max());
83
84 char* alloc = new char[name.size() + 1];
85 std::copy_n(name.data(), name.size(), alloc);
86 alloc[name.size()] = '\0';
87 idNumberT newID = static_cast<idNumberT>(names_[nt].size());
88 names_[nt].emplace_back(alloc);
89 if (hint) {
90 idx_[nt].emplace_hint(*hint, alloc, newID);
91 } else {
92 idx_[nt].emplace(alloc, newID);
93 }
94 return newID;
95 }
96
97 // Return the ID corresponding to @e name.
98 // Add the name to the NameBase if it doesn't exists.
99 // @param nt: @e nameT type of the name.
100 idNumberT namebase_find_or_add(nameT nt, const char* name) {
101 ASSERT(IsValidNameType(nt));
102
103 auto& nb = idx_[nt];
104 auto it = nb.lower_bound(name);
105 if (it != nb.end() && !nb.key_comp()(name, it->first))
106 return it->second;
107
108 return namebase_add(nt, name, &it);
109 }
110
111 // Return the number of names stored in the NameBase.
112 // @param nt: a valid @e nameT type.
113 size_t namebase_size(nameT nt) const {
114 ASSERT(IsValidNameType(nt));
115
116 return names_[nt].size();
117 }
118
125 bool insert(const char* name, size_t nameLen, nameT nt, idNumberT id) {
126 if (id >= names_[nt].size())
127 names_[nt].resize(id + size_t{1});
128
129 if (names_[nt][id]) // A name with the same ID already exists
130 return false;
131
132 char* buf = new char[nameLen + 1];
133 std::copy_n(name, nameLen, buf);
134 buf[nameLen] = '\0';
135 names_[nt][id].reset(buf);
136 auto it = idx_[nt].emplace_hint(idx_[nt].end(), buf, id);
137 return it->second == id; // Check that the name doesn't already exists
138 }
139
143 void Clear() { *this = NameBase(); }
144
152 std::vector<idNumberT> getFirstMatches(nameT nt, const char* str,
153 size_t maxMatches) const {
154 ASSERT(IsValidNameType(nt) && str != NULL);
155
156 std::vector<idNumberT> res;
157 size_t len = strlen(str);
158 for (auto it = idx_[nt].lower_bound(str);
159 it != idx_[nt].end() && res.size() < maxMatches; ++it) {
160 const char* s = it->first;
161 if (strlen(s) < len || !std::equal(str, str + len, s))
162 break;
163 res.emplace_back(it->second);
164 }
165 return res;
166 }
167
174 const char* GetName(nameT nt, idNumberT id) const {
175 ASSERT(IsValidNameType(nt) && id < GetNumNames(nt));
176 return names_[nt][id].get();
177 }
178
183 const decltype(idx_)& getNames() const { return idx_; }
184
190 idNumberT GetNumNames(nameT nt) const {
191 ASSERT(IsValidNameType(nt));
192 return static_cast<idNumberT>(names_[nt].size());
193 }
194
202 scid::core::errorT FindExactName(nameT nt, const char* str, idNumberT* idPtr) const {
203 ASSERT(IsValidNameType(nt) && str != NULL && idPtr != NULL);
204
205 auto it = idx_[nt].find(str);
206 if (it != idx_[nt].end()) {
207 *idPtr = (*it).second;
208 return scid::core::OK;
209 }
210 return scid::core::ERROR_NameNotFound;
211 }
212
218 std::vector<uint32_t> generateHashMap(nameT nt) const {
219 std::vector<uint32_t> res(names_[nt].size());
220 std::transform(
221 names_[nt].begin(), names_[nt].end(), res.begin(),
222 [](auto const& name) { return strStartHash(name.get()); });
223 return res;
224 }
225
230 std::array<std::vector<int>, NUM_NAME_TYPES>
231 calcNameFreq(Index const& idx) const {
232 std::array<std::vector<int>, NUM_NAME_TYPES> resVec;
233 for (nameT n = NAME_PLAYER; n < NUM_NAME_TYPES; n++) {
234 resVec[n].resize(GetNumNames(n), 0);
235 }
236 for (gamenumT i = 0, n = idx.GetNumGames(); i < n; i++) {
237 const IndexEntry* ie = idx.GetEntry(i);
238 resVec[NAME_PLAYER][ie->GetWhite()] += 1;
239 resVec[NAME_PLAYER][ie->GetBlack()] += 1;
240 resVec[NAME_EVENT][ie->GetEvent()] += 1;
241 resVec[NAME_SITE][ie->GetSite()] += 1;
242 resVec[NAME_ROUND][ie->GetRound()] += 1;
243 }
244 return resVec;
245 }
246
251 size_t count_invalid_ids(Index const& idx) const {
252 size_t n_invalid = 0;
253 std::array<size_t, NUM_NAME_TYPES> maxID;
254 for (auto n = nameT{}; n < NUM_NAME_TYPES; n++) {
255 maxID[n] = GetNumNames(n);
256 }
257 for (gamenumT i = 0, n = idx.GetNumGames(); i < n; i++) {
258 const IndexEntry* ie = idx.GetEntry(i);
259 n_invalid += ie->GetWhite() < maxID[NAME_PLAYER] ? 0 : 1;
260 n_invalid += ie->GetBlack() < maxID[NAME_PLAYER] ? 0 : 1;
261 n_invalid += ie->GetEvent() < maxID[NAME_EVENT] ? 0 : 1;
262 n_invalid += ie->GetSite() < maxID[NAME_SITE] ? 0 : 1;
263 n_invalid += ie->GetRound() < maxID[NAME_ROUND] ? 0 : 1;
264 }
265 return n_invalid;
266 }
267
273 static bool IsValidNameType(nameT nt) { return (nt < NUM_NAME_TYPES); }
274
282 static nameT NameTypeFromString(const char* str) {
283 if (*str == '\0')
284 return NAME_INVALID;
285 if (strIsAlphaPrefix(str, "player"))
286 return NAME_PLAYER;
287 if (strIsAlphaPrefix(str, "event"))
288 return NAME_EVENT;
289 if (strIsAlphaPrefix(str, "site"))
290 return NAME_SITE;
291 if (strIsAlphaPrefix(str, "round"))
292 return NAME_ROUND;
293 if (strIsAlphaPrefix("player", str))
294 return NAME_PLAYER;
295 if (strIsAlphaPrefix("event", str))
296 return NAME_EVENT;
297 if (strIsAlphaPrefix("site", str))
298 return NAME_SITE;
299 if (strIsAlphaPrefix("round", str))
300 return NAME_ROUND;
301 return NAME_INVALID;
302 }
303};
304
308struct TagRoster {
309 const char* event;
310 const char* site;
311 const char* round;
312 const char* white;
313 const char* black;
314
315 template <typename TEntry>
316 static TagRoster make(TEntry const& ie, NameBase const& nb) {
317 TagRoster res;
318 res.event = nb.GetName(NAME_EVENT, ie.GetEvent());
319 res.site = nb.GetName(NAME_SITE, ie.GetSite());
320 res.white = nb.GetName(NAME_PLAYER, ie.GetWhite());
321 res.black = nb.GetName(NAME_PLAYER, ie.GetBlack());
322 res.round = nb.GetName(NAME_ROUND, ie.GetRound());
323 return res;
324 }
325
326 template <typename TEntry, typename Fn>
327 auto map(TEntry& dest, Fn getID) const {
328 {
329 auto [err, id] = getID(NAME_EVENT, event);
330 if (err)
331 return err;
332 dest.SetEvent(id);
333 }
334 {
335 auto [err, id] = getID(NAME_SITE, site);
336 if (err)
337 return err;
338 dest.SetSite(id);
339 }
340 {
341 auto [err, id] = getID(NAME_ROUND, round);
342 if (err)
343 return err;
344 dest.SetRound(id);
345 }
346 {
347 auto [err, id] = getID(NAME_PLAYER, white);
348 if (err)
349 return err;
350 dest.SetWhite(id);
351 }
352 {
353 auto [err, id] = getID(NAME_PLAYER, black);
354 if (!err)
355 dest.SetBlack(id);
356
357 return err;
358 }
359 }
360};
361
362
363} // namespace scid::database
364#endif // SCID_NAMEBASE_H
Definition indexentry.h:51
Definition index.h:36
gamenumT GetNumGames() const
Header getter functions.
This class stores the database's names (players, events, sites and rounds).
Definition namebase.h:50
const char * GetName(nameT nt, idNumberT id) const
Retrieve a name.
Definition namebase.h:174
const decltype(idx_) & getNames() const
Definition namebase.h:183
std::array< std::vector< int >, NUM_NAME_TYPES > calcNameFreq(Index const &idx) const
Counts how many times each name is used.
Definition namebase.h:231
idNumberT GetNumNames(nameT nt) const
Definition namebase.h:190
std::vector< idNumberT > getFirstMatches(nameT nt, const char *str, size_t maxMatches) const
Get the first few matches of a name prefix.
Definition namebase.h:152
size_t count_invalid_ids(Index const &idx) const
Counts how many invalid IDs (references to names that do not exist in this NameBase) are present in i...
Definition namebase.h:251
bool insert(const char *name, size_t nameLen, nameT nt, idNumberT id)
DEPRECATED Add a name (string) and its associated id to the NameBase.
Definition namebase.h:125
scid::core::errorT FindExactName(nameT nt, const char *str, idNumberT *idPtr) const
Finds an exact full, case-sensitive name.
Definition namebase.h:202
std::vector< uint32_t > generateHashMap(nameT nt) const
For every name generates a 32bit hash with the first 4 chars.
Definition namebase.h:218
static nameT NameTypeFromString(const char *str)
Match a string to a nameT.
Definition namebase.h:282
void Clear()
Frees memory, leaving the object empty.
Definition namebase.h:143
static bool IsValidNameType(nameT nt)
Validate a nameT type.
Definition namebase.h:273
class StrRange - parse a string interpreting its content as 1 or 2 integers separated by whitespace.
Definition common.h:30
The Seven Tag Roster defined in the PGN standard is stored in the IndexEntry, but 5 are indexes that ...
Definition namebase.h:308