libscid 0.1.0
Chess applications made easy.
Loading...
Searching...
No Matches
encode.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2022 Fulvio Benini.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
20 * THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
27#pragma once
28
29#include "scid/core/game.h"
30#include "scid/core/nags.h"
31
32#include <algorithm>
33#include <cstdint>
34#include <optional>
35#include <span>
36#include <string>
37#include <string_view>
38#include <vector>
39
40namespace scid::core::pgn {
41
43 bool symbolicNags = false;
44 bool includeSupplementalTags = true;
45 bool includeComments = true;
46 bool includeVariations = true;
47 std::optional<unsigned> lineWidth = std::nullopt;
48};
49
50// We want to split the PGN text in lines to make it more readable, but we do
51// not want to insert extra newline chars inside comments or tag values.
52// This implies that even very long comment would stay on a single line if the
53// user didn't insert newline chars himself.
54// However it is possible to set @e hard_len (i.e to 1024) to allow converting
55// spaces to newline chars in lines longer than @e hard_len.
56template <int desired_len = 80, char breakpoint_char = '\0', int hard_len = 0,
57 typename Iter>
58Iter break_lines(Iter begin, Iter end) {
59 auto line_first_char = begin;
60 auto last_breakpoint = begin;
61 auto it = begin;
62 while (true) {
63 it = find_if(it, end, [&](char ch) {
64 return ch == '\n' || ch == breakpoint_char;
65 });
66
67 // Change the last breakpoint to newline char if the line would exceed
68 // the desired length and there weren't newline chars (for example in
69 // comments) beetween this and the last breakpoint.
70 if (std::distance(line_first_char, it) > desired_len &&
71 last_breakpoint > line_first_char) {
72 *last_breakpoint = '\n';
73 line_first_char = last_breakpoint + 1;
74 }
75
76 // If a secondary line length was requested, try to convert spaces to
77 // newline chars (this is not desiderable, but old software may use
78 // limited fixed size buffer when reading PGNs).
79 if (hard_len != 0 && std::distance(line_first_char, it) > hard_len) {
80 line_first_char = break_lines<hard_len, ' '>(line_first_char, it);
81 }
82
83 if (it == end)
84 break;
85
86 if (*it == '\n') {
87 line_first_char = ++it;
88 } else /* *it == breakpoint_char */ {
89 last_breakpoint = it;
90 *it++ = ' ';
91 }
92 }
93 return line_first_char;
94}
95
96template <typename Iter>
97Iter break_lines(Iter begin, Iter end, unsigned desired_len) {
98 auto line_first_char = begin;
99 auto last_breakpoint = begin;
100 auto it = begin;
101 while (true) {
102 it = find_if(it, end, [](char ch) {
103 return ch == '\n' || ch == '\0';
104 });
105
106 if (desired_len != 0 &&
107 std::distance(line_first_char, it) > desired_len &&
108 last_breakpoint > line_first_char) {
109 *last_breakpoint = '\n';
110 line_first_char = last_breakpoint + 1;
111 }
112
113 if (it == end)
114 break;
115
116 if (*it == '\n') {
117 line_first_char = ++it;
118 } else {
119 last_breakpoint = it;
120 *it++ = ' ';
121 }
122 }
123 return line_first_char;
124}
125
126// Escape quote and backslash chars according to the PGN standard:
127// "A quote inside a string is represented by the backslash immediately followed
128// by a quote. A backslash inside a string is represented by two adjacent
129// backslashes."
130// @param str: the string containing the chars to be escaped.
131// @param pos: start of the substring of @e str to be processed.
132template <typename TCont>
133void escape_string(TCont& str, typename TCont::size_type pos) {
134 auto it = str.begin() + pos;
135 while (true) {
136 it = std::find_if(it, str.end(),
137 [](char ch) { return ch == '\\' || ch == '\"'; });
138 if (it != str.end())
139 it = str.insert(it, '\\') + 2;
140 else
141 break;
142 }
143}
144
145// Encode a tag pair according to the PGN standard.
146// "A tag pair is composed of four consecutive tokens: a left bracket token, a
147// symbol token, a string token, and a right bracket token. The symbol token is
148// the tag name and the string token is the tag value associated with the tag
149// name. There are no white space characters between the left bracket and the
150// tag name, there are no white space characters between the tag value and the
151// right bracket, and there is a single space character between the tag name and
152// the tag value."
153// @param unknown_to_question_mark: if true, and a Seven Tag Roster is unknown,
154// its tag value is changed to a single question mark.
155template <bool unknown_to_question_mark = false, typename TCont>
156void encode_tag_pair(std::string_view tag, std::string_view value,
157 TCont& dest) {
158 dest.push_back('[');
159 dest.insert(dest.end(), tag.begin(), tag.end());
160 dest.push_back('\0');
161
162 dest.push_back('"');
163 if (unknown_to_question_mark && value.empty() &&
164 (tag == "Event" || tag == "Site" || tag == "Round" || tag == "White" ||
165 tag == "Black")) {
166 dest.push_back('?');
167 } else {
168 auto value_begin = dest.size();
169 dest.insert(dest.end(), value.begin(), value.end());
170 escape_string(dest, value_begin);
171 }
172 dest.push_back('"');
173
174 dest.push_back(']');
175 // "Each tag pair should appear left justified on a line by itself"
176 dest.push_back('\n');
177}
178
179// Encode a comment as "rest of the line": this comment type starts with a
180// semicolon character and continues to the end of the line.
181// If @e comment include any newline or line break char the comment cannot be
182// encoded in this way: it return false and does not modify @e dest.
183template <int hard_len = 0, typename TCont>
184[[nodiscard]] bool encode_comment_rest_of_line(std::string_view comment,
185 TCont& dest) {
186 if ((hard_len != 0 && comment.size() >= hard_len) ||
187 std::any_of(comment.begin(), comment.end(),
188 [](char ch) { return ch == '\n' || ch == '\0'; }))
189 return false;
190
191 if (!dest.empty() && dest.back() != '\0' && dest.back() != '\n') {
192 dest.push_back('\0');
193 }
194 dest.push_back(';');
195 dest.insert(dest.end(), comment.begin(), comment.end());
196 dest.push_back('\n');
197 return true;
198}
199
200// Encode a comment in one of the two kinds specified by PGN standard.
201// The kind that "starts with a left brace character and continues to the next
202// right brace character" is preferred and used if the comment do not contains
203// curly braces itself. If the comments contains both curly braces and newline
204// or line break chars the curly braces inside the comment are replaced with
205// UTF-8 fullwidth curly braces.
206template <int hard_len = 0, typename TCont>
207static void encode_comment(std::string_view comment, TCont& dest) {
208 auto is_curly = [](char ch) { return ch == '{' || ch == '}'; };
209 auto it_curly = std::find_if(comment.begin(), comment.end(), is_curly);
210 if (it_curly != comment.end() &&
211 encode_comment_rest_of_line<hard_len>(comment, dest))
212 return;
213
214 dest.push_back('{');
215 dest.insert(dest.end(), comment.begin(), comment.end());
216 if (it_curly != comment.end()) {
217 // Replace curly braces with UTF-8 fullwidth curly braces U+FF5B
218 // (ef bd 9b) or U+FF5D (ef bd 9d).
219 auto it = dest.end() - std::distance(it_curly, comment.end());
220 do {
221 auto replace_char = (*it == '{') ? u8"\uFF5B" : u8"\uFF5D";
222 static_assert(std::u8string_view(u8"\uFF5D").size() == 3);
223 it = dest.insert(it, 2, '\0');
224 it = std::copy_n(replace_char, 3, it);
225
226 it = std::find_if(it, dest.end(), is_curly);
227 } while (it != dest.end());
228 }
229 dest.push_back('}');
230 dest.push_back('\0');
231}
232
233namespace detail {
234
235enum class MovetextEntryKind {
236 InitialComment,
237 VariationStart,
238 VariationEnd,
239 Move
240};
241
243 MovetextEntryKind kind;
244 std::string_view san;
245 std::string_view comment;
246 std::span<const Nag> nags;
247};
248
249inline std::string san_for_move(scid::core::Position& position,
250 const Move& move,
251 scid::core::sanFlagT flag) {
252 if (!move.san.empty())
253 return move.san;
254
255 return position.makeSan(move.spec, flag);
256}
257
258template <int hard_len = 0, typename TCont>
259void encode_movetext_entry(MovetextEntry const& entry,
260 std::vector<long long>& ply,
261 typename TCont::size_type& move_end,
262 TCont& dest,
263 EncodeOptions options) {
264 switch (entry.kind) {
265 case MovetextEntryKind::InitialComment:
266 if (options.includeComments && !entry.comment.empty())
267 encode_comment<hard_len>(entry.comment, dest);
268 break;
269
270 case MovetextEntryKind::VariationStart:
271 ply.push_back(ply.back() - 1);
272 dest.push_back('(');
273 if (options.includeComments && !entry.comment.empty())
274 encode_comment<hard_len>(entry.comment, dest);
275 break;
276
277 case MovetextEntryKind::VariationEnd:
278 ply.pop_back();
279 if (dest.back() == '\0') {
280 dest.back() = ')';
281 } else {
282 dest.push_back(')');
283 }
284 dest.push_back('\0');
285 break;
286
287 case MovetextEntryKind::Move: {
288 auto white_to_move = (ply.back() % 2) == 0;
289 if (white_to_move || move_end != dest.size()) {
290 auto move_number = std::to_string(ply.back() / 2 + 1);
291 move_number.append(white_to_move ? 1 : 3, '.');
292 dest.insert(dest.end(), move_number.begin(), move_number.end());
293 }
294 dest.insert(dest.end(), entry.san.begin(), entry.san.end());
295 dest.push_back('\0');
296 move_end = dest.size();
297 ply.back()++;
298
299 if (options.includeComments) {
300 for (auto nag : entry.nags) {
301 auto nag_str = nagToString(nag, options.symbolicNags);
302 dest.insert(dest.end(), nag_str.begin(), nag_str.end());
303 dest.push_back('\0');
304 }
305 }
306 if (options.includeComments && !entry.comment.empty())
307 encode_comment<hard_len>(entry.comment, dest);
308 break;
309 }
310 }
311}
312
313} // namespace detail
314
315template <int hard_len = 0, typename TCont>
316void encode_core_line(MoveSequence const& line,
317 scid::core::Position position,
318 std::vector<long long>& ply,
319 typename TCont::size_type& move_end, TCont& dest,
320 EncodeOptions options = {}) {
321 for (std::size_t i = 0; i < line.moves.size(); ++i) {
322 auto const& move = line.moves[i];
323 auto position_before_move = position;
324 const auto sanFlag = i + 1 == line.moves.size()
325 ? scid::core::SAN_MATETEST
326 : scid::core::SAN_CHECKTEST;
327 const auto san = detail::san_for_move(position, move, sanFlag);
328
329 detail::encode_movetext_entry<hard_len>(
330 {detail::MovetextEntryKind::Move,
331 san,
332 move.metadata.comment,
333 {move.metadata.nags.data(), move.metadata.nags.size()}},
334 ply, move_end, dest, options);
335
336 if (options.includeVariations) {
337 for (auto const& variation : move.childVariations) {
338 detail::encode_movetext_entry<hard_len>(
339 {detail::MovetextEntryKind::VariationStart,
340 {},
341 variation.initialComment,
342 {}},
343 ply, move_end, dest, options);
344 encode_core_line<hard_len>(variation.line, position_before_move,
345 ply, move_end, dest, options);
346 detail::encode_movetext_entry<hard_len>(
347 {detail::MovetextEntryKind::VariationEnd, {}, {}, {}},
348 ply, move_end, dest, options);
349 }
350 }
351
352 (void)position.applyMove(move.spec);
353 }
354}
355
356template <int hard_len = 0, typename TCont>
357void encode_movetext(Game const& game, TCont& dest,
358 EncodeOptions options = {}) {
359 std::vector<long long> ply = {game.initialPlyCounter()};
360 auto move_end = dest.size();
361 dest.push_back('\n');
362
363 if (options.includeComments && !game.initialComment().empty()) {
364 detail::encode_movetext_entry<hard_len>(
365 {detail::MovetextEntryKind::InitialComment,
366 {},
367 game.initialComment(),
368 {}},
369 ply, move_end, dest, options);
370 }
371
372 auto position = game.startPosition() ? *game.startPosition()
373 : scid::core::Position::getStdStart();
374 encode_core_line<hard_len>(game.movetext().mainline, position, ply,
375 move_end, dest, options);
376
377 if (dest.back() == '\0')
378 dest.back() = '\n';
379}
380
381template <typename TCont>
382void encode_core_tag_pairs(Game const& game, TCont& dest,
383 EncodeOptions options = {}) {
384 char str_buf[256];
385 encode_tag_pair("Event", game.event(), dest);
386 encode_tag_pair("Site", game.site(), dest);
387 scid::core::date_DecodeToString(game.date(), str_buf);
388 encode_tag_pair("Date", str_buf, dest);
389 encode_tag_pair("Round", game.round(), dest);
390 encode_tag_pair("White", game.white().name, dest);
391 encode_tag_pair("Black", game.black().name, dest);
392 encode_tag_pair("Result", game.resultString(), dest);
393
394 if (options.includeSupplementalTags) {
395 if (auto rating = game.white().rating.value) {
396 std::string tag = "White";
397 tag.append(scid::core::ratingTypeNames[game.white().rating.type]);
398 encode_tag_pair(tag, std::to_string(rating), dest);
399 }
400 if (auto rating = game.black().rating.value) {
401 std::string tag = "Black";
402 tag.append(scid::core::ratingTypeNames[game.black().rating.type]);
403 encode_tag_pair(tag, std::to_string(rating), dest);
404 }
405 if (!game.eco().empty())
406 encode_tag_pair("ECO", game.eco(), dest);
407 if (game.eventDate() != scid::core::ZERO_DATE) {
408 scid::core::date_DecodeToString(game.eventDate(), str_buf);
409 encode_tag_pair("EventDate", str_buf, dest);
410 }
411 for (auto const& tag : game.extraTags())
412 encode_tag_pair(tag.first, tag.second, dest);
413 }
414 if (game.hasNonStandardStart(str_buf, sizeof(str_buf)))
415 encode_tag_pair("FEN", str_buf, dest);
416}
417
418template <int hard_len = 0, typename TCont>
419void encode_game(Game const& game, TCont& dest, EncodeOptions options = {}) {
420 encode_core_tag_pairs(game, dest, options);
421 encode_movetext<hard_len>(game, dest, options);
422
423 auto result = game.resultString();
424 dest.insert(dest.end(), result.begin(), result.end());
425 dest.push_back('\n');
426}
427
428// Encode a game according to the PGN standard, adding newline chars to make it
429// more readable.
430// @param game: the game to be encoded.
431// @param dest: the container where the PGN Game will be appended.
432template <int desired_len = 80, typename TGame, typename TCont>
433void encode(TGame const& game, TCont& dest, EncodeOptions options = {}) {
434 auto begin = dest.size();
435 encode_game(game, dest, options);
436 if (options.lineWidth) {
437 break_lines(dest.begin() + begin, dest.end(), *options.lineWidth);
438 } else {
439 break_lines<desired_len>(dest.begin() + begin, dest.end());
440 }
441}
442
443} // namespace scid::core::pgn
Definition position.h:44
Definition game.h:60
Definition encode.h:42