Line data Source code
1 : #include "com.hpp"
2 : #include "definition.hpp"
3 : #include "dynamicConfig.hpp"
4 : #include "learn_tools.hpp"
5 : #include "logging.hpp"
6 : #include "material.hpp"
7 : #include "moveGen.hpp"
8 : #include "pieceTools.hpp"
9 : #include "position.hpp"
10 : #include "positionTools.hpp"
11 : #include "searcher.hpp"
12 :
13 : #ifdef WITH_DATA2BIN
14 :
15 : // Mostly copy/paste from nodchip Stockfish repository and adapted to Minic
16 : // Tools for handling various learning data format
17 :
18 : namespace {
19 :
20 0 : int parse_game_result_from_pgn_extract(const std::string & result) {
21 : // White Win
22 0 : if (result == "\"1-0\"") {
23 : return 1;
24 : }
25 : // Black Win
26 0 : else if (result == "\"0-1\"") {
27 : return -1;
28 : }
29 : // Draw
30 : else {
31 0 : return 0;
32 : }
33 : }
34 :
35 : // Here
36 : // #-4 --> mated in 4
37 : // #3 --> mating in 3
38 : // -M4 --> mated in 4
39 : // +M3 --> mating in 3
40 0 : ScoreType parse_score_from_pgn_extract(const std::string & eval, bool& success) {
41 0 : success = true;
42 :
43 0 : if (eval.substr(0, 1) == "#") {
44 0 : if (eval.substr(1, 1) == "-") {
45 0 : return matedScore(static_cast<DepthType>(stoi(eval.substr(2, eval.length() - 2))));
46 : }
47 : else {
48 0 : return matingScore(static_cast<DepthType>(stoi(eval.substr(1, eval.length() - 1))));
49 : }
50 : }
51 0 : else if (eval.substr(0, 2) == "-M") {
52 0 : return matedScore(static_cast<DepthType>(stoi(eval.substr(2, eval.length() - 2))));
53 : }
54 0 : else if (eval.substr(0, 2) == "+M") {
55 0 : return matingScore(static_cast<DepthType>(stoi(eval.substr(2, eval.length() - 2))));
56 : }
57 : else {
58 : char* endptr;
59 0 : double value = strtod(eval.c_str(), &endptr);
60 0 : if (*endptr != '\0') {
61 0 : success = false;
62 0 : return 0;
63 : }
64 : else {
65 0 : return static_cast<ScoreType>(value * 100);
66 : }
67 : }
68 : }
69 :
70 0 : inline void ltrim(std::string& s) {
71 0 : s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !std::isspace(ch); }));
72 0 : }
73 :
74 0 : inline void rtrim(std::string& s) {
75 0 : s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
76 0 : }
77 :
78 : inline void trim(std::string& s) {
79 0 : ltrim(s);
80 0 : rtrim(s);
81 0 : }
82 :
83 0 : inline bool is_like_fen(const std::string & fen) {
84 : auto count_space = std::count(fen.cbegin(), fen.cend(), ' ');
85 : auto count_slash = std::count(fen.cbegin(), fen.cend(), '/');
86 0 : return count_space == 5 && count_slash == 7;
87 : }
88 :
89 : } // anonymous namespace
90 :
91 0 : bool convert_plain_to_bin(const std::vector<std::string>& filenames,
92 : const std::string& output_file_name,
93 : const int ply_minimum,
94 : const int ply_maximum) {
95 :
96 0 : std::fstream fs;
97 : uint64_t data_size = 0;
98 : uint64_t filtered_size = 0;
99 : uint64_t filtered_size_fen = 0;
100 : uint64_t filtered_size_move = 0;
101 : uint64_t filtered_size_ply = 0;
102 :
103 0 : fs.open(output_file_name, std::ios::app | std::ios::binary);
104 :
105 0 : for (const auto & filename : filenames) {
106 : std::cout << "converting " << filename << " from plain to binary format... " << std::endl;
107 : std::string line;
108 : std::string line2;
109 0 : std::ifstream ifs;
110 0 : ifs.open(filename);
111 : PackedSfenValue p;
112 0 : RootPosition pos;
113 : #if defined(WITH_NNUE)
114 0 : NNUEEvaluator evaluator;
115 : pos.associateEvaluator(evaluator);
116 : #endif
117 : data_size = 0;
118 : filtered_size = 0;
119 : filtered_size_fen = 0;
120 : filtered_size_move = 0;
121 : filtered_size_ply = 0;
122 0 : p.gamePly = 1; // Not included in apery format. Should be initialized
123 0 : bool ignore_flag_fen = false;
124 0 : bool ignore_flag_move = false;
125 0 : bool ignore_flag_ply = false;
126 0 : bool scored = false;
127 0 : bool hasPos = false;
128 0 : unsigned int count = 0;
129 0 : while (std::getline(ifs, line)) {
130 0 : ++count;
131 0 : if ((count % 1000000 == 0)) { std::cout << "lines " << count << ", skipped pos " << filtered_size << std::endl; }
132 0 : std::stringstream ss(line);
133 : std::string token;
134 : std::string value;
135 :
136 0 : auto skipit = [&]() {
137 0 : std::cout << "skipping from line " << count << std::endl;
138 0 : while (std::getline(ifs, line2)) {
139 0 : std::cout << "skipped line " << line2 << std::endl;
140 0 : std::stringstream sss(line2);
141 0 : sss >> token;
142 0 : if (token == "e") {
143 0 : ignore_flag_fen = false;
144 0 : ignore_flag_move = false;
145 0 : ignore_flag_ply = false;
146 0 : scored = false;
147 0 : hasPos = false;
148 : break;
149 : }
150 0 : }
151 0 : };
152 :
153 0 : ss >> token;
154 0 : if (token == "fen") {
155 0 : std::string input_fen = line.substr(4);
156 0 : if (readFEN(input_fen, pos, true, true)) {
157 0 : sfen_pack(pos, p.sfen);
158 0 : hasPos = true;
159 : }
160 : else {
161 0 : skipit();
162 : }
163 : }
164 0 : else if (token == "move") {
165 0 : ss >> value;
166 0 : Square from = INVALIDSQUARE;
167 0 : Square to = INVALIDSQUARE;
168 0 : MType type = T_std;
169 : // Here we forbid castling moves
170 : // beware this will create a "discontinuity" in binpack
171 : // interpretation of the move sequence
172 0 : if (hasPos && readMove(pos, value, from, to, type, true)) {
173 0 : p.move = ToSFMove(pos, from, to, type); // use SF style move encoding
174 : //p.move = ToMove(from,to,type); // use Minic style move encoding
175 : }
176 : else {
177 0 : skipit();
178 : }
179 : }
180 0 : else if (token == "score") {
181 : int16_t score;
182 0 : ss >> score;
183 0 : p.score = std::min(std::max(score, int16_t(-MATE)), int16_t(MATE));
184 : }
185 0 : else if (token == "ply") {
186 : int temp;
187 0 : ss >> temp;
188 0 : if (temp < ply_minimum || temp > ply_maximum) {
189 0 : ignore_flag_ply = true;
190 0 : ++filtered_size_ply;
191 : }
192 0 : p.gamePly = uint16_t(temp); // No cast here?
193 : }
194 0 : else if (token == "result") {
195 : int temp;
196 0 : ss >> temp;
197 0 : p.game_result = int8_t(temp); // Do you need a cast here?
198 0 : scored = true;
199 : }
200 0 : else if (token == "e") {
201 0 : if (!(ignore_flag_fen || ignore_flag_move || ignore_flag_ply) && scored && hasPos) {
202 0 : fs.write((char*)&p, sizeof(PackedSfenValue));
203 0 : data_size += 1;
204 : }
205 : else {
206 0 : ++filtered_size;
207 : }
208 0 : ignore_flag_fen = false;
209 0 : ignore_flag_move = false;
210 0 : ignore_flag_ply = false;
211 0 : scored = false;
212 0 : hasPos = false;
213 : }
214 : else {
215 : std::cout << "bad token : " << token << std::endl;
216 0 : skipit();
217 : }
218 0 : }
219 : std::cout << "done " << data_size << " parsed " << filtered_size << " is filtered"
220 : << " (illegal fen:" << filtered_size_fen << ", illegal move:" << filtered_size_move << ", illegal ply:" << filtered_size_ply << ")"
221 : << std::endl;
222 0 : ifs.close();
223 0 : }
224 : std::cout << "all done" << std::endl;
225 0 : fs.close();
226 0 : return true;
227 0 : }
228 :
229 : // First pgn must be converted using this command
230 : // pgn-extract --fencomments -Wlalg --nochecks --nomovenumbers --noresults -w500000 -N -V -o data.plain games.pgn
231 0 : bool convert_bin_from_pgn_extract(const std::vector<std::string>& filenames,
232 : const std::string& output_file_name,
233 : const bool pgn_eval_side_to_move,
234 : const bool convert_no_eval_fens_as_score_zero) {
235 : std::cout << "pgn_eval_side_to_move=" << pgn_eval_side_to_move << std::endl;
236 : std::cout << "convert_no_eval_fens_as_score_zero=" << convert_no_eval_fens_as_score_zero << std::endl;
237 :
238 0 : RootPosition pos;
239 : #if defined(WITH_NNUE)
240 0 : NNUEEvaluator evaluator;
241 : pos.associateEvaluator(evaluator);
242 : #endif
243 :
244 0 : std::fstream ofs;
245 0 : ofs.open(output_file_name, std::ios::out | std::ios::binary);
246 :
247 : int game_count = 0;
248 : int fen_count = 0;
249 :
250 0 : for (const auto & filename : filenames) {
251 : //std::cout << " convert " << filename << std::endl;
252 0 : std::ifstream ifs;
253 0 : ifs.open(filename);
254 :
255 : int game_result = 0;
256 :
257 : std::string line;
258 0 : while (std::getline(ifs, line)) {
259 : //std::cout << "line : " << line << std::endl;
260 :
261 0 : if (line.empty()) { continue; }
262 :
263 0 : else if (line.substr(0, 1) == "[") {
264 0 : std::regex pattern_result(R"(\[Result (.+?)\])");
265 : std::smatch match;
266 :
267 : // example: [Result "1-0"]
268 0 : if (std::regex_search(line, match, pattern_result)) {
269 0 : game_result = parse_game_result_from_pgn_extract(match.str(1));
270 0 : ++game_count;
271 0 : if (game_count % 10000 == 0) { std::cout << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl; }
272 : }
273 :
274 : continue;
275 0 : }
276 :
277 : else {
278 : int gamePly = 1;
279 : auto itr = line.cbegin();
280 :
281 : while (true) {
282 0 : ++gamePly;
283 :
284 : PackedSfenValue psv;
285 : memset((char*)&psv, 0, sizeof(PackedSfenValue));
286 :
287 : // fen
288 : {
289 : bool fen_found = false;
290 :
291 0 : while (!fen_found) {
292 0 : std::regex pattern_bracket(R"(\{(.+?)\})");
293 : std::smatch match;
294 0 : if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) { break; }
295 :
296 0 : itr += match.position(0) + match.length(0) - 1;
297 0 : std::string str_fen = match.str(1);
298 : trim(str_fen);
299 :
300 : //std::cout << "possible fen " << str_fen << std::endl;
301 0 : if (is_like_fen(str_fen)) {
302 : //std::cout << "validated fen " << str_fen << std::endl;
303 : fen_found = true;
304 0 : readFEN(str_fen, pos, true, true);
305 0 : sfen_pack(pos, psv.sfen);
306 : }
307 0 : }
308 :
309 0 : if (!fen_found) {
310 : //std::cout << "fen not found" << std::endl;
311 : break;
312 : }
313 : }
314 :
315 : // move
316 : {
317 0 : std::regex pattern_move(R"(\}(.+?)\{)");
318 : std::smatch match;
319 0 : if (!std::regex_search(itr, line.cend(), match, pattern_move)) {
320 : //std::cout << "move not found" << std::endl;
321 : break;
322 : }
323 :
324 0 : itr += match.position(0) + match.length(0) - 1;
325 0 : std::string str_move = match.str(1);
326 : //std::cout << "move " << str_move << std::endl;
327 : trim(str_move);
328 0 : Square from = INVALIDSQUARE;
329 0 : Square to = INVALIDSQUARE;
330 0 : MType type = T_std;
331 0 : if (readMove(pos, str_move, from, to, type)) {
332 0 : psv.move = ToSFMove(pos, from, to, type); // use SF style move encoding
333 : }
334 0 : }
335 :
336 : // eval
337 : bool eval_found = false;
338 : {
339 0 : std::regex pattern_bracket(R"(\{(.+?)\})");
340 : std::smatch match;
341 0 : if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
342 : //std::cout << "eval not found" << std::endl;
343 : break;
344 : }
345 :
346 0 : std::string str_eval_clk = match.str(1);
347 : trim(str_eval_clk);
348 : //std::cout << "eval " << str_eval_clk << std::endl;
349 :
350 : // example: { [%eval 0.25] [%clk 0:10:00] }
351 : // example: { [%eval #-4] [%clk 0:10:00] }
352 : // example: { [%eval #3] [%clk 0:10:00] }
353 : // example: { +0.71/22 1.2s }
354 : // example: { -M4/7 0.003s }
355 : // example: { M3/245 0.017s }
356 : // example: { +M1/245 0.010s, White mates }
357 : // example: { 0.60 }
358 : // example: { book }
359 : // example: { rnbqkb1r/pp3ppp/2p1pn2/3p4/2PP4/2N2N2/PP2PPPP/R1BQKB1R w KQkq - 0 5 }
360 :
361 : // Considering the absence of eval
362 0 : if (!is_like_fen(str_eval_clk)) {
363 0 : itr += match.position(0) + match.length(0) - 1;
364 :
365 0 : if (str_eval_clk != "book") {
366 0 : std::regex pattern_eval1(R"(\[\%eval (.+?)\])");
367 0 : std::regex pattern_eval2(R"((.+?)\/)");
368 :
369 : std::string str_eval;
370 0 : if (std::regex_search(str_eval_clk, match, pattern_eval1) || std::regex_search(str_eval_clk, match, pattern_eval2)) {
371 0 : str_eval = match.str(1);
372 : trim(str_eval);
373 : }
374 : else {
375 : str_eval = str_eval_clk;
376 : }
377 :
378 : bool success = false;
379 0 : ScoreType value = parse_score_from_pgn_extract(str_eval, success);
380 0 : if (success) {
381 : eval_found = true;
382 0 : psv.score = std::clamp(value, ScoreType(-MATE), ScoreType(MATE));
383 : }
384 0 : }
385 : }
386 0 : }
387 :
388 : // write
389 0 : if (eval_found || convert_no_eval_fens_as_score_zero) {
390 0 : if (!eval_found && convert_no_eval_fens_as_score_zero) { psv.score = 0; }
391 :
392 0 : psv.gamePly = static_cast<uint16_t>(gamePly);
393 0 : psv.game_result = static_cast<uint8_t>(game_result);
394 :
395 0 : if (pos.c == Co_Black) {
396 0 : if (!pgn_eval_side_to_move) { psv.score *= -1; }
397 0 : psv.game_result *= -1;
398 : }
399 :
400 0 : ofs.write((char*)&psv, sizeof(PackedSfenValue));
401 :
402 0 : ++fen_count;
403 : }
404 0 : }
405 :
406 : game_result = 0;
407 : }
408 : }
409 0 : }
410 :
411 0 : std::cout << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl;
412 : std::cout << " all done" << std::endl;
413 0 : ofs.close();
414 0 : return true;
415 0 : }
416 :
417 0 : bool convert_bin_to_plain(const std::vector<std::string>& filenames, const std::string& output_file_name) {
418 0 : std::ofstream ofs;
419 0 : ofs.open(output_file_name, std::ios::app);
420 0 : for (const auto & filename : filenames) {
421 : std::cout << "convert " << filename << " ... " << std::endl;
422 : // Just convert packedsfenvalue to text
423 0 : std::fstream fs;
424 0 : fs.open(filename, std::ios::in | std::ios::binary);
425 : PackedSfenValue p;
426 : while (true) {
427 0 : if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
428 0 : Position tpos; // fully empty position !
429 0 : set_from_packed_sfen(tpos, p.sfen);
430 : // write as plain text
431 0 : ofs << "fen " << GetFEN(tpos) << std::endl;
432 0 : ofs << "move " << ToString(FromSFMove(tpos, p.move)) << std::endl;
433 0 : ofs << "score " << p.score << std::endl;
434 0 : ofs << "ply " << static_cast<int>(p.gamePly) << std::endl;
435 0 : ofs << "result " << static_cast<int>(p.game_result) << std::endl;
436 : ofs << "e" << std::endl;
437 0 : }
438 : else {
439 : break;
440 : }
441 0 : }
442 0 : fs.close();
443 : std::cout << "done" << std::endl;
444 0 : }
445 0 : ofs.close();
446 : std::cout << "all done" << std::endl;
447 :
448 0 : return true;
449 0 : }
450 :
451 : //#define DEBUG_MTRACE
452 : #ifdef DEBUG_MTRACE
453 : #include <mcheck.h>
454 : #endif
455 :
456 0 : bool rescore(const std::vector<std::string>& filenames, const std::string& output_file_name) {
457 :
458 : #ifdef DEBUG_MTRACE
459 : mtrace();
460 : // use LD_PRELOAD=libc_malloc_debug.so MALLOC_TRACE=output.txt
461 : #endif
462 :
463 0 : std::ofstream ofs;
464 0 : ofs.open(output_file_name, std::ios::app);
465 :
466 0 : Searcher& cos = Searcher::getCoSearcher(0);
467 0 : cos.clearSearch(true);
468 :
469 : // init sub search
470 0 : const int oldMinOutLvl = DynamicConfig::minOutputLevel;
471 0 : const unsigned int oldLevel = DynamicConfig::level;
472 0 : const unsigned int oldRandomOpen = DynamicConfig::randomOpen;
473 0 : const unsigned int oldRandomPly = DynamicConfig::randomPly;
474 :
475 0 : DynamicConfig::minOutputLevel = Logging::logMax;
476 0 : DynamicConfig::level = 100;
477 0 : DynamicConfig::randomOpen = 0;
478 0 : DynamicConfig::randomPly = 0;
479 :
480 : uint64_t count = 0;
481 :
482 0 : TimeMan::isDynamic = false;
483 0 : TimeMan::nbMoveInTC = -1;
484 0 : TimeMan::msecPerMove = INFINITETIME;
485 0 : TimeMan::msecInTC = -1;
486 0 : TimeMan::msecInc = -1;
487 0 : TimeMan::msecUntilNextTC = -1;
488 :
489 0 : RootPosition tpos;
490 : #ifdef WITH_NNUE
491 0 : NNUEEvaluator evaluator;
492 : tpos.associateEvaluator(evaluator);
493 : #endif
494 :
495 0 : for (const auto & filename : filenames) {
496 : std::cout << "rescoring " << filename << " ... " << std::endl;
497 : // Just convert packedsfenvalue to text
498 0 : std::fstream fs;
499 0 : fs.open(filename, std::ios::in | std::ios::binary);
500 : PackedSfenValue p;
501 : while (true) {
502 0 : if ((++count % 100000) == 0) { std::cout << count << std::endl; }
503 0 : if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
504 : //std::cout << p.score << std::endl;
505 : tpos.clear(); // fully empty position !
506 0 : set_from_packed_sfen(tpos, p.sfen);
507 : //std::cout << GetFEN(tpos) << std::endl;
508 : #ifdef WITH_NNUE
509 0 : tpos.resetNNUEEvaluator(evaluator);
510 : #endif
511 0 : ThreadData data;
512 : DepthType depth = MAX_DEPTH;
513 : // only if not constrainted by maxnodes we compute a max depth depending on gamephase
514 0 : if (TimeMan::maxNodes == 0 ){
515 0 : const Hash matHash = MaterialHash::getMaterialHash(tpos.mat);
516 : float gp = 1;
517 0 : if (matHash != nullHash) {
518 : const MaterialHash::MaterialHashEntry& MEntry = MaterialHash::materialHashTable[matHash];
519 : gp = MEntry.gamePhase();
520 : }
521 : // don't worry about "else" here ...
522 0 : depth = static_cast<DepthType>(clampDepth(DynamicConfig::genFenDepth) * gp + clampDepth(DynamicConfig::genFenDepthEG) * (1.f - gp));
523 : }
524 0 : DynamicConfig::randomPly = 0;
525 0 : data.p = tpos;
526 0 : data.depth = depth;
527 0 : cos.setData(data);
528 0 : cos.stopFlag = false;
529 0 : cos.currentMoveMs = INFINITETIME;
530 : // do not update COM::position here
531 0 : cos.searchDriver(false);
532 0 : data = cos.getData();
533 : //std::cout << data.score << std::endl;
534 : // only write quiet moves
535 0 : if (!isCapture(data.best)){
536 0 : p.score = data.score;
537 0 : ofs.write((char*)&p, sizeof(PackedSfenValue));
538 : }
539 : }
540 : else {
541 : std::cout << "read error" << std::endl;
542 : break;
543 : }
544 0 : }
545 0 : fs.close();
546 : std::cout << "done" << std::endl;
547 0 : }
548 0 : ofs.close();
549 : std::cout << "all done" << std::endl;
550 :
551 0 : DynamicConfig::minOutputLevel = oldMinOutLvl;
552 0 : DynamicConfig::level = oldLevel;
553 0 : DynamicConfig::randomOpen = oldRandomOpen;
554 0 : DynamicConfig::randomPly = oldRandomPly;
555 :
556 : #ifdef DEBUG_MTRACE
557 : muntrace();
558 : #endif
559 :
560 0 : return true;
561 0 : }
562 :
563 : #endif // WITH_DATA2BIN
|