Line data Source code
1 : #pragma once
2 :
3 : #include "definition.hpp"
4 :
5 : #ifdef WITH_NNUE
6 :
7 : // quantization constantes
8 :
9 : // By default, quantization is disabled, values are float and we scale only output layer
10 : template<bool Q> struct Quantization {
11 : using WT = float;
12 : using WIT = float;
13 : using BT = float;
14 : using BIT = float;
15 : static constexpr WT scale {1};
16 : static constexpr float outFactor {600.f};
17 : static float round(const float& x) { return x; }
18 : };
19 :
20 : // When quantization is activated (on read) we try to store weights and
21 : // do computations using only integers, smaller being best for speed
22 : // but there is some constrains : see doc/
23 : template<> struct Quantization<true> {
24 : using WT = float;
25 : using WIT = int16_t;
26 : using BT = float;
27 : using BIT = int16_t;
28 : static constexpr WT scale {512}; // 32*512 = 16384 and |weight| & |bias| < 0.6
29 : static constexpr float outFactor {600.f};
30 830489088 : static float round(const float& x) { return std::round(x); }
31 : };
32 :
33 22 : template<bool Q> inline void quantizationInfo() {
34 : if constexpr (Q) {
35 22 : Logging::LogIt(Logging::logInfo) << "Quantization info :";
36 22 : Logging::LogIt(Logging::logInfo) << "scale " << Quantization<true>::scale;
37 : }
38 : else {
39 : Logging::LogIt(Logging::logInfo) << "No quantization, using float net";
40 : }
41 22 : }
42 :
43 : #endif // WITH_NNUE
|