From 1fc3bfaa68e7b31593b7a951795d716dcce854f6 Mon Sep 17 00:00:00 2001
From: PikaCat <760758491@qq.com>
Date: Sun, 5 Jun 2022 17:43:32 +0800
Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E4=BA=86NNUE=E5=B1=80?=
 =?UTF-8?q?=E9=9D=A2=E8=AF=84=E5=88=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ChineseChess.pro                        |  10 +
 ChineseChess.pro.user                   |   2 +-
 README.md                               |  37 ++-
 app.rc                                  |   2 +-
 src/board/chessboard.cpp                | 109 +++++---
 src/board/chessboard.h                  |  28 +-
 src/evaluate/accumulator.h              |  25 ++
 src/evaluate/evaluate.cpp               | 330 ++++++------------------
 src/evaluate/evaluate.h                 |  92 +++++++
 src/evaluate/layer/clippedrelu.h        |  90 +++++++
 src/evaluate/layer/dense.h              | 151 +++++++++++
 src/evaluate/layer/featuretransformer.h | 204 +++++++++++++++
 src/evaluate/layer/input.h              |  33 +++
 src/evaluate/model.h                    | 106 ++++++++
 src/global.h                            | 302 +++-------------------
 src/move/historymove.h                  |   4 +
 src/search/chessengine.cpp              |   9 +-
 src/search/searchinstance.cpp           |  60 ++---
 src/table/pregen.cpp                    |  11 +-
 19 files changed, 959 insertions(+), 646 deletions(-)
 create mode 100644 src/evaluate/accumulator.h
 create mode 100644 src/evaluate/evaluate.h
 create mode 100644 src/evaluate/layer/clippedrelu.h
 create mode 100644 src/evaluate/layer/dense.h
 create mode 100644 src/evaluate/layer/featuretransformer.h
 create mode 100644 src/evaluate/layer/input.h
 create mode 100644 src/evaluate/model.h
diff --git a/ChineseChess.pro b/ChineseChess.pro
index c8f37a6..eed4f9f 100644
--- a/ChineseChess.pro
+++ b/ChineseChess.pro
@@ -2,6 +2,7 @@ QT       += core gui network # testlib
 
 greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
 
+QMAKE_LFLAGS_WINDOWS += -Wl,--stack,32000000
 QMAKE_CXXFLAGS += -std=gnu++2b -march=native -masm=intel -fopenmp
 QMAKE_CXXFLAGS_RELEASE += -Ofast -flto
 # The following define makes your compiler emit warnings if you use
@@ -19,6 +20,13 @@ HEADERS += \
     src/GUI/dialog.h \
     src/board/bitboard.h \
     src/board/chessboard.h \
+    src/evaluate/accumulator.h \
+    src/evaluate/evaluate.h \
+    src/evaluate/layer/clippedrelu.h \
+    src/evaluate/layer/dense.h \
+    src/evaluate/layer/featuretransformer.h \
+    src/evaluate/layer/input.h \
+    src/evaluate/model.h \
     src/global.h \
     src/machine/searchmachine.h \
     src/machine/searchquiescencemachine.h \
@@ -59,6 +67,8 @@ INCLUDEPATH += src \
     src/search \
     src/table \
     src/move \
+    src/evaluate \
+    src/evaluate/layer \
 #    test
 
 LIBS += -fopenmp
diff --git a/ChineseChess.pro.user b/ChineseChess.pro.user
index 1a385dd..c4035e5 100644
--- a/ChineseChess.pro.user
+++ b/ChineseChess.pro.user
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE QtCreatorProject>
-<!-- Written by QtCreator 7.0.2, 2022-05-26T12:53:40. -->
+<!-- Written by QtCreator 7.0.2, 2022-06-06T19:53:51. -->
 <qtcreator>
  <data>
   <variable>EnvironmentId</variable>
diff --git a/README.md b/README.md
index 11938fb..7a04454 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,11 @@
 #### 介绍
 + 个人项目，中国象棋Qt界面与AI象棋引擎
 + 棋盘结构为 **PEXT位棋盘** ，使用CPU中128位寄存器的低90位来存储棋盘，对应C++的数据结构为__m128i
-+ 使用了 **POPCNT指令，BMI位操作指令集中的PEXT与TZCNT指令，SSE指令集中的与、或、非、异或、零测试** 等指令来进行走法预生成与快速运算，需要相应的CPU支持
++ 使用了 **POPCNT指令，BMI位操作指令集中的PEXT与TZCNT指令，SSE指令集中的与、或、非、异或、零测试，AVX2指令集** 等指令来进行走法预生成与快速运算，需要相应的CPU支持
 + 引擎算法基于超出边界（Fail-Soft）的AlphaBeta剪枝，使用迭代加深（含内部迭代加深）的搜索方式
-+ 在局面评价上使用渐进式的评估方法对进行局面评估，考虑了棋子的子力价值、位置分、王安全分（包括空头炮、炮镇窝心马、沉底炮等危险棋形的评估）
++ 在局面评价上使用NNUE快速更新的神经网络对进行局面评估
 + 支持历史表启发，杀手启发，吃子启发，有良好的走法排序器
-+ 支持基于SSE的无锁置换表裁剪、带验证的空着裁剪、落后着法衰减、杀棋步数裁剪、剃刀裁剪、无用裁剪、差值裁剪
++ 支持基于SSE的无锁置换表裁剪、带验证的空着裁剪、落后着法衰减、杀棋步数裁剪、无用裁剪、差值裁剪
 + 支持将军延伸和重复局面检测（支持长将检测和部分长捉检测）
 + 支持主要变例搜索、使用OpenMP与QtConcurrent并发库进行Lazy-SMP多线程搜索
 + 联网的情况下支持ChessDB提供的开局库、对局库和残局库，大约可提升引擎200ELO左右
@@ -22,21 +22,21 @@
 #### 语言标准
 + C++最新标准，开启GNU最新的语言级别扩展特性
 
-#### 引擎棋力（使用云库、CPU:i5-8265U）
-+ 足以应对一般的纯人，但由于搜索速度和评分函数知识上的缺陷，暂不足以应对其他优秀的象棋软件(如佳佳象棋与象棋旋风)。
+#### 引擎棋力（非NNUE版本，使用云库、CPU:i5-8265U）
++ 足以应对一般的纯人，但由于搜索速度上的缺陷，暂不足以应对其他优秀的象棋软件(如象棋旋风)。
 + 与一般的象棋引擎的对战评测在b站：
     + 对战悟空象棋引擎：https://www.bilibili.com/video/BV1TF41147Do/ 
     + 对战象棋小巫师引擎：https://www.bilibili.com/video/BV1va411h7yo/ 
     + 对战象眼引擎：https://www.bilibili.com/video/BV1Q34y1b7PA/
 
-#### 天天象棋测试（使用云库、CPU:i5-8265U）
+#### 天天象棋测试（非NNUE版本，使用云库、CPU:i5-8265U）
 + 可战胜业8-3纯人，得出本软件ELO大约为2000左右
 + 天天象棋人机对战可以战胜精英级别电脑（天天象棋分析12层），由此可得本软件大致与新版天天象棋分析13层相当。
 + 实战测试结果最高等级如下(该账号仅用于测试软件棋力，由于达到业余9-1后，再往后的测试需要实名认证，鉴于已经达到了测试的目的，所以该账号现已注销)：
 ![评测最高等级](https://images.gitee.com/uploads/images/2021/0823/185211_45f94b91_7628839.jpeg "QQ图片20210823185009.jpg")
 + 更多实战测试的内容在：https://www.bilibili.com/video/BV1eR4y1j777
 
-#### JJ象棋测试（使用云库、CPU:i5-8265U）
+#### JJ象棋测试（非NNUE版本，使用云库、CPU:i5-8265U）
 + 实战测试可战胜特大等级纯人，最高达到荣誉顶级，100盘胜率94%，有1盘掉线，1盘与其他软件作和，4盘输给其他软件，其余与纯人对战都赢了
 + 该账号仅用于测试软件棋力，由于特大等级的小部分人和荣誉顶级的绝大部分人都是软件，由于本软件不具备与其他软件对撕的能力，鉴于已经达到了测试的目的，故不再往后测试
 ![评测最高等级](https://images.gitee.com/uploads/images/2021/0921/212032_434c1039_7628839.jpeg "Screenshot_2021-09-21-21-16-53-960_cn.jj.chess.mi.jpg")
@@ -49,12 +49,11 @@
 
 #### 未来愿景
 + 这个引擎目前还有很多不完善的地方((＞﹏＜)一大堆捏~)：
-    1. 没有任何的审局眼光，虽然有云库不会开局落入飞刀局面，但是中局脱库后极其容易跳水，且无法识别官和局面(如双车对车士象全)。(这点将在NNUE版本推出后极大改善，但NNUE版本什么时候才能出来不确定，要看我什么时候能学完NNUE)
-    2. 没有发挥出位棋盘该有的速度，相比于数组棋盘提升幅度不是很大，所以对应的程序实现还有很多未被发现的Bug没有解决。
-    3. 搜索速度不快，剪枝力度不够大，NPS比不上免费的佳佳象棋引擎，更不用说商业引擎了。
-    4. 没有UCI协议支持，目前无法使用命令模式将引擎与界面解耦。
-    5. 没有引擎ELO测评平台，如CCRL。
-    6. 没有测试平台，如fishtest。
+    1. 没有发挥出位棋盘该有的速度，相比于数组棋盘提升幅度不是很大，所以对应的程序实现还有很多未被发现的Bug没有解决。
+    2. 搜索速度不快，剪枝力度不够大。
+    3. 没有UCI协议支持，目前无法使用命令模式将引擎与界面解耦。
+    4. 没有引擎ELO测评平台，如CCRL。
+    5. 没有测试平台，如fishtest。
 
 + 建立这个仓库的初心是看到国际象棋Stockfish引擎的开源仓库及其开源社区支持的强大支持，于是想着能不能在国内也建立一个这样的仓库，让更多象棋引擎爱好者参与引擎的改进，更新，提issue，提pull requests，众人拾柴火焰高。就像Stockfish超过商业引擎Komodo一样，有一天我们也能够媲美象棋旋风。
 + 我曾经看到过一句话，我很喜欢：If you love something, set it free. 来自虚幻引擎的官网。这里的free有两种意思，免费与自由。所以如果你喜欢一样东西，想让它变好，就让它免费吧，让它可以被它人自由获取吧！这也是我为什么要开源的原因，这也是我为什么使用WTFPL的原因。
@@ -63,13 +62,23 @@
 #### 云开局库、残局库
 + https://www.chessdb.cn/query/
 
+#### 特别感谢
++ 特别感谢ianfab编写的NNUE工具链以及Belzedar94提供的权重文件，让皮卡喵象棋引擎搭上了NNUE的时代快车
++ 特别感谢ianfab耐心解答我的解惑，使得皮卡喵NNUE成为可能。https://github.com/ianfab/Fairy-Stockfish/discussions/491
++ 以下是ianfab提供的NNUE工具链：
+    1. 训练数据生成器：https://github.com/ianfab/variant-nnue-tools
+    2. NNUE网络训练器：https://github.com/ianfab/variant-nnue-pytorch
++ NNUE的最新参数文件（皮卡喵象棋的nnue文件会与其保持同步更新）：https://fairy-stockfish.github.io/nnue/#current-best-nnue-networks
+
 #### 参考文献
 1. 象棋百科全书：https://www.xqbase.com/computer.htm
 2. 象棋编程维基百科：https://www.chessprogramming.org/Main_Page
 3. Shark象棋引擎论文：http://rportal.lib.ntnu.edu.tw/bitstream/20.500.12235/106625/1/n060147070s01.pdf
+4. NNUE神经网络手册：https://github.com/glinscott/nnue-pytorch/blob/master/docs/nnue.md
 
 #### 参考代码
 1. 象棋小巫师: https://github.com/xqbase/xqwlight
 2. 象眼: https://github.com/xqbase/eleeye
 3. 国际象棋位棋盘: https://github.com/maksimKorzh/bbc
-4. 佳佳象棋：https://github.com/leedavid/NewGG
\ No newline at end of file
+4. 佳佳象棋：https://github.com/leedavid/NewGG
+5. Fairy-Stockfish：https://github.com/ianfab/Fairy-Stockfish
\ No newline at end of file
diff --git a/app.rc b/app.rc
index 5d57c87..34eedb7 100644
--- a/app.rc
+++ b/app.rc
@@ -1 +1 @@
-IDI_ICON1 ICON "ChessImage/ChessIcon.ico"
+IDI_ICON1 ICON "ChessImage/ChessIcon.ico"
\ No newline at end of file
diff --git a/src/board/chessboard.cpp b/src/board/chessboard.cpp
index 12e1756..b13e720 100644
--- a/src/board/chessboard.cpp
+++ b/src/board/chessboard.cpp
@@ -29,6 +29,7 @@ void Chessboard::parseFen(const QString &fen) {
   this->m_redOccupancy.clearAllBits();
   this->m_blackOccupancy.clearAllBits();
   this->m_occupancy.clearAllBits();
+  this->m_piece = 0;
   memset(this->m_helperBoard, EMPTY, sizeof(this->m_helperBoard));
 
   // 分割为棋盘和选边两部分
@@ -42,6 +43,7 @@ void Chessboard::parseFen(const QString &fen) {
     case '/': continue;
     default:
       if (ch.isNumber()) { count += (ch.toLatin1() - '0'); continue; }
+      ++this->m_piece;
       this->m_bitboards[FEN_MAP[ch]].setBit(count);
       this->m_helperBoard[count] = FEN_MAP[ch];
       break;
@@ -59,8 +61,21 @@ void Chessboard::parseFen(const QString &fen) {
   // 重置步数计数器
   this->m_historyMovesCount = 1;
 
-  // 调用预计算函数
-  this->preCalculateScores();
+  // 刷新双方的初始累加器
+  Accumulator &acc { this->getLastMove().m_acc };
+  qint32 featureIndexes[33];
+
+  // 刷新对方的累加器
+  this->m_side ^= OPP_SIDE;
+  acc.kingPos[this->m_side] = this->m_bitboards[KING + this->m_side].getLastBitIndex();
+  this->getAllFeatures(featureIndexes);
+  featureTransformer->refreshAccumulator(acc, this->m_side, featureIndexes);
+
+  // 刷新自己的累加器
+  this->m_side ^= OPP_SIDE;
+  acc.kingPos[this->m_side] = this->m_bitboards[KING + this->m_side].getLastBitIndex();
+  this->getAllFeatures(featureIndexes);
+  featureTransformer->refreshAccumulator(acc, this->m_side, featureIndexes);
 }
 
 QString Chessboard::getFen() const {
@@ -160,6 +175,23 @@ quint8 Chessboard::genNonCapMoves(ValuedMove *moveList) const {
   return total;
 }
 
+void Chessboard::getAllFeatures(qint32 *featureIndexes) const {
+  // 获取当前走子方将的位置
+  quint8 kingPos { this->getLastMove().m_acc.kingPos[this->m_side] };
+
+  // 遍历所有位置，提取特征
+  Bitboard occupancy { this->m_occupancy };
+
+  quint8 index;
+  while ((index = occupancy.getLastBitIndex()) < 90) {
+    occupancy.clearBit(index);
+    *featureIndexes++ = FeatureIndex(this->m_side, index, this->m_helperBoard[index], kingPos);
+  }
+
+  // 结束标志
+  *featureIndexes = -1;
+}
+
 bool Chessboard::isChecked() const {
   // 获取对方的选边
   quint8 oppSide = this->m_side ^ OPP_SIDE;
@@ -247,19 +279,11 @@ std::optional<qint16> Chessboard::getRepeatScore(quint8 distance) const {
       if (move->zobrist() == this->m_zobrist) {
         myFlag = (myFlag & 0x3fff) == 0 ? myFlag : 0x3fff;
         oppFlag = (oppFlag & 0x3fff) == 0 ? oppFlag : 0x3fff;
-        // 我方长打返回负分，对方长打返回正分
-        qint16 score { 0 };
-        if (myFlag > oppFlag) score = BAN_SCORE_LOSS + distance;
-        else if (myFlag < oppFlag) score = BAN_SCORE_MATE - distance;
-
-        /* 如果双方都长打或者双方都没有长打但是有重复局面就返回和棋的分数
-         * 但无论如何都要使得和棋对于第一层的那一方来说是不利的，是负分
-         * distance & 1 的作用是确定现在在那一层
-         * 说明evaluate的那一层和第一层是同一方
-         * 同一方返回负值，不同方返回正值，这样正值上到第一层就会变成负值 */
-        if (score == 0) return distance & 1 ? DRAW_SCORE : -DRAW_SCORE;
-        // 有一方长打
-        else return score;
+
+        // 我方长打返回负分，对方长打返回正分，双方长打返回0分
+        if (myFlag > oppFlag) return BAN_SCORE_LOSS + distance;
+        else if (myFlag < oppFlag) return BAN_SCORE_MATE - distance;
+        else return 0;
       }
     }
     // 如果是对方，更新对方的将军信息
@@ -282,6 +306,8 @@ bool Chessboard::makeMove(Move &move) {
     if (RED == this->m_side) this->m_blackOccupancy.clearBit(move.to());
     else this->m_redOccupancy.clearBit(move.to());
     this->m_bitboards[move.victim()].clearBit(move.to());
+    // 存活的子少了一个
+    --this->m_piece;
     // 注意，这里不用移除occupancy中move.to()位，因为攻击的棋子会移动过来
   }
 
@@ -305,6 +331,9 @@ bool Chessboard::makeMove(Move &move) {
   this->m_helperBoard[move.from()] = EMPTY;
   this->m_helperBoard[move.to()] = move.chess();
 
+  // 获取上一个累加器
+  const Accumulator &lastAcc { this->getLastMove().m_acc };
+
   // 在历史走法表中记录这一个走法
   HistoryMove &historyMove { this->m_historyMoves[this->m_historyMovesCount++] };
 
@@ -318,27 +347,25 @@ bool Chessboard::makeMove(Move &move) {
   this->m_zobrist ^= PRE_GEN.getSideZobrist();
   this->m_zobrist ^= PRE_GEN.getZobrist(move.chess(), move.from());
   this->m_zobrist ^= PRE_GEN.getZobrist(move.chess(), move.to());
-
-  if (move.isCapture()) {
-    // 吃子步需要把被吃的子的zobrist去除
-    this->m_zobrist ^= PRE_GEN.getZobrist(move.victim(), move.to());
-    // 顺便计算吃子得分
-    if (RED == this->m_side) this->m_blackScore -= VALUE[move.victim()][move.to()];
-    else this->m_redScore -= VALUE[move.victim()][move.to()];
-  }
-
-  // 计算得分
-  if (RED == this->m_side) {
-    this->m_redScore -= VALUE[move.chess()][move.from()];
-    this->m_redScore += VALUE[move.chess()][move.to()];
-  } else {
-    this->m_blackScore -= VALUE[move.chess()][move.from()];
-    this->m_blackScore += VALUE[move.chess()][move.to()];
+  // 吃子步需要把被吃的子的zobrist去除
+  if (move.isCapture()) this->m_zobrist ^= PRE_GEN.getZobrist(move.victim(), move.to());
+
+  // 如果走动的是将，就刷新自己的累加器
+  if (move.chess() == KING + this->m_side) {
+    historyMove.m_acc.kingPos[this->m_side] = move.to();
+    qint32 featureIndexes[33];
+    this->getAllFeatures(featureIndexes);
+    featureTransformer->refreshAccumulator(historyMove.m_acc, this->m_side, featureIndexes);
   }
+  // 否则就更新自己的累加器
+  else featureTransformer->updateAccumulator(lastAcc, historyMove.m_acc, this->m_side, move);
 
   // 换边
   this->m_side ^= OPP_SIDE;
 
+  // 不要忘记另一边累加器的也要更新
+  featureTransformer->updateAccumulator(lastAcc, historyMove.m_acc, this->m_side, move);
+
   // 补充对应的将军捉子信息
   if (isChecked()) historyMove.setChecked();
   else historyMove.setChase(this->getChase());
@@ -361,26 +388,18 @@ void Chessboard::unMakeMove() {
   // 还原原来的Zobrist值
   this->m_zobrist = move.zobrist();
 
-  // 还原原来的得分
-  if (RED == this->m_side) {
-    this->m_redScore -= VALUE[move.chess()][move.to()];
-    this->m_redScore += VALUE[move.chess()][move.from()];
-    if (move.isCapture()) this->m_blackScore += VALUE[move.victim()][move.to()];
-  } else {
-    this->m_blackScore -= VALUE[move.chess()][move.to()];
-    this->m_blackScore += VALUE[move.chess()][move.from()];
-    if (move.isCapture()) this->m_redScore += VALUE[move.victim()][move.to()];
-  }
-
   // 撤销这个走法
   undoMove(move);
 }
 
 void Chessboard::makeNullMove() {
   // 获取历史走法表项，并将自增走法历史表的大小
+  const Accumulator &lastAcc { this->getLastMove().m_acc };
   HistoryMove &move = this->m_historyMoves[this->m_historyMovesCount++];
   // 设置空步信息
   move.setNullMove();
+  // 复制上一个累加器的内容
+  this->getLastMove().m_acc.copyFrom(lastAcc);
   // 换边
   this->m_side ^= OPP_SIDE;
   // 计算新的Zobrist值
@@ -400,6 +419,10 @@ void Chessboard::updateHistoryValue(const Move &move, quint8 depth) {
   this->m_historyTable.updateValue(move, depth);
 }
 
+HistoryMove &Chessboard::getLastMove() {
+  return this->m_historyMoves[this->m_historyMovesCount - 1];
+}
+
 const HistoryMove &Chessboard::getLastMove() const {
   return this->m_historyMoves[this->m_historyMovesCount - 1];
 }
@@ -421,6 +444,8 @@ void Chessboard::undoMove(const Move &move) {
     if (RED == this->m_side) this->m_blackOccupancy.setBit(move.to());
     else this->m_redOccupancy.setBit(move.to());
     this->m_bitboards[move.victim()].setBit(move.to());
+    // 恢复存活子
+    ++this->m_piece;
     // 注意，如果是吃子步则不用清空to，因为这里原来有一个棋子
   }
 
diff --git a/src/board/chessboard.h b/src/board/chessboard.h
index 1ca58b3..cc824f5 100644
--- a/src/board/chessboard.h
+++ b/src/board/chessboard.h
@@ -3,6 +3,7 @@
 #include "historymove.h"
 #include "valuedmove.h"
 #include "historytable.h"
+#include "evaluate.h"
 
 namespace PikaChess {
 class Chessboard final {
@@ -26,6 +27,12 @@ class Chessboard final {
    */
   quint8 genNonCapMoves(ValuedMove *moveList) const;
 
+  /**
+   * @brief 获取当前走子方的所有激活的特征
+   * @param featureIndexes 存放激活的特征的数组
+   */
+  void getAllFeatures(qint32 *featureIndexes) const;
+
   /** 当前是否被将军 */
   bool isChecked() const;
 
@@ -75,6 +82,7 @@ class Chessboard final {
   void updateHistoryValue(const Move &move, quint8 depth);
 
   /** 获得最后一个走法 */
+  HistoryMove &getLastMove();
   const HistoryMove &getLastMove() const;
 
   void setSide(quint8 newSide);
@@ -83,14 +91,8 @@ class Chessboard final {
 
   quint8 side() const;
 
-  /** 评价分预计算，根据局面情况预计算局面分，引擎棋力的主要来源 */
-  void preCalculateScores();
-
-  /** 局面的静态评分，只包括子力的位置分 */
-  qint16 staticScore() const;
-
   /** 获得当前局面的评分 */
-  qint16 score() const;
+  qint16 score();
 
 protected:
   /**
@@ -105,13 +107,6 @@ class Chessboard final {
    */
   quint16 getChase();
 
-  /** 王安全分，包括空头炮，炮镇窝心马，沉底炮，车封锁将门 */
-  qint16 kingSafety() const;
-
-  /** 计算王安全分的帮助函数 */
-  qint16 kingSafety_helper(quint8 side, quint8 center,
-                           quint8 left, quint8 middle, quint8 right) const;
-
 private:
   /** 用来辅助走法生成的辅助数组棋盘 */
   quint8 m_helperBoard[90];
@@ -129,9 +124,8 @@ class Chessboard final {
   /** 当前局面的Zobrist值 */
   quint64 m_zobrist;
 
-  /** 当前局面的红黑方得分 */
-  quint16 m_redScore;
-  quint16 m_blackScore;
+  /** 当前局面所剩的子力个数 */
+  quint8 m_piece;
 
   /** 走棋的历史记录 */
   HistoryMove m_historyMoves[256];
diff --git a/src/evaluate/accumulator.h b/src/evaluate/accumulator.h
new file mode 100644
index 0000000..977989f
--- /dev/null
+++ b/src/evaluate/accumulator.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "model.h"
+
+namespace PikaChess {
+/** 累加器，记录着特征转换层的输出值 */
+struct Accumulator {
+  /** 双方将的位置 */
+  quint8 kingPos[8];
+  /** 特征转换层的输出，这些值将被输入到全连接层1 */
+  alignas(CACHE_LINE_SIZE) qint16 accumulation[8][512];
+  /** PSQT部分的输出值，这些值将直接用于评分 */
+  alignas(CACHE_LINE_SIZE) qint32 psqtAccumulation[8][PSQT_BUCKETS];
+
+  void copyFrom(const Accumulator &acc) {
+    memmove(accumulation[RED], acc.accumulation[RED], sizeof(accumulation[RED]));
+    memmove(accumulation[BLACK], acc.accumulation[BLACK], sizeof(accumulation[BLACK]));
+    memmove(psqtAccumulation[RED], acc.psqtAccumulation[RED], sizeof(psqtAccumulation[RED]));
+    memmove(psqtAccumulation[BLACK], acc.psqtAccumulation[BLACK],
+            sizeof(psqtAccumulation[BLACK]));
+    kingPos[RED] = acc.kingPos[RED];
+    kingPos[BLACK] = acc.kingPos[BLACK];
+  }
+};
+}
diff --git a/src/evaluate/evaluate.cpp b/src/evaluate/evaluate.cpp
index 0de7efe..9d5a33c 100644
--- a/src/evaluate/evaluate.cpp
+++ b/src/evaluate/evaluate.cpp
@@ -1,284 +1,98 @@
 #include "chessboard.h"
+#include <fstream>
 
 namespace PikaChess {
-/** 真实用于评分的价值表 */
-/** 各个子力的价值表 */
-qint16 VALUE[14][90];
-/** 空头炮的罚分表 */
-qint16 HOLLOW_THREAT_PENALTY[2][90];
-/** 沉底炮的罚分表 */
-qint16 BOTTOM_THREAT_PENALTY[90];
-/** 缺士的罚分值 */
-qint16 ADVISOR_LEAKAGE_PENALTY[2];
-/** 先行棋的分数 */
-qint16 ADVANCED_SCORE;
-/** 炮镇窝心马的罚分表 */
-constexpr qint16 CENTER_KNIGHT_PENALTY[2][90] {{
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0, 50,  0,  0,  0,  0,
-    0,  0,  0,  0, 45,  0,  0,  0,  0,
-    0,  0,  0,  0, 40,  0,  0,  0,  0,
-    0,  0,  0,  0, 35,  0,  0,  0,  0,
-    0,  0,  0,  0, 30,  0,  0,  0,  0,
-    0,  0,  0,  0, 30,  0,  0,  0,  0,
-    0,  0,  0,  0, 30,  0,  0,  0,  0
-}, {
-    0,  0,  0,  0, 30,  0,  0,  0,  0,
-    0,  0,  0,  0, 30,  0,  0,  0,  0,
-    0,  0,  0,  0, 30,  0,  0,  0,  0,
-    0,  0,  0,  0, 35,  0,  0,  0,  0,
-    0,  0,  0,  0, 40,  0,  0,  0,  0,
-    0,  0,  0,  0, 45,  0,  0,  0,  0,
-    0,  0,  0,  0, 50,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-}};
-/** 中炮的罚分表 */
-qint16 CENTER_THREAT_PENALTY[2][90] {{
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0, 12,  0,  0,  0,  0,
-    0,  0,  0,  0, 11,  0,  0,  0,  0,
-    0,  0,  0,  0, 10,  0,  0,  0,  0,
-    0,  0,  0,  0,  8,  0,  0,  0,  0,
-    0,  0,  0,  0,  7,  0,  0,  0,  0,
-    0,  0,  0,  0,  7,  0,  0,  0,  0,
-    0,  0,  0,  0,  7,  0,  0,  0,  0
-}, {
-    0,  0,  0,  0,  7,  0,  0,  0,  0,
-    0,  0,  0,  0,  7,  0,  0,  0,  0,
-    0,  0,  0,  0,  7,  0,  0,  0,  0,
-    0,  0,  0,  0,  8,  0,  0,  0,  0,
-    0,  0,  0,  0, 10,  0,  0,  0,  0,
-    0,  0,  0,  0, 11,  0,  0,  0,  0,
-    0,  0,  0,  0, 12,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-}};
+/** 输入特征转换器，由于参数都在里面，所以使用大页面管理 */
+LargePagePtr<FeatureTransformer> featureTransformer;
 
-qint16 Chessboard::score() const {
-  return staticScore() + kingSafety();
-}
-
-qint16 Chessboard::staticScore() const {
-  /*
-    为何此处要额外加上一个ADVANCED_SCORE先行棋分？因为执行该函数时本身是轮到该层玩家走棋，
-    但是因为各种原因只能搜索到这里了，该层玩家没有走棋而直接返回了这个局面下自己的评分!
-    实际上这样对他的评价是不够正确的，所以加上一个补偿分数，代表下一步是该玩家先行，使得评价更公正一些!
-  */
-  if (RED == this->m_side) {
-    return this->m_redScore - this->m_blackScore + ADVANCED_SCORE;
-  } else return this->m_blackScore - this->m_redScore + ADVANCED_SCORE;
-}
+/** 模型剩余的部分使用对齐页面 */
+AlignedPtr<Model> model[LAYER_STACKS];
 
-qint16 Chessboard::kingSafety() const {
-  // 红黑双方的王安全分
-  qint16 redSafety { kingSafety_helper(RED, 76, 84, 85, 86) };
-  qint16 blackSafety { kingSafety_helper(BLACK, 13, 3, 4, 5) };
+/** NNUE的文件名和网络描述信息 */
+std::string fileName { "xiangqi.nnue" };
+std::string netDescription;
 
-  // 根据选边返回王安全分
-  if (RED == this->m_side) return redSafety - blackSafety;
-  else return blackSafety - redSafety;
+/** 将特征转换器和模型的权重和偏差置为空 */
+template <typename T>
+void ZeroParameters(AlignedPtr<T> &pointer) {
+  pointer.reset((T*)_mm_malloc(sizeof(T), alignof(T)));
+  std::memset(pointer.get(), 0, sizeof(T));
 }
 
-qint16 Chessboard::kingSafety_helper(quint8 side, quint8 center,
-                                     quint8 left, quint8 middle, quint8 right) const {
-  bool red { side == RED };
-  quint8 oppSide = side ^ OPP_SIDE;
-
-  // 获取士的形状
-  quint8 shape { SHAPE_NONE };
-  // 缺士
-  if (2 not_eq this->m_bitboards[ADVISOR + side].countBits(red)) shape = SHAPE_LEAK;
-  // 将不在中间
-  else if (KING + side not_eq this->m_helperBoard[middle]) shape = SHAPE_HOLLOW;
-  // 左右士
-  else if (this->m_helperBoard[left] == ADVISOR + side and
-           this->m_helperBoard[left] == this->m_helperBoard[right]) {
-    shape = SHAPE_CENTER;
-  // 确认花心有一个士，左或右有一个士
-  } else if (this->m_helperBoard[center] == ADVISOR + side and
-           (this->m_helperBoard[left] == ADVISOR + side or
-            this->m_helperBoard[right] == ADVISOR + side)) shape = SHAPE_LR;
-
-  // 根据士的形状来决定逻辑
-  quint8 index;
-  qint16 safety { 0 };
-  switch(shape) {
-  case SHAPE_LEAK:
-    // 缺士怕双车
-    if (2 == this->m_bitboards[ROOK + oppSide].countBits()) {
-      safety -= ADVISOR_LEAKAGE_PENALTY[red];
-    }
-    break;
-
-  case SHAPE_HOLLOW:
-    // 有双士，但将占领花心
-    if (KING + side == this->m_helperBoard[center]) safety -= 20;
-    break;
-
-  case SHAPE_LR:
-    // 中士加左或右士，查看将的另一边有无沉底炮
-    index = (PRE_GEN.getRookAttack(middle, this->m_occupancy) &
-             this->m_bitboards[CANNON + oppSide]).getLastBitIndex();
-    if (index < 90) safety -= BOTTOM_THREAT_PENALTY[index];
-    // 计算中炮威胁
-    index = (PRE_GEN.getCannonChase(center, false, this->m_occupancy) &
-             this->m_bitboards[CANNON + oppSide]).getLastBitIndex();
-    if (index < 90) {
-      safety -= CENTER_THREAT_PENALTY[red][index];
-      // 将门被对方控制，给予一定的罚分
-      index = ADVISOR + side == this->m_helperBoard[right] ? left : right;
-      if (this->isProtected(index, side) or
-          PRE_GEN.getRookAttack(index, this->m_occupancy) & this->m_bitboards[KING + oppSide]) {
-        safety -= 20;
-      }
-      // 如果车在底线保护将，给予更大的罚分
-      if (PRE_GEN.getRookAttack(middle, this->m_occupancy) &
-          this->m_bitboards[ROOK + side]) safety -= 80;
-    }
-    break;
-
-  case SHAPE_CENTER:
-    // 两边士，查看有无空头炮
-    index = (PRE_GEN.getRookAttack(middle, this->m_occupancy) &
-             this->m_bitboards[CANNON + oppSide]).getLastBitIndex();
-    if (index < 90) safety -= HOLLOW_THREAT_PENALTY[red][index];
-    // 如果不存在空头炮，就计算是否存在炮镇窝心马
-    else if (KNIGHT + side == this->m_helperBoard[center]) {
-      index = (PRE_GEN.getCannonChase(center, false, this->m_occupancy) &
-               this->m_bitboards[CANNON + oppSide]).getLastBitIndex();
-      if (index < 90) safety -= CENTER_KNIGHT_PENALTY[red][index];
-    }
-    break;
-
-  default: break;
-  }
-
-  return safety;
+template <typename T>
+void ZeroParameters(LargePagePtr<T> &pointer) {
+  pointer.reset((T*)(AlignedLargePageAlloc(sizeof(T))));
+  std::memset(pointer.get(), 0, sizeof(T));
 }
 
-void Chessboard::preCalculateScores() {
-  qint16 pawnAttacking[90], pawnAttackless[90];
+void ZeroParameters() {
+  ZeroParameters(featureTransformer);
+  for (quint8 i = 0; i < LAYER_STACKS; ++i) ZeroParameters(model[i]);
+}
 
-  // 首先判断局势处于开中局还是残局阶段，方法是计算各种棋子的数量，按照车=6、马炮=3、其它=1相加。
-  qint16 midgameValue = (this->m_bitboards[RED_ADVISOR] | this->m_bitboards[BLACK_ADVISOR] |
-                         this->m_bitboards[RED_BISHOP] | this->m_bitboards[BLACK_BISHOP] |
-                         this->m_bitboards[RED_PAWN] | this->m_bitboards[BLACK_PAWN])
-                            .countBits();
-  midgameValue += (this->m_bitboards[RED_KNIGHT] | this->m_bitboards[BLACK_KNIGHT] |
-                   this->m_bitboards[RED_CANNON] | this->m_bitboards[BLACK_CANNON])
-                      .countBits() * 3;
-  midgameValue += (this->m_bitboards[RED_ROOK] | this->m_bitboards[BLACK_ROOK]).countBits() * 6;
-  // 使用二次函数，子力很少时才认为接近残局
-  midgameValue = (132 - midgameValue) * midgameValue / 66;
-  ADVANCED_SCORE = (4 * midgameValue + 2) / 66;
+/** 读取NNUE文件的头部信息 */
+bool ReadHeader(std::istream &stream) {
+  // 首先读取版本信息并校验
+  if (ReadInt<quint32>(stream) not_eq VERSION) return false;
 
-  auto intLerp { [&] (const qint16 a, const qint16 b) {
-    return (a * midgameValue + b * (66 - midgameValue)) / 66;
-  } };
+  // 接着读取整个NNUE文件的哈希值并校验
+  if (ReadInt<quint32>(stream) not_eq HASH_VALUE_FILE) return false;
 
-  for (quint8 index { 0 }; index < 90; ++index) {
-    // 计算过渡性的分数，首先回填将的分数
-    VALUE[RED_KING][index] = VALUE[BLACK_KING][89 - index] =
-        intLerp(CONST_KING_MIDGAME[index], CONST_KING_ENDGAME[index]);
-    // 然后回填马的分数
-    VALUE[RED_KNIGHT][index] = VALUE[BLACK_KNIGHT][89 - index] =
-        intLerp(CONST_KNIGHT_MIDGAME[index], CONST_KNIGHT_ENDGAME[index]);
-    // 然后回填车的分数
-    VALUE[RED_ROOK][index] = VALUE[BLACK_ROOK][89 - index] =
-        intLerp(CONST_ROOK_MIDGAME[index], CONST_ROOK_ENDGAME[index]);
-    // 然后回填炮的分数
-    VALUE[RED_CANNON][index] = VALUE[BLACK_CANNON][89 - index] =
-        intLerp(CONST_CANNON_MIDGAME[index], CONST_CANNON_ENDGAME[index]);
-    // 最后计算兵的分数
-    pawnAttacking[index] =
-        intLerp(CONST_PAWN_ATTACKING_MIDGAME[index], CONST_PAWN_ATTACKING_ENDGAME[index]);
-    pawnAttackless[index] =
-        intLerp(CONST_PAWN_ATTACKLESS_MIDGAME[index], CONST_PAWN_ATTACKLESS_ENDGAME[index]);
-  }
+  // 读取文件描述信息的长度
+  quint32 length = ReadInt<quint32>(stream);
+  netDescription.resize(length);
+  // 读取文件描述信息
+  stream.read(const_cast<char *>(netDescription.c_str()), length);
 
-  // 计算空头炮罚分
-  for (quint8 index { 0 }; index < 90; ++index) {
-    HOLLOW_THREAT_PENALTY[1][index] = HOLLOW_THREAT_PENALTY[0][89 - index] =
-        CONST_HOLLOW_THREAT_PENALTY[index] * (midgameValue + 66) / 132;
-  }
+  return not stream.fail();
+}
 
-  // 然后判断各方是否处于进攻状态，方法是计算各种过河棋子的数量，按照车马2炮兵1相加。
-  qint16 redAttacks { 0 }, blackAttacks { 0 };
-  redAttacks += 2 * ((this->m_bitboards[RED_ROOK] | this->m_bitboards[RED_KNIGHT]) &
-                     PRE_GEN.getBlackSide()).countBits();
-  redAttacks += ((this->m_bitboards[RED_CANNON] | this->m_bitboards[RED_PAWN]) &
-                 PRE_GEN.getBlackSide()).countBits();
-  blackAttacks += 2 * ((this->m_bitboards[BLACK_ROOK] | this->m_bitboards[BLACK_KNIGHT]) &
-                       PRE_GEN.getRedSide()).countBits();
-  blackAttacks += ((this->m_bitboards[BLACK_CANNON] | this->m_bitboards[BLACK_PAWN]) &
-                   PRE_GEN.getRedSide()).countBits();
+/** 读取某层网络的权重和偏差 */
+template <typename T>
+bool ReadParameters(std::istream &stream, T &layer) {
+  // 首先读取该层的HASH值并校验
+  if (not stream or ReadInt<quint32>(stream) not_eq T::getHashValue()) return false;
+  // 随之调用该层的读参数方法
+  return layer.readParameters(stream);
+}
 
-  // 如果本方轻子数比对方多，那么每多一个轻子(车算2个轻子)威胁值加2。威胁值最多不超过8。
-  qint16 redLights = 2 * this->m_bitboards[RED_ROOK].countBits();
-  redLights += (this->m_bitboards[RED_KNIGHT] | this->m_bitboards[RED_CANNON]).countBits();
-  qint16 blackLights = 2 * this->m_bitboards[BLACK_ROOK].countBits();
-  blackLights += (this->m_bitboards[BLACK_KNIGHT] | this->m_bitboards[BLACK_CANNON]).countBits();
-  if (redLights > blackLights) redAttacks += (redLights - blackLights) * 2;
-  else blackAttacks += (blackLights - redLights) * 2;
-  redAttacks = std::min(redAttacks, qint16(8));
-  blackAttacks = std::min(blackAttacks, qint16(8));
+/** 读取所有网络层的权重和偏差 */
+bool ReadParameters(std::istream& stream) {
+  // 首先读取头部信息
+  if (not ReadHeader(stream)) return false;
 
-  // 填写红黑双方的缺士罚分
-  ADVISOR_LEAKAGE_PENALTY[0] = 10 * redAttacks;
-  ADVISOR_LEAKAGE_PENALTY[1] = 10 * blackAttacks;
+  // 接着读取特征转换器的权重和偏差
+  if (not ReadParameters(stream, *featureTransformer)) return false;
 
-  // 计算沉底炮的威胁值
-  for (quint8 index : { 0, 1, 7, 8 }) {
-    BOTTOM_THREAT_PENALTY[index] = CONST_BOTTOM_THREAT_PENALTY[index] * redAttacks / 8;
-  }
-  for (quint8 index : { 81, 82, 88, 89 }) {
-    BOTTOM_THREAT_PENALTY[index] = CONST_BOTTOM_THREAT_PENALTY[index] * blackAttacks / 8;
+  // 接着读取每一层的权重和偏差
+  for (quint8 i = 0; i < LAYER_STACKS; ++i) {
+    if (not ReadParameters(stream, *model[i])) return false;
   }
 
-  auto redLerp { [&] (const qint16 a, const qint16 b) {
-    return (a * redAttacks + b * (8 - redAttacks)) / 8;
-  } };
+  // 最后检查是否已经读到了文件末尾符号EOF
+  return stream and stream.peek() == std::ios::traits_type::eof();
+}
 
-  auto blackLerp { [&] (const qint16 a, const qint16 b) {
-    return (a * blackAttacks + b * (8 - blackAttacks)) / 8;
-  } };
+/** 从NNUE文件中初始化所有的内容 */
+void NNUEInit() {
+  ZeroParameters();
+  std::ifstream nnueFile { fileName, std::ios::binary };
+  if (not ReadParameters(nnueFile)) throw "读取NNUE神经网络参数失败";
+}
 
-  // 计算象士兵的分值
-  for (quint8 index { 0 }; index < 90; ++index) {
-    VALUE[RED_BISHOP][index] =
-        blackLerp(CONST_BISHOP_THREATENED[index], CONST_BISHOP_THREATLESS[index]);
-    VALUE[BLACK_BISHOP][89 - index] =
-        redLerp(CONST_BISHOP_THREATENED[index], CONST_BISHOP_THREATLESS[index]);
-    VALUE[RED_ADVISOR][index] =
-        blackLerp(CONST_ADVISOR_THREATENED[index], CONST_ADVISOR_THREATLESS[index]);
-    VALUE[BLACK_ADVISOR][89 - index] =
-        redLerp(CONST_ADVISOR_THREATENED[index], CONST_ADVISOR_THREATLESS[index]);
-    VALUE[RED_PAWN][index] = redLerp(pawnAttacking[index], pawnAttackless[index]);
-    VALUE[BLACK_PAWN][89 - index] = blackLerp(pawnAttacking[index], pawnAttackless[index]);
-  }
+/** 获得局面评分 */
+qint16 Chessboard::score() {
+  // 存储中间结果的空间
+  alignas(CACHE_LINE_SIZE) quint8 transformedFeatures[FeatureTransformer::BufferSize];
+  alignas(CACHE_LINE_SIZE) char buffer[Model::BufferSize];
 
-  // 调整不受威胁方少掉的士、象分值
-  this->m_redScore = 10 * (8 - blackAttacks);
-  this->m_blackScore = 10 * (8 - redAttacks);
+  /* bucket有点像以前的渐进式评分函数的局面阶段（开局->中局->残局），不同的阶段采用不同的评分模型
+   * HalfKAv2有8份小的评分模型，分别对应局面的8个阶段，按照下面的公式计算 */
+  const quint8 bucket = (this->m_piece - 1) / 4;
+  const auto psqt = featureTransformer->transform(this->getLastMove().m_acc, this->m_side,
+                                                  transformedFeatures, bucket);
+  const auto output = model[bucket]->propagate(transformedFeatures, buffer);
 
-  // 最后重新计算子力位置分
-  quint8 index;
-  Bitboard redOccupancy { this->m_redOccupancy };
-  while ((index = redOccupancy.getLastBitIndex()) < 90) {
-    this->m_redScore += VALUE[this->m_helperBoard[index]][index];
-    redOccupancy.clearBit(index);
-  }
-  Bitboard blackOccupancy { this->m_blackOccupancy };
-  while ((index = blackOccupancy.getLastBitIndex()) < 90) {
-    this->m_blackScore += VALUE[this->m_helperBoard[index]][index];
-    blackOccupancy.clearBit(index);
-  }
+  return (psqt + output[0]) >> OUTPUT_SCALE_BITS;
 }
 }
diff --git a/src/evaluate/evaluate.h b/src/evaluate/evaluate.h
new file mode 100644
index 0000000..60768cf
--- /dev/null
+++ b/src/evaluate/evaluate.h
@@ -0,0 +1,92 @@
+#pragma once
+#include "featuretransformer.h"
+
+#include "windows.h"
+#include <memory>
+
+namespace PikaChess {
+/** NNUE文件的哈希值 */
+constexpr quint32 HASH_VALUE_FILE = FeatureTransformer::getHashValue() ^ Model::getHashValue();
+
+/** 对齐大页分配 */
+inline void *AlignedLargePageAlloc(quint64 allocSize) {
+  HANDLE hProcessToken { };
+  LUID luid { };
+  void* mem = nullptr;
+
+  const quint64 largePageSize = GetLargePageMinimum();
+  if (not largePageSize) return nullptr;
+
+  // 提升权限以获得SeLockMemory权限
+  if (not OpenProcessToken(GetCurrentProcess(),
+                           TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) return nullptr;
+
+  if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid)) {
+    TOKEN_PRIVILEGES tp { };
+    TOKEN_PRIVILEGES prevTp { };
+    DWORD prevTpLen = 0;
+
+    tp.PrivilegeCount = 1;
+    tp.Privileges[0].Luid = luid;
+    tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+    // 调整令牌权限
+    AdjustTokenPrivileges(
+        hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen);
+
+    // 检查是否成功获取权限
+    if (GetLastError() == ERROR_SUCCESS) {
+      // 向上取整到页的大小，并分配页面
+      allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
+      mem = VirtualAlloc(
+          NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+
+      // 恢复原有的令牌
+      AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL);
+    }
+  }
+
+  CloseHandle(hProcessToken);
+
+  // 如果分配成功，返回地址，如果分配失败，使用普通API重新分配
+  return mem ? mem : VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+}
+
+void NNUEInit();
+
+/** RAII，自动释放内存 */
+template <typename T>
+struct AlignedDeleter {
+  void operator()(T* ptr) const {
+    ptr->~T();
+    _mm_free(ptr);
+  }
+};
+
+/** RAII，自动释放内存 */
+template <typename T>
+struct LargePageDeleter {
+  void operator()(T* ptr) const {
+    ptr->~T();
+    if (ptr and not VirtualFree(ptr, 0, MEM_RELEASE)) {
+      DWORD err = GetLastError();
+      std::cerr << "无法分配对齐大页， 错误代码: 0x"
+                << std::hex << err
+                << std::dec << std::endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+};
+
+template <typename T>
+using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
+
+template <typename T>
+using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
+
+/** 输入特征转换器，由于参数都在里面，所以使用大页面管理 */
+extern LargePagePtr<FeatureTransformer> featureTransformer;
+
+/** 模型剩余的部分使用对齐页面 */
+extern AlignedPtr<Model> model[LAYER_STACKS];
+}
diff --git a/src/evaluate/layer/clippedrelu.h b/src/evaluate/layer/clippedrelu.h
new file mode 100644
index 0000000..ddcbb0f
--- /dev/null
+++ b/src/evaluate/layer/clippedrelu.h
@@ -0,0 +1,90 @@
+#pragma once
+#include "global.h"
+
+namespace PikaChess {
+/** 神经网络的ClippedReLU层，<上一层> */
+template <typename PreviousLayer>
+class ClippedReLU {
+public:
+  /** 输入输出的类型 */
+  using InputType = typename PreviousLayer::OutputType;
+  using OutputType = quint8;
+
+  /** 输入输出的张量维度，因为是relu所以输入输出维度相同 */
+  static constexpr quint32 InputDimensions = PreviousLayer::OutputDimensions;
+  static constexpr quint32 OutputDimensions = InputDimensions;
+
+  /** 本层网络需要使用到的缓冲区大小，对其到CPU的缓冲块，以字节为单位 */
+  static constexpr quint32 SelfBufferSize =
+      CeilToMultiple<quint32>(OutputDimensions * sizeof(OutputType), CACHE_LINE_SIZE);
+
+  /** 本层网络所在的缓冲区的总大小，包括前面网络的缓冲区和本层网络所需的缓冲区大小 */
+  static constexpr quint32 BufferSize = PreviousLayer::BufferSize + SelfBufferSize;
+
+  /** NNUE网络文件中嵌入的哈希值 */
+  static constexpr std::uint32_t getHashValue() {
+    quint32 hashValue = 0x538D24C7u;
+    hashValue += PreviousLayer::getHashValue();
+    return hashValue;
+  }
+
+  /** 将网络的权重和偏差从文件中读取到内存中，直接调用上一层的读取操作即可，本层没有权重和偏差 */
+  bool readParameters(std::istream& stream) { return previousLayer.readParameters(stream); }
+
+  /** 前向传播函数 */
+  const OutputType* propagate(const quint8 *transformedFeatures, char* buffer) const {
+    // 首先调用上一层的传播函数得到本层的输入
+    const auto input = previousLayer.propagate(transformedFeatures, buffer + SelfBufferSize);
+    // 输出指针
+    const auto output = reinterpret_cast<OutputType*>(buffer);
+
+    // 如果输入正好是SIMD_WIDTH的倍数，说明输入维度是32，上__m256i
+    if constexpr (InputDimensions % SIMD_WIDTH == 0) {
+      // 用于确定下限
+      const __m256i Zero = _mm256_setzero_si256();
+      // 用于重排序
+      const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+
+      // 以__m256i为单位的输入输出指针
+      const auto in = reinterpret_cast<const __m256i*>(input);
+      const auto out = reinterpret_cast<__m256i*>(output);
+
+      // 一次操作两个__m256i，经历32->16->16(在这里给予一定的右位移)
+      const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+                                                   _mm256_load_si256(&in[0]),
+                                                   _mm256_load_si256(&in[1])),
+                                               WEIGHTS_SCALE_BITS);
+      const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+                                                   _mm256_load_si256(&in[2]),
+                                                   _mm256_load_si256(&in[3])),
+                                               WEIGHTS_SCALE_BITS);
+
+      // 将上面得到的结果进行上下钳位，16->8->clamp(0, 127)，因为这一系列操作会打乱顺序，所以最后进行重排序
+      _mm256_store_si256(out, _mm256_permutevar8x32_epi32(
+                                  _mm256_max_epi8(_mm256_packs_epi16(words0, words1),
+                                                  Zero), Offsets));
+    }
+    // 其他情况说明输入维度是16，上__m128i，具体步骤同上，只是不会打乱，不需要重排序
+    else {
+      const __m128i Zero = _mm_setzero_si128();
+      const auto in = reinterpret_cast<const __m128i*>(input);
+      const auto out = reinterpret_cast<__m128i*>(output);
+      const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+                                                _mm_load_si128(&in[0]),
+                                                _mm_load_si128(&in[1])),
+                                            WEIGHTS_SCALE_BITS);
+      const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+                                                _mm_load_si128(&in[2]),
+                                                _mm_load_si128(&in[3])),
+                                            WEIGHTS_SCALE_BITS);
+      _mm_store_si128(out, _mm_max_epi8(_mm_packs_epi16(words0, words1), Zero));
+    }
+
+    return output;
+  }
+
+private:
+  /** 上一层 */
+  PreviousLayer previousLayer;
+};
+}
diff --git a/src/evaluate/layer/dense.h b/src/evaluate/layer/dense.h
new file mode 100644
index 0000000..43dcf74
--- /dev/null
+++ b/src/evaluate/layer/dense.h
@@ -0,0 +1,151 @@
+#pragma once
+#include "global.h"
+
+namespace PikaChess {
+/** 神经网络的Dense层 <上一层网络，这层网络的输出张量维度1 x OutDims> */
+template <typename PreviousLayer, quint32 OutDims>
+class Dense {
+public:
+  /** 输入张量数据类型，输出张量数据类型 */
+  using InputType = typename PreviousLayer::OutputType;
+  using OutputType = qint32;
+
+  /** 输入张量维度，也就是上一层网络的输出张量维度 */
+  static constexpr quint32 InputDimensions = PreviousLayer::OutputDimensions;
+  /** 输入张量维度，对齐到SIMD宽度 */
+  static constexpr quint32 PaddedInputDimensions =
+      CeilToMultiple<quint32>(InputDimensions, MAX_SIMD_WIDTH);
+  /** 输出张量维度 */
+  static constexpr quint32 OutputDimensions = OutDims;
+  /** 使用SIMD指令集，每一次可以处理多少个输出元素
+   *  因为使用的是AVX2(__m256i)，输出类型的qint32，所以一次最多处理32 / 4 = 8个输出元素 */
+  static constexpr quint32 OutputSIMDWidth = SIMD_WIDTH / 4;
+
+  /** 本层网络需要使用到的缓冲区大小，对其到CPU的缓冲单元，以字节为单位 */
+  static constexpr quint64 SelfBufferSize =
+      CeilToMultiple<quint32>(OutputDimensions * sizeof(OutputType), CACHE_LINE_SIZE);
+
+  /** 本层网络所在的缓冲区的总大小，包括前面网络的缓冲区和本层网络所需的缓冲区大小 */
+  static constexpr quint64 BufferSize = PreviousLayer::BufferSize + SelfBufferSize;
+
+  /** 用于_mm256_madd_epi16的固定乘法因子 */
+  static inline const __m256i Ones256 = _mm256_set1_epi16(1);
+
+  /** NNUE网络文件中嵌入的哈希值 */
+  static constexpr quint32 getHashValue() {
+    quint32 hashValue = 0xCC03DAE4u;
+    hashValue += OutputDimensions;
+    hashValue ^= PreviousLayer::getHashValue() >> 1;
+    hashValue ^= PreviousLayer::getHashValue() << 31;
+    return hashValue;
+  }
+
+  /** 将网络的权重和偏差从文件中读取到内存中 */
+  bool readParameters(std::istream& stream) {
+    // 首先递归调用上一层网络的读取函数
+    if (!previousLayer.readParameters(stream)) return false;
+    // 接着读取偏差，大小为输出维度
+    for (quint64 i = 0; i < OutputDimensions; ++i) {
+      biases[i] = ReadInt<BiasType>(stream);
+    }
+    // 接着读取权重，因为后面需要作SIMD处理，所以这里需要将权重以四个为单位将列转换为行，读者可以自行打印查看
+    for (quint64 i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) {
+      weights[
+          (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
+          i / PaddedInputDimensions * 4 +
+          i % 4
+      ] = ReadInt<WeightType>(stream);
+    }
+    return !stream.fail();
+  }
+
+  /** 前向传播函数 */
+  const OutputType *propagate(const quint8* transformedFeatures, char *buffer) const {
+    // 首先调用上一层的传播函数得到本层的输入
+    const auto input = previousLayer.propagate(transformedFeatures, buffer + SelfBufferSize);
+
+    // 输出数组指针
+    const auto output = (OutputType*)(buffer);
+
+    /* 输出维度只能处理元素的整数倍，或者是1（最后一层），因为输出维度其实也是输入维度，是对其到MAX_SIMD_WIDTH的
+     * 这里使用constexpr if留给编译器根据不同的情况进行优化*/
+    if constexpr (OutputDimensions % OutputSIMDWidth == 0) {
+      // 一次处理4列数据，有多少列需要处理
+      constexpr qint32 NumChunks = InputDimensions / 4;
+
+      // 将输出转换为32位指针的形式，以供下面_mm256_set1_epi32读取使用，因为我们一次性处理4列
+      const auto input32 = (const qint32*)(input);
+      // 输出指针
+      __m256i *outptr = (__m256i*)(output);
+      // 首先将偏差复制到output中
+      std::memmove(output, biases, OutputDimensions * sizeof(OutputType));
+
+      /* 每次处理4个4列，最后一个4列的坐标是NumChunks(从1开始计算)
+       * 所以NumChunks - 3就是最后一个4列的第1列
+       * 因为我们是从0开始的，所以也就是最后一个4列的第2列，刚好满足i的最后一组条件 */
+      for (qint32 i = 0; i < NumChunks - 3; i += 4) {
+        const __m256i in0 = _mm256_set1_epi32(input32[i + 0]);
+        const __m256i in1 = _mm256_set1_epi32(input32[i + 1]);
+        const __m256i in2 = _mm256_set1_epi32(input32[i + 2]);
+        const __m256i in3 = _mm256_set1_epi32(input32[i + 3]);
+        const auto col0 = (const __m256i*)(&weights[(i + 0) * OutputDimensions * 4]);
+        const auto col1 = (const __m256i*)(&weights[(i + 1) * OutputDimensions * 4]);
+        const auto col2 = (const __m256i*)(&weights[(i + 2) * OutputDimensions * 4]);
+        const auto col3 = (const __m256i*)(&weights[(i + 3) * OutputDimensions * 4]);
+        // // 以4个8位为单位对应位置相乘相加，一次操作四个__m256i
+        for (qint32 j = 0; j * OutputSIMDWidth < OutputDimensions; ++j) {
+          __m256i product0 = _mm256_maddubs_epi16(in0, col0[j]);
+          __m256i product1 = _mm256_maddubs_epi16(in1, col1[j]);
+          __m256i product2 = _mm256_maddubs_epi16(in2, col2[j]);
+          __m256i product3 = _mm256_maddubs_epi16(in3, col3[j]);
+          product0 = _mm256_adds_epi16(product0, product1);
+          product0 = _mm256_madd_epi16(product0, Ones256);
+          product2 = _mm256_adds_epi16(product2, product3);
+          product2 = _mm256_madd_epi16(product2, Ones256);
+          outptr[j] = _mm256_add_epi32(outptr[j], _mm256_add_epi32(product0, product2));
+        }
+      }
+    }
+    // 如果输出维度只有1，就不需要额外的处理了，直接将输入和权重对应相乘再相加即可
+    else if constexpr (OutputDimensions == 1) {
+      const auto inputVector = (const __m256i*)(input);
+
+      // 这里计算处理完所有的输入需要多少次SIMD运算
+      constexpr qint32 NumChunks = PaddedInputDimensions / SIMD_WIDTH;
+      __m256i sum0 = _mm256_setzero_si256();
+      const auto row0 = (const __m256i*)(&weights[0]);
+
+      for (qint32 j = 0; j < NumChunks; ++j) {
+        // 以4个8位为单位对应位置相乘相加
+        sum0 = _mm256_add_epi32(sum0, _mm256_madd_epi16(_mm256_maddubs_epi16(
+                                                            inputVector[j], row0[j]), Ones256));
+      }
+
+      // 最后要加上偏差，将一个__mm256i以32位为单位加在一起，转换成两个__mm128i 对应32位相加
+      __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum0),
+                                     _mm256_extracti128_si256(sum0, 1));
+      // A+B B+A D+C C+D
+      sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
+      // B+A+D+C A+B+C+D C+D+A+B D+C+B+A
+      sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
+      // D+C+B+A+bias
+      output[0] = _mm_cvtsi128_si32(sum128) + biases[0];
+    }
+
+    return output;
+  }
+
+private:
+  /** 偏差的类型和网络的输出类型匹配 */
+  using BiasType = OutputType;
+  /** 权重的类型和网络的输入类型匹配，但是可以为负数 */
+  using WeightType = qint8;
+
+  /** 上一层网络 */
+  PreviousLayer previousLayer;
+
+  /** 对其到缓冲块的权重和偏差，加速SIMD运算效率 */
+  alignas(CACHE_LINE_SIZE) BiasType biases[OutputDimensions];
+  alignas(CACHE_LINE_SIZE) WeightType weights[OutputDimensions * PaddedInputDimensions];
+};
+}
diff --git a/src/evaluate/layer/featuretransformer.h b/src/evaluate/layer/featuretransformer.h
new file mode 100644
index 0000000..b3da680
--- /dev/null
+++ b/src/evaluate/layer/featuretransformer.h
@@ -0,0 +1,204 @@
+#pragma once
+#include "global.h"
+#include "model.h"
+#include "move.h"
+#include "accumulator.h"
+
+namespace PikaChess {
+/** 偏差的类型 */
+using BiasType = qint16;
+/** 权重的类型 */
+using WeightType = qint16;
+using PSQTWeightType = qint32;
+
+/** 对于AVX2而言最佳的寄存器个数，CPU内一共有16个YMM寄存器 */
+static constexpr quint8 NUM_REGS = 16;
+
+/** 特征转换器 */
+class FeatureTransformer {
+private:
+  /** 一边的特征转换后的结果维度 */
+  static constexpr quint32 HalfDimensions = TRANSFORMED_FEATURE_DIMENSIONS;
+  /** 一次可以处理多少个数据，16个寄存器，每个可以处理16个数据，一次可以处理256个数据 */
+  static constexpr quint32 TileHeight = 256;
+
+  /** 在NNUE文件中绑定的哈希值 */
+  static constexpr quint32 HASH_VALUE = 0x5f234cb8u;
+
+public:
+  /** 特征转换的输出类型 */
+  using OutputType = quint8;
+
+  /** 输入维度，输出维度 */
+  static constexpr quint32 InputDimensions = INPUT_DIMENSION;
+  static constexpr quint32 OutputDimensions = HalfDimensions * 2;
+
+  /** 本层需要使用到的缓冲区大小 */
+  static constexpr quint64 BufferSize = OutputDimensions * sizeof(OutputType);
+
+  /** NNUE网络文件中嵌入的哈希值 */
+  static constexpr quint32 getHashValue() { return HASH_VALUE ^ OutputDimensions; }
+
+  /** 将网络的权重和偏差从文件中读取到内存中 */
+  bool readParameters(std::istream& stream) {
+    ReadInt<BiasType>(stream, biases, HalfDimensions);
+    ReadInt<WeightType>(stream, weights, HalfDimensions * INPUT_DIMENSION);
+    ReadInt<PSQTWeightType>(stream, psqtWeights, PSQT_BUCKETS * INPUT_DIMENSION);
+    return !stream.fail();
+  }
+
+  /** 将一个棋盘的特征转换，并且将PSQT部分的分数返回 */
+  qint32 transform(Accumulator &accumulator, quint8 side, OutputType* output, int bucket) const {
+    const quint8 perspectives[2] = { side, quint8(side ^ OPP_SIDE) };
+    const auto &accumulation = accumulator.accumulation;
+    const auto &psqtAccumulation = accumulator.psqtAccumulation;
+
+    // 直接获得PSQT部分的分数
+    const auto psqt = (psqtAccumulation[perspectives[0]][bucket]
+                       - psqtAccumulation[perspectives[1]][bucket]) >> 1;
+
+    // 一共有多少块需要处理
+    constexpr quint32 NumChunks = HalfDimensions / SIMD_WIDTH;
+    // 用于ClippedReLU的下限
+    const __m256i Zero = _mm256_setzero_si256();
+
+    // 对双方转换后的特征进行ClippedReLU操作
+    for (quint8 p = 0; p < 2; ++p) {
+      const quint32 offset = HalfDimensions * p;
+      auto out = reinterpret_cast<__m256i*>(&output[offset]);
+      for (quint8 j = 0; j < NumChunks; ++j)
+      {
+        __m256i sum0 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
+                                         (accumulation[perspectives[p]])[j * 2 + 0]);
+        __m256i sum1 = _mm256_load_si256(&reinterpret_cast<const __m256i*>
+                                         (accumulation[perspectives[p]])[j * 2 + 1]);
+
+        _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(
+                                        _mm256_max_epi8(_mm256_packs_epi16(sum0, sum1), Zero),
+                                        0b11011000));
+      }
+    }
+
+    return psqt;
+  }
+
+  /** 更新累加器，针对side方来更新，通常每走一步棋要调用两次本函数，更新双方的累加器 */
+  void updateAccumulator(const Accumulator &oldAcc, Accumulator &newAcc,
+                         quint8 side, const Move& move) const {
+    // 定义需要使用的寄存器
+    __m256i acc[NUM_REGS];
+    __m256i psqt;
+
+    // 先复制将的位置
+    newAcc.kingPos[side] = oldAcc.kingPos[side];
+
+    // 一步只可能添加一个特征，也就是走子方走到的那个地方
+    quint32 added { FeatureIndex(side, move.to(), move.chess(), newAcc.kingPos[side]) };
+
+    // 一步可能删除一到两个特征，也就是走子方离开的那个地方，加上吃掉走到的那个位置的子
+    qint32 removed[2];
+    // 走到的那个地方
+    removed[0] = FeatureIndex(side, move.from(), move.chess(), newAcc.kingPos[side]);
+    // 如果是吃子步则添加上第二个特征，如果不是就置为-1表示没有吃子
+    if (move.isCapture()) {
+      removed[1] = FeatureIndex(side, move.to(), move.victim(), newAcc.kingPos[side]);
+    } else removed[1] = -1;
+
+    // 首先处理特征转换的部分
+    for (quint32 j = 0; j < HalfDimensions / TileHeight; ++j) {
+      // 将特征从旧的累加器载入到寄存器中
+      auto accTile = (__m256i*)(&oldAcc.accumulation[side][j * TileHeight]);
+      for (quint32 k = 0; k < NUM_REGS; ++k) acc[k] = _mm256_load_si256(&accTile[k]);
+
+      // 删除那些已经移除的特征
+      for (qint32 index : removed) {
+        if (-1 not_eq index) {
+          quint32 offset = HalfDimensions * index + j * TileHeight;
+          auto column = (const __m256i*)(&weights[offset]);
+          for (quint32 k = 0; k < NUM_REGS; ++k) acc[k] = _mm256_sub_epi16(acc[k], column[k]);
+        }
+      }
+
+      // 添加上新增的特征，也就是本步走到的那个地方
+      quint32 offset = HalfDimensions * added + j * TileHeight;
+      auto column = (const __m256i*)(&weights[offset]);
+      for (quint32 k = 0; k < NUM_REGS; ++k) acc[k] = _mm256_add_epi16(acc[k], column[k]);
+
+      // 将处理后的结果保存到新的累加器中
+      accTile = (__m256i*)(&newAcc.accumulation[side][j * TileHeight]);
+      for (quint32 k = 0; k < NUM_REGS; ++k) _mm256_store_si256(&accTile[k], acc[k]);
+    }
+
+    // 接着处理PSQT的部分，加载PSQT部分的累加器到寄存器中
+    auto accTilePsqt = (__m256i*)(&oldAcc.psqtAccumulation[side][0]);
+    psqt = _mm256_load_si256(accTilePsqt);
+
+    // 删除那些已经移除的特征
+    for (const auto index : removed) {
+      if (-1 not_eq index) {
+        const quint32 offset = PSQT_BUCKETS * index;
+        auto columnPsqt = (const __m256i*)(&psqtWeights[offset]);
+        psqt = _mm256_sub_epi32(psqt, columnPsqt[0]);
+      }
+    }
+
+    // 添加上新增的特征，也就是本步走到的那个地方
+    const quint32 offset = PSQT_BUCKETS * added;
+    auto columnPsqt = (const __m256i*)(&psqtWeights[offset]);
+    psqt = _mm256_add_epi32(psqt, *columnPsqt);
+
+    // 将处理后的结果保存到新的累加器中
+    accTilePsqt = (__m256i*)(&newAcc.psqtAccumulation[side][0]);
+    _mm256_store_si256(accTilePsqt, psqt);
+  }
+
+  /** 刷新累加器，使用提供的特征位置重置整个累加器，只针对side方更新
+   *  由于将走动了，所以需要调用updateAccumulator，更新另一方的累加器 */
+  void refreshAccumulator(Accumulator &accumulator, quint8 side, qint32 *featureIndexes) const {
+    // 定义需要使用的寄存器
+    __m256i acc[NUM_REGS];
+    __m256i psqt;
+
+    // 首先处理特征转换的部分
+    for (quint32 j = 0; j < HalfDimensions / TileHeight; ++j) {
+      // 将偏差复制到寄存器中
+      auto biasesTile = (const __m256i*)(&biases[j * TileHeight]);
+      for (quint32 k = 0; k < NUM_REGS; ++k) acc[k] = biasesTile[k];
+
+      // 将特征逐个添加到寄存器中，直到累加完成
+      qint32 *now = featureIndexes;
+      while (-1 not_eq *now) {
+        quint32 index = *now++;
+        const quint32 offset = HalfDimensions * index + j * TileHeight;
+        auto column = (const __m256i*)(&weights[offset]);
+
+        for (quint8 k = 0; k < NUM_REGS; ++k) acc[k] = _mm256_add_epi16(acc[k], column[k]);
+      }
+
+      // 将处理后的结果保存回累加器中
+      auto accTile = (__m256i*)(&accumulator.accumulation[side][j * TileHeight]);
+      for (quint8 k = 0; k < NUM_REGS; ++k) _mm256_store_si256(&accTile[k], acc[k]);
+    }
+
+    // 接着处理PSQT的部分
+    psqt = _mm256_setzero_si256();
+
+    // 将特征逐个累加到寄存器中
+    while (-1 not_eq *featureIndexes) {
+      quint32 index = *featureIndexes++;
+      const quint32 offset = PSQT_BUCKETS * index;
+      auto columnPsqt = (const __m256i*)(&psqtWeights[offset]);
+      psqt = _mm256_add_epi32(psqt, *columnPsqt);
+    }
+
+    // 将处理后的结果保存回累加器中
+    auto accTilePsqt = (__m256i*)(&accumulator.psqtAccumulation[side][0]);
+    _mm256_store_si256(accTilePsqt, psqt);
+  }
+
+  /** 本层所用的偏差，权重和PSQT权重，对其到CPU的缓存块大小 */
+  alignas(CACHE_LINE_SIZE) BiasType biases[HalfDimensions];
+  alignas(CACHE_LINE_SIZE) WeightType weights[HalfDimensions * InputDimensions];
+  alignas(CACHE_LINE_SIZE) PSQTWeightType psqtWeights[InputDimensions * PSQT_BUCKETS];
+};
+}
diff --git a/src/evaluate/layer/input.h b/src/evaluate/layer/input.h
new file mode 100644
index 0000000..24ae7a8
--- /dev/null
+++ b/src/evaluate/layer/input.h
@@ -0,0 +1,33 @@
+#pragma once
+#include "global.h"
+
+namespace PikaChess {
+/** 神经网络的输入层 <这层网络的输出张量维度1 x OutDims> */
+template <quint32 OutDims>
+class Input {
+ public:
+  /** 输出类型 */
+  using OutputType = quint8;
+
+  /** 输出维度 */
+  static constexpr quint32 OutputDimensions = OutDims;
+
+  /** 输入层不需要缓冲区 */
+  static constexpr quint32 BufferSize = 0;
+
+  /** NNUE网络文件中嵌入的哈希值 */
+  static constexpr quint32 getHashValue() {
+    quint32 hashValue = 0xEC42E90Du;
+    hashValue ^= OutputDimensions;
+    return hashValue;
+  }
+
+  /** 读取网络的权重，输入层没有权重，直接返回 */
+  bool readParameters(std::istream&) { return true; }
+
+  /** 前向传播，直接将输入特征返回即可 */
+  const OutputType *propagate(const quint8 *transformedFeatures, char*) const {
+    return transformedFeatures;
+  }
+};
+}
diff --git a/src/evaluate/model.h b/src/evaluate/model.h
new file mode 100644
index 0000000..c89fb92
--- /dev/null
+++ b/src/evaluate/model.h
@@ -0,0 +1,106 @@
+#pragma once
+#include "global.h"
+
+#include "input.h"
+#include "dense.h"
+#include "clippedrelu.h"
+
+namespace PikaChess {
+/** HalfKAv2模型
+ *  9(将的位置) * 13(棋子的个数{2(双方) * 7(每方7种棋子) - 1(将的位置交集为空集，所以合并为一个特征)}) *
+ *  90(每个棋子的位置) -> 特征转换 ->
+ *  (512(转换后的特征) + 8(PSQT的部分)) x 2 -> 全连接层1 -> ClippedReLU ->
+ *  16 -> 全连接层2 -> ClippedReLU ->
+ *  32 -> 输出层
+ *  -> 1(NNUE神经网络的部分估值) 与前面的PSQT部分合并 -> 最终局面评分
+*/
+static constexpr const char *MODEL_NAME = "HalfKAv2(Friend)";
+
+/** 每一个棋子在输入层的位置(A Feature)，其中包括将在输入层的位置(K Feature) */
+enum {
+  PS_R_ROOK = 0 * 90, PS_B_ROOK = 1 * 90,
+  PS_R_ADVISOR = 2 * 90, PS_B_ADVISOR = 3 * 90,
+  PS_R_CANNON = 4 * 90, PS_B_CANNON = 5 * 90,
+  PS_R_PAWN = 6 * 90, PS_B_PAWN = 7 * 90,
+  PS_R_KNIGHT = 8 * 90, PS_B_KNIGHT = 9 * 90,
+  PS_R_BISHOP = 10 * 90, PS_B_BISHOP = 11 * 90,
+  PS_KING = 12 * 90, PS_ALL = 13 * 90,
+  PS_KING_0 = 0 * PS_ALL, PS_KING_1 = 1 * PS_ALL, PS_KING_2 = 2 * PS_ALL,
+  PS_KING_3 = 3 * PS_ALL, PS_KING_4 = 4 * PS_ALL, PS_KING_5 = 5 * PS_ALL,
+  PS_KING_6 = 6 * PS_ALL, PS_KING_7 = 7 * PS_ALL, PS_KING_8 = 8 * PS_ALL
+};
+
+/** 整个模型的输入维度 */
+static constexpr quint32 INPUT_DIMENSION = 9 * PS_ALL;
+/** 一边经过特征转换层转换后的维度，两边就是512 x 2 = 1024 */
+constexpr quint32 TRANSFORMED_FEATURE_DIMENSIONS = 512;
+/** 一边特征转换后的PSQT的特征的个数 */
+constexpr quint32 PSQT_BUCKETS = 8;
+/** 模型的全连接层子网络的个数 */
+constexpr quint32 LAYER_STACKS = 8;
+
+/** 全连接层1的输入，经过特征转换后一共有512 x 2也就是1024个特征 */
+using InputLayer = Input<TRANSFORMED_FEATURE_DIMENSIONS * 2>;
+/** 全连接层1，激活函数采用ClippedReLU */
+using HiddenLayer1 = ClippedReLU<Dense<InputLayer, 16>>;
+/** 全连接层2，激活函数采用ClippedReLU */
+using HiddenLayer2 = ClippedReLU<Dense<HiddenLayer1, 32>>;
+/** 输出层，本层没有激活函数 */
+using OutputLayer = Dense<HiddenLayer2, 1>;
+
+/** 整个模型不包括特征转换层的架构，因为特征转换层需要单独出来以达到快速更新的目的 */
+using Model = OutputLayer;
+
+/** 根据当前的走子方和子的编号获得子力的特征位置(A Feature)，红黑翻转满足NNUE的翻转需求 */
+static constexpr quint32 PIECE_FEATURE_INDEX[8][14] {
+    { PS_R_ROOK, PS_R_KNIGHT, PS_R_CANNON, PS_R_BISHOP, PS_R_PAWN, PS_R_ADVISOR, PS_KING,
+     PS_B_ROOK, PS_B_KNIGHT, PS_B_CANNON, PS_B_BISHOP, PS_B_PAWN, PS_B_ADVISOR, PS_KING },
+    {}, {}, {}, {}, {}, {},
+    { PS_B_ROOK, PS_B_KNIGHT, PS_B_CANNON, PS_B_BISHOP, PS_B_PAWN, PS_B_ADVISOR, PS_KING,
+     PS_R_ROOK, PS_R_KNIGHT, PS_R_CANNON, PS_R_BISHOP, PS_R_PAWN, PS_R_ADVISOR, PS_KING }
+};
+
+/** 用将的位置获得将的特征位置(K Feature) */
+static constexpr uint32_t KING_FEATURE_INDEX[90] {
+    0, 0, 0, PS_KING_0, PS_KING_1, PS_KING_2, 0, 0, 0,
+    0, 0, 0, PS_KING_3, PS_KING_4, PS_KING_5, 0, 0, 0,
+    0, 0, 0, PS_KING_6, PS_KING_7, PS_KING_8, 0, 0, 0,
+    0, 0, 0,         0,         0,         0, 0, 0, 0,
+    0, 0, 0,         0,         0,         0, 0, 0, 0,
+    0, 0, 0,         0,         0,         0, 0, 0, 0,
+    0, 0, 0,         0,         0,         0, 0, 0, 0,
+    0, 0, 0, PS_KING_6, PS_KING_7, PS_KING_8, 0, 0, 0,
+    0, 0, 0, PS_KING_3, PS_KING_4, PS_KING_5, 0, 0, 0,
+    0, 0, 0, PS_KING_0, PS_KING_1, PS_KING_2, 0, 0, 0,
+};
+
+/** 根据当前走子方翻转红方和黑方的位置，以满足NNUE的翻转需求 */
+static constexpr quint8 ORIENT[8][90] {
+    {   81, 82, 83, 84, 85, 86, 87, 88, 89,
+        72, 73, 74, 75, 76, 77, 78, 79, 80,
+        63, 64, 65, 66, 67, 68, 69, 70, 71,
+        54, 55, 56, 57, 58, 59, 60, 61, 62,
+        45, 46, 47, 48, 49, 50, 51, 52, 53,
+        36, 37, 38, 39, 40, 41, 42, 43, 44,
+        27, 28, 29, 30, 31, 32, 33, 34, 35,
+        18, 19, 20, 21, 22, 23, 24, 25, 26,
+         9, 10, 11, 12, 13, 14, 15, 16, 17,
+         0,  1,  2,  3,  4,  5,  6,  7,  8, },
+    {}, {}, {}, {}, {}, {},
+    {    0,  1,  2,  3,  4,  5,  6,  7,  8,
+         9, 10, 11, 12, 13, 14, 15, 16, 17,
+        18, 19, 20, 21, 22, 23, 24, 25, 26,
+        27, 28, 29, 30, 31, 32, 33, 34, 35,
+        36, 37, 38, 39, 40, 41, 42, 43, 44,
+        45, 46, 47, 48, 49, 50, 51, 52, 53,
+        54, 55, 56, 57, 58, 59, 60, 61, 62,
+        63, 64, 65, 66, 67, 68, 69, 70, 71,
+        72, 73, 74, 75, 76, 77, 78, 79, 80,
+        81, 82, 83, 84, 85, 86, 87, 88, 89, }
+};
+
+/** 根据当前的走子方，棋子类型，棋子位置，王的位置获取这个特征在特征转换层的输入位置(KA Feature) */
+inline quint32 FeatureIndex(quint8 side, quint8 index, quint8 chess, quint8 kingIndex) {
+  return KING_FEATURE_INDEX[kingIndex] + PIECE_FEATURE_INDEX[side][chess] + ORIENT[side][index];
+}
+}
diff --git a/src/global.h b/src/global.h
index 9bde2bd..90063d4 100644
--- a/src/global.h
+++ b/src/global.h
@@ -19,12 +19,12 @@ constexpr quint8 RED_PAWN { 4 }, RED_ADVISOR { 5 }, RED_KING { 6 };
 constexpr quint8 BLACK_ROOK { 7 }, BLACK_KNIGHT { 8 }, BLACK_CANNON { 9 }, BLACK_BISHOP { 10 };
 constexpr quint8 BLACK_PAWN { 11 }, BLACK_ADVISOR { 12 }, BLACK_KING { 13 };
 
-/** 定义赢棋和棋输棋的分数 */
-constexpr qint16 MATE_SCORE { 10000 }, DRAW_SCORE { 20 }, LOSS_SCORE { -10000 };
+/** 定义赢棋输棋的分数 */
+constexpr qint16 MATE_SCORE { 30000 }, LOSS_SCORE { -30000 };
 /** 长将判负的分值，在该值之内则不写入置换表 */
-constexpr qint16 BAN_SCORE_MATE { 9500 }, BAN_SCORE_LOSS { -9500 };
+constexpr qint16 BAN_SCORE_MATE { 29500 }, BAN_SCORE_LOSS { -29500 };
 /** 搜索出赢棋和输棋的分值界限，超出此值就说明已经搜索出杀棋了 */
-constexpr qint16 WIN_SCORE { 9000 }, LOST_SCORE { -9000 };
+constexpr qint16 WIN_SCORE { 29000 }, LOST_SCORE { -29000 };
 
 /** 搜索状态机的阶段 */
 constexpr quint8 PHASE_HASH { 0 };
@@ -64,273 +64,43 @@ constexpr quint16 CHESS_FLAG[14] {
     1 << 10, 1 << 11, 1 << 12, 1 << 13
 };
 
-/** 士的形状 */
-// 缺士
-constexpr quint8 SHAPE_LEAK { 0 };
-// 将不在中间
-constexpr quint8 SHAPE_HOLLOW { 1 };
-// 中士加左士或右士 <^ 或 ^>
-constexpr quint8 SHAPE_LR { 2 };
-// 左右士 <>
-constexpr quint8 SHAPE_CENTER { 3 };
-// 不成形状
-constexpr quint8 SHAPE_NONE { 4 };
-
-/** 以下是用于预计算的固定分值 */
-/** 开中局的将 */
-constexpr qint16 CONST_KING_MIDGAME[90] {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  1,  1,  1,  0,  0,  0,
-     0,  0,  0,  2,  2,  2,  0,  0,  0,
-     0,  0,  0, 11, 15, 11,  0,  0,  0,
+/** 棋子的残局价值，用于差值裁剪，车马炮象兵士将 */
+constexpr quint16 PIECE_VALUE[14] {
+    1380, 800, 700, 300, 270, 450, 0,
+    1380, 800, 700, 300, 270, 450, 0
 };
 
-/** 残局的将 */
-constexpr qint16 CONST_KING_ENDGAME[90] {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  5, 15,  5,  0,  0,  0,
-     0,  0,  0,  3, 13,  3,  0,  0,  0,
-     0,  0,  0,  1, 11,  1,  0,  0,  0,
-};
+/** NNUE文件的版本 */
+constexpr quint32 VERSION = 0x7AF32F20u;
+/** 评分时输出的放缩系数 */
+constexpr quint8 OUTPUT_SCALE_BITS = 4;
+/** 权重的放缩系数 */
+constexpr quint8 WEIGHTS_SCALE_BITS = 6;
 
-/** 没受威胁的士 */
-constexpr qint16 CONST_ADVISOR_THREATLESS[90] {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0, 20,  0, 20,  0,  0,  0,
-     0,  0,  0,  0, 23,  0,  0,  0,  0,
-     0,  0,  0, 20,  0, 20,  0,  0,  0,
-};
+/** CPU缓存单元的大小 */
+constexpr quint8 CACHE_LINE_SIZE = 64;
 
-/** 受到威胁的士 */
-constexpr qint16 CONST_ADVISOR_THREATENED[90] {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0, 40,  0, 40,  0,  0,  0,
-     0,  0,  0,  0, 43,  0,  0,  0,  0,
-     0,  0,  0, 40,  0, 40,  0,  0,  0,
-};
+/** SIMD的宽度 */
+constexpr quint8 SIMD_WIDTH = 32;
+constexpr quint8 MAX_SIMD_WIDTH = 32;
 
-/** 没受威胁的象 */
-constexpr qint16 CONST_BISHOP_THREATLESS[90] {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0, 20,  0,  0,  0, 20,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-    18,  0,  0,  0, 23,  0,  0,  0, 18,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0, 20,  0,  0,  0, 20,  0,  0,
-};
-
-/** 受威胁的象 */
-constexpr qint16 CONST_BISHOP_THREATENED[90] {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0, 40,  0,  0,  0, 40,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-    38,  0,  0,  0, 43,  0,  0,  0, 38,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0, 40,  0,  0,  0, 40,  0,  0,
-};
-
-/** 开中局的马 */
-constexpr qint16 CONST_KNIGHT_MIDGAME[90] {
-    90, 90, 90, 96, 90, 96, 90, 90, 90,
-    90, 96,103, 97, 94, 97,103, 96, 90,
-    92, 98, 99,103, 99,103, 99, 98, 92,
-    93,108,100,107,100,107,100,108, 93,
-    90,100, 99,103,104,103, 99,100, 90,
-    90, 98,101,102,103,102,101, 98, 90,
-    92, 94, 98, 95, 98, 95, 98, 94, 92,
-    93, 92, 94, 95, 92, 95, 94, 92, 93,
-    85, 90, 92, 93, 78, 93, 92, 90, 85,
-    88, 85, 90, 88, 90, 88, 90, 85, 88,
-};
-
-/** 残局的马 */
-constexpr qint16 CONST_KNIGHT_ENDGAME[90] {
-    92, 94, 96, 96, 96, 96, 96, 94, 92,
-    94, 96, 98, 98, 98, 98, 98, 96, 94,
-    96, 98,100,100,100,100,100, 98, 96,
-    96, 98,100,100,100,100,100, 98, 96,
-    96, 98,100,100,100,100,100, 98, 96,
-    94, 96, 98, 98, 98, 98, 98, 96, 94,
-    94, 96, 98, 98, 98, 98, 98, 96, 94,
-    92, 94, 96, 96, 96, 96, 96, 94, 92,
-    90, 92, 94, 92, 92, 92, 94, 92, 90,
-    88, 90, 92, 90, 90, 90, 92, 90, 88,
-};
-
-/** 开中局的车 */
-constexpr qint16 CONST_ROOK_MIDGAME[90] {
-    206,208,207,213,214,213,207,208,206,
-    206,212,209,216,233,216,209,212,206,
-    206,208,207,214,216,214,207,208,206,
-    206,213,213,216,216,216,213,213,206,
-    208,211,211,214,215,214,211,211,208,
-    208,212,212,214,215,214,212,212,208,
-    204,209,204,212,214,212,204,209,204,
-    198,208,204,212,212,212,204,208,198,
-    200,208,206,212,200,212,206,208,200,
-    194,206,204,212,200,212,204,206,194,
-};
-
-/** 残局的车 */
-constexpr qint16 CONST_ROOK_ENDGAME[90] {
-    182,182,182,184,186,184,182,182,182,
-    184,184,184,186,190,186,184,184,184,
-    182,182,182,184,186,184,182,182,182,
-    180,180,180,182,184,182,180,180,180,
-    180,180,180,182,184,182,180,180,180,
-    180,180,180,182,184,182,180,180,180,
-    180,180,180,182,184,182,180,180,180,
-    180,180,180,182,184,182,180,180,180,
-    180,180,180,182,184,182,180,180,180,
-    180,180,180,182,184,182,180,180,180,
-};
-
-/** 开中局的炮 */
-constexpr qint16 CONST_CANNON_MIDGAME[90] {
-    100,100, 96, 91, 90, 91, 96,100,100,
-     98, 98, 96, 92, 89, 92, 96, 98, 98,
-     97, 97, 96, 91, 92, 91, 96, 97, 97,
-     96, 99, 99, 98,100, 98, 99, 99, 96,
-     96, 96, 96, 96,100, 96, 96, 96, 96,
-     95, 96, 99, 96,100, 96, 99, 96, 95,
-     96, 96, 96, 96, 96, 96, 96, 96, 96,
-     97, 96,100, 99,101, 99,100, 96, 97,
-     96, 97, 98, 98, 98, 98, 98, 97, 96,
-     96, 96, 97, 99, 99, 99, 97, 96, 96,
-};
-
-/** 残局的炮 */
-constexpr qint16 CONST_CANNON_ENDGAME[90] {
-    100,100,100,100,100,100,100,100,100,
-    100,100,100,100,100,100,100,100,100,
-    100,100,100,100,100,100,100,100,100,
-    100,100,100,102,104,102,100,100,100,
-    100,100,100,102,104,102,100,100,100,
-    100,100,100,102,104,102,100,100,100,
-    100,100,100,102,104,102,100,100,100,
-    100,100,100,102,104,102,100,100,100,
-    100,100,100,104,106,104,100,100,100,
-    100,100,100,104,106,104,100,100,100,
-};
-
-/** 开中局、有进攻机会的兵 */
-constexpr qint16 CONST_PAWN_ATTACKING_MIDGAME[90] {
-     9,  9,  9, 11, 13, 11,  9,  9,  9,
-    39, 49, 69, 84, 89, 84, 69, 49, 39,
-    39, 49, 64, 74, 74, 74, 64, 49, 39,
-    39, 46, 54, 59, 61, 59, 54, 46, 39,
-    29, 37, 41, 54, 59, 54, 41, 37, 29,
-     7,  0, 13,  0, 16,  0, 13,  0,  7,
-     7,  0,  7,  0, 15,  0,  7,  0,  7,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-};
-
-/** 开中局、没有进攻机会的兵 */
-constexpr qint16 CONST_PAWN_ATTACKLESS_MIDGAME[90] {
-     9,  9,  9, 11, 13, 11,  9,  9,  9,
-    19, 24, 34, 42, 44, 42, 34, 24, 19,
-    19, 24, 32, 37, 37, 37, 32, 24, 19,
-    19, 23, 27, 29, 30, 29, 27, 23, 19,
-    14, 18, 20, 27, 29, 27, 20, 18, 14,
-     7,  0, 13,  0, 16,  0, 13,  0,  7,
-     7,  0,  7,  0, 15,  0,  7,  0,  7,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-};
-
-/** 残局、有进攻机会的兵 */
-constexpr qint16 CONST_PAWN_ATTACKING_ENDGAME[90] {
-    10, 10, 10, 15, 15, 15, 10, 10, 10,
-    50, 55, 60, 85,100, 85, 60, 55, 50,
-    65, 70, 70, 75, 75, 75, 70, 70, 65,
-    75, 80, 80, 80, 80, 80, 80, 80, 75,
-    70, 70, 65, 70, 70, 70, 65, 70, 70,
-    45,  0, 40, 45, 45, 45, 40,  0, 45,
-    40,  0, 35, 40, 40, 40, 35,  0, 40,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-};
-
-/** 残局、没有进攻机会的兵 */
-constexpr qint16 CONST_PAWN_ATTACKLESS_ENDGAME[90] {
-    10, 10, 10, 15, 15, 15, 10, 10, 10,
-    10, 15, 20, 45, 60, 45, 20, 15, 10,
-    25, 30, 30, 35, 35, 35, 30, 30, 25,
-    35, 40, 40, 45, 45, 45, 40, 40, 35,
-    25, 30, 30, 35, 35, 35, 30, 30, 25,
-    25,  0, 25, 25, 25, 25, 25,  0, 25,
-    20,  0, 20, 20, 20, 20, 20,  0, 20,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-};
-
-/** 空头炮的罚分，大体上空头炮位置越高威胁越大 */
-constexpr qint16 CONST_HOLLOW_THREAT_PENALTY[90] {
-     0,  0,  0,  0, 80,  0,  0,  0,  0,
-     0,  0,  0,  0, 80,  0,  0,  0,  0,
-     0,  0,  0,  0, 80,  0,  0,  0,  0,
-     0,  0,  0,  0, 75,  0,  0,  0,  0,
-     0,  0,  0,  0, 70,  0,  0,  0,  0,
-     0,  0,  0,  0, 65,  0,  0,  0,  0,
-     0,  0,  0,  0, 60,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-     0,  0,  0,  0,  0,  0,  0,  0,  0,
-};
+/** 将n向上取整为base的整数倍 */
+template <typename IntType>
+constexpr IntType CeilToMultiple(IntType n, IntType base) {
+  return (n + base - 1) / base * base;
+}
 
-/** 沉底炮的罚分，大体上越靠近边线威胁越大 */
-constexpr qint16 CONST_BOTTOM_THREAT_PENALTY[90] {
-   40, 30,  0,  0,  0,  0,  0, 30, 40,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,
-   40, 30,  0,  0,  0,  0,  0, 30, 40,
-};
+/** 从NNUE文件中读取数据 */
+template <typename IntType>
+inline IntType ReadInt(std::istream &stream) {
+  IntType result;
+  stream.read((char*)(&result), sizeof(IntType));
+  return result;
+}
 
-/** 先行棋的分数 */
-extern qint16 ADVANCED_SCORE;
-/** 各个子力的价值表 */
-extern qint16 VALUE[14][90];
+/** 从NNUE文件中读取数据 */
+template <typename IntType>
+inline void ReadInt(std::istream &stream, IntType *out, quint64 count) {
+  stream.read((char*)(out), sizeof(IntType) * count);
+}
 }
diff --git a/src/move/historymove.h b/src/move/historymove.h
index e266495..7304ef4 100644
--- a/src/move/historymove.h
+++ b/src/move/historymove.h
@@ -1,6 +1,7 @@
 #pragma once
 #include "move.h"
 #include "bitboard.h"
+#include "accumulator.h"
 
 namespace PikaChess {
 class HistoryMove : public Move {
@@ -48,6 +49,9 @@ class HistoryMove : public Move {
   /** 设置一个走法 */
   void setMove(const Move &move);
 
+  /** 该步对应的累加器 */
+  Accumulator m_acc;
+
 private:
   /** 走该步之前的Zobrist值 */
   quint64 m_zobrist;
diff --git a/src/search/chessengine.cpp b/src/search/chessengine.cpp
index d94b9d9..8394cff 100644
--- a/src/search/chessengine.cpp
+++ b/src/search/chessengine.cpp
@@ -2,7 +2,12 @@
 #include "searchinstance.h"
 
 namespace PikaChess {
-ChessEngine::ChessEngine() { reset(); }
+ChessEngine::ChessEngine() {
+  // 加载神经网络
+  NNUEInit();
+  // 初始化棋盘
+  reset();
+}
 
 void ChessEngine::reset() {
   // 初始局面
@@ -12,8 +17,6 @@ void ChessEngine::reset() {
 void ChessEngine::search() {
   // 重置信息
   this->m_hashTable.reset();
-  // 重新计算分值表和分值
-  this->m_chessboard.preCalculateScores();
   // 迭代加深，重置深度
   this->m_currentDepth = 1;
 
diff --git a/src/search/searchinstance.cpp b/src/search/searchinstance.cpp
index 62a1e26..d1302ef 100644
--- a/src/search/searchinstance.cpp
+++ b/src/search/searchinstance.cpp
@@ -1,8 +1,8 @@
 #include "searchinstance.h"
 
 namespace PikaChess {
-/** 搜索的衰减层数 [是否是CUT Node][第几层][第几个走法] */
-quint8 REDUCTIONS[2][64][128];
+/** 搜索的衰减层数 [第几层][第几个走法] */
+quint16 REDUCTIONS[64][128];
 
 SearchInstance::SearchInstance(const Chessboard &chessboard, HashTable &hashTable)
     : m_chessboard { chessboard }, m_hashTable { hashTable } { }
@@ -39,7 +39,7 @@ void SearchInstance::searchRoot(const qint8 depth) {
         // 对于延迟走法的处理，要求没有被将军，没有将军别人，该步不是吃子步
         if (depth >= 3 and notInCheck and newDepth not_eq depth and not lastMove.isCapture()) {
           tryScore = -searchFull(-bestScore - 1, -bestScore,
-                                 newDepth - REDUCTIONS[false][depth][moveCount]);
+                                 newDepth - REDUCTIONS[depth][moveCount]);
         }
         // 如果不满足条件则不衰减层数
         else tryScore = -searchFull(-bestScore - 1, -bestScore, newDepth);
@@ -91,45 +91,20 @@ qint16 SearchInstance::searchFull(qint16 alpha, const qint16 beta,
 
   // 不被将军时可以进行一些裁剪
   bool notInCheck { not this->m_chessboard.getLastMove().isChecked() };
-  bool notPVNode { beta - alpha <= 1 };
   if (notInCheck) {
-    qint16 staticEval { this->m_chessboard.staticScore() };
+    qint16 staticEval { this->m_chessboard.score() };
 
     // 无用裁剪
-    if (depth < 7 and abs(beta) < WIN_SCORE) {
+    if (depth < 9 and abs(beta) < WIN_SCORE) {
       // 裁剪的边界
-      quint8 futilityMargin = 40 * depth;
+      qint16 futilityMargin = 214 * depth;
 
       // 如果放弃一定的分值还是超出边界就返回
       if (staticEval - futilityMargin >= beta) return staticEval - futilityMargin;
     }
 
     // 适用于非PV节点的前期裁剪
-    if (notPVNode) {
-      // 剃刀裁剪
-      if (depth <= 3) {
-        // 给静态评价加上第一个边界
-        tryScore = staticEval + 40;
-
-        // 如果超出边界
-        if (tryScore < beta) {
-          // 第一层直接返回评分和静态搜索的最大值
-          if (depth == 1) return std::max(tryScore, searchQuiescence(alpha, beta));
-
-          // 其余情况加上第二个边界
-          tryScore += 60;
-
-          // 如果还是超出边界
-          if (tryScore < beta and depth <= 2) {
-            // 获得静态评分
-            qint16 newScore { searchQuiescence(alpha, beta) };
-
-            // 如果静态评分也超出边界，返回评分和静态搜索的最大值
-            if (newScore < beta) return std::max(tryScore, newScore);
-          }
-        }
-      }
-
+    if (beta - alpha <= 1) {
       /* 进行空步裁剪，不能连着走两步空步，被将军时不能走空步，层数较大时，需要进行检验
          根节点的Beta值是"MATE_SCORE"，所以不可能发生空步裁剪 */
       if (nullOk) {
@@ -140,7 +115,7 @@ qint16 SearchInstance::searchFull(qint16 alpha, const qint16 beta,
         // 撤销空步
         unMakeNullMove();
         // 如果足够好就可以发生截断，层数较大时要注意进行校验
-        if (tryScore >= beta and ((depth < 12 and abs(beta) < WIN_SCORE) or
+        if (tryScore >= beta and ((depth < 14 and abs(beta) < WIN_SCORE) or
                                   searchFull(beta - 1, beta, depth - 2, NO_NULL) >= beta)) {
           return tryScore;
         }
@@ -180,7 +155,7 @@ qint16 SearchInstance::searchFull(qint16 alpha, const qint16 beta,
       // PVS，对于延迟走法的处理，要求没有被将军，没有将军别人，该步不是吃子步
       if (depth >= 3 and notInCheck and newDepth not_eq depth and not lastMove.isCapture()) {
         tryScore = -searchFull(-alpha - 1, -alpha,
-                               newDepth - REDUCTIONS[notPVNode][depth][moveCount]);
+                               newDepth - REDUCTIONS[depth][moveCount]);
       }
       // 如果不满足条件就不衰减层数
       else tryScore = -searchFull(-alpha - 1, -alpha, newDepth);
@@ -239,6 +214,9 @@ qint16 SearchInstance::searchQuiescence(qint16 alpha, const qint16 beta) {
 
   qint16 bestScore { LOSS_SCORE };
 
+  // 差值裁剪的边界值
+  qint16 deltaBase { LOSS_SCORE };
+
   // 如果不被将军，先做局面评价，如果局面评价没有截断，再生成吃子走法
   bool notInCheck { not this->m_chessboard.getLastMove().isChecked() };
   if (notInCheck) {
@@ -248,12 +226,12 @@ qint16 SearchInstance::searchQuiescence(qint16 alpha, const qint16 beta) {
       // Beta截断
       if (tryScore >= beta) return tryScore;
 
-      // 差值(delta)裁剪，如果吃一个车都无法超过alpha就裁剪
-      if (tryScore + 200 < alpha) return tryScore;
-
       // 缩小Alpha-Beta边界
       if (tryScore > alpha) alpha = tryScore;
     }
+
+    // 调整差值裁剪的边界
+    deltaBase = bestScore + 155;
   }
 
   // 静态搜索有限状态机
@@ -264,8 +242,12 @@ qint16 SearchInstance::searchQuiescence(qint16 alpha, const qint16 beta) {
   while ((move = search.getNextMove()).isVaild()) {
     // 如果被将军了就不搜索这一步
     if (makeMove(move)) {
-      // 不然就获得评分并更新最好的分数
-      tryScore = -searchQuiescence(-beta, -alpha);
+      // 首先进行差值裁剪，如果加上一定的值都不能超过alpha，就认为这个走法是无用的
+      if (notInCheck and deltaBase + PIECE_VALUE[move.victim()] <= alpha) {
+        tryScore = deltaBase + PIECE_VALUE[move.victim()];
+      }
+      // 否则就获得评分并更新最好的分数
+      else tryScore = -searchQuiescence(-beta, -alpha);
 
       // 撤销走棋
       unMakeMove();
diff --git a/src/table/pregen.cpp b/src/table/pregen.cpp
index 64980f6..0b34b40 100644
--- a/src/table/pregen.cpp
+++ b/src/table/pregen.cpp
@@ -9,7 +9,7 @@ extern __m128i BITBOARD_MASK[90];
 extern __m128i BITBOARD_NOT_MASK[90];
 
 /** 延迟走法衰减的衰减层数 */
-extern quint8 REDUCTIONS[2][64][128];
+extern quint16 REDUCTIONS[64][128];
 
 PreGen::PreGen() {
   // 位棋盘掩码初始化
@@ -86,12 +86,13 @@ PreGen::PreGen() {
   genZobristValues();
 
   // 生成LMR的衰减层数数据
+  quint16 reduce[128];
+  for (quint8 i { 1 }; i < 128; ++i) reduce[i] = int(21.9 * std::log(i));
+
   for (quint8 depth = 1; depth < 64; ++depth) {
     for (quint8 moveCount = 1; moveCount < 128; ++moveCount) {
-      double reduce = log(depth) * log(moveCount) / 1.95;
-      REDUCTIONS[true][depth][moveCount] = int(std::round(reduce));
-      REDUCTIONS[false][depth][moveCount] =
-          std::max(REDUCTIONS[true][depth][moveCount] - 1, 0);
+      int r = reduce[depth] * reduce[moveCount];
+      REDUCTIONS[depth][moveCount] = (r + 534) / 1024;
     }
   }
 }