From 57ea5f7d501be0d4eaa6579b632c4e39adc69b7e Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Tue, 27 Aug 2024 18:50:52 +0800 Subject: [PATCH 01/41] fpga: Add pcie XDMA framework Co-Author: xushuoxiang --- Makefile | 6 +++ src/test/csrc/fpga/xdma.cpp | 63 +++++++++++++++++++++++++++ src/test/csrc/fpga/xdma.h | 71 +++++++++++++++++++++++++++++++ src/test/csrc/fpga/xdma_mpool.cpp | 0 4 files changed, 140 insertions(+) create mode 100644 src/test/csrc/fpga/xdma.cpp create mode 100644 src/test/csrc/fpga/xdma.h create mode 100644 src/test/csrc/fpga/xdma_mpool.cpp diff --git a/Makefile b/Makefile index bf9300dc8..f09fd3d1c 100644 --- a/Makefile +++ b/Makefile @@ -78,6 +78,12 @@ SIM_VSRC = $(shell find $(VSRC_DIR) -name "*.v" -or -name "*.sv") # DiffTest support DIFFTEST_CSRC_DIR = $(abspath ./src/test/csrc/difftest) +# FPGA-Difftest support +FPGA ?= 0 +ifeq ($(FPGA),1) +DIFFTEST_CSRC_DIR += $(abspath ./src/test/csrc/fpga) +endif + DIFFTEST_CXXFILES = $(shell find $(DIFFTEST_CSRC_DIR) -name "*.cpp") ifeq ($(NO_DIFF), 1) SIM_CXXFLAGS += -DCONFIG_NO_DIFFTEST diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp new file mode 100644 index 000000000..f55f58ab5 --- /dev/null +++ b/src/test/csrc/fpga/xdma.cpp @@ -0,0 +1,63 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#include "xdma.h" + +FpgaXdma::FpgaXdma() { + signal(SIGINT, handle_sigint); + fd_c2h = open("/dev/xdma0_c2h_0", O_RDWR); + set_dma_fd_block(); +} + +void FpgaXdma::handle_sigint(int sig) { + printf("Unlink sem success, exit success!\n"); + exit(1); +} + +void FpgaXdma::set_dma_fd_block() { + int flags = fcntl(fd, F_GETFL, 0); + if (flags == -1) { + perror("fcntl get error"); + return; + } + // Clear the O NONBLOCK flag and set it to blocking mode + flags &= ~O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) == -1) { + perror("fcntl set error"); + return; + } +} + +void FpgaXdma::thread_read_xdma() { + while (running) { + char *memory = memory_pool.get_free_chunk(); + read(fd_c2h, memory, recv_size); + memory_pool.set_busy_chunk(); + } +} + +void FpgaXdma::write_difftest_thread() { + while (running) { + const char *memory = memory_pool.get_busy_chunk(); + memcpy(&diffteststate, memory, sizeof(diffteststate)); + + stream_receiver_cout ++; + memory_pool.set_free_chunk(); + +// Notify difftest to run the next beat + + + } +} diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h new file mode 100644 index 000000000..223dda98d --- /dev/null +++ b/src/test/csrc/fpga/xdma.h @@ -0,0 +1,71 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#ifndef __XDMA_H__ +#define __XDMA_H__ + +#include "common.h" +#include +#include +#include +#include +#include +#include + +#include "diffstate.h" + +#define MAX_DATA_LEN 1024 * 8 - 1 +#define HEAD_DATA_LEN 7 +#define BUFSIZE 1024 * 8 * 8 +#define SHMSZ 27 +#define WAIT_RECV_SLEEP 5 + +typedef struct FpgaPackgeHead { + struct DiffTestState difftestinfo; + unsigned int sequence : 16; + unsigned int message_size : 16; + unsigned long data[HEAD_DATA_LEN]; +} FpgaPackgeHead; + +class FpgaXdma { +public: + struct FpgaPackgeHead *shmadd_recv; + + int shmid_recv; + int ret_recv; + key_t key_recv; + + int fd_c2h; + int fd_interrupt; + + struct FpgaPackgeHead recv_buffer; + unsigned long buffer[8]; + unsigned int recv_size = sizeof(FpgaPackgeHead); + unsigned long old_exec_instr = 0; + + FpgaXdma(); + ~FpgaXdma() {}; + + void set_dma_fd_block(); + void handle_sigint(int sig); + void read_xdma_thread(); + void write_difftest_thread(); + +protected: + std::mutex test_mtx; + std::condition_variable test_cv; +}; + +#endif diff --git a/src/test/csrc/fpga/xdma_mpool.cpp b/src/test/csrc/fpga/xdma_mpool.cpp new file mode 100644 index 000000000..e69de29bb From 6ab769b179b64d8e62819786912a0d19f82fd99a Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Wed, 28 Aug 2024 10:54:16 +0800 Subject: [PATCH 02/41] fpga: add mpool --- src/test/csrc/fpga/mpool.cpp | 72 +++++++++++++++++++++++++++ src/test/csrc/fpga/mpool.h | 83 +++++++++++++++++++++++++++++++ src/test/csrc/fpga/xdma_mpool.cpp | 0 3 files changed, 155 insertions(+) create mode 100644 src/test/csrc/fpga/mpool.cpp create mode 100644 src/test/csrc/fpga/mpool.h delete mode 100644 src/test/csrc/fpga/xdma_mpool.cpp diff --git a/src/test/csrc/fpga/mpool.cpp b/src/test/csrc/fpga/mpool.cpp new file mode 100644 index 000000000..0e6d2122c --- /dev/null +++ b/src/test/csrc/fpga/mpool.cpp @@ -0,0 +1,72 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#include "mpool.h" + +void MemoryPool::init_memory_pool() { + memory_pool.reserve(NUM_BLOCKS); + for (size_t i = 0; i < NUM_BLOCKS; ++i) { + memory_pool.emplace_back(); + block_mutexes[i].unlock(); + } +} + +void MemoryPool::cleanup_memory_pool() { + cv_empty.notify_all(); + cv_filled.notify_all(); + memory_pool.clear(); +} + +void MemoryPool::unlock_thread() { + cv_empty.notify_all(); + cv_filled.notify_all(); +} + +char *MemoryPool::get_free_chunk() { + page_head = (write_index++) & REM_NUM_BLOCKS; + { + std::unique_lock lock(block_mutexes[page_head]); + cv_empty.wait(lock, [this] { return empty_blocks > 0; }); + } + + --empty_blocks; + block_mutexes[page_head].lock(); + return memory_pool[page_head].data.get(); +} + +void MemoryPool::set_busy_chunk() { + memory_pool[page_head].is_free = false; + block_mutexes[page_head].unlock(); + cv_filled.notify_one(); + ++filled_blocks; +} + +const char *MemoryPool::get_busy_chunk() { + page_end = (read_index++) & REM_NUM_BLOCKS; + { + std::unique_lock lock(block_mutexes[page_end]); + cv_filled.wait(lock, [this] { return filled_blocks > 0; }); + } + --filled_blocks; + block_mutexes[page_end].lock(); + return memory_pool[page_end].data.get(); +} + +void MemoryPool::set_free_chunk() { + memory_pool[page_end].is_free = true; + block_mutexes[page_end].unlock(); + cv_empty.notify_one(); + ++empty_blocks; +} diff --git a/src/test/csrc/fpga/mpool.h b/src/test/csrc/fpga/mpool.h new file mode 100644 index 000000000..7c3242588 --- /dev/null +++ b/src/test/csrc/fpga/mpool.h @@ -0,0 +1,83 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#include +#include +#include +#include +#include +#include + +#define MEMPOOL_SIZE 4096 * 1024 // 4M page +#define MEMBLOCK_SIZE 4096 // 4K packge +#define NUM_BLOCKS (MEMPOOL_SIZE / MEMBLOCK_SIZE) +#define REM_NUM_BLOCKS (NUM_BLOCKS - 1) + +extern bool running; +class MemoryPool { +public: + // Constructor to allocate aligned memory blocks + MemoryPool() { + init_memory_pool(); + } + + ~MemoryPool() { + cleanup_memory_pool(); + } + // Disable copy constructors and copy assignment operators + MemoryPool(const MemoryPool &) = delete; + MemoryPool &operator=(const MemoryPool &) = delete; + + void init_memory_pool(); + + // Cleaning up memory pools + void cleanup_memory_pool(); + // Releasing locks manually + void unlock_thread(); + + // Detect a free block and lock the memory that returns the free block + char *get_free_chunk(); + // Set block data valid and locked + void set_busy_chunk(); + + // Gets the latest block of memory + const char *get_busy_chunk(); + // Invalidate and lock the block + void set_free_chunk(); + +private: + struct MemoryBlock { + std::unique_ptr> data; + bool is_free; + + MemoryBlock() : is_free(true) { + void *ptr = nullptr; + if (posix_memalign(&ptr, MEMBLOCK_SIZE, MEMBLOCK_SIZE * 2) != 0) { + throw std::runtime_error("Failed to allocate aligned memory"); + } + data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); + } + }; + std::vector memory_pool; // Mempool + std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array + std::atomic empty_blocks = NUM_BLOCKS; // Free block count + std::atomic filled_blocks; // Filled blocks count + std::atomic write_index; + std::atomic read_index; + std::condition_variable cv_empty; // Free block condition variable + std::condition_variable cv_filled; // Filled block condition variable + size_t page_head = 0; + size_t page_end = 0; +}; diff --git a/src/test/csrc/fpga/xdma_mpool.cpp b/src/test/csrc/fpga/xdma_mpool.cpp deleted file mode 100644 index e69de29bb..000000000 From e989cc1b7eb6684ce610c19be9f8661395adf772 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Wed, 28 Aug 2024 15:53:21 +0800 Subject: [PATCH 03/41] difftest: Move mempool to common code --- src/test/csrc/{fpga => common}/mpool.cpp | 0 src/test/csrc/{fpga => common}/mpool.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/test/csrc/{fpga => common}/mpool.cpp (100%) rename src/test/csrc/{fpga => common}/mpool.h (100%) diff --git a/src/test/csrc/fpga/mpool.cpp b/src/test/csrc/common/mpool.cpp similarity index 100% rename from src/test/csrc/fpga/mpool.cpp rename to src/test/csrc/common/mpool.cpp diff --git a/src/test/csrc/fpga/mpool.h b/src/test/csrc/common/mpool.h similarity index 100% rename from src/test/csrc/fpga/mpool.h rename to src/test/csrc/common/mpool.h From 25cd18b2a7aa16dac11b5fe9e1f8bf224ed26501 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Thu, 29 Aug 2024 11:18:48 +0800 Subject: [PATCH 04/41] fpga: Add function of difftest through dma interface --- src/test/csrc/common/mpool.h | 5 ++ src/test/csrc/difftest/difftest.cpp | 2 + src/test/csrc/fpga/fpga_main.cpp | 103 ++++++++++++++++++++++++++++ src/test/csrc/fpga/xdma.cpp | 42 ++++++++---- src/test/csrc/fpga/xdma.h | 31 +++++---- 5 files changed, 154 insertions(+), 29 deletions(-) create mode 100644 src/test/csrc/fpga/fpga_main.cpp diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 7c3242588..2aafdea48 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -13,6 +13,9 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ +#ifndef __MPOOL_H__ +#define __MPOOL_H__ + #include #include #include @@ -81,3 +84,5 @@ class MemoryPool { size_t page_head = 0; size_t page_end = 0; }; + +#endif diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index 475db164d..3c85f464f 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -94,7 +94,9 @@ void difftest_set_dut() { } } int difftest_step() { +#ifndef WITH_FPGA difftest_set_dut(); +#endif for (int i = 0; i < NUM_CORES; i++) { int ret = difftest[i]->step(); if (ret) { diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp new file mode 100644 index 000000000..864590915 --- /dev/null +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -0,0 +1,103 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +#include "difftest.h" +#include "diffstate.h" +#include "mpool.h" +#include "xdma.h" + +#define XDMA_C2H_DEVICE "/dev/xdma0_c2h_0" + +enum { + SIMV_RUN, + SIMV_DONE, + SIMV_FAIL, +} simv_state; + +static uint8_t simv_result = SIMV_RUN; +static uint64_t max_instrs = 0; + +struct core_end_info_t { + bool core_trap[NUM_CORES]; + double core_cpi[NUM_CORES]; + uint8_t core_trap_num; +}; +static core_end_info_t core_end_info; + +void simv_init(); +void simv_step(); +void cpu_endtime_check(); +void set_dut_from_xdma(); + +FpgaXdma *xdma_device = NULL; + +int main(int argc, char *argv[]) { + + simv_init(); + + while (simv_result == SIMV_RUN) { + // get xdma data + set_dut_from_xdma(); + + // run difftest + simv_step(); + cpu_endtime_check(); + } +} + +void set_dut_from_xdma() { + { + std::unique_lock lock(xdma_device->diff_mtx); + xdma_device->diff_filled_cv.wait(lock, [] { return xdma_device->diff_packge_filled; }); + for (int i = 0; i < NUM_CORES; i++) { + + difftest[i]->dut = &xdma_device->difftest_pack[i]; + } + xdma_device->diff_packge_filled = false; + xdma_device->diff_empile_cv.notify_one(); + } +} + +void simv_init() { + xdma_device = new FpgaXdma(XDMA_C2H_DEVICE); + difftest_init(); + max_instrs = 40000000; +} + +void simv_step() { + if (difftest_step()) + simv_result = SIMV_FAIL; +} + +void cpu_endtime_check() { + if (max_instrs != 0) { // 0 for no limit + for (int i = 0; i < NUM_CORES; i++) { + if (core_end_info.core_trap[i]) + continue; + auto trap = difftest[i]->get_trap_event(); + if (max_instrs < trap->instrCnt) { + core_end_info.core_trap[i] = true; + core_end_info.core_trap_num++; + eprintf(ANSI_COLOR_GREEN "EXCEEDED CORE-%d MAX INSTR: %ld\n" ANSI_COLOR_RESET, i, max_instrs); + difftest[i]->display_stats(); + core_end_info.core_cpi[i] = (double)trap->cycleCnt / (double)trap->instrCnt; + if (core_end_info.core_trap_num == NUM_CORES) { + simv_result = SIMV_DONE; + } + } + } + } +} diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index f55f58ab5..589bb32c7 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -13,11 +13,15 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ +#include +#include + #include "xdma.h" +#include "mpool.h" -FpgaXdma::FpgaXdma() { +FpgaXdma::FpgaXdma(const char *device_name) { signal(SIGINT, handle_sigint); - fd_c2h = open("/dev/xdma0_c2h_0", O_RDWR); + fd_c2h = open(device_name, O_RDWR); set_dma_fd_block(); } @@ -27,37 +31,47 @@ void FpgaXdma::handle_sigint(int sig) { } void FpgaXdma::set_dma_fd_block() { - int flags = fcntl(fd, F_GETFL, 0); + int flags = fcntl(fd_c2h, F_GETFL, 0); if (flags == -1) { perror("fcntl get error"); return; } // Clear the O NONBLOCK flag and set it to blocking mode flags &= ~O_NONBLOCK; - if (fcntl(fd, F_SETFL, flags) == -1) { + if (fcntl(fd_c2h, F_SETFL, flags) == -1) { perror("fcntl set error"); return; } } -void FpgaXdma::thread_read_xdma() { +void FpgaXdma::read_xdma_thread() { while (running) { - char *memory = memory_pool.get_free_chunk(); + char *memory = xdma_mempool.get_free_chunk(); read(fd_c2h, memory, recv_size); - memory_pool.set_busy_chunk(); + xdma_mempool.set_busy_chunk(); } } void FpgaXdma::write_difftest_thread() { while (running) { - const char *memory = memory_pool.get_busy_chunk(); - memcpy(&diffteststate, memory, sizeof(diffteststate)); - - stream_receiver_cout ++; - memory_pool.set_free_chunk(); + const char *memory = xdma_mempool.get_busy_chunk(); + static uint8_t valid_core = 0; + uint8_t core_id = 0; -// Notify difftest to run the next beat - + memcpy(&core_id, memory + sizeof(DiffTestState), sizeof(uint8_t)); + assert(core_id > NUM_CORES); + { + std::unique_lock lock(diff_mtx); + diff_empile_cv.wait(lock, [this] { return !diff_packge_filled; }); + memcpy(&difftest_pack[core_id], memory, sizeof(DiffTestState)); + } + valid_core ++; + xdma_mempool.set_free_chunk(); + if (core_id == NUM_CORES) { + diff_packge_filled = true; + // Notify difftest to run the next check + diff_filled_cv.notify_one(); + } } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 223dda98d..41112ad6f 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -16,7 +16,6 @@ #ifndef __XDMA_H__ #define __XDMA_H__ -#include "common.h" #include #include #include @@ -24,25 +23,25 @@ #include #include +#include "common.h" #include "diffstate.h" +#include "mpool.h" -#define MAX_DATA_LEN 1024 * 8 - 1 +#define WITH_FPGA #define HEAD_DATA_LEN 7 #define BUFSIZE 1024 * 8 * 8 -#define SHMSZ 27 #define WAIT_RECV_SLEEP 5 typedef struct FpgaPackgeHead { - struct DiffTestState difftestinfo; - unsigned int sequence : 16; - unsigned int message_size : 16; - unsigned long data[HEAD_DATA_LEN]; + DiffTestState difftestinfo; + uint8_t corid; } FpgaPackgeHead; class FpgaXdma { public: struct FpgaPackgeHead *shmadd_recv; - + MemoryPool xdma_mempool; + DiffTestState difftest_pack[NUM_CORES] = {}; int shmid_recv; int ret_recv; key_t key_recv; @@ -50,22 +49,24 @@ class FpgaXdma { int fd_c2h; int fd_interrupt; - struct FpgaPackgeHead recv_buffer; - unsigned long buffer[8]; unsigned int recv_size = sizeof(FpgaPackgeHead); unsigned long old_exec_instr = 0; - FpgaXdma(); + std::condition_variable diff_filled_cv; + std::condition_variable diff_empile_cv; + std::mutex diff_mtx; + bool diff_packge_filled = false; + FpgaXdma(const char *device_name); ~FpgaXdma() {}; void set_dma_fd_block(); - void handle_sigint(int sig); + + // thread api void read_xdma_thread(); void write_difftest_thread(); -protected: - std::mutex test_mtx; - std::condition_variable test_cv; +private: + static void handle_sigint(int sig); }; #endif From c20ef8dac15a12d26dc34c5e604c4d3178afd202 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Thu, 29 Aug 2024 15:24:00 +0800 Subject: [PATCH 05/41] fpga: add independent compilation and usage support under fpga --- Makefile | 1 + fpga.mk | 19 +++++++++++++++++ src/test/csrc/common/mpool.h | 1 - src/test/csrc/fpga/fpga_main.cpp | 26 +++++++++++++++++++++--- src/test/csrc/fpga/xdma.cpp | 35 +++++++++++++++++++++++++++----- src/test/csrc/fpga/xdma.h | 22 ++++++++++++-------- 6 files changed, 86 insertions(+), 18 deletions(-) create mode 100644 fpga.mk diff --git a/Makefile b/Makefile index f09fd3d1c..eefde437d 100644 --- a/Makefile +++ b/Makefile @@ -238,6 +238,7 @@ include verilator.mk include vcs.mk include palladium.mk include libso.mk +include fpga.mk clean: vcs-clean pldm-clean rm -rf $(BUILD_DIR) diff --git a/fpga.mk b/fpga.mk new file mode 100644 index 000000000..e28792301 --- /dev/null +++ b/fpga.mk @@ -0,0 +1,19 @@ + +FPGA = FPGA_HOST +FPGA_TARGET = $(abspath $(BUILD_DIR)/simv) +FPGA_BUILD_DIR = $(abspath $(BUILD_DIR)/simv-compile) +FPGA_RUN_DIR = $(abspath $(BUILD_DIR)/$(notdir $(RUN_BIN))) + +FPGA_CSRC_DIR = $(abspath ./src/test/csrc/fpga) +FPGA_CONFIG_DIR = $(abspath ./config) + +FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") +FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) +FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl + +fpga-build: fpga-clean fpga-host + +fpga-host: + $(CXX) $(FPGA_CXXFLAGS) $(FPGA_CXXFILES) $^ -o $@ $(FPGA_LDFLAGS) +fpga-clean: + rm -f fpga-host diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 2aafdea48..0925e3043 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -28,7 +28,6 @@ #define NUM_BLOCKS (MEMPOOL_SIZE / MEMBLOCK_SIZE) #define REM_NUM_BLOCKS (NUM_BLOCKS - 1) -extern bool running; class MemoryPool { public: // Constructor to allocate aligned memory blocks diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 864590915..3f51d21a5 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -14,9 +14,10 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include "difftest.h" #include "diffstate.h" +#include "difftest.h" #include "mpool.h" +#include "refproxy.h" #include "xdma.h" #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_0" @@ -41,11 +42,13 @@ void simv_init(); void simv_step(); void cpu_endtime_check(); void set_dut_from_xdma(); +void set_diff_ref_so(char *s); +void args_parsingniton(int argc, char *argv[]); -FpgaXdma *xdma_device = NULL; +FpgaXdma *xdma_device = NULL; int main(int argc, char *argv[]) { - + args_parsingniton(argc, argv); simv_init(); while (simv_result == SIMV_RUN) { @@ -56,6 +59,15 @@ int main(int argc, char *argv[]) { simv_step(); cpu_endtime_check(); } + free(xdma_device); +} + +void set_diff_ref_so(char *s) { + extern const char *difftest_ref_so; + printf("diff-test ref so:%s\n", s); + char *buf = (char *)malloc(256); + strcpy(buf, s); + difftest_ref_so = buf; } void set_dut_from_xdma() { @@ -101,3 +113,11 @@ void cpu_endtime_check() { } } } + +void args_parsingniton(int argc, char *argv[]) { + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "--diff") == 0) { + set_diff_ref_so(argv[++i]); + } + } +} diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 589bb32c7..f586f834a 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -13,15 +13,19 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include -#include - #include "xdma.h" #include "mpool.h" +#include +#include FpgaXdma::FpgaXdma(const char *device_name) { signal(SIGINT, handle_sigint); fd_c2h = open(device_name, O_RDWR); + if (fd_c2h == -1) { + printf("xdma device not find %s\n", device_name); + exit(1); + } + printf("xdma device %s\n", device_name); set_dma_fd_block(); } @@ -34,6 +38,7 @@ void FpgaXdma::set_dma_fd_block() { int flags = fcntl(fd_c2h, F_GETFL, 0); if (flags == -1) { perror("fcntl get error"); + exit(1); return; } // Clear the O NONBLOCK flag and set it to blocking mode @@ -44,6 +49,25 @@ void FpgaXdma::set_dma_fd_block() { } } +void FpgaXdma::start_transmit_thread() { + if (running == true) + return; + receive_thread = std::thread(&FpgaXdma::read_xdma_thread, this); + process_thread = std::thread(&FpgaXdma::write_difftest_thread, this); + running = true; +} + +void FpgaXdma::stop_thansmit_thread() { + if (running == false) + return; + xdma_mempool.unlock_thread(); + if (receive_thread.joinable()) + receive_thread.join(); + if (process_thread.joinable()) + process_thread.join(); + running = false; +} + void FpgaXdma::read_xdma_thread() { while (running) { char *memory = xdma_mempool.get_free_chunk(); @@ -65,11 +89,12 @@ void FpgaXdma::write_difftest_thread() { diff_empile_cv.wait(lock, [this] { return !diff_packge_filled; }); memcpy(&difftest_pack[core_id], memory, sizeof(DiffTestState)); } - valid_core ++; + valid_core++; xdma_mempool.set_free_chunk(); - if (core_id == NUM_CORES) { + if (valid_core == NUM_CORES) { diff_packge_filled = true; + valid_core = 0; // Notify difftest to run the next check diff_filled_cv.notify_one(); } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 41112ad6f..c5d5d2c15 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -16,22 +16,18 @@ #ifndef __XDMA_H__ #define __XDMA_H__ +#include "common.h" +#include "diffstate.h" +#include "mpool.h" #include #include #include #include #include +#include #include -#include "common.h" -#include "diffstate.h" -#include "mpool.h" - #define WITH_FPGA -#define HEAD_DATA_LEN 7 -#define BUFSIZE 1024 * 8 * 8 -#define WAIT_RECV_SLEEP 5 - typedef struct FpgaPackgeHead { DiffTestState difftestinfo; uint8_t corid; @@ -48,6 +44,7 @@ class FpgaXdma { int fd_c2h; int fd_interrupt; + bool running = false; unsigned int recv_size = sizeof(FpgaPackgeHead); unsigned long old_exec_instr = 0; @@ -57,15 +54,22 @@ class FpgaXdma { std::mutex diff_mtx; bool diff_packge_filled = false; FpgaXdma(const char *device_name); - ~FpgaXdma() {}; + ~FpgaXdma() { + stop_thansmit_thread(); + }; void set_dma_fd_block(); // thread api + void start_transmit_thread(); + void stop_thansmit_thread(); void read_xdma_thread(); void write_difftest_thread(); private: + std::thread receive_thread; + std::thread process_thread; + static void handle_sigint(int sig); }; From 2511ef7eda9543a7397e9d83c86bdd0ee8b21c8b Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Wed, 18 Sep 2024 18:13:54 +0800 Subject: [PATCH 06/41] fpga: modify the xdma initi process --- fpga.mk | 3 ++ src/test/csrc/fpga/fpga_main.cpp | 4 +-- src/test/csrc/fpga/xdma.cpp | 55 +++++++++++++++++--------------- src/test/csrc/fpga/xdma.h | 13 +++++--- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/fpga.mk b/fpga.mk index e28792301..11e08ceb9 100644 --- a/fpga.mk +++ b/fpga.mk @@ -11,6 +11,9 @@ FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl +DMA_CHANNELS?=1 +FPGA_LDFLAGS += -DCONFIG_DMA_CHANNELS=$(DMA_CHANNELS) + fpga-build: fpga-clean fpga-host fpga-host: diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 3f51d21a5..274325c15 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -20,8 +20,6 @@ #include "refproxy.h" #include "xdma.h" -#define XDMA_C2H_DEVICE "/dev/xdma0_c2h_0" - enum { SIMV_RUN, SIMV_DONE, @@ -84,7 +82,7 @@ void set_dut_from_xdma() { } void simv_init() { - xdma_device = new FpgaXdma(XDMA_C2H_DEVICE); + xdma_device = new FpgaXdma; difftest_init(); max_instrs = 40000000; } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index f586f834a..985934f58 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -18,15 +18,31 @@ #include #include -FpgaXdma::FpgaXdma(const char *device_name) { +#define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" +#define XDMA_H2C_DEVICE "/dev/xdma0_h2c_0" +static const int dma_channel = CONFIG_DMA_CHANNELS; + +FpgaXdma::FpgaXdma() { signal(SIGINT, handle_sigint); - fd_c2h = open(device_name, O_RDWR); - if (fd_c2h == -1) { - printf("xdma device not find %s\n", device_name); - exit(1); + for (int channel = 0; i < dma_channel; channel ++) { + char c2h_device[64]; + sprintf(c2h_device,"%s%d",DEVICE_C2H_NAME,i); + xdma_c2h_fd[i] = open(c2h_device, O_RDONLY ); + if (xdma_c2h_fd[i] == -1) { + std::cout << c2h_device << std::endl; + perror("Failed to open XDMA device"); + exit(-1); + } + std::cout << "XDMA link " << c2h_device << std::endl; + } + + xdma_h2c_fd[i] = open(h2c_device, O_WRONLY); + if (xdma_h2c_fd[i] == -1) { + std::cout << h2c_device << std::endl; + perror("Failed to open XDMA device"); + exit(-1); } - printf("xdma device %s\n", device_name); - set_dma_fd_block(); + std::cout << "XDMA link " << h2c_device << std::endl; } void FpgaXdma::handle_sigint(int sig) { @@ -34,26 +50,15 @@ void FpgaXdma::handle_sigint(int sig) { exit(1); } -void FpgaXdma::set_dma_fd_block() { - int flags = fcntl(fd_c2h, F_GETFL, 0); - if (flags == -1) { - perror("fcntl get error"); - exit(1); - return; - } - // Clear the O NONBLOCK flag and set it to blocking mode - flags &= ~O_NONBLOCK; - if (fcntl(fd_c2h, F_SETFL, flags) == -1) { - perror("fcntl set error"); - return; - } -} - void FpgaXdma::start_transmit_thread() { if (running == true) return; - receive_thread = std::thread(&FpgaXdma::read_xdma_thread, this); - process_thread = std::thread(&FpgaXdma::write_difftest_thread, this); + + for(int i = 0; i < dma_channel;i ++) { + printf("start channel %d \n", i); + receive_thread[i] = std::thread(&FpgaXdma::read_xdma_thread, this, i); + } + process_thread[i] = std::thread(&FpgaXdma::write_difftest_thread, this, i); running = true; } @@ -68,7 +73,7 @@ void FpgaXdma::stop_thansmit_thread() { running = false; } -void FpgaXdma::read_xdma_thread() { +void FpgaXdma::read_xdma_thread(int channel) { while (running) { char *memory = xdma_mempool.get_free_chunk(); read(fd_c2h, memory, recv_size); diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index c5d5d2c15..d5572bea1 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -36,13 +36,16 @@ typedef struct FpgaPackgeHead { class FpgaXdma { public: struct FpgaPackgeHead *shmadd_recv; - MemoryPool xdma_mempool; + + MemoryPool xdma_mempool[DMA_CHANNS]; DiffTestState difftest_pack[NUM_CORES] = {}; int shmid_recv; int ret_recv; key_t key_recv; - int fd_c2h; + int xdma_c2h_fd[DMA_CHANNS]; + int xdma_h2c_fd; + int fd_interrupt; bool running = false; @@ -53,7 +56,7 @@ class FpgaXdma { std::condition_variable diff_empile_cv; std::mutex diff_mtx; bool diff_packge_filled = false; - FpgaXdma(const char *device_name); + FpgaXdma(); ~FpgaXdma() { stop_thansmit_thread(); }; @@ -67,8 +70,8 @@ class FpgaXdma { void write_difftest_thread(); private: - std::thread receive_thread; - std::thread process_thread; + std::thread receive_thread[DMA_CHANNS]; + std::thread process_thread[DMA_CHANNS]; static void handle_sigint(int sig); }; From f9e59bc2c81cbb31841a34f16616f480f13d1816 Mon Sep 17 00:00:00 2001 From: xiaokamikami Date: Fri, 20 Sep 2024 15:34:51 +0800 Subject: [PATCH 07/41] difftest: Fixed an issue where the block structure was not memory safe --- src/test/csrc/common/mpool.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 0925e3043..0484d6348 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -22,12 +22,37 @@ #include #include #include +#include #define MEMPOOL_SIZE 4096 * 1024 // 4M page #define MEMBLOCK_SIZE 4096 // 4K packge #define NUM_BLOCKS (MEMPOOL_SIZE / MEMBLOCK_SIZE) #define REM_NUM_BLOCKS (NUM_BLOCKS - 1) +struct MemoryBlock { + std::unique_ptr> data; + std::atomic is_free; + + MemoryBlock() : is_free(true) { + void* ptr = nullptr; + if (posix_memalign(&ptr, 4096, 4096) != 0) { + throw std::runtime_error("Failed to allocate aligned memory"); + } + memset(ptr, 0, 4096); + data = std::unique_ptr>( + static_cast(ptr), + [](char* p) { free(p); } + ); + } + // Disable copy operations + MemoryBlock(const MemoryBlock&) = delete; + MemoryBlock& operator=(const MemoryBlock&) = delete; + + // Enable move operations + MemoryBlock(MemoryBlock&&) = default; + MemoryBlock& operator=(MemoryBlock&&) = default; +}; + class MemoryPool { public: // Constructor to allocate aligned memory blocks @@ -74,7 +99,7 @@ class MemoryPool { }; std::vector memory_pool; // Mempool std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array - std::atomic empty_blocks = NUM_BLOCKS; // Free block count + std::atomic empty_blocks {NUM_BLOCKS}; // Free block count std::atomic filled_blocks; // Filled blocks count std::atomic write_index; std::atomic read_index; @@ -84,4 +109,5 @@ class MemoryPool { size_t page_end = 0; }; + #endif From 3ca054208616ef5def8e276a0bb26e3065268003 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 23 Sep 2024 16:49:03 +0800 Subject: [PATCH 08/41] fpga: The memory pool with sliding window was added to realize multi-channel out-of-order reception of data packets --- src/test/csrc/common/mpool.cpp | 94 ++++++++++++++++++++++++++++++++++ src/test/csrc/common/mpool.h | 59 +++++++++++++++++++++ 2 files changed, 153 insertions(+) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 0e6d2122c..05e5caa30 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -70,3 +70,97 @@ void MemoryPool::set_free_chunk() { cv_empty.notify_one(); ++empty_blocks; } + +// Cleaning up memory pools +void MemoryIdxPool::cleanupMemoryPool() { + cv_empty.notify_all(); + cv_filled.notify_all(); +} + +// Write a specified free block of a free window +bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { + size_t page_w_idx; + { + std::lock_guard lock(offset_mutexes); + + page_w_idx = idx + group_w_offset; + // Processing of winding data at the boundary + if (memory_pool[page_w_idx].is_free.load() == false) { + size_t this_group = group_w_idx.load(); + size_t offset = ((this_group & REM_MAX_GROUPING_IDX) * MAX_IDX); + page_w_idx = idx + offset; + write_next_count ++; + // Lookup failed + if (memory_pool[page_w_idx].is_free.load() == false) { + printf("This block has been written, and there is a duplicate packge idx %d\n",idx); + return false; + } + } else { + write_count ++; + // Proceed to the next group + if (write_count == MAX_IDX) { + memory_pool[page_w_idx].is_free.store(false); + memcpy(memory_pool[page_w_idx].data.get(), data, 4096); + + size_t next_w_idx = wait_next_free_group(); + group_w_offset = (next_w_idx & REM_MAX_GROUPING_IDX) * MAX_IDX; + write_count = write_next_count; + write_next_count = 0; + return true; + } + } + memory_pool[page_w_idx].is_free.store(false); + } + memcpy(memory_pool[page_w_idx].data.get(), data, 4096); + + return true; +} + +bool MemoryIdxPool::read_busy_chunk(char *data) { + size_t page_r_idx = read_count + group_r_offset; + size_t this_r_idx = ++read_count; + + if (this_r_idx == MAX_IDX) { + read_count = 0; + size_t next_r_idx = wait_next_full_group(); + group_r_offset = ((next_r_idx & REM_MAX_GROUPING_IDX) * MAX_IDX); + } + if (memory_pool[page_r_idx].is_free.load() == true) { + printf("An attempt was made to read the block of free %d\n", page_r_idx); + return false; + } + + memcpy(data, memory_pool[page_r_idx].data.get(), 4096); + memory_pool[page_r_idx].is_free.store(true); + + return true; +} + +size_t MemoryIdxPool::wait_next_free_group() { + empty_blocks.fetch_sub(1); + size_t free_num = empty_blocks.load(); + cv_filled.notify_all(); + //Reserve at least two free blocks + if (free_num <= 2) { + std::unique_lock lock(window_mutexes); + cv_empty.wait(lock, [this] { return empty_blocks.load() > 1;}); + } + return group_w_idx.fetch_add(1); +} + +size_t MemoryIdxPool::wait_next_full_group() { + empty_blocks.fetch_add(1); + size_t free_num = empty_blocks.load(); + cv_empty.notify_all(); + + if (free_num >= MAX_GROUP_READ) { + std::unique_lock lock(window_mutexes); + cv_filled.wait(lock, [this] { return empty_blocks.load() < MAX_GROUP_READ;}); + } + return group_r_idx.fetch_add(1); +} + +bool MemoryIdxPool::check_group() { + bool result = (group_w_idx.load() > group_r_idx.load()) ? true : false; + return result; +} \ No newline at end of file diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 0484d6348..ed6e10f0c 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -110,4 +110,63 @@ class MemoryPool { }; +static const size_t MAX_IDX = 256; +static const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; +static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //窗口需要预留两个空闲空间 +static const size_t REM_MAX_IDX = (MAX_IDX - 1); +static const size_t REM_MAX_GROUPING_IDX = (MAX_GROUPING_IDX - 1); + +// Split the memory pool into sliding Windows based on the index width +// Support multi-thread out-of-order write sequential read +class MemoryIdxPool { +public: + MemoryIdxPool() { + initMemoryPool(); + } + + ~MemoryIdxPool() { + cleanupMemoryPool(); + } + // Disable copy constructors and copy assignment operators + MemoryIdxPool(const MemoryIdxPool&) = delete; + MemoryIdxPool& operator=(const MemoryIdxPool&) = delete; + + void initMemoryPool() {} + + // Cleaning up memory pools + void cleanupMemoryPool(); + + // Write a specified free block of a free window + bool write_free_chunk(uint8_t idx, const char *data); + + // Get the head memory + bool read_busy_chunk(char *data); + + // Wait for the data to be free + size_t wait_next_free_group(); + + // Wait for the data to be readable + size_t wait_next_full_group(); + + // Check if there is a window to read + bool check_group(); + +private: + MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool + std::mutex window_mutexes; // window sliding protection + std::mutex offset_mutexes; // w/r offset protection + std::condition_variable cv_empty; // Free block condition variable + std::condition_variable cv_filled; // Filled block condition variable + + size_t group_r_offset = 0; // The offset used by the current consumer + size_t group_w_offset = 0; // The offset used by the current producer + size_t read_count = 0; + size_t write_count = 0; + size_t write_next_count = 0; + + std::atomic empty_blocks{MAX_GROUP_READ}; + std::atomic group_w_idx{1}; + std::atomic group_r_idx{1}; +}; + #endif From a82ec95466b2446fb5a81323306fa4a423e146cb Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 23 Sep 2024 16:49:16 +0800 Subject: [PATCH 09/41] fpga: fix mpool format --- src/test/csrc/common/mpool.cpp | 18 ++++++++-------- src/test/csrc/common/mpool.h | 38 +++++++++++++++------------------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 05e5caa30..fcd08a5be 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -81,7 +81,7 @@ void MemoryIdxPool::cleanupMemoryPool() { bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { size_t page_w_idx; { - std::lock_guard lock(offset_mutexes); + std::lock_guard lock(offset_mutexes); page_w_idx = idx + group_w_offset; // Processing of winding data at the boundary @@ -89,14 +89,14 @@ bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { size_t this_group = group_w_idx.load(); size_t offset = ((this_group & REM_MAX_GROUPING_IDX) * MAX_IDX); page_w_idx = idx + offset; - write_next_count ++; + write_next_count++; // Lookup failed if (memory_pool[page_w_idx].is_free.load() == false) { - printf("This block has been written, and there is a duplicate packge idx %d\n",idx); + printf("This block has been written, and there is a duplicate packge idx %d\n", idx); return false; } } else { - write_count ++; + write_count++; // Proceed to the next group if (write_count == MAX_IDX) { memory_pool[page_w_idx].is_free.store(false); @@ -106,10 +106,10 @@ bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { group_w_offset = (next_w_idx & REM_MAX_GROUPING_IDX) * MAX_IDX; write_count = write_next_count; write_next_count = 0; - return true; + return true; } } - memory_pool[page_w_idx].is_free.store(false); + memory_pool[page_w_idx].is_free.store(false); } memcpy(memory_pool[page_w_idx].data.get(), data, 4096); @@ -143,7 +143,7 @@ size_t MemoryIdxPool::wait_next_free_group() { //Reserve at least two free blocks if (free_num <= 2) { std::unique_lock lock(window_mutexes); - cv_empty.wait(lock, [this] { return empty_blocks.load() > 1;}); + cv_empty.wait(lock, [this] { return empty_blocks.load() > 1; }); } return group_w_idx.fetch_add(1); } @@ -155,7 +155,7 @@ size_t MemoryIdxPool::wait_next_full_group() { if (free_num >= MAX_GROUP_READ) { std::unique_lock lock(window_mutexes); - cv_filled.wait(lock, [this] { return empty_blocks.load() < MAX_GROUP_READ;}); + cv_filled.wait(lock, [this] { return empty_blocks.load() < MAX_GROUP_READ; }); } return group_r_idx.fetch_add(1); } @@ -163,4 +163,4 @@ size_t MemoryIdxPool::wait_next_full_group() { bool MemoryIdxPool::check_group() { bool result = (group_w_idx.load() > group_r_idx.load()) ? true : false; return result; -} \ No newline at end of file +} diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index ed6e10f0c..2b26bb241 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -18,11 +18,11 @@ #include #include +#include #include #include #include #include -#include #define MEMPOOL_SIZE 4096 * 1024 // 4M page #define MEMBLOCK_SIZE 4096 // 4K packge @@ -30,27 +30,24 @@ #define REM_NUM_BLOCKS (NUM_BLOCKS - 1) struct MemoryBlock { - std::unique_ptr> data; + std::unique_ptr> data; std::atomic is_free; MemoryBlock() : is_free(true) { - void* ptr = nullptr; + void *ptr = nullptr; if (posix_memalign(&ptr, 4096, 4096) != 0) { throw std::runtime_error("Failed to allocate aligned memory"); } memset(ptr, 0, 4096); - data = std::unique_ptr>( - static_cast(ptr), - [](char* p) { free(p); } - ); + data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); } // Disable copy operations - MemoryBlock(const MemoryBlock&) = delete; - MemoryBlock& operator=(const MemoryBlock&) = delete; + MemoryBlock(const MemoryBlock &) = delete; + MemoryBlock &operator=(const MemoryBlock &) = delete; // Enable move operations - MemoryBlock(MemoryBlock&&) = default; - MemoryBlock& operator=(MemoryBlock&&) = default; + MemoryBlock(MemoryBlock &&) = default; + MemoryBlock &operator=(MemoryBlock &&) = default; }; class MemoryPool { @@ -99,7 +96,7 @@ class MemoryPool { }; std::vector memory_pool; // Mempool std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array - std::atomic empty_blocks {NUM_BLOCKS}; // Free block count + std::atomic empty_blocks{NUM_BLOCKS}; // Free block count std::atomic filled_blocks; // Filled blocks count std::atomic write_index; std::atomic read_index; @@ -109,7 +106,6 @@ class MemoryPool { size_t page_end = 0; }; - static const size_t MAX_IDX = 256; static const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //窗口需要预留两个空闲空间 @@ -128,8 +124,8 @@ class MemoryIdxPool { cleanupMemoryPool(); } // Disable copy constructors and copy assignment operators - MemoryIdxPool(const MemoryIdxPool&) = delete; - MemoryIdxPool& operator=(const MemoryIdxPool&) = delete; + MemoryIdxPool(const MemoryIdxPool &) = delete; + MemoryIdxPool &operator=(const MemoryIdxPool &) = delete; void initMemoryPool() {} @@ -152,16 +148,16 @@ class MemoryIdxPool { bool check_group(); private: - MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool - std::mutex window_mutexes; // window sliding protection - std::mutex offset_mutexes; // w/r offset protection - std::condition_variable cv_empty; // Free block condition variable - std::condition_variable cv_filled; // Filled block condition variable + MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool + std::mutex window_mutexes; // window sliding protection + std::mutex offset_mutexes; // w/r offset protection + std::condition_variable cv_empty; // Free block condition variable + std::condition_variable cv_filled; // Filled block condition variable size_t group_r_offset = 0; // The offset used by the current consumer size_t group_w_offset = 0; // The offset used by the current producer size_t read_count = 0; - size_t write_count = 0; + size_t write_count = 0; size_t write_next_count = 0; std::atomic empty_blocks{MAX_GROUP_READ}; From 08044407030d75eabbaf6de9be4da45697a6bb40 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 23 Sep 2024 17:56:53 +0800 Subject: [PATCH 10/41] fpga: modify xdma to be multi-channel configurable and use a sliding window --- src/test/csrc/common/mpool.cpp | 2 +- src/test/csrc/common/mpool.h | 7 ---- src/test/csrc/fpga/xdma.cpp | 70 +++++++++++++++++----------------- src/test/csrc/fpga/xdma.h | 12 +++--- 4 files changed, 43 insertions(+), 48 deletions(-) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index fcd08a5be..4e83e63ae 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -126,7 +126,7 @@ bool MemoryIdxPool::read_busy_chunk(char *data) { group_r_offset = ((next_r_idx & REM_MAX_GROUPING_IDX) * MAX_IDX); } if (memory_pool[page_r_idx].is_free.load() == true) { - printf("An attempt was made to read the block of free %d\n", page_r_idx); + printf("An attempt was made to read the block of free %zu\n", page_r_idx); return false; } diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 2b26bb241..b78bf4ad1 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -41,13 +41,6 @@ struct MemoryBlock { memset(ptr, 0, 4096); data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); } - // Disable copy operations - MemoryBlock(const MemoryBlock &) = delete; - MemoryBlock &operator=(const MemoryBlock &) = delete; - - // Enable move operations - MemoryBlock(MemoryBlock &&) = default; - MemoryBlock &operator=(MemoryBlock &&) = default; }; class MemoryPool { diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 985934f58..6a90f16e6 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -16,6 +16,7 @@ #include "xdma.h" #include "mpool.h" #include +#include #include #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" @@ -24,10 +25,10 @@ static const int dma_channel = CONFIG_DMA_CHANNELS; FpgaXdma::FpgaXdma() { signal(SIGINT, handle_sigint); - for (int channel = 0; i < dma_channel; channel ++) { + for (int i = 0; i < dma_channel; i++) { char c2h_device[64]; - sprintf(c2h_device,"%s%d",DEVICE_C2H_NAME,i); - xdma_c2h_fd[i] = open(c2h_device, O_RDONLY ); + sprintf(c2h_device, "%s%d", XDMA_C2H_DEVICE, i); + xdma_c2h_fd[i] = open(c2h_device, O_RDONLY); if (xdma_c2h_fd[i] == -1) { std::cout << c2h_device << std::endl; perror("Failed to open XDMA device"); @@ -36,13 +37,13 @@ FpgaXdma::FpgaXdma() { std::cout << "XDMA link " << c2h_device << std::endl; } - xdma_h2c_fd[i] = open(h2c_device, O_WRONLY); - if (xdma_h2c_fd[i] == -1) { - std::cout << h2c_device << std::endl; + xdma_h2c_fd = open(XDMA_H2C_DEVICE, O_WRONLY); + if (xdma_h2c_fd == -1) { + std::cout << XDMA_H2C_DEVICE << std::endl; perror("Failed to open XDMA device"); exit(-1); } - std::cout << "XDMA link " << h2c_device << std::endl; + std::cout << "XDMA link " << XDMA_H2C_DEVICE << std::endl; } void FpgaXdma::handle_sigint(int sig) { @@ -54,54 +55,55 @@ void FpgaXdma::start_transmit_thread() { if (running == true) return; - for(int i = 0; i < dma_channel;i ++) { + for (int i = 0; i < dma_channel; i++) { printf("start channel %d \n", i); receive_thread[i] = std::thread(&FpgaXdma::read_xdma_thread, this, i); } - process_thread[i] = std::thread(&FpgaXdma::write_difftest_thread, this, i); + process_thread = std::thread(&FpgaXdma::write_difftest_thread, this); running = true; } void FpgaXdma::stop_thansmit_thread() { if (running == false) return; - xdma_mempool.unlock_thread(); - if (receive_thread.joinable()) - receive_thread.join(); + running = false; + + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { + if (receive_thread[i].joinable()) + receive_thread[i].join(); + close(xdma_c2h_fd[i]); + } + if (process_thread.joinable()) process_thread.join(); - running = false; + + close(xdma_h2c_fd); + xdma_mempool.cleanupMemoryPool(); } void FpgaXdma::read_xdma_thread(int channel) { + FpgaPackgeHead packge; + bool result = true; while (running) { - char *memory = xdma_mempool.get_free_chunk(); - read(fd_c2h, memory, recv_size); - xdma_mempool.set_busy_chunk(); + size_t size = read(xdma_c2h_fd[channel], &packge, sizeof(FpgaPackgeHead)); + uint8_t idx = packge.packge_idx; + if (xdma_mempool.write_free_chunk(idx, (char *)&packge) == false) { + printf("It should not be the case that no available block can be found\n"); + assert(0); + } } } void FpgaXdma::write_difftest_thread() { + FpgaPackgeHead packge; + bool result = true; while (running) { - const char *memory = xdma_mempool.get_busy_chunk(); - static uint8_t valid_core = 0; - uint8_t core_id = 0; - - memcpy(&core_id, memory + sizeof(DiffTestState), sizeof(uint8_t)); - assert(core_id > NUM_CORES); - { - std::unique_lock lock(diff_mtx); - diff_empile_cv.wait(lock, [this] { return !diff_packge_filled; }); - memcpy(&difftest_pack[core_id], memory, sizeof(DiffTestState)); + if (xdma_mempool.read_busy_chunk((char *)&packge) == false) { + printf("Failed to read data from the XDMA memory pool\n"); + assert(0); } - valid_core++; - xdma_mempool.set_free_chunk(); + // packge unpack - if (valid_core == NUM_CORES) { - diff_packge_filled = true; - valid_core = 0; - // Notify difftest to run the next check - diff_filled_cv.notify_one(); - } + // difftest run } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index d5572bea1..cb4307c60 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -30,20 +30,20 @@ #define WITH_FPGA typedef struct FpgaPackgeHead { DiffTestState difftestinfo; - uint8_t corid; + uint8_t packge_idx; } FpgaPackgeHead; class FpgaXdma { public: struct FpgaPackgeHead *shmadd_recv; - MemoryPool xdma_mempool[DMA_CHANNS]; + MemoryIdxPool xdma_mempool; DiffTestState difftest_pack[NUM_CORES] = {}; int shmid_recv; int ret_recv; key_t key_recv; - int xdma_c2h_fd[DMA_CHANNS]; + int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; int xdma_h2c_fd; int fd_interrupt; @@ -66,12 +66,12 @@ class FpgaXdma { // thread api void start_transmit_thread(); void stop_thansmit_thread(); - void read_xdma_thread(); + void read_xdma_thread(int channel); void write_difftest_thread(); private: - std::thread receive_thread[DMA_CHANNS]; - std::thread process_thread[DMA_CHANNS]; + std::thread receive_thread[CONFIG_DMA_CHANNELS]; + std::thread process_thread; static void handle_sigint(int sig); }; From e047edc7260bfb5d98d46daa213a131b803dd373 Mon Sep 17 00:00:00 2001 From: Kami Date: Tue, 24 Sep 2024 17:26:40 +0800 Subject: [PATCH 11/41] fpga: Improve the operation logic of fpga diff --- src/test/csrc/fpga/fpga_main.cpp | 50 ++++++++++++++++++-------------- src/test/csrc/fpga/xdma.cpp | 3 +- src/test/csrc/fpga/xdma.h | 20 +++++-------- 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 274325c15..2f4ed5a11 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -15,6 +15,7 @@ ***************************************************************************************/ #include "diffstate.h" +#include "difftest-dpic.h" #include "difftest.h" #include "mpool.h" #include "refproxy.h" @@ -26,6 +27,7 @@ enum { SIMV_FAIL, } simv_state; +static char work_load[256] = "/dev/zero"; static uint8_t simv_result = SIMV_RUN; static uint64_t max_instrs = 0; @@ -39,7 +41,6 @@ static core_end_info_t core_end_info; void simv_init(); void simv_step(); void cpu_endtime_check(); -void set_dut_from_xdma(); void set_diff_ref_so(char *s); void args_parsingniton(int argc, char *argv[]); @@ -50,12 +51,13 @@ int main(int argc, char *argv[]) { simv_init(); while (simv_result == SIMV_RUN) { - // get xdma data - set_dut_from_xdma(); - - // run difftest - simv_step(); - cpu_endtime_check(); + // wait get xdma data + if (xdma_device->diff_packge_count.load(std::memory_order_seq_cst) > 0) { + // run difftest + simv_step(); + cpu_endtime_check(); + xdma_device->diff_packge_count.fetch_sub(1, std::memory_order_relaxed); + } } free(xdma_device); } @@ -68,28 +70,30 @@ void set_diff_ref_so(char *s) { difftest_ref_so = buf; } -void set_dut_from_xdma() { - { - std::unique_lock lock(xdma_device->diff_mtx); - xdma_device->diff_filled_cv.wait(lock, [] { return xdma_device->diff_packge_filled; }); - for (int i = 0; i < NUM_CORES; i++) { - - difftest[i]->dut = &xdma_device->difftest_pack[i]; - } - xdma_device->diff_packge_filled = false; - xdma_device->diff_empile_cv.notify_one(); - } -} - void simv_init() { xdma_device = new FpgaXdma; difftest_init(); - max_instrs = 40000000; } void simv_step() { if (difftest_step()) simv_result = SIMV_FAIL; + if (difftest_state() != -1) { + int trapCode = difftest_state(); + for (int i = 0; i < NUM_CORES; i++) { + printf("Core %d: ", i); + uint64_t pc = difftest[i]->get_trap_event()->pc; + switch (trapCode) { + case 0: eprintf(ANSI_COLOR_GREEN "HIT GOOD TRAP at pc = 0x%" PRIx64 "\n" ANSI_COLOR_RESET, pc); break; + default: eprintf(ANSI_COLOR_RED "Unknown trap code: %d\n" ANSI_COLOR_RESET, trapCode); + } + difftest[i]->display_stats(); + } + if (trapCode == 0) + simv_result = SIMV_DONE; + else + simv_result = SIMV_FAIL; + } } void cpu_endtime_check() { @@ -116,6 +120,10 @@ void args_parsingniton(int argc, char *argv[]) { for (int i = 1; i < argc; ++i) { if (strcmp(argv[i], "--diff") == 0) { set_diff_ref_so(argv[++i]); + } else if (strcmp(argv[i], "-i") == 0) { + memcpy(work_load, argv[++i], sizeof(argv[++i])); + } else if (strcmp(argv[i], "--max-instrs") == 0) { + max_instrs = std::stoul(argv[++i], nullptr, 16); } } } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 6a90f16e6..6854bcfc4 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -103,7 +103,8 @@ void FpgaXdma::write_difftest_thread() { assert(0); } // packge unpack - + v_difftest_Batch(packge.difftest_batch_info.io_data, packge.difftest_batch_info.io_info); // difftest run + diff_packge_count.fetch_add(1, std::memory_order_relaxed); } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index cb4307c60..ae9eeeeda 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -18,7 +18,9 @@ #include "common.h" #include "diffstate.h" +#include "difftest-dpic.h" #include "mpool.h" +#include #include #include #include @@ -28,8 +30,9 @@ #include #define WITH_FPGA + typedef struct FpgaPackgeHead { - DiffTestState difftestinfo; + BatchInfo difftest_batch_info; uint8_t packge_idx; } FpgaPackgeHead; @@ -38,31 +41,22 @@ class FpgaXdma { struct FpgaPackgeHead *shmadd_recv; MemoryIdxPool xdma_mempool; - DiffTestState difftest_pack[NUM_CORES] = {}; - int shmid_recv; - int ret_recv; - key_t key_recv; int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; int xdma_h2c_fd; - int fd_interrupt; bool running = false; - unsigned int recv_size = sizeof(FpgaPackgeHead); - unsigned long old_exec_instr = 0; - std::condition_variable diff_filled_cv; std::condition_variable diff_empile_cv; - std::mutex diff_mtx; - bool diff_packge_filled = false; + + std::atomic diff_packge_count{0}; + FpgaXdma(); ~FpgaXdma() { stop_thansmit_thread(); }; - void set_dma_fd_block(); - // thread api void start_transmit_thread(); void stop_thansmit_thread(); From 7686245c841be827c9cc9668f5a286a141f844db Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 25 Sep 2024 11:35:26 +0800 Subject: [PATCH 12/41] fpga: Remove redundant mempool-MemoryBlock definitions --- src/test/csrc/common/mpool.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index b78bf4ad1..69d031e70 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -41,6 +41,21 @@ struct MemoryBlock { memset(ptr, 0, 4096); data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); } + // Move constructors + MemoryBlock(MemoryBlock &&other) noexcept : data(std::move(other.data)), is_free(other.is_free.load()) {} + + // Move assignment operator + MemoryBlock &operator=(MemoryBlock &&other) noexcept { + if (this != &other) { + data = std::move(other.data); + is_free.store(other.is_free.load()); + } + return *this; + } + + // Disable the copy constructor and copy assignment operator + MemoryBlock(const MemoryBlock &) = delete; + MemoryBlock &operator=(const MemoryBlock &) = delete; }; class MemoryPool { @@ -75,18 +90,6 @@ class MemoryPool { void set_free_chunk(); private: - struct MemoryBlock { - std::unique_ptr> data; - bool is_free; - - MemoryBlock() : is_free(true) { - void *ptr = nullptr; - if (posix_memalign(&ptr, MEMBLOCK_SIZE, MEMBLOCK_SIZE * 2) != 0) { - throw std::runtime_error("Failed to allocate aligned memory"); - } - data = std::unique_ptr>(static_cast(ptr), [](char *p) { free(p); }); - } - }; std::vector memory_pool; // Mempool std::vector block_mutexes{NUM_BLOCKS}; // Partition lock array std::atomic empty_blocks{NUM_BLOCKS}; // Free block count @@ -101,7 +104,7 @@ class MemoryPool { static const size_t MAX_IDX = 256; static const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; -static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //窗口需要预留两个空闲空间 +static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //The window needs to reserve two free Spaces static const size_t REM_MAX_IDX = (MAX_IDX - 1); static const size_t REM_MAX_GROUPING_IDX = (MAX_GROUPING_IDX - 1); From 744d51998eda8baad7c183fa4f9319d0b7081457 Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 25 Sep 2024 15:12:10 +0800 Subject: [PATCH 13/41] fpga: The adaptation pack processes the new batch --- src/test/csrc/fpga/xdma.cpp | 7 +++---- src/test/csrc/fpga/xdma.h | 13 ++++--------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 6854bcfc4..eb8bb4581 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -21,11 +21,10 @@ #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" #define XDMA_H2C_DEVICE "/dev/xdma0_h2c_0" -static const int dma_channel = CONFIG_DMA_CHANNELS; FpgaXdma::FpgaXdma() { signal(SIGINT, handle_sigint); - for (int i = 0; i < dma_channel; i++) { + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { char c2h_device[64]; sprintf(c2h_device, "%s%d", XDMA_C2H_DEVICE, i); xdma_c2h_fd[i] = open(c2h_device, O_RDONLY); @@ -55,7 +54,7 @@ void FpgaXdma::start_transmit_thread() { if (running == true) return; - for (int i = 0; i < dma_channel; i++) { + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { printf("start channel %d \n", i); receive_thread[i] = std::thread(&FpgaXdma::read_xdma_thread, this, i); } @@ -103,7 +102,7 @@ void FpgaXdma::write_difftest_thread() { assert(0); } // packge unpack - v_difftest_Batch(packge.difftest_batch_info.io_data, packge.difftest_batch_info.io_info); + v_difftest_Batch((uint8_t *)packge.diff_batch_pack); // difftest run diff_packge_count.fetch_add(1, std::memory_order_relaxed); } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index ae9eeeeda..9319f823a 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -32,24 +32,16 @@ #define WITH_FPGA typedef struct FpgaPackgeHead { - BatchInfo difftest_batch_info; uint8_t packge_idx; + char diff_batch_pack[CONFIG_DIFFTEST_BATCH_BYTELEN]; } FpgaPackgeHead; class FpgaXdma { public: - struct FpgaPackgeHead *shmadd_recv; - MemoryIdxPool xdma_mempool; - int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; - int xdma_h2c_fd; - bool running = false; - std::condition_variable diff_filled_cv; - std::condition_variable diff_empile_cv; - std::atomic diff_packge_count{0}; FpgaXdma(); @@ -67,6 +59,9 @@ class FpgaXdma { std::thread receive_thread[CONFIG_DMA_CHANNELS]; std::thread process_thread; + int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; + int xdma_h2c_fd; + static void handle_sigint(int sig); }; From 9a976d550e4dc0db1b1f897091a4983e2e08eca7 Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 25 Sep 2024 18:42:25 +0800 Subject: [PATCH 14/41] CI: add fpga-diff compile ci --- .github/workflows/main.yml | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 611255f0d..7279bbdaf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -306,3 +306,45 @@ jobs: make simv VCS=verilator -j2 ./build/simv +workload=$WORKLOAD +b=0 +e=-1 +diff=$REF_SO +max-instrs=5000 +warmup_instr=1000 make clean + ./build/simv +workload=./ready-to-run/microbench.bin +b=0 +e=-1 +diff=./ready-to-run/riscv64-nemu-interpreter-so +max-instrs=5000 +warmup_instr=1000 + + # test-difftest-fpga: + # runs-on: ubuntu-22.04 + + # needs: test-difftest-main + + # steps: + # - uses: actions/checkout@v4 + + # - name: Prepare environment + # run: | + # cd $GITHUB_WORKSPACE/.. + # git config --global url."https://github.com/".insteadOf git@github.com: + # git config --global url."https://".insteadOf git:// + # git clone https://github.com/OpenXiangShan/xs-env + # cd xs-env + # sudo -s ./setup-tools.sh + # source ./setup.sh + + # - name: Prepare NutShell + # run: | + # cd $GITHUB_WORKSPACE/../xs-env + # rm -r NutShell + # git clone -b dev-difftest --single-branch https://github.com/OSCPU/NutShell.git + # cd NutShell && git submodule update --init + # rm -r difftest + # cp -r $GITHUB_WORKSPACE . + + # - name: Enable -Werror for EMU Build + # run: | + # echo "CXX_NO_WARNING=1" >> $GITHUB_ENV + + # - name: FPGA-difftest Build + # run: | + # cd $GITHUB_WORKSPACE/../xs-env + # source ./env.sh + # cd $GITHUB_WORKSPACE/../xs-env/NutShell + # source ./env.sh + # make clean + # make sim-verilog MILL_ARGS="--difftest-config ENBF" -j2 + # make fpga-build DMA_CHANNELS=2 WITH_CHISELDB=0 WITH_CONSTANTIN=0 From 9c51fdcd31bcfe58e3a556d50527d0633089aabf Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 26 Sep 2024 11:22:58 +0800 Subject: [PATCH 15/41] fpga: svdpi.h is not referenced when fpga is used --- src/main/scala/DPIC.scala | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/scala/DPIC.scala b/src/main/scala/DPIC.scala index 93e8f4faa..d631707a1 100644 --- a/src/main/scala/DPIC.scala +++ b/src/main/scala/DPIC.scala @@ -296,6 +296,7 @@ private class DummyDPICBatchWrapper( object DPIC { val interfaces = ListBuffer.empty[(String, String, String)] + var defMacros = new StringBuilder() def apply(control: GatewaySinkControl, io: Valid[DifftestBundle], config: GatewayConfig): Unit = { val module = Module(new DummyDPICWrapper(chiselTypeOf(io), config)) @@ -313,6 +314,12 @@ object DPIC { module.control := control module.io := io val dpic = module.dpic + if (!config.isFPGA) + defMacros ++= + s""" + |#ifdef CONFIG_DIFFTEST_BATCH + |#include "svdpi.h" + |#endif // CONFIG_DIFFTEST_BATCH""".stripMargin interfaces += ((dpic.dpicFuncName, dpic.dpicFuncProto, dpic.dpicFunc)) } @@ -327,12 +334,10 @@ object DPIC { interfaceCpp += "" interfaceCpp += "#include " interfaceCpp += "#include \"diffstate.h\"" - interfaceCpp += "#ifdef CONFIG_DIFFTEST_BATCH" - interfaceCpp += "#include \"svdpi.h\"" - interfaceCpp += "#endif // CONFIG_DIFFTEST_BATCH" interfaceCpp += "#ifdef CONFIG_DIFFTEST_PERFCNT" interfaceCpp += "#include \"perf.h\"" interfaceCpp += "#endif // CONFIG_DIFFTEST_PERFCNT" + interfaceCpp += defMacros.toString() interfaceCpp += "" interfaceCpp += """ From 97ab23c30ff90d5465f83f461d4906674778a712 Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 26 Sep 2024 17:02:42 +0800 Subject: [PATCH 16/41] fpga: Burn workload to fpga ddr at boot time --- src/test/csrc/fpga/fpga_main.cpp | 2 +- src/test/csrc/fpga/xdma.cpp | 49 +++++++++++++++++++++++++++++++- src/test/csrc/fpga/xdma.h | 18 +++++++++++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 2f4ed5a11..689dfbf4b 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -71,7 +71,7 @@ void set_diff_ref_so(char *s) { } void simv_init() { - xdma_device = new FpgaXdma; + xdma_device = new FpgaXdma(work_load); difftest_init(); } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index eb8bb4581..c7e578f7d 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -16,14 +16,20 @@ #include "xdma.h" #include "mpool.h" #include +#include #include #include +#include +#define XDMA_USER "/dev/xdma0_user" +#define XDMA_BYPASS "/dev/xdma0_bypass" #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" #define XDMA_H2C_DEVICE "/dev/xdma0_h2c_0" -FpgaXdma::FpgaXdma() { +FpgaXdma::FpgaXdma(const char *workload) { signal(SIGINT, handle_sigint); + ddr_load_workload(workload); + for (int i = 0; i < CONFIG_DMA_CHANNELS; i++) { char c2h_device[64]; sprintf(c2h_device, "%s%d", XDMA_C2H_DEVICE, i); @@ -50,6 +56,47 @@ void FpgaXdma::handle_sigint(int sig) { exit(1); } +// write xdma_bypass memory or xdma_user +int FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value) { + uint64_t pg_size = sysconf(_SC_PAGE_SIZE); + uint64_t size = !is_bypass ? 0x1000 : 0x10000; + uint64_t aligned_size = (size + 0xffful) & ~0xffful; + uint64_t base = addr & ~0xffful; + uint32_t offset = addr & 0xfffu; + std::ifstream workload_fd; + int fd = -1; + + if (base % pg_size != 0) { + printf("base must be a multiple of system page size\n"); + return -1; + } + + if (is_bypass) + fd = open(XDMA_BYPASS, O_RDWR | O_SYNC); + else + fd = open(XDMA_USER, O_RDWR | O_SYNC); + if (fd < 0) { + printf("failed to open %s\n", is_bypass ? XDMA_BYPASS : XDMA_USER); + return -1; + } + + void *m_ptr = mmap(nullptr, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, base); + if (m_ptr == MAP_FAILED) { + close(fd); + printf("failed to mmap\n"); + return -1; + } + + if (is_bypass) { + workload_fd.read(((char *)m_ptr) + offset, size); + } else { + ((volatile uint32_t *)m_ptr)[offset >> 2] = value; + } + + munmap(m_ptr, aligned_size); + close(fd); +} + void FpgaXdma::start_transmit_thread() { if (running == true) return; diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 9319f823a..86fef90e4 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -44,11 +44,27 @@ class FpgaXdma { std::atomic diff_packge_count{0}; - FpgaXdma(); + FpgaXdma(const char *workload); ~FpgaXdma() { stop_thansmit_thread(); }; + int core_reset() { + device_write(false, nullptr, 0x100000, 0x1); + device_write(false, nullptr, 0x10000, 0x8); + } + + int core_restart() { + device_write(false, nullptr, 0x100000, 0); + } + + int ddr_load_workload(const char *workload) { + core_reset(); + device_write(true, workload, 0, 0); + core_restart(); + } + + int device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value); // thread api void start_transmit_thread(); void stop_thansmit_thread(); From 4effd8cd042080097a986e805067be3f47b1b333 Mon Sep 17 00:00:00 2001 From: Kami Date: Fri, 27 Sep 2024 15:34:30 +0800 Subject: [PATCH 17/41] fpga: Load memory for the ref module --- src/test/csrc/common/ram.h | 3 +++ src/test/csrc/fpga/fpga_main.cpp | 4 ++++ src/test/csrc/fpga/xdma.cpp | 17 +++++++++++------ src/test/csrc/fpga/xdma.h | 9 +++++---- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/test/csrc/common/ram.h b/src/test/csrc/common/ram.h index f4cbce014..2c74e48c3 100644 --- a/src/test/csrc/common/ram.h +++ b/src/test/csrc/common/ram.h @@ -112,6 +112,9 @@ class SimMemory { uint64_t get_size() { return memory_size; } + uint64_t get_load_img_size() { + return get_img_size(); + } bool in_range_u8(uint64_t address) { return address < memory_size; } diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 689dfbf4b..f35589d3b 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -18,6 +18,7 @@ #include "difftest-dpic.h" #include "difftest.h" #include "mpool.h" +#include "ram.h" #include "refproxy.h" #include "xdma.h" @@ -60,6 +61,8 @@ int main(int argc, char *argv[]) { } } free(xdma_device); + printf("difftest releases the fpga device and exits\n"); + exit(0); } void set_diff_ref_so(char *s) { @@ -72,6 +75,7 @@ void set_diff_ref_so(char *s) { void simv_init() { xdma_device = new FpgaXdma(work_load); + init_ram(work_load, DEFAULT_EMU_RAM_SIZE); difftest_init(); } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index c7e578f7d..2ad8840c0 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -15,6 +15,7 @@ ***************************************************************************************/ #include "xdma.h" #include "mpool.h" +#include "ram.h" #include #include #include @@ -57,18 +58,17 @@ void FpgaXdma::handle_sigint(int sig) { } // write xdma_bypass memory or xdma_user -int FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value) { +void FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value) { uint64_t pg_size = sysconf(_SC_PAGE_SIZE); uint64_t size = !is_bypass ? 0x1000 : 0x10000; uint64_t aligned_size = (size + 0xffful) & ~0xffful; uint64_t base = addr & ~0xffful; uint32_t offset = addr & 0xfffu; - std::ifstream workload_fd; int fd = -1; if (base % pg_size != 0) { printf("base must be a multiple of system page size\n"); - return -1; + exit(-1); } if (is_bypass) @@ -77,18 +77,23 @@ int FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, fd = open(XDMA_USER, O_RDWR | O_SYNC); if (fd < 0) { printf("failed to open %s\n", is_bypass ? XDMA_BYPASS : XDMA_USER); - return -1; + exit(-1); } void *m_ptr = mmap(nullptr, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, base); if (m_ptr == MAP_FAILED) { close(fd); printf("failed to mmap\n"); - return -1; + exit(-1); } if (is_bypass) { - workload_fd.read(((char *)m_ptr) + offset, size); + if (simMemory->get_load_img_size() > aligned_size) { + printf("The loaded workload size exceeds the xdma bypass size"); + exit(-1); + } + memcpy(static_cast(m_ptr) + offset, static_cast(simMemory->as_ptr()), + simMemory->get_load_img_size()); } else { ((volatile uint32_t *)m_ptr)[offset >> 2] = value; } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 86fef90e4..ceca1e8e2 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -49,22 +49,23 @@ class FpgaXdma { stop_thansmit_thread(); }; - int core_reset() { + void core_reset() { device_write(false, nullptr, 0x100000, 0x1); device_write(false, nullptr, 0x10000, 0x8); } - int core_restart() { + void core_restart() { device_write(false, nullptr, 0x100000, 0); } - int ddr_load_workload(const char *workload) { + void ddr_load_workload(const char *workload) { core_reset(); device_write(true, workload, 0, 0); core_restart(); } - int device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value); + void device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value); + // thread api void start_transmit_thread(); void stop_thansmit_thread(); From 71a9032272ae2648e05616fc50c786a61c2d37b1 Mon Sep 17 00:00:00 2001 From: Kami Date: Fri, 1 Nov 2024 18:38:49 +0800 Subject: [PATCH 18/41] fpga: The optimal parameters of mpool are adjusted --- src/test/csrc/common/mpool.cpp | 10 +++------- src/test/csrc/common/mpool.h | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 4e83e63ae..1e0ec19d5 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -99,9 +99,8 @@ bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { write_count++; // Proceed to the next group if (write_count == MAX_IDX) { - memory_pool[page_w_idx].is_free.store(false); memcpy(memory_pool[page_w_idx].data.get(), data, 4096); - + memory_pool[page_w_idx].is_free.store(false); size_t next_w_idx = wait_next_free_group(); group_w_offset = (next_w_idx & REM_MAX_GROUPING_IDX) * MAX_IDX; write_count = write_next_count; @@ -132,13 +131,11 @@ bool MemoryIdxPool::read_busy_chunk(char *data) { memcpy(data, memory_pool[page_r_idx].data.get(), 4096); memory_pool[page_r_idx].is_free.store(true); - return true; } size_t MemoryIdxPool::wait_next_free_group() { - empty_blocks.fetch_sub(1); - size_t free_num = empty_blocks.load(); + size_t free_num = empty_blocks.fetch_sub(1, std::memory_order_relaxed) + 1; cv_filled.notify_all(); //Reserve at least two free blocks if (free_num <= 2) { @@ -149,8 +146,7 @@ size_t MemoryIdxPool::wait_next_free_group() { } size_t MemoryIdxPool::wait_next_full_group() { - empty_blocks.fetch_add(1); - size_t free_num = empty_blocks.load(); + size_t free_num = empty_blocks.fetch_add(1, std::memory_order_relaxed) + 1; cv_empty.notify_all(); if (free_num >= MAX_GROUP_READ) { diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 69d031e70..149ffd83c 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -24,8 +24,8 @@ #include #include -#define MEMPOOL_SIZE 4096 * 1024 // 4M page -#define MEMBLOCK_SIZE 4096 // 4K packge +#define MEMPOOL_SIZE 16384 * 1024 // 16M memory +#define MEMBLOCK_SIZE 4096 // 4K packge #define NUM_BLOCKS (MEMPOOL_SIZE / MEMBLOCK_SIZE) #define REM_NUM_BLOCKS (NUM_BLOCKS - 1) @@ -35,7 +35,7 @@ struct MemoryBlock { MemoryBlock() : is_free(true) { void *ptr = nullptr; - if (posix_memalign(&ptr, 4096, 4096) != 0) { + if (posix_memalign(&ptr, 4096, 4096 + MEMBLOCK_SIZE) != 0) { throw std::runtime_error("Failed to allocate aligned memory"); } memset(ptr, 0, 4096); @@ -102,15 +102,16 @@ class MemoryPool { size_t page_end = 0; }; -static const size_t MAX_IDX = 256; -static const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; -static const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //The window needs to reserve two free Spaces -static const size_t REM_MAX_IDX = (MAX_IDX - 1); -static const size_t REM_MAX_GROUPING_IDX = (MAX_GROUPING_IDX - 1); - // Split the memory pool into sliding Windows based on the index width // Support multi-thread out-of-order write sequential read class MemoryIdxPool { +private: + const size_t MAX_IDX = 256; + const size_t MAX_GROUPING_IDX = NUM_BLOCKS / MAX_IDX; + const size_t MAX_GROUP_READ = MAX_GROUPING_IDX - 2; //The window needs to reserve two free Spaces + const size_t REM_MAX_IDX = (MAX_IDX - 1); + const size_t REM_MAX_GROUPING_IDX = (MAX_GROUPING_IDX - 1); + public: MemoryIdxPool() { initMemoryPool(); From 4e7deac8949000e75976a2bf0b5062cbbcd70697 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 4 Nov 2024 16:48:44 +0800 Subject: [PATCH 19/41] fpga: batch processing on fpga diff data is used by default --- fpga.mk | 4 ++-- src/test/csrc/fpga/fpga_main.cpp | 41 ++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/fpga.mk b/fpga.mk index 11e08ceb9..c1213bc5e 100644 --- a/fpga.mk +++ b/fpga.mk @@ -7,8 +7,8 @@ FPGA_RUN_DIR = $(abspath $(BUILD_DIR)/$(notdir $(RUN_BIN))) FPGA_CSRC_DIR = $(abspath ./src/test/csrc/fpga) FPGA_CONFIG_DIR = $(abspath ./config) -FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") -FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) +FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") -include cstring +FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) -O2 FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl DMA_CHANNELS?=1 diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index f35589d3b..67b571466 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -14,6 +14,8 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ +#include +#include #include "diffstate.h" #include "difftest-dpic.h" #include "difftest.h" @@ -29,7 +31,9 @@ enum { } simv_state; static char work_load[256] = "/dev/zero"; -static uint8_t simv_result = SIMV_RUN; +static std::atomic simv_result {SIMV_RUN}; +static std::mutex simv_mtx; +static std::condition_variable simv_cv; static uint64_t max_instrs = 0; struct core_end_info_t { @@ -49,17 +53,13 @@ FpgaXdma *xdma_device = NULL; int main(int argc, char *argv[]) { args_parsingniton(argc, argv); + simv_init(); - - while (simv_result == SIMV_RUN) { - // wait get xdma data - if (xdma_device->diff_packge_count.load(std::memory_order_seq_cst) > 0) { - // run difftest - simv_step(); - cpu_endtime_check(); - xdma_device->diff_packge_count.fetch_sub(1, std::memory_order_relaxed); - } + std::unique_lock lock(simv_mtx); + while (simv_result.load() == SIMV_RUN) { + simv_cv.wait(lock); } + xdma_device->running = false; free(xdma_device); printf("difftest releases the fpga device and exits\n"); exit(0); @@ -79,9 +79,17 @@ void simv_init() { difftest_init(); } +extern "C" void simv_nstep(uint8_t step) { + for (int i = 0; i < step; i++) { + simv_step(); + } +} + void simv_step() { - if (difftest_step()) - simv_result = SIMV_FAIL; + if (difftest_step()) { + simv_result.store(SIMV_FAIL); + simv_cv.notify_one(); + } if (difftest_state() != -1) { int trapCode = difftest_state(); for (int i = 0; i < NUM_CORES; i++) { @@ -94,10 +102,12 @@ void simv_step() { difftest[i]->display_stats(); } if (trapCode == 0) - simv_result = SIMV_DONE; + simv_result.store(SIMV_DONE); else - simv_result = SIMV_FAIL; + simv_result.store(SIMV_FAIL); + simv_cv.notify_one(); } + cpu_endtime_check(); } void cpu_endtime_check() { @@ -113,7 +123,8 @@ void cpu_endtime_check() { difftest[i]->display_stats(); core_end_info.core_cpi[i] = (double)trap->cycleCnt / (double)trap->instrCnt; if (core_end_info.core_trap_num == NUM_CORES) { - simv_result = SIMV_DONE; + simv_result.store(SIMV_DONE); + simv_cv.notify_one(); } } } From 15bbadb3f4afb3e8e08078bd437cf7e46d2a20d3 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 4 Nov 2024 17:19:01 +0800 Subject: [PATCH 20/41] fpga: remove the redundant include and disable DB on fpga --- Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index eefde437d..7e021f69d 100644 --- a/Makefile +++ b/Makefile @@ -80,9 +80,6 @@ SIM_VSRC = $(shell find $(VSRC_DIR) -name "*.v" -or -name "*.sv") DIFFTEST_CSRC_DIR = $(abspath ./src/test/csrc/difftest) # FPGA-Difftest support FPGA ?= 0 -ifeq ($(FPGA),1) -DIFFTEST_CSRC_DIR += $(abspath ./src/test/csrc/fpga) -endif DIFFTEST_CXXFILES = $(shell find $(DIFFTEST_CSRC_DIR) -name "*.cpp") ifeq ($(NO_DIFF), 1) @@ -96,7 +93,9 @@ endif endif # ChiselDB +ifneq ($(FPGA),1) WITH_CHISELDB ?= 1 +endif ifeq ($(WITH_CHISELDB), 1) SIM_CXXFILES += $(BUILD_DIR)/chisel_db.cpp SIM_CXXFLAGS += -I$(BUILD_DIR) -DENABLE_CHISEL_DB @@ -104,7 +103,9 @@ SIM_LDFLAGS += -lsqlite3 endif # ConstantIn +ifneq ($(FPGA),1) WITH_CONSTANTIN ?= 1 +endif ifeq ($(WITH_CONSTANTIN), 1) SIM_CXXFILES += $(BUILD_DIR)/constantin.cpp SIM_CXXFLAGS += -I$(BUILD_DIR) -DENABLE_CONSTANTIN From f5986f3256ab2f35bc62cbf853b767066aa99dc1 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 4 Nov 2024 18:47:31 +0800 Subject: [PATCH 21/41] fpga: fix formate --- src/test/csrc/fpga/fpga_main.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 67b571466..3d6d166eb 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -14,8 +14,6 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include -#include #include "diffstate.h" #include "difftest-dpic.h" #include "difftest.h" @@ -23,6 +21,8 @@ #include "ram.h" #include "refproxy.h" #include "xdma.h" +#include +#include enum { SIMV_RUN, @@ -31,7 +31,7 @@ enum { } simv_state; static char work_load[256] = "/dev/zero"; -static std::atomic simv_result {SIMV_RUN}; +static std::atomic simv_result{SIMV_RUN}; static std::mutex simv_mtx; static std::condition_variable simv_cv; static uint64_t max_instrs = 0; @@ -53,9 +53,9 @@ FpgaXdma *xdma_device = NULL; int main(int argc, char *argv[]) { args_parsingniton(argc, argv); - + simv_init(); - std::unique_lock lock(simv_mtx); + std::unique_lock lock(simv_mtx); while (simv_result.load() == SIMV_RUN) { simv_cv.wait(lock); } From c991c3d905ec7607a29baf7549f417f94fedf189 Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 6 Nov 2024 15:05:33 +0800 Subject: [PATCH 22/41] fpga: fix a stuck startup bug --- src/test/csrc/common/mpool.h | 2 +- src/test/csrc/fpga/fpga_main.cpp | 12 ++++++++---- src/test/csrc/fpga/xdma.cpp | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 149ffd83c..3ff77f34b 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -153,7 +153,7 @@ class MemoryIdxPool { size_t group_r_offset = 0; // The offset used by the current consumer size_t group_w_offset = 0; // The offset used by the current producer - size_t read_count = 0; + size_t read_count = MAX_IDX - 1; size_t write_count = 0; size_t write_next_count = 0; diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 3d6d166eb..ca6507895 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -55,9 +55,12 @@ int main(int argc, char *argv[]) { args_parsingniton(argc, argv); simv_init(); - std::unique_lock lock(simv_mtx); - while (simv_result.load() == SIMV_RUN) { - simv_cv.wait(lock); + { + std::unique_lock lock(simv_mtx); + xdma_device->start_transmit_thread(); + while (simv_result.load() == SIMV_RUN) { + simv_cv.wait(lock); + } } xdma_device->running = false; free(xdma_device); @@ -136,7 +139,8 @@ void args_parsingniton(int argc, char *argv[]) { if (strcmp(argv[i], "--diff") == 0) { set_diff_ref_so(argv[++i]); } else if (strcmp(argv[i], "-i") == 0) { - memcpy(work_load, argv[++i], sizeof(argv[++i])); + i++; + memcpy(work_load, argv[i], strlen(argv[i])); } else if (strcmp(argv[i], "--max-instrs") == 0) { max_instrs = std::stoul(argv[++i], nullptr, 16); } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 2ad8840c0..872bf6568 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -76,7 +76,7 @@ void FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, else fd = open(XDMA_USER, O_RDWR | O_SYNC); if (fd < 0) { - printf("failed to open %s\n", is_bypass ? XDMA_BYPASS : XDMA_USER); + printf("Failed to open %s\n", is_bypass ? XDMA_BYPASS : XDMA_USER); exit(-1); } From 327d1351cd144cb073f675e8aef21fe50f4c7148 Mon Sep 17 00:00:00 2001 From: Kami Date: Fri, 8 Nov 2024 10:41:17 +0800 Subject: [PATCH 23/41] fpga: Add idx verification for dma packets and initiate process repair for consumers --- src/test/csrc/common/mpool.cpp | 5 +++++ src/test/csrc/common/mpool.h | 4 +++- src/test/csrc/fpga/xdma.cpp | 11 +++++++---- src/test/csrc/fpga/xdma.h | 2 -- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 1e0ec19d5..03c364e6d 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -115,6 +115,11 @@ bool MemoryIdxPool::write_free_chunk(uint8_t idx, const char *data) { return true; } +void MemoryIdxPool::wait_mempool_start() { + std::unique_lock lock(window_mutexes); + cv_filled.wait(lock); +} + bool MemoryIdxPool::read_busy_chunk(char *data) { size_t page_r_idx = read_count + group_r_offset; size_t this_r_idx = ++read_count; diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 3ff77f34b..59f2fe445 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -144,6 +144,8 @@ class MemoryIdxPool { // Check if there is a window to read bool check_group(); + // Wait mempool have data + void wait_mempool_start(); private: MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool std::mutex window_mutexes; // window sliding protection @@ -153,7 +155,7 @@ class MemoryIdxPool { size_t group_r_offset = 0; // The offset used by the current consumer size_t group_w_offset = 0; // The offset used by the current producer - size_t read_count = MAX_IDX - 1; + size_t read_count = 0; size_t write_count = 0; size_t write_next_count = 0; diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 872bf6568..1c78cacac 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -134,7 +134,6 @@ void FpgaXdma::stop_thansmit_thread() { void FpgaXdma::read_xdma_thread(int channel) { FpgaPackgeHead packge; - bool result = true; while (running) { size_t size = read(xdma_c2h_fd[channel], &packge, sizeof(FpgaPackgeHead)); uint8_t idx = packge.packge_idx; @@ -147,15 +146,19 @@ void FpgaXdma::read_xdma_thread(int channel) { void FpgaXdma::write_difftest_thread() { FpgaPackgeHead packge; - bool result = true; + uint8_t recv_count = 0; + xdma_mempool.wait_mempool_start(); while (running) { if (xdma_mempool.read_busy_chunk((char *)&packge) == false) { printf("Failed to read data from the XDMA memory pool\n"); assert(0); } + if (packge.packge_idx != recv_count) { + printf("read mempool idx failed\n"); + assert(0); + } + recv_count ++; // packge unpack v_difftest_Batch((uint8_t *)packge.diff_batch_pack); - // difftest run - diff_packge_count.fetch_add(1, std::memory_order_relaxed); } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index ceca1e8e2..17749d976 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -42,8 +42,6 @@ class FpgaXdma { bool running = false; - std::atomic diff_packge_count{0}; - FpgaXdma(const char *workload); ~FpgaXdma() { stop_thansmit_thread(); From 6d38a2e699dd21e498587f65d2abf912ef4402e6 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 11 Nov 2024 17:30:01 +0800 Subject: [PATCH 24/41] fpga: mpool fix computation of free_num --- src/test/csrc/common/mpool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 03c364e6d..5dcbd30f6 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -140,7 +140,7 @@ bool MemoryIdxPool::read_busy_chunk(char *data) { } size_t MemoryIdxPool::wait_next_free_group() { - size_t free_num = empty_blocks.fetch_sub(1, std::memory_order_relaxed) + 1; + size_t free_num = empty_blocks.fetch_sub(1, std::memory_order_relaxed) - 1; cv_filled.notify_all(); //Reserve at least two free blocks if (free_num <= 2) { From 76be72cb7e79f9f800f2cc06e97676e8866a18f3 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 18 Nov 2024 18:17:22 +0800 Subject: [PATCH 25/41] difftest: Turn off h2c when it is not needed --- src/test/csrc/common/mpool.h | 1 + src/test/csrc/fpga/xdma.cpp | 6 ++++-- src/test/csrc/fpga/xdma.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 59f2fe445..25fe02c8b 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -146,6 +146,7 @@ class MemoryIdxPool { // Wait mempool have data void wait_mempool_start(); + private: MemoryBlock memory_pool[NUM_BLOCKS]; // Mempool std::mutex window_mutexes; // window sliding protection diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 1c78cacac..52e1a0070 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -42,7 +42,7 @@ FpgaXdma::FpgaXdma(const char *workload) { } std::cout << "XDMA link " << c2h_device << std::endl; } - +#ifdef CONFIG_USE_XDMA_H2C xdma_h2c_fd = open(XDMA_H2C_DEVICE, O_WRONLY); if (xdma_h2c_fd == -1) { std::cout << XDMA_H2C_DEVICE << std::endl; @@ -50,6 +50,7 @@ FpgaXdma::FpgaXdma(const char *workload) { exit(-1); } std::cout << "XDMA link " << XDMA_H2C_DEVICE << std::endl; +#endif } void FpgaXdma::handle_sigint(int sig) { @@ -127,8 +128,9 @@ void FpgaXdma::stop_thansmit_thread() { if (process_thread.joinable()) process_thread.join(); - +#ifdef CONFIG_USE_XDMA_H2C close(xdma_h2c_fd); +#endif xdma_mempool.cleanupMemoryPool(); } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 17749d976..0b2ff5da1 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -75,7 +75,9 @@ class FpgaXdma { std::thread process_thread; int xdma_c2h_fd[CONFIG_DMA_CHANNELS]; +#ifdef CONFIG_USE_XDMA_H2C int xdma_h2c_fd; +#endif static void handle_sigint(int sig); }; From 7a346c801e8ada746dc8c904fc4cbbad8105e558 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 16 Dec 2024 18:53:47 +0800 Subject: [PATCH 26/41] difftest: Added packet parsing under squash for XDMA --- src/test/csrc/fpga/diff_unpack.cpp | 224 +++++++++++++++++++++++++++++ src/test/csrc/fpga/diff_unpack.h | 23 +++ 2 files changed, 247 insertions(+) create mode 100644 src/test/csrc/fpga/diff_unpack.cpp create mode 100644 src/test/csrc/fpga/diff_unpack.h diff --git a/src/test/csrc/fpga/diff_unpack.cpp b/src/test/csrc/fpga/diff_unpack.cpp new file mode 100644 index 000000000..0300a4c3b --- /dev/null +++ b/src/test/csrc/fpga/diff_unpack.cpp @@ -0,0 +1,224 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#include +#include "diffstate.h" +#include "difftest-dpic.h" +#include "diff_unpack.h" + +typedef struct { + uint8_t io_index; + uint8_t io_coreid; + uint64_t io_data; + uint8_t io_valid; +} SquashCommitData; + +typedef struct { + uint8_t io_coreid; + uint8_t io_success; + uint8_t io_bits_valid; + uint8_t io_valid; +} SquashLrScEvent; + +typedef struct { + uint8_t io_coreid; + uint64_t io_vsscratch; + uint64_t io_vsatp; + uint64_t io_vstval; + uint64_t io_vscause; + uint64_t io_vsepc; + uint64_t io_vstvec; + uint64_t io_vsstatus; + uint64_t io_hgatp; + uint64_t io_htinst; + uint64_t io_htval; + uint64_t io_hcounteren; + uint64_t io_hedele; + uint64_t io_hideleg; + uint64_t io_hstatus; + uint64_t io_mtinst; + uint64_t io_mtval2; + uint64_t io_virtMode; + uint8_t io_valid; +} SquashHCSRState; + +typedef struct { + uint8_t io_coreid; + uint64_t io_fcsr; + uint8_t io_valid; +} SquashFpCSRState; + +typedef struct { + uint8_t io_coreid; + uint64_t io_value[32]; + uint8_t io_valid; +} SquashArchFpRegState; + +typedef struct { + uint8_t io_coreid; + uint64_t io_value[32]; + uint8_t io_valid; +} SquashArchIntRegState; + +typedef struct { + uint8_t io_coreid; + uint64_t io_medeleg; + uint64_t io_mideleg; + uint64_t io_sscratch; + uint64_t io_mscratch; + uint64_t io_mie; + uint64_t io_mip; + uint64_t io_satp; + uint64_t io_scause; + uint64_t io_mcause; + uint64_t io_stvec; + uint64_t io_mtvec; + uint64_t io_stval; + uint64_t io_mtval; + uint64_t io_sepc; + uint64_t io_mepc; + uint64_t io_sstatus; + uint64_t io_mstatus; + uint64_t io_privilegeMode; + uint8_t io_valid; +} SquashCSRState; + +typedef struct { + uint8_t io_coreid; + uint32_t io_interrupt; + uint32_t io_exception; + uint64_t io_exceptionPC; + uint32_t io_exceptionInst; + uint8_t io_hasNMI; + uint8_t io_virtualInterruptIsHvictlInject; + uint8_t io_bits_valid; + uint8_t io_valid; +} SquashArchEvent; + +typedef struct { + uint8_t coreid; + uint64_t pc; + uint64_t code; + uint8_t hasWFI; + uint64_t instrCnt; + uint64_t cycleCnt; + uint8_t hasTrap; + uint8_t io_valid; +} SquashTrapEvent; + +typedef struct { + uint8_t index; + uint8_t coreid; + uint8_t special; + uint8_t nFused; + uint8_t isStore; + uint8_t isLoad; + uint8_t sqIdx; + uint8_t lqIdx; + uint16_t robIdx; + uint32_t instr; + uint64_t pc; + uint8_t wdest; + uint8_t wpdest; + uint8_t vecwen; + uint8_t fpwen; + uint8_t rfwen; + uint8_t isRVC; + uint8_t skip; + uint8_t valid; +} SquashInstrCommit; + + +void squash_unpackge(uint8_t *packge) { + for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { + SquashCommitData temp; + memcpy(&temp, packge, sizeof(SquashCommitData)); + packge += sizeof(SquashCommitData); + v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); + } + { + SquashLrScEvent temp; + memcpy(&temp, packge, sizeof(SquashLrScEvent)); + packge += sizeof(SquashLrScEvent); + v_difftest_LrScEvent(temp.io_success, temp.io_coreid); + } + { + SquashHCSRState temp; + memcpy(&temp, packge, sizeof(SquashHCSRState)); + packge += sizeof(SquashHCSRState); + v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, + temp.io_hstatus, temp.io_hideleg, temp.io_hedele, temp.io_hcounteren, + temp. io_htval, temp.io_htinst, temp.io_hgatp, temp.io_vsstatus, + temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, + temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); + } + { + SquashFpCSRState temp; + memcpy(&temp, packge, sizeof(SquashFpCSRState)); + packge += sizeof(SquashFpCSRState); + v_difftest_FpCSRState(temp.io_fcsr, temp.io_coreid); + } + { + SquashCSRState temp; + memcpy(&temp, packge, sizeof(SquashCSRState)); + packge += sizeof(SquashCSRState); + v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, + temp.io_sepc, temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, + temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, temp.io_medeleg, + temp.io_coreid); + } + { + SquashArchEvent temp; + memcpy(&temp, packge, sizeof(SquashArchEvent)); + packge += sizeof(SquashArchEvent); + v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, + temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); + } + { + SquashArchIntRegState temp; + memcpy(&temp, packge, sizeof(SquashArchIntRegState)); + packge += sizeof(SquashArchIntRegState); + v_difftest_ArchIntRegState(temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], temp.io_value[12], + temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], temp.io_value[17], temp.io_value[18], temp.io_value[19], + temp.io_value[20], temp.io_value[21], temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + } + { + SquashArchFpRegState temp; + memcpy(&temp, packge, sizeof(SquashArchFpRegState)); + packge += sizeof(SquashArchFpRegState); + v_difftest_ArchFpRegState(temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], temp.io_value[12], + temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], temp.io_value[17], temp.io_value[18], temp.io_value[19], + temp.io_value[20], temp.io_value[21], temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + } + { + SquashTrapEvent temp; + memcpy(&temp, packge, sizeof(SquashTrapEvent)); + packge += sizeof(SquashTrapEvent); + v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, + temp.pc, temp.coreid); + } + for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { + SquashInstrCommit temp; + memcpy(&temp, packge, sizeof(SquashInstrCommit)); + packge += sizeof(SquashInstrCommit); + v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, + temp.pc, temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, temp.special, + temp.coreid, temp.index); + } +} \ No newline at end of file diff --git a/src/test/csrc/fpga/diff_unpack.h b/src/test/csrc/fpga/diff_unpack.h new file mode 100644 index 000000000..343ba1e0c --- /dev/null +++ b/src/test/csrc/fpga/diff_unpack.h @@ -0,0 +1,23 @@ +/*************************************************************************************** +* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* +* DiffTest is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ +#ifndef __DIFF_UNPACK_H__ +#define __DIFF_UNPACK_H__ +#include "diffstate.h" +#include "difftest-dpic.h" +void squash_unpackge(uint8_t *packge); + + +#endif From 0ec41287779bc4a70b9093ba647a5cdeaddfe465 Mon Sep 17 00:00:00 2001 From: Kami Date: Tue, 17 Dec 2024 15:19:20 +0800 Subject: [PATCH 27/41] fpga: The packet adapted to squash format runs FPGA-diff --- fpga.mk | 2 +- src/test/csrc/difftest/difftest.cpp | 4 + src/test/csrc/fpga/diff_unpack.cpp | 164 +++++++++++++++------------- src/test/csrc/fpga/diff_unpack.h | 1 - src/test/csrc/fpga/fpga_main.cpp | 3 +- src/test/csrc/fpga/xdma.cpp | 17 ++- src/test/csrc/fpga/xdma.h | 14 ++- 7 files changed, 120 insertions(+), 85 deletions(-) diff --git a/fpga.mk b/fpga.mk index c1213bc5e..0f0de4a73 100644 --- a/fpga.mk +++ b/fpga.mk @@ -8,7 +8,7 @@ FPGA_CSRC_DIR = $(abspath ./src/test/csrc/fpga) FPGA_CONFIG_DIR = $(abspath ./config) FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") -include cstring -FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) -O2 +FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) -DWITH_FPGA -O2 FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl DMA_CHANNELS?=1 diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index 3c85f464f..8a0b897d9 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -22,7 +22,9 @@ #include "ram.h" #include "spikedasm.h" #ifdef CONFIG_DIFFTEST_SQUASH +#ifndef WITH_FPGA #include "svdpi.h" +#endif // WITH_FPGA #endif // CONFIG_DIFFTEST_SQUASH #ifdef CONFIG_DIFFTEST_PERFCNT #include "perf.h" @@ -135,6 +137,7 @@ void difftest_finish() { } #ifdef CONFIG_DIFFTEST_SQUASH +#ifndef WITH_FPGA svScope squashScope; void set_squash_scope() { squashScope = svGetScope(); @@ -149,6 +152,7 @@ void difftest_squash_enable(int enable) { svSetScope(squashScope); set_squash_enable(enable); } +#endif // WITH_FPGA #endif // CONFIG_DIFFTEST_SQUASH #ifdef CONFIG_DIFFTEST_REPLAY diff --git a/src/test/csrc/fpga/diff_unpack.cpp b/src/test/csrc/fpga/diff_unpack.cpp index 0300a4c3b..11e6fac88 100644 --- a/src/test/csrc/fpga/diff_unpack.cpp +++ b/src/test/csrc/fpga/diff_unpack.cpp @@ -13,27 +13,29 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include +#include "diff_unpack.h" #include "diffstate.h" #include "difftest-dpic.h" -#include "diff_unpack.h" +#include + +extern void simv_nstep(uint8_t step); -typedef struct { - uint8_t io_index; - uint8_t io_coreid; +typedef struct { + uint8_t io_index; + uint8_t io_coreid; uint64_t io_data; - uint8_t io_valid; + uint8_t io_valid; } SquashCommitData; -typedef struct { +typedef struct { uint8_t io_coreid; uint8_t io_success; uint8_t io_bits_valid; uint8_t io_valid; } SquashLrScEvent; -typedef struct { - uint8_t io_coreid; +typedef struct { + uint8_t io_coreid; uint64_t io_vsscratch; uint64_t io_vsatp; uint64_t io_vstval; @@ -51,29 +53,29 @@ typedef struct { uint64_t io_mtinst; uint64_t io_mtval2; uint64_t io_virtMode; - uint8_t io_valid; + uint8_t io_valid; } SquashHCSRState; -typedef struct { - uint8_t io_coreid; +typedef struct { + uint8_t io_coreid; uint64_t io_fcsr; - uint8_t io_valid; + uint8_t io_valid; } SquashFpCSRState; -typedef struct { - uint8_t io_coreid; +typedef struct { + uint8_t io_coreid; uint64_t io_value[32]; - uint8_t io_valid; + uint8_t io_valid; } SquashArchFpRegState; -typedef struct { - uint8_t io_coreid; +typedef struct { + uint8_t io_coreid; uint64_t io_value[32]; - uint8_t io_valid; + uint8_t io_valid; } SquashArchIntRegState; -typedef struct { - uint8_t io_coreid; +typedef struct { + uint8_t io_coreid; uint64_t io_medeleg; uint64_t io_mideleg; uint64_t io_sscratch; @@ -92,54 +94,57 @@ typedef struct { uint64_t io_sstatus; uint64_t io_mstatus; uint64_t io_privilegeMode; - uint8_t io_valid; + uint8_t io_valid; } SquashCSRState; -typedef struct { - uint8_t io_coreid; +typedef struct { + uint8_t io_coreid; uint32_t io_interrupt; uint32_t io_exception; uint64_t io_exceptionPC; uint32_t io_exceptionInst; - uint8_t io_hasNMI; - uint8_t io_virtualInterruptIsHvictlInject; - uint8_t io_bits_valid; - uint8_t io_valid; + uint8_t io_hasNMI; + uint8_t io_virtualInterruptIsHvictlInject; + uint8_t io_bits_valid; + uint8_t io_valid; } SquashArchEvent; -typedef struct { - uint8_t coreid; +typedef struct { + uint8_t coreid; uint64_t pc; uint64_t code; - uint8_t hasWFI; + uint8_t hasWFI; uint64_t instrCnt; uint64_t cycleCnt; - uint8_t hasTrap; - uint8_t io_valid; + uint8_t hasTrap; + uint8_t io_valid; } SquashTrapEvent; -typedef struct { - uint8_t index; - uint8_t coreid; - uint8_t special; - uint8_t nFused; - uint8_t isStore; - uint8_t isLoad; - uint8_t sqIdx; - uint8_t lqIdx; +typedef struct { + uint8_t index; + uint8_t coreid; + uint8_t special; + uint8_t nFused; + uint8_t isStore; + uint8_t isLoad; + uint8_t sqIdx; + uint8_t lqIdx; uint16_t robIdx; uint32_t instr; uint64_t pc; - uint8_t wdest; - uint8_t wpdest; - uint8_t vecwen; - uint8_t fpwen; - uint8_t rfwen; - uint8_t isRVC; - uint8_t skip; - uint8_t valid; + uint8_t wdest; + uint8_t wpdest; + uint8_t vecwen; + uint8_t fpwen; + uint8_t rfwen; + uint8_t isRVC; + uint8_t skip; + uint8_t valid; } SquashInstrCommit; +typedef struct { + uint8_t step; +} SquashDiffStep; void squash_unpackge(uint8_t *packge) { for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { @@ -158,11 +163,10 @@ void squash_unpackge(uint8_t *packge) { SquashHCSRState temp; memcpy(&temp, packge, sizeof(SquashHCSRState)); packge += sizeof(SquashHCSRState); - v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, - temp.io_hstatus, temp.io_hideleg, temp.io_hedele, temp.io_hcounteren, - temp. io_htval, temp.io_htinst, temp.io_hgatp, temp.io_vsstatus, - temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, - temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); + v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, temp.io_hstatus, temp.io_hideleg, + temp.io_hedele, temp.io_hcounteren, temp.io_htval, temp.io_htinst, temp.io_hgatp, + temp.io_vsstatus, temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, + temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); } { SquashFpCSRState temp; @@ -174,51 +178,59 @@ void squash_unpackge(uint8_t *packge) { SquashCSRState temp; memcpy(&temp, packge, sizeof(SquashCSRState)); packge += sizeof(SquashCSRState); - v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, - temp.io_sepc, temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, - temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, temp.io_medeleg, - temp.io_coreid); + v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, + temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, + temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, + temp.io_medeleg, temp.io_coreid); } { SquashArchEvent temp; memcpy(&temp, packge, sizeof(SquashArchEvent)); packge += sizeof(SquashArchEvent); v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, - temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); + temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); } { SquashArchIntRegState temp; memcpy(&temp, packge, sizeof(SquashArchIntRegState)); packge += sizeof(SquashArchIntRegState); - v_difftest_ArchIntRegState(temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], temp.io_value[12], - temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], temp.io_value[17], temp.io_value[18], temp.io_value[19], - temp.io_value[20], temp.io_value[21], temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + v_difftest_ArchIntRegState( + temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], + temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], + temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], + temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); } { SquashArchFpRegState temp; memcpy(&temp, packge, sizeof(SquashArchFpRegState)); packge += sizeof(SquashArchFpRegState); - v_difftest_ArchFpRegState(temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], temp.io_value[12], - temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], temp.io_value[17], temp.io_value[18], temp.io_value[19], - temp.io_value[20], temp.io_value[21], temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + v_difftest_ArchFpRegState( + temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], + temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], + temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], + temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); } { SquashTrapEvent temp; memcpy(&temp, packge, sizeof(SquashTrapEvent)); packge += sizeof(SquashTrapEvent); - v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, - temp.pc, temp.coreid); + v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); } for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { SquashInstrCommit temp; memcpy(&temp, packge, sizeof(SquashInstrCommit)); packge += sizeof(SquashInstrCommit); - v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, - temp.pc, temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, temp.special, - temp.coreid, temp.index); + v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, + temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, + temp.special, temp.coreid, temp.index); + } + { + SquashDiffStep temp; + memcpy(&temp, packge, sizeof(SquashDiffStep)); + simv_nstep(temp.step); } -} \ No newline at end of file +} diff --git a/src/test/csrc/fpga/diff_unpack.h b/src/test/csrc/fpga/diff_unpack.h index 343ba1e0c..1f88fd5a6 100644 --- a/src/test/csrc/fpga/diff_unpack.h +++ b/src/test/csrc/fpga/diff_unpack.h @@ -19,5 +19,4 @@ #include "difftest-dpic.h" void squash_unpackge(uint8_t *packge); - #endif diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index ca6507895..82e7587d1 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -55,6 +55,7 @@ int main(int argc, char *argv[]) { args_parsingniton(argc, argv); simv_init(); + printf("simv init\n"); { std::unique_lock lock(simv_mtx); xdma_device->start_transmit_thread(); @@ -82,7 +83,7 @@ void simv_init() { difftest_init(); } -extern "C" void simv_nstep(uint8_t step) { +void simv_nstep(uint8_t step) { for (int i = 0; i < step; i++) { simv_step(); } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 52e1a0070..bbf4e9192 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -20,8 +20,15 @@ #include #include #include +#include +#include +#include #include +#ifdef CONFIG_DIFFTEST_SQUASH +#include "diff_unpack.h" +#endif // CONFIG_DIFFTEST_SQUASH + #define XDMA_USER "/dev/xdma0_user" #define XDMA_BYPASS "/dev/xdma0_bypass" #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" @@ -61,7 +68,7 @@ void FpgaXdma::handle_sigint(int sig) { // write xdma_bypass memory or xdma_user void FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, uint64_t value) { uint64_t pg_size = sysconf(_SC_PAGE_SIZE); - uint64_t size = !is_bypass ? 0x1000 : 0x10000; + uint64_t size = !is_bypass ? 0x1000 : 0x100000; uint64_t aligned_size = (size + 0xffful) & ~0xffful; uint64_t base = addr & ~0xffful; uint32_t offset = addr & 0xfffu; @@ -159,8 +166,12 @@ void FpgaXdma::write_difftest_thread() { printf("read mempool idx failed\n"); assert(0); } - recv_count ++; + recv_count++; // packge unpack - v_difftest_Batch((uint8_t *)packge.diff_batch_pack); +#ifdef CONFIG_DIFFTEST_BATCH + v_difftest_Batch(packge.diff_packge); +#elif defined(CONFIG_DIFFTEST_SQUASH) + squash_unpackge(packge.diff_packge); +#endif } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index 0b2ff5da1..f106084ea 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -29,11 +29,17 @@ #include #include -#define WITH_FPGA - +#ifdef CONFIG_DIFFTEST_BATCH +#define DMA_DIFF_PACKGE_LEN CONFIG_DIFFTEST_BATCH_BYTELEN +#elif defined(CONFIG_DIFFTEST_SQUASH) +#define DMA_DIFF_PACKGE_LEN 1216 +#endif typedef struct FpgaPackgeHead { uint8_t packge_idx; - char diff_batch_pack[CONFIG_DIFFTEST_BATCH_BYTELEN]; + uint8_t diff_packge[DMA_DIFF_PACKGE_LEN]; +#ifdef CONFIG_DIFFTEST_BATCH + uint8_t zero[95]; +#endif // CONFIG_DIFFTEST_BATCH } FpgaPackgeHead; class FpgaXdma { @@ -48,11 +54,13 @@ class FpgaXdma { }; void core_reset() { + device_write(false, nullptr, 0x20000, 0x1); device_write(false, nullptr, 0x100000, 0x1); device_write(false, nullptr, 0x10000, 0x8); } void core_restart() { + device_write(false, nullptr, 0x20000, 0); device_write(false, nullptr, 0x100000, 0); } From 25b8b9ad345e2f6ac420c9262e9180738e018e51 Mon Sep 17 00:00:00 2001 From: klin02 Date: Thu, 12 Dec 2024 20:16:20 +0800 Subject: [PATCH 28/41] WIP: expose IO for FPGA no batch --- src/main/scala/Gateway.scala | 18 +++++++++++++++++- src/test/vsrc/vcs/DifftestEndpoint.v | 4 ++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/main/scala/Gateway.scala b/src/main/scala/Gateway.scala index 481099e5d..75c781a73 100644 --- a/src/main/scala/Gateway.scala +++ b/src/main/scala/Gateway.scala @@ -254,11 +254,13 @@ class GatewayEndpoint(instanceWithDelay: Seq[(DifftestBundle, Int)], config: Gat } object GatewaySink { + private val ports = ListBuffer.empty[Valid[DifftestBundle]] def apply(control: GatewaySinkControl, io: Valid[DifftestBundle], config: GatewayConfig): Unit = { config.style match { case "dpic" => DPIC(control, io, config) case _ => DPIC(control, io, config) // Default: DPI-C } + ports += io } def batch(template: Seq[DifftestBundle], control: GatewaySinkControl, io: BatchIO, config: GatewayConfig): Unit = { @@ -280,9 +282,23 @@ object GatewaySink { } def collect(config: GatewayConfig): GatewayResult = { + val collected = MixedVecInit( + ports.toSeq.map { gen => Seq(gen.valid.asTypeOf(UInt(8.W)), gen.bits.getByteAlign) }.flatten.toSeq + ).asUInt + val out = Option.when(config.isFPGA && !config.isBatch) { + IO(new Bundle { + val data = Output(UInt(collected.getWidth.W)) + val enable = Output(Bool()) + }) + } + if (config.isFPGA && !config.isBatch) { + out.get.data := collected + out.get.enable := VecInit(ports.toSeq.map(_.valid)).asUInt.orR + dontTouch(out.get) + } config.style match { case "dpic" => DPIC.collect() - case _ => DPIC.collect() // Default: DPI-C + case _ => DPIC.collect() // Default: DPI-C } } } diff --git a/src/test/vsrc/vcs/DifftestEndpoint.v b/src/test/vsrc/vcs/DifftestEndpoint.v index ff58e99a6..26aea698f 100644 --- a/src/test/vsrc/vcs/DifftestEndpoint.v +++ b/src/test/vsrc/vcs/DifftestEndpoint.v @@ -237,12 +237,16 @@ assign difftest_perfCtrl_dump = 0; reg [63:0] n_cycles; reg [63:0] stuck_timer; +`define MACRO aa.bb.cc always @(posedge clock) begin if (reset) begin n_cycles <= 64'h0; stuck_timer <= 64'h0; end else begin + if (n_cycles == 64'h1) begin + $display("suf `MACRO suf"); + end n_cycles <= n_cycles + 64'h1; // max cycles From 9ed3eb6d4183b5e76842287e7554c0f791b927b0 Mon Sep 17 00:00:00 2001 From: Kami Date: Sat, 4 Jan 2025 17:56:05 +0800 Subject: [PATCH 29/41] fpga: fix squash packge memory sequence --- src/test/csrc/fpga/diff_unpack.cpp | 335 ++++++++++++++++++----------- 1 file changed, 215 insertions(+), 120 deletions(-) diff --git a/src/test/csrc/fpga/diff_unpack.cpp b/src/test/csrc/fpga/diff_unpack.cpp index 11e6fac88..e9dae77b9 100644 --- a/src/test/csrc/fpga/diff_unpack.cpp +++ b/src/test/csrc/fpga/diff_unpack.cpp @@ -17,25 +17,28 @@ #include "diffstate.h" #include "difftest-dpic.h" #include - +#include +#include +#include extern void simv_nstep(uint8_t step); typedef struct { - uint8_t io_index; - uint8_t io_coreid; - uint64_t io_data; uint8_t io_valid; + uint8_t io_bits_valid; + uint64_t io_data; + uint8_t io_coreid; + uint8_t io_index; } SquashCommitData; typedef struct { - uint8_t io_coreid; + uint8_t io_valid; uint8_t io_success; uint8_t io_bits_valid; - uint8_t io_valid; + uint8_t io_coreid; } SquashLrScEvent; typedef struct { - uint8_t io_coreid; + uint8_t io_valid; uint64_t io_vsscratch; uint64_t io_vsatp; uint64_t io_vstval; @@ -53,184 +56,276 @@ typedef struct { uint64_t io_mtinst; uint64_t io_mtval2; uint64_t io_virtMode; - uint8_t io_valid; + uint8_t io_coreid; } SquashHCSRState; typedef struct { - uint8_t io_coreid; - uint64_t io_fcsr; uint8_t io_valid; + uint64_t io_fcsr; + uint8_t io_coreid; } SquashFpCSRState; typedef struct { - uint8_t io_coreid; - uint64_t io_value[32]; uint8_t io_valid; + uint64_t io_value[32]; + uint8_t io_coreid; } SquashArchFpRegState; typedef struct { - uint8_t io_coreid; - uint64_t io_value[32]; uint8_t io_valid; + uint64_t io_value[32]; + uint8_t io_coreid; } SquashArchIntRegState; typedef struct { - uint8_t io_coreid; - uint64_t io_medeleg; - uint64_t io_mideleg; - uint64_t io_sscratch; - uint64_t io_mscratch; - uint64_t io_mie; - uint64_t io_mip; - uint64_t io_satp; - uint64_t io_scause; - uint64_t io_mcause; - uint64_t io_stvec; - uint64_t io_mtvec; - uint64_t io_stval; - uint64_t io_mtval; - uint64_t io_sepc; - uint64_t io_mepc; - uint64_t io_sstatus; - uint64_t io_mstatus; - uint64_t io_privilegeMode; uint8_t io_valid; + uint64_t io_privilegeMode; + uint64_t io_mstatus; + uint64_t io_sstatus; + uint64_t io_mepc; + uint64_t io_sepc; + uint64_t io_mtval; + uint64_t io_stval; + uint64_t io_mtvec; + uint64_t io_stvec; + uint64_t io_mcause; + uint64_t io_scause; + uint64_t io_satp; + uint64_t io_mip; + uint64_t io_mie; + uint64_t io_mscratch; + uint64_t io_sscratch; + uint64_t io_mideleg; + uint64_t io_medeleg; + uint8_t io_coreid; } SquashCSRState; typedef struct { - uint8_t io_coreid; + uint8_t io_valid; + uint8_t io_bits_valid; uint32_t io_interrupt; uint32_t io_exception; uint64_t io_exceptionPC; uint32_t io_exceptionInst; uint8_t io_hasNMI; uint8_t io_virtualInterruptIsHvictlInject; - uint8_t io_bits_valid; - uint8_t io_valid; + uint8_t io_coreid; } SquashArchEvent; typedef struct { - uint8_t coreid; - uint64_t pc; - uint64_t code; - uint8_t hasWFI; - uint64_t instrCnt; - uint64_t cycleCnt; - uint8_t hasTrap; uint8_t io_valid; + uint8_t hasTrap; + uint64_t cycleCnt; + uint64_t instrCnt; + uint8_t hasWFI; + uint64_t code; + uint64_t pc; + uint8_t coreid; } SquashTrapEvent; typedef struct { - uint8_t index; - uint8_t coreid; - uint8_t special; - uint8_t nFused; - uint8_t isStore; - uint8_t isLoad; - uint8_t sqIdx; - uint8_t lqIdx; - uint16_t robIdx; - uint32_t instr; - uint64_t pc; - uint8_t wdest; - uint8_t wpdest; - uint8_t vecwen; - uint8_t fpwen; - uint8_t rfwen; - uint8_t isRVC; - uint8_t skip; uint8_t valid; + uint8_t bits_valid; + uint8_t skip; + uint8_t isRVC; + uint8_t rfwen; + uint8_t fpwen; + uint8_t vecwen; + uint8_t wpdest; + uint8_t wdest; + uint64_t pc; + uint32_t instr; + uint16_t robIdx; + uint8_t lqIdx; + uint8_t sqIdx; + uint8_t isLoad; + uint8_t isStore; + uint8_t nFused; + uint8_t special; + uint8_t coreid; + uint8_t index; } SquashInstrCommit; -typedef struct { - uint8_t step; -} SquashDiffStep; +// XIANG SHAN +// void squash_unpackge(uint8_t *packge) { +// uint8_t have_step = 0; +// // PACKGE HEAD +// { +// memcpy(&have_step, packge, sizeof(uint8_t)); +// } +// for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { +// SquashInstrCommit temp; +// memcpy(&temp, packge, sizeof(SquashInstrCommit)); +// packge += sizeof(SquashInstrCommit); +// v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, +// temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, +// temp.special, temp.coreid, temp.index); +// } +// { +// SquashTrapEvent temp; +// memcpy(&temp, packge, sizeof(SquashTrapEvent)); +// packge += sizeof(SquashTrapEvent); +// v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); +// } +// { +// SquashArchFpRegState temp; +// memcpy(&temp, packge, sizeof(SquashArchFpRegState)); +// packge += sizeof(SquashArchFpRegState); +// v_difftest_ArchFpRegState( +// temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], +// temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], +// temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], +// temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], +// temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], +// temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); +// } +// { +// SquashArchIntRegState temp; +// memcpy(&temp, packge, sizeof(SquashArchIntRegState)); +// packge += sizeof(SquashArchIntRegState); +// v_difftest_ArchIntRegState( +// temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], +// temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], +// temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], +// temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], +// temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], +// temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); +// } +// { +// SquashArchEvent temp; +// memcpy(&temp, packge, sizeof(SquashArchEvent)); +// packge += sizeof(SquashArchEvent); +// v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, +// temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); +// } +// { +// SquashCSRState temp; +// memcpy(&temp, packge, sizeof(SquashCSRState)); +// packge += sizeof(SquashCSRState); +// v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, +// temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, +// temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, +// temp.io_medeleg, temp.io_coreid); +// } +// { +// SquashFpCSRState temp; +// memcpy(&temp, packge, sizeof(SquashFpCSRState)); +// packge += sizeof(SquashFpCSRState); +// v_difftest_FpCSRState(temp.io_fcsr, temp.io_coreid); +// } +// { +// SquashHCSRState temp; +// memcpy(&temp, packge, sizeof(SquashHCSRState)); +// packge += sizeof(SquashHCSRState); +// v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, temp.io_hstatus, temp.io_hideleg, +// temp.io_hedele, temp.io_hcounteren, temp.io_htval, temp.io_htinst, temp.io_hgatp, +// temp.io_vsstatus, temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, +// temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); +// } +// { +// SquashLrScEvent temp; +// memcpy(&temp, packge, sizeof(SquashLrScEvent)); +// packge += sizeof(SquashLrScEvent); +// v_difftest_LrScEvent(temp.io_success, temp.io_coreid); +// } +// for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { +// SquashCommitData temp; +// memcpy(&temp, packge, sizeof(SquashCommitData)); +// packge += sizeof(SquashCommitData); +// v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); +// } +// // PACKGE END +// if (have_step != 0) { +// simv_nstep(have_step); +// } +// } +//NUT SHELL void squash_unpackge(uint8_t *packge) { - for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { - SquashCommitData temp; - memcpy(&temp, packge, sizeof(SquashCommitData)); - packge += sizeof(SquashCommitData); - v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); - } - { - SquashLrScEvent temp; - memcpy(&temp, packge, sizeof(SquashLrScEvent)); - packge += sizeof(SquashLrScEvent); - v_difftest_LrScEvent(temp.io_success, temp.io_coreid); - } + uint8_t have_step = 0; + printf("squash packge size sum %lx \n", sizeof(SquashArchIntRegState) + sizeof(SquashCSRState) + sizeof(SquashArchEvent) + sizeof(SquashTrapEvent) + sizeof(SquashCommitData) + sizeof(SquashInstrCommit) + 1); + // PACKGE HEAD { - SquashHCSRState temp; - memcpy(&temp, packge, sizeof(SquashHCSRState)); - packge += sizeof(SquashHCSRState); - v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, temp.io_hstatus, temp.io_hideleg, - temp.io_hedele, temp.io_hcounteren, temp.io_htval, temp.io_htinst, temp.io_hgatp, - temp.io_vsstatus, temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, - temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); + memcpy(&have_step, packge, sizeof(uint8_t)); + packge += 1; } { - SquashFpCSRState temp; - memcpy(&temp, packge, sizeof(SquashFpCSRState)); - packge += sizeof(SquashFpCSRState); - v_difftest_FpCSRState(temp.io_fcsr, temp.io_coreid); + SquashArchIntRegState temp; + memcpy(&temp, packge, sizeof(SquashArchIntRegState)); + packge += sizeof(SquashArchIntRegState); + + if (temp.io_coreid + 1 > NUM_CORES) { + assert(0); + } + if (temp.io_valid) { + v_difftest_ArchIntRegState( + temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], + temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], + temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], + temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + for (size_t i = 0; i < 32; i++) { + printf("get Int-Reg[%d]:%lx\n", i, temp.io_value[i]); + } + } } { SquashCSRState temp; memcpy(&temp, packge, sizeof(SquashCSRState)); packge += sizeof(SquashCSRState); - v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, + if (temp.io_valid) { + v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, temp.io_medeleg, temp.io_coreid); + printf("get SquashCSRState CORE_ID:%x\n", temp.io_coreid); + } } { SquashArchEvent temp; memcpy(&temp, packge, sizeof(SquashArchEvent)); packge += sizeof(SquashArchEvent); - v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, + if (temp.io_valid) { + v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); - } - { - SquashArchIntRegState temp; - memcpy(&temp, packge, sizeof(SquashArchIntRegState)); - packge += sizeof(SquashArchIntRegState); - v_difftest_ArchIntRegState( - temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], - temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], - temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], - temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); - } - { - SquashArchFpRegState temp; - memcpy(&temp, packge, sizeof(SquashArchFpRegState)); - packge += sizeof(SquashArchFpRegState); - v_difftest_ArchFpRegState( - temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], - temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], - temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], - temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + printf("get SquashArchEvent CORE_ID:%x\n", temp.io_coreid); + } } { SquashTrapEvent temp; memcpy(&temp, packge, sizeof(SquashTrapEvent)); packge += sizeof(SquashTrapEvent); - v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); + if (temp.io_valid) { + v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); + printf("get SquashTrapEvent PC = %lx, cyclecnt = %lx\n",temp.pc, temp.cycleCnt); + } } for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { SquashInstrCommit temp; memcpy(&temp, packge, sizeof(SquashInstrCommit)); packge += sizeof(SquashInstrCommit); - v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, + if (temp.valid) { + v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, temp.special, temp.coreid, temp.index); + printf("get SquashInstrCommit\n"); + } } - { - SquashDiffStep temp; - memcpy(&temp, packge, sizeof(SquashDiffStep)); - simv_nstep(temp.step); + for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { + SquashCommitData temp; + memcpy(&temp, packge, sizeof(SquashCommitData)); + packge += sizeof(SquashCommitData); + if (temp.io_valid) { + v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); + printf("get SquashCommitData\n"); + } + } + // PACKGE END + if (have_step != 0) { + printf("run step\n"); + simv_nstep(have_step); } -} + sleep(1); + printf("end squash\n"); +} \ No newline at end of file From 51e9b9f6bf8ac213d9bf44a32b00a3f2c324fffd Mon Sep 17 00:00:00 2001 From: Kami Date: Sat, 4 Jan 2025 17:56:05 +0800 Subject: [PATCH 30/41] =?UTF-8?q?fpga:=20fix=20suqash=20struct=20=20memory?= =?UTF-8?q?=20alignment=EF=BC=8CSupport=20for=20switching=20parsing=20func?= =?UTF-8?q?tions=20according=20to=20CPU?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/test/csrc/fpga/diff_unpack.cpp | 213 ++++++++++++++--------------- 1 file changed, 100 insertions(+), 113 deletions(-) diff --git a/src/test/csrc/fpga/diff_unpack.cpp b/src/test/csrc/fpga/diff_unpack.cpp index e9dae77b9..e515c8ed0 100644 --- a/src/test/csrc/fpga/diff_unpack.cpp +++ b/src/test/csrc/fpga/diff_unpack.cpp @@ -21,6 +21,10 @@ #include #include extern void simv_nstep(uint8_t step); +#pragma pack(1) +typedef struct { + uint8_t io_step; +} SquashStep; typedef struct { uint8_t io_valid; @@ -145,118 +149,104 @@ typedef struct { uint8_t coreid; uint8_t index; } SquashInstrCommit; +#pragma pack() -// XIANG SHAN -// void squash_unpackge(uint8_t *packge) { -// uint8_t have_step = 0; -// // PACKGE HEAD -// { -// memcpy(&have_step, packge, sizeof(uint8_t)); -// } -// for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { -// SquashInstrCommit temp; -// memcpy(&temp, packge, sizeof(SquashInstrCommit)); -// packge += sizeof(SquashInstrCommit); -// v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, -// temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, -// temp.special, temp.coreid, temp.index); -// } -// { -// SquashTrapEvent temp; -// memcpy(&temp, packge, sizeof(SquashTrapEvent)); -// packge += sizeof(SquashTrapEvent); -// v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); -// } -// { -// SquashArchFpRegState temp; -// memcpy(&temp, packge, sizeof(SquashArchFpRegState)); -// packge += sizeof(SquashArchFpRegState); -// v_difftest_ArchFpRegState( -// temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], -// temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], -// temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], -// temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], -// temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], -// temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); -// } -// { -// SquashArchIntRegState temp; -// memcpy(&temp, packge, sizeof(SquashArchIntRegState)); -// packge += sizeof(SquashArchIntRegState); -// v_difftest_ArchIntRegState( -// temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], -// temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], -// temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], -// temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], -// temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], -// temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); -// } -// { -// SquashArchEvent temp; -// memcpy(&temp, packge, sizeof(SquashArchEvent)); -// packge += sizeof(SquashArchEvent); -// v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, -// temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); -// } -// { -// SquashCSRState temp; -// memcpy(&temp, packge, sizeof(SquashCSRState)); -// packge += sizeof(SquashCSRState); -// v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, -// temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, -// temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, -// temp.io_medeleg, temp.io_coreid); -// } -// { -// SquashFpCSRState temp; -// memcpy(&temp, packge, sizeof(SquashFpCSRState)); -// packge += sizeof(SquashFpCSRState); -// v_difftest_FpCSRState(temp.io_fcsr, temp.io_coreid); -// } -// { -// SquashHCSRState temp; -// memcpy(&temp, packge, sizeof(SquashHCSRState)); -// packge += sizeof(SquashHCSRState); -// v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, temp.io_hstatus, temp.io_hideleg, -// temp.io_hedele, temp.io_hcounteren, temp.io_htval, temp.io_htinst, temp.io_hgatp, -// temp.io_vsstatus, temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, -// temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); -// } -// { -// SquashLrScEvent temp; -// memcpy(&temp, packge, sizeof(SquashLrScEvent)); -// packge += sizeof(SquashLrScEvent); -// v_difftest_LrScEvent(temp.io_success, temp.io_coreid); -// } -// for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { -// SquashCommitData temp; -// memcpy(&temp, packge, sizeof(SquashCommitData)); -// packge += sizeof(SquashCommitData); -// v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); -// } -// // PACKGE END -// if (have_step != 0) { -// simv_nstep(have_step); -// } -// } - -//NUT SHELL void squash_unpackge(uint8_t *packge) { uint8_t have_step = 0; - printf("squash packge size sum %lx \n", sizeof(SquashArchIntRegState) + sizeof(SquashCSRState) + sizeof(SquashArchEvent) + sizeof(SquashTrapEvent) + sizeof(SquashCommitData) + sizeof(SquashInstrCommit) + 1); // PACKGE HEAD { - memcpy(&have_step, packge, sizeof(uint8_t)); - packge += 1; + SquashStep temp; + memcpy(&temp, packge, sizeof(SquashStep)); + packge += sizeof(SquashStep); + have_step = temp.io_step; + } +#if defined(CPU_XIANGSHAN) + for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { + SquashInstrCommit temp; + memcpy(&temp, packge, sizeof(SquashInstrCommit)); + packge += sizeof(SquashInstrCommit); + v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, + temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, + temp.special, temp.coreid, temp.index); + } + { + SquashTrapEvent temp; + memcpy(&temp, packge, sizeof(SquashTrapEvent)); + packge += sizeof(SquashTrapEvent); + v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); + } + { + SquashArchFpRegState temp; + memcpy(&temp, packge, sizeof(SquashArchFpRegState)); + packge += sizeof(SquashArchFpRegState); + v_difftest_ArchFpRegState( + temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], + temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], + temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], + temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); } { SquashArchIntRegState temp; memcpy(&temp, packge, sizeof(SquashArchIntRegState)); packge += sizeof(SquashArchIntRegState); - - if (temp.io_coreid + 1 > NUM_CORES) { - assert(0); - } + v_difftest_ArchIntRegState( + temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], + temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], + temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], + temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + } + { + SquashArchEvent temp; + memcpy(&temp, packge, sizeof(SquashArchEvent)); + packge += sizeof(SquashArchEvent); + v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, + temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); + } + { + SquashCSRState temp; + memcpy(&temp, packge, sizeof(SquashCSRState)); + packge += sizeof(SquashCSRState); + v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, + temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, + temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, + temp.io_medeleg, temp.io_coreid); + } + { + SquashFpCSRState temp; + memcpy(&temp, packge, sizeof(SquashFpCSRState)); + packge += sizeof(SquashFpCSRState); + v_difftest_FpCSRState(temp.io_fcsr, temp.io_coreid); + } + { + SquashHCSRState temp; + memcpy(&temp, packge, sizeof(SquashHCSRState)); + packge += sizeof(SquashHCSRState); + v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, temp.io_hstatus, temp.io_hideleg, + temp.io_hedele, temp.io_hcounteren, temp.io_htval, temp.io_htinst, temp.io_hgatp, + temp.io_vsstatus, temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, + temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); + } + { + SquashLrScEvent temp; + memcpy(&temp, packge, sizeof(SquashLrScEvent)); + packge += sizeof(SquashLrScEvent); + v_difftest_LrScEvent(temp.io_success, temp.io_coreid); + } + for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { + SquashCommitData temp; + memcpy(&temp, packge, sizeof(SquashCommitData)); + packge += sizeof(SquashCommitData); + v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); + } +#elif defined(CPU_NUTSHELL) + { + SquashArchIntRegState temp; + memcpy(&temp, packge, sizeof(SquashArchIntRegState)); + packge += sizeof(SquashArchIntRegState); if (temp.io_valid) { v_difftest_ArchIntRegState( temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], @@ -265,9 +255,6 @@ void squash_unpackge(uint8_t *packge) { temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); - for (size_t i = 0; i < 32; i++) { - printf("get Int-Reg[%d]:%lx\n", i, temp.io_value[i]); - } } } { @@ -279,7 +266,7 @@ void squash_unpackge(uint8_t *packge) { temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, temp.io_medeleg, temp.io_coreid); - printf("get SquashCSRState CORE_ID:%x\n", temp.io_coreid); + //printf("get SquashCSRState CORE_ID:%x\n", temp.io_coreid); } } { @@ -289,7 +276,7 @@ void squash_unpackge(uint8_t *packge) { if (temp.io_valid) { v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); - printf("get SquashArchEvent CORE_ID:%x\n", temp.io_coreid); + //printf("get SquashArchEvent io_exceptionPC:%x\n", temp.io_exceptionPC); } } { @@ -298,7 +285,7 @@ void squash_unpackge(uint8_t *packge) { packge += sizeof(SquashTrapEvent); if (temp.io_valid) { v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); - printf("get SquashTrapEvent PC = %lx, cyclecnt = %lx\n",temp.pc, temp.cycleCnt); + printf("get SquashTrapEvent PC = %lx, instrCnt %lx\n", temp.pc, temp.instrCnt); } } for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { @@ -309,7 +296,7 @@ void squash_unpackge(uint8_t *packge) { v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, temp.special, temp.coreid, temp.index); - printf("get SquashInstrCommit\n"); + //printf("get SquashInstrCommit\n"); } } for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { @@ -321,11 +308,11 @@ void squash_unpackge(uint8_t *packge) { printf("get SquashCommitData\n"); } } +#endif // PACKGE END if (have_step != 0) { - printf("run step\n"); + printf("step %d\n", have_step); simv_nstep(have_step); } - sleep(1); - printf("end squash\n"); -} \ No newline at end of file + usleep(50000); +} From 1208b381e7760db40bf0186c04dbd0e100add494 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 6 Jan 2025 17:08:27 +0800 Subject: [PATCH 31/41] fpga: The running parameter max-instrs is modified from hexadecimal to base 10 --- src/test/csrc/fpga/fpga_main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 82e7587d1..3e8939a55 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -143,7 +143,7 @@ void args_parsingniton(int argc, char *argv[]) { i++; memcpy(work_load, argv[i], strlen(argv[i])); } else if (strcmp(argv[i], "--max-instrs") == 0) { - max_instrs = std::stoul(argv[++i], nullptr, 16); + max_instrs = std::stoul(argv[++i], nullptr, 10); } } } From 80efb16ecd393913c62c2f9da1af2cef35c72f24 Mon Sep 17 00:00:00 2001 From: Kami Date: Mon, 6 Jan 2025 18:58:03 +0800 Subject: [PATCH 32/41] fpga: fix fpga_mian init process --- src/test/csrc/fpga/fpga_main.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 3e8939a55..8f31fb0eb 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -17,6 +17,11 @@ #include "diffstate.h" #include "difftest-dpic.h" #include "difftest.h" +#include "flash.h" +#include "device.h" +#include "goldenmem.h" +#include "ram.h" +#include "refproxy.h" #include "mpool.h" #include "ram.h" #include "refproxy.h" @@ -71,7 +76,6 @@ int main(int argc, char *argv[]) { void set_diff_ref_so(char *s) { extern const char *difftest_ref_so; - printf("diff-test ref so:%s\n", s); char *buf = (char *)malloc(256); strcpy(buf, s); difftest_ref_so = buf; @@ -80,10 +84,17 @@ void set_diff_ref_so(char *s) { void simv_init() { xdma_device = new FpgaXdma(work_load); init_ram(work_load, DEFAULT_EMU_RAM_SIZE); + init_flash(NULL); + difftest_init(); + + init_device(); + init_goldenmem(); + init_nemuproxy(DEFAULT_EMU_RAM_SIZE); } void simv_nstep(uint8_t step) { + difftest_switch_zone(); for (int i = 0; i < step; i++) { simv_step(); } @@ -91,6 +102,7 @@ void simv_nstep(uint8_t step) { void simv_step() { if (difftest_step()) { + printf("SIMV_FAIL\n"); simv_result.store(SIMV_FAIL); simv_cv.notify_one(); } From 0019b48c28f0bad777efd54fb926b36a9b497aa2 Mon Sep 17 00:00:00 2001 From: Kami Date: Tue, 7 Jan 2025 18:45:44 +0800 Subject: [PATCH 33/41] fpga: It supports running the difftest in squash mode on fpga difftest to repair the processing of data packets and pass the fpga verification --- src/test/csrc/difftest/difftest.cpp | 3 +-- src/test/csrc/fpga/diff_unpack.cpp | 22 +++++----------------- src/test/csrc/fpga/fpga_main.cpp | 1 - src/test/csrc/fpga/xdma.cpp | 19 +++++++++++++++---- src/test/csrc/fpga/xdma.h | 7 ++----- 5 files changed, 23 insertions(+), 29 deletions(-) diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index 8a0b897d9..a5a0b396d 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -96,9 +96,7 @@ void difftest_set_dut() { } } int difftest_step() { -#ifndef WITH_FPGA difftest_set_dut(); -#endif for (int i = 0; i < NUM_CORES; i++) { int ret = difftest[i]->step(); if (ret) { @@ -589,6 +587,7 @@ int Difftest::do_instr_commit(int i) { } void Difftest::do_first_instr_commit() { + printf("dut->commit[0].pc %lx\n", dut->commit[0].pc); if (!has_commit && dut->commit[0].valid) { #ifndef BASIC_DIFFTEST_ONLY if (dut->commit[0].pc != FIRST_INST_ADDRESS) { diff --git a/src/test/csrc/fpga/diff_unpack.cpp b/src/test/csrc/fpga/diff_unpack.cpp index e515c8ed0..c188eb146 100644 --- a/src/test/csrc/fpga/diff_unpack.cpp +++ b/src/test/csrc/fpga/diff_unpack.cpp @@ -152,14 +152,10 @@ typedef struct { #pragma pack() void squash_unpackge(uint8_t *packge) { - uint8_t have_step = 0; +#ifdef USE_THREAD_MEMPOOL + packge += sizeof(uint8_t); +#endif // PACKGE HEAD - { - SquashStep temp; - memcpy(&temp, packge, sizeof(SquashStep)); - packge += sizeof(SquashStep); - have_step = temp.io_step; - } #if defined(CPU_XIANGSHAN) for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { SquashInstrCommit temp; @@ -266,7 +262,6 @@ void squash_unpackge(uint8_t *packge) { temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, temp.io_medeleg, temp.io_coreid); - //printf("get SquashCSRState CORE_ID:%x\n", temp.io_coreid); } } { @@ -276,7 +271,6 @@ void squash_unpackge(uint8_t *packge) { if (temp.io_valid) { v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); - //printf("get SquashArchEvent io_exceptionPC:%x\n", temp.io_exceptionPC); } } { @@ -285,7 +279,6 @@ void squash_unpackge(uint8_t *packge) { packge += sizeof(SquashTrapEvent); if (temp.io_valid) { v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); - printf("get SquashTrapEvent PC = %lx, instrCnt %lx\n", temp.pc, temp.instrCnt); } } for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { @@ -296,7 +289,6 @@ void squash_unpackge(uint8_t *packge) { v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, temp.special, temp.coreid, temp.index); - //printf("get SquashInstrCommit\n"); } } for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { @@ -305,14 +297,10 @@ void squash_unpackge(uint8_t *packge) { packge += sizeof(SquashCommitData); if (temp.io_valid) { v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); - printf("get SquashCommitData\n"); } } #endif - // PACKGE END - if (have_step != 0) { - printf("step %d\n", have_step); - simv_nstep(have_step); - } +// PACKGE END + simv_nstep(1); usleep(50000); } diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 8f31fb0eb..567fb70ec 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -94,7 +94,6 @@ void simv_init() { } void simv_nstep(uint8_t step) { - difftest_switch_zone(); for (int i = 0; i < step; i++) { simv_step(); } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index bbf4e9192..830ce8f33 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -142,15 +142,26 @@ void FpgaXdma::stop_thansmit_thread() { } void FpgaXdma::read_xdma_thread(int channel) { - FpgaPackgeHead packge; + FpgaPackgeHead* packge = (FpgaPackgeHead*)malloc(sizeof(FpgaPackgeHead)); + // TODO: The first packet may be repeated twice, and if it is, drop the first packet + size_t size = read(xdma_c2h_fd[channel], packge->diff_packge, DMA_DIFF_PACKGE_LEN); while (running) { - size_t size = read(xdma_c2h_fd[channel], &packge, sizeof(FpgaPackgeHead)); - uint8_t idx = packge.packge_idx; + memset(packge, 0, sizeof(FpgaPackgeHead)); + size_t size = read(xdma_c2h_fd[channel], packge->diff_packge, DMA_DIFF_PACKGE_LEN); +#ifdef USE_THREAD_MEMPOOL if (xdma_mempool.write_free_chunk(idx, (char *)&packge) == false) { printf("It should not be the case that no available block can be found\n"); assert(0); } +#endif // USE_THREAD_MEMPOOL + +#ifdef CONFIG_DIFFTEST_BATCH + v_difftest_Batch(packge->diff_packge); +#elif defined(CONFIG_DIFFTEST_SQUASH) + squash_unpackge(packge->diff_packge); +#endif } + free(packge); } void FpgaXdma::write_difftest_thread() { @@ -162,7 +173,7 @@ void FpgaXdma::write_difftest_thread() { printf("Failed to read data from the XDMA memory pool\n"); assert(0); } - if (packge.packge_idx != recv_count) { + if (packge.diff_packge[0] != recv_count) { printf("read mempool idx failed\n"); assert(0); } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index f106084ea..d186feb5d 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -32,14 +32,11 @@ #ifdef CONFIG_DIFFTEST_BATCH #define DMA_DIFF_PACKGE_LEN CONFIG_DIFFTEST_BATCH_BYTELEN #elif defined(CONFIG_DIFFTEST_SQUASH) -#define DMA_DIFF_PACKGE_LEN 1216 +#define DMA_DIFF_PACKGE_LEN 1280 #endif + typedef struct FpgaPackgeHead { - uint8_t packge_idx; uint8_t diff_packge[DMA_DIFF_PACKGE_LEN]; -#ifdef CONFIG_DIFFTEST_BATCH - uint8_t zero[95]; -#endif // CONFIG_DIFFTEST_BATCH } FpgaPackgeHead; class FpgaXdma { From 63cc88818bb3c3163ab6863120f937deb7857e9c Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 8 Jan 2025 15:14:04 +0800 Subject: [PATCH 34/41] fpga: rename **unpack.cpp --- src/test/csrc/fpga/xdma.cpp | 2 +- src/test/csrc/fpga/{diff_unpack.cpp => xdma_unpack.cpp} | 2 +- src/test/csrc/fpga/{diff_unpack.h => xdma_unpack.h} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/test/csrc/fpga/{diff_unpack.cpp => xdma_unpack.cpp} (99%) rename src/test/csrc/fpga/{diff_unpack.h => xdma_unpack.h} (100%) diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 830ce8f33..a5505907e 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -26,7 +26,7 @@ #include #ifdef CONFIG_DIFFTEST_SQUASH -#include "diff_unpack.h" +#include "xdma_unpack.h" #endif // CONFIG_DIFFTEST_SQUASH #define XDMA_USER "/dev/xdma0_user" diff --git a/src/test/csrc/fpga/diff_unpack.cpp b/src/test/csrc/fpga/xdma_unpack.cpp similarity index 99% rename from src/test/csrc/fpga/diff_unpack.cpp rename to src/test/csrc/fpga/xdma_unpack.cpp index c188eb146..4aec79dd0 100644 --- a/src/test/csrc/fpga/diff_unpack.cpp +++ b/src/test/csrc/fpga/xdma_unpack.cpp @@ -13,7 +13,7 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include "diff_unpack.h" +#include "squash_unpack.h" #include "diffstate.h" #include "difftest-dpic.h" #include diff --git a/src/test/csrc/fpga/diff_unpack.h b/src/test/csrc/fpga/xdma_unpack.h similarity index 100% rename from src/test/csrc/fpga/diff_unpack.h rename to src/test/csrc/fpga/xdma_unpack.h From e5065b3d45ebd25e5f04ad429497b9c53d3e899d Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 8 Jan 2025 16:54:46 +0800 Subject: [PATCH 35/41] fpga: fix Squash typedef --- src/test/csrc/fpga/xdma_unpack.cpp | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/test/csrc/fpga/xdma_unpack.cpp b/src/test/csrc/fpga/xdma_unpack.cpp index 4aec79dd0..d7ad0e1c5 100644 --- a/src/test/csrc/fpga/xdma_unpack.cpp +++ b/src/test/csrc/fpga/xdma_unpack.cpp @@ -36,30 +36,30 @@ typedef struct { typedef struct { uint8_t io_valid; - uint8_t io_success; uint8_t io_bits_valid; + uint8_t io_success; uint8_t io_coreid; } SquashLrScEvent; typedef struct { uint8_t io_valid; - uint64_t io_vsscratch; - uint64_t io_vsatp; - uint64_t io_vstval; - uint64_t io_vscause; - uint64_t io_vsepc; - uint64_t io_vstvec; - uint64_t io_vsstatus; - uint64_t io_hgatp; - uint64_t io_htinst; - uint64_t io_htval; - uint64_t io_hcounteren; - uint64_t io_hedele; - uint64_t io_hideleg; - uint64_t io_hstatus; - uint64_t io_mtinst; - uint64_t io_mtval2; uint64_t io_virtMode; + uint64_t io_mtval2; + uint64_t io_mtinst; + uint64_t io_hstatus; + uint64_t io_hideleg; + uint64_t io_hedele; + uint64_t io_hcounteren; + uint64_t io_htval; + uint64_t io_htinst; + uint64_t io_hgatp; + uint64_t io_vsstatus; + uint64_t io_vstvec; + uint64_t io_vsepc; + uint64_t io_vscause; + uint64_t io_vstval; + uint64_t io_vsatp; + uint64_t io_vsscratch; uint8_t io_coreid; } SquashHCSRState; From 119990c96953777c6dd63c15082e851bf492ea2b Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 8 Jan 2025 16:58:54 +0800 Subject: [PATCH 36/41] CI: enable fpga-diff compile ci --- .github/workflows/main.yml | 77 ++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7279bbdaf..c68be864d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -308,43 +308,40 @@ jobs: make clean ./build/simv +workload=./ready-to-run/microbench.bin +b=0 +e=-1 +diff=./ready-to-run/riscv64-nemu-interpreter-so +max-instrs=5000 +warmup_instr=1000 - # test-difftest-fpga: - # runs-on: ubuntu-22.04 - - # needs: test-difftest-main - - # steps: - # - uses: actions/checkout@v4 - - # - name: Prepare environment - # run: | - # cd $GITHUB_WORKSPACE/.. - # git config --global url."https://github.com/".insteadOf git@github.com: - # git config --global url."https://".insteadOf git:// - # git clone https://github.com/OpenXiangShan/xs-env - # cd xs-env - # sudo -s ./setup-tools.sh - # source ./setup.sh - - # - name: Prepare NutShell - # run: | - # cd $GITHUB_WORKSPACE/../xs-env - # rm -r NutShell - # git clone -b dev-difftest --single-branch https://github.com/OSCPU/NutShell.git - # cd NutShell && git submodule update --init - # rm -r difftest - # cp -r $GITHUB_WORKSPACE . - - # - name: Enable -Werror for EMU Build - # run: | - # echo "CXX_NO_WARNING=1" >> $GITHUB_ENV - - # - name: FPGA-difftest Build - # run: | - # cd $GITHUB_WORKSPACE/../xs-env - # source ./env.sh - # cd $GITHUB_WORKSPACE/../xs-env/NutShell - # source ./env.sh - # make clean - # make sim-verilog MILL_ARGS="--difftest-config ENBF" -j2 - # make fpga-build DMA_CHANNELS=2 WITH_CHISELDB=0 WITH_CONSTANTIN=0 + test-difftest-fpga: + runs-on: ubuntu-22.04 + + needs: test-difftest-main + + steps: + - uses: actions/checkout@v4 + + - name: Prepare environment + run: | + cd $GITHUB_WORKSPACE/.. + git config --global url."https://github.com/".insteadOf git@github.com: + git config --global url."https://".insteadOf git:// + git clone https://github.com/OpenXiangShan/xs-env + cd xs-env + sudo -s ./setup-tools.sh + source ./setup.sh + + - name: Prepare NutShell + run: | + cd $GITHUB_WORKSPACE/../xs-env + rm -r NutShell + git clone -b dev-difftest --single-branch https://github.com/OSCPU/NutShell.git + cd NutShell && git submodule update --init + rm -r difftest + cp -r $GITHUB_WORKSPACE . + + - name: FPGA-difftest Build + run: | + cd $GITHUB_WORKSPACE/../xs-env + source ./env.sh + cd $GITHUB_WORKSPACE/../xs-env/NutShell + source ./env.sh + make clean + make sim-verilog MILL_ARGS="--difftest-config SF" -j2 + cd ./difftest + make fpga-build WITH_CHISELDB=0 WITH_CONSTANTIN=0 From f850fb29bc08e778ffbae4650a9b33388cbcc948 Mon Sep 17 00:00:00 2001 From: Kami Date: Wed, 8 Jan 2025 17:04:30 +0800 Subject: [PATCH 37/41] CI: fix CI compile --- .github/workflows/main.yml | 3 +-- src/main/scala/Gateway.scala | 22 ++++++++++------- src/test/csrc/difftest/difftest.cpp | 1 - src/test/csrc/fpga/fpga_main.cpp | 4 +--- src/test/csrc/fpga/xdma.cpp | 2 +- src/test/csrc/fpga/xdma_unpack.cpp | 37 +++++++++++++++-------------- 6 files changed, 36 insertions(+), 33 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c68be864d..5c451779b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -306,7 +306,6 @@ jobs: make simv VCS=verilator -j2 ./build/simv +workload=$WORKLOAD +b=0 +e=-1 +diff=$REF_SO +max-instrs=5000 +warmup_instr=1000 make clean - ./build/simv +workload=./ready-to-run/microbench.bin +b=0 +e=-1 +diff=./ready-to-run/riscv64-nemu-interpreter-so +max-instrs=5000 +warmup_instr=1000 test-difftest-fpga: runs-on: ubuntu-22.04 @@ -344,4 +343,4 @@ jobs: make clean make sim-verilog MILL_ARGS="--difftest-config SF" -j2 cd ./difftest - make fpga-build WITH_CHISELDB=0 WITH_CONSTANTIN=0 + make fpga-build FPGA=1 diff --git a/src/main/scala/Gateway.scala b/src/main/scala/Gateway.scala index 75c781a73..d88e98c66 100644 --- a/src/main/scala/Gateway.scala +++ b/src/main/scala/Gateway.scala @@ -282,23 +282,29 @@ object GatewaySink { } def collect(config: GatewayConfig): GatewayResult = { - val collected = MixedVecInit( - ports.toSeq.map { gen => Seq(gen.valid.asTypeOf(UInt(8.W)), gen.bits.getByteAlign) }.flatten.toSeq - ).asUInt - val out = Option.when(config.isFPGA && !config.isBatch) { - IO(new Bundle { + val collected = if (config.isFPGA && !config.isBatch && config.isSquash) { + MixedVecInit( + ports.toSeq.map { gen => Seq(gen.valid.asTypeOf(UInt(8.W)), gen.bits.getByteAlign) }.flatten.toSeq + ).asUInt + } else { + 0.U + } + val out = if (config.isFPGA && !config.isBatch && config.isSquash) { + Some(IO(new Bundle { val data = Output(UInt(collected.getWidth.W)) val enable = Output(Bool()) - }) + })) + } else { + None } - if (config.isFPGA && !config.isBatch) { + if (config.isFPGA && !config.isBatch && config.isSquash) { out.get.data := collected out.get.enable := VecInit(ports.toSeq.map(_.valid)).asUInt.orR dontTouch(out.get) } config.style match { case "dpic" => DPIC.collect() - case _ => DPIC.collect() // Default: DPI-C + case _ => DPIC.collect() // Default: DPI-C } } } diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index a5a0b396d..aef0b51b7 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -587,7 +587,6 @@ int Difftest::do_instr_commit(int i) { } void Difftest::do_first_instr_commit() { - printf("dut->commit[0].pc %lx\n", dut->commit[0].pc); if (!has_commit && dut->commit[0].valid) { #ifndef BASIC_DIFFTEST_ONLY if (dut->commit[0].pc != FIRST_INST_ADDRESS) { diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 567fb70ec..9b1d63277 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -14,14 +14,12 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ +#include "device.h" #include "diffstate.h" #include "difftest-dpic.h" #include "difftest.h" #include "flash.h" -#include "device.h" #include "goldenmem.h" -#include "ram.h" -#include "refproxy.h" #include "mpool.h" #include "ram.h" #include "refproxy.h" diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index a5505907e..4d9758c34 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -142,7 +142,7 @@ void FpgaXdma::stop_thansmit_thread() { } void FpgaXdma::read_xdma_thread(int channel) { - FpgaPackgeHead* packge = (FpgaPackgeHead*)malloc(sizeof(FpgaPackgeHead)); + FpgaPackgeHead *packge = (FpgaPackgeHead *)malloc(sizeof(FpgaPackgeHead)); // TODO: The first packet may be repeated twice, and if it is, drop the first packet size_t size = read(xdma_c2h_fd[channel], packge->diff_packge, DMA_DIFF_PACKGE_LEN); while (running) { diff --git a/src/test/csrc/fpga/xdma_unpack.cpp b/src/test/csrc/fpga/xdma_unpack.cpp index d7ad0e1c5..860750b6b 100644 --- a/src/test/csrc/fpga/xdma_unpack.cpp +++ b/src/test/csrc/fpga/xdma_unpack.cpp @@ -13,13 +13,13 @@ * * See the Mulan PSL v2 for more details. ***************************************************************************************/ -#include "squash_unpack.h" +#include "xdma_unpack.h" #include "diffstate.h" #include "difftest-dpic.h" -#include +#include #include +#include #include -#include extern void simv_nstep(uint8_t step); #pragma pack(1) typedef struct { @@ -245,12 +245,13 @@ void squash_unpackge(uint8_t *packge) { packge += sizeof(SquashArchIntRegState); if (temp.io_valid) { v_difftest_ArchIntRegState( - temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], - temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], - temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], - temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); + temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], + temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], + temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], + temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], + temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], + temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], + temp.io_coreid); } } { @@ -259,10 +260,10 @@ void squash_unpackge(uint8_t *packge) { packge += sizeof(SquashCSRState); if (temp.io_valid) { v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, - temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, - temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, - temp.io_medeleg, temp.io_coreid); - } + temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, + temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, + temp.io_medeleg, temp.io_coreid); + } } { SquashArchEvent temp; @@ -270,7 +271,7 @@ void squash_unpackge(uint8_t *packge) { packge += sizeof(SquashArchEvent); if (temp.io_valid) { v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, - temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); + temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); } } { @@ -286,9 +287,9 @@ void squash_unpackge(uint8_t *packge) { memcpy(&temp, packge, sizeof(SquashInstrCommit)); packge += sizeof(SquashInstrCommit); if (temp.valid) { - v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, - temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, - temp.special, temp.coreid, temp.index); + v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, + temp.pc, temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, + temp.nFused, temp.special, temp.coreid, temp.index); } } for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { @@ -300,7 +301,7 @@ void squash_unpackge(uint8_t *packge) { } } #endif -// PACKGE END + // PACKGE END simv_nstep(1); usleep(50000); } From 8b330814d04bb17815b9a2b2fe0723f9761fae60 Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 9 Jan 2025 10:34:52 +0800 Subject: [PATCH 38/41] CI: tidy up the file code format --- .github/workflows/main.yml | 23 +++------ Makefile | 14 +++--- fpga.mk | 13 ++--- src/test/csrc/difftest/difftest.cpp | 8 +-- src/test/csrc/fpga/fpga_main.cpp | 73 ++++++++++++++++------------ src/test/csrc/fpga/xdma.cpp | 6 +-- src/test/csrc/fpga/xdma.h | 4 +- src/test/csrc/fpga/xdma_unpack.cpp | 10 ++-- src/test/csrc/fpga/xdma_unpack.h | 5 +- src/test/vsrc/vcs/DifftestEndpoint.v | 4 -- 10 files changed, 77 insertions(+), 83 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5c451779b..4a92efd41 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -318,29 +318,20 @@ jobs: - name: Prepare environment run: | cd $GITHUB_WORKSPACE/.. - git config --global url."https://github.com/".insteadOf git@github.com: - git config --global url."https://".insteadOf git:// - git clone https://github.com/OpenXiangShan/xs-env - cd xs-env - sudo -s ./setup-tools.sh - source ./setup.sh + wget https://github.com/OpenXiangShan/xs-env/raw/refs/heads/master/install-verilator.sh + wget https://github.com/OpenXiangShan/xs-env/raw/refs/heads/master/setup-tools.sh + sudo bash setup-tools.sh - name: Prepare NutShell run: | - cd $GITHUB_WORKSPACE/../xs-env - rm -r NutShell + cd $GITHUB_WORKSPACE/.. git clone -b dev-difftest --single-branch https://github.com/OSCPU/NutShell.git - cd NutShell && git submodule update --init - rm -r difftest - cp -r $GITHUB_WORKSPACE . + cd NutShell && rm -rf difftest && cp -r $GITHUB_WORKSPACE . + echo "NOOP_HOME=$(pwd)" >> $GITHUB_ENV - name: FPGA-difftest Build run: | - cd $GITHUB_WORKSPACE/../xs-env - source ./env.sh - cd $GITHUB_WORKSPACE/../xs-env/NutShell - source ./env.sh - make clean + cd $NOOP_HOME make sim-verilog MILL_ARGS="--difftest-config SF" -j2 cd ./difftest make fpga-build FPGA=1 diff --git a/Makefile b/Makefile index 7e021f69d..3b98f3e38 100644 --- a/Makefile +++ b/Makefile @@ -79,7 +79,11 @@ SIM_VSRC = $(shell find $(VSRC_DIR) -name "*.v" -or -name "*.sv") # DiffTest support DIFFTEST_CSRC_DIR = $(abspath ./src/test/csrc/difftest) # FPGA-Difftest support -FPGA ?= 0 +ifeq ($(FPGA),1) +$(info FPGA is enabled. ChiselDB and ConstantIn are implicitly disabled.) +WITH_CHISELDB = 0 +WITH_CONSTANTIN = 0 +endif DIFFTEST_CXXFILES = $(shell find $(DIFFTEST_CSRC_DIR) -name "*.cpp") ifeq ($(NO_DIFF), 1) @@ -92,20 +96,16 @@ SIM_CXXFLAGS += -DCONFIG_DIFFTEST_PERFCNT endif endif -# ChiselDB -ifneq ($(FPGA),1) WITH_CHISELDB ?= 1 -endif +# ChiselDB ifeq ($(WITH_CHISELDB), 1) SIM_CXXFILES += $(BUILD_DIR)/chisel_db.cpp SIM_CXXFLAGS += -I$(BUILD_DIR) -DENABLE_CHISEL_DB SIM_LDFLAGS += -lsqlite3 endif -# ConstantIn -ifneq ($(FPGA),1) WITH_CONSTANTIN ?= 1 -endif +# ConstantIn ifeq ($(WITH_CONSTANTIN), 1) SIM_CXXFILES += $(BUILD_DIR)/constantin.cpp SIM_CXXFLAGS += -I$(BUILD_DIR) -DENABLE_CONSTANTIN diff --git a/fpga.mk b/fpga.mk index 0f0de4a73..40c29b6f5 100644 --- a/fpga.mk +++ b/fpga.mk @@ -1,22 +1,17 @@ - -FPGA = FPGA_HOST -FPGA_TARGET = $(abspath $(BUILD_DIR)/simv) -FPGA_BUILD_DIR = $(abspath $(BUILD_DIR)/simv-compile) -FPGA_RUN_DIR = $(abspath $(BUILD_DIR)/$(notdir $(RUN_BIN))) - FPGA_CSRC_DIR = $(abspath ./src/test/csrc/fpga) -FPGA_CONFIG_DIR = $(abspath ./config) +FPGA_CONFIG_DIR = $(abspath ./config) # Reserve storage for xdma configuration FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") -include cstring -FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) -DWITH_FPGA -O2 +FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) -DCONFIG_PLATFORM_FPGA -O2 FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl -DMA_CHANNELS?=1 +DMA_CHANNELS ?= 1 FPGA_LDFLAGS += -DCONFIG_DMA_CHANNELS=$(DMA_CHANNELS) fpga-build: fpga-clean fpga-host fpga-host: $(CXX) $(FPGA_CXXFLAGS) $(FPGA_CXXFILES) $^ -o $@ $(FPGA_LDFLAGS) + fpga-clean: rm -f fpga-host diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index aef0b51b7..d8e696eec 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -22,9 +22,9 @@ #include "ram.h" #include "spikedasm.h" #ifdef CONFIG_DIFFTEST_SQUASH -#ifndef WITH_FPGA +#ifndef CONFIG_PLATFORM_FPGA #include "svdpi.h" -#endif // WITH_FPGA +#endif // CONFIG_PLATFORM_FPGA #endif // CONFIG_DIFFTEST_SQUASH #ifdef CONFIG_DIFFTEST_PERFCNT #include "perf.h" @@ -135,7 +135,7 @@ void difftest_finish() { } #ifdef CONFIG_DIFFTEST_SQUASH -#ifndef WITH_FPGA +#ifndef CONFIG_PLATFORM_FPGA svScope squashScope; void set_squash_scope() { squashScope = svGetScope(); @@ -150,7 +150,7 @@ void difftest_squash_enable(int enable) { svSetScope(squashScope); set_squash_enable(enable); } -#endif // WITH_FPGA +#endif // CONFIG_PLATFORM_FPGA #endif // CONFIG_DIFFTEST_SQUASH #ifdef CONFIG_DIFFTEST_REPLAY diff --git a/src/test/csrc/fpga/fpga_main.cpp b/src/test/csrc/fpga/fpga_main.cpp index 9b1d63277..7d4193ce0 100644 --- a/src/test/csrc/fpga/fpga_main.cpp +++ b/src/test/csrc/fpga/fpga_main.cpp @@ -1,6 +1,6 @@ /*************************************************************************************** -* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences * * DiffTest is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -25,16 +25,17 @@ #include "refproxy.h" #include "xdma.h" #include +#include #include enum { - SIMV_RUN, - SIMV_DONE, - SIMV_FAIL, + FPGA_RUN, + FPGA_DONE, + FPGA_FAIL, } simv_state; static char work_load[256] = "/dev/zero"; -static std::atomic simv_result{SIMV_RUN}; +static std::atomic simv_result{FPGA_RUN}; static std::mutex simv_mtx; static std::condition_variable simv_cv; static uint64_t max_instrs = 0; @@ -46,30 +47,30 @@ struct core_end_info_t { }; static core_end_info_t core_end_info; -void simv_init(); -void simv_step(); +void fpga_init(); +void fpga_step(); void cpu_endtime_check(); void set_diff_ref_so(char *s); -void args_parsingniton(int argc, char *argv[]); +void args_parsing(int argc, char *argv[]); FpgaXdma *xdma_device = NULL; int main(int argc, char *argv[]) { - args_parsingniton(argc, argv); + args_parsing(argc, argv); - simv_init(); + fpga_init(); printf("simv init\n"); { std::unique_lock lock(simv_mtx); xdma_device->start_transmit_thread(); - while (simv_result.load() == SIMV_RUN) { + while (simv_result.load() == FPGA_RUN) { simv_cv.wait(lock); } } xdma_device->running = false; free(xdma_device); printf("difftest releases the fpga device and exits\n"); - exit(0); + return 0; } void set_diff_ref_so(char *s) { @@ -79,7 +80,7 @@ void set_diff_ref_so(char *s) { difftest_ref_so = buf; } -void simv_init() { +void fpga_init() { xdma_device = new FpgaXdma(work_load); init_ram(work_load, DEFAULT_EMU_RAM_SIZE); init_flash(NULL); @@ -91,16 +92,16 @@ void simv_init() { init_nemuproxy(DEFAULT_EMU_RAM_SIZE); } -void simv_nstep(uint8_t step) { +void fpga_nstep(uint8_t step) { for (int i = 0; i < step; i++) { - simv_step(); + fpga_step(); } } -void simv_step() { +void fpga_step() { if (difftest_step()) { - printf("SIMV_FAIL\n"); - simv_result.store(SIMV_FAIL); + printf("FPGA_FAIL\n"); + simv_result.store(FPGA_FAIL); simv_cv.notify_one(); } if (difftest_state() != -1) { @@ -115,9 +116,9 @@ void simv_step() { difftest[i]->display_stats(); } if (trapCode == 0) - simv_result.store(SIMV_DONE); + simv_result.store(FPGA_DONE); else - simv_result.store(SIMV_FAIL); + simv_result.store(FPGA_FAIL); simv_cv.notify_one(); } cpu_endtime_check(); @@ -136,7 +137,7 @@ void cpu_endtime_check() { difftest[i]->display_stats(); core_end_info.core_cpi[i] = (double)trap->cycleCnt / (double)trap->instrCnt; if (core_end_info.core_trap_num == NUM_CORES) { - simv_result.store(SIMV_DONE); + simv_result.store(FPGA_DONE); simv_cv.notify_one(); } } @@ -144,15 +145,25 @@ void cpu_endtime_check() { } } -void args_parsingniton(int argc, char *argv[]) { - for (int i = 1; i < argc; ++i) { - if (strcmp(argv[i], "--diff") == 0) { - set_diff_ref_so(argv[++i]); - } else if (strcmp(argv[i], "-i") == 0) { - i++; - memcpy(work_load, argv[i], strlen(argv[i])); - } else if (strcmp(argv[i], "--max-instrs") == 0) { - max_instrs = std::stoul(argv[++i], nullptr, 10); +void args_parsing(int argc, char *argv[]) { + int opt; + int option_index = 0; + static struct option long_options[] = { + {"diff", required_argument, 0, 0}, {"max-instrs", required_argument, 0, 0}, {0, 0, 0, 0}}; + + while ((opt = getopt_long(argc, argv, "i:", long_options, &option_index)) != -1) { + switch (opt) { + case 0: + if (strcmp(long_options[option_index].name, "diff") == 0) { + set_diff_ref_so(optarg); + } else if (strcmp(long_options[option_index].name, "max-instrs") == 0) { + max_instrs = std::stoul(optarg, nullptr, 10); + } + break; + case 'i': strncpy(work_load, optarg, sizeof(work_load) - 1); break; + default: + std::cerr << "Usage: " << argv[0] << " [--diff ] [-i ] [--max-instrs ]" << std::endl; + exit(EXIT_FAILURE); } } } diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 4d9758c34..70ca74133 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -1,6 +1,6 @@ /*************************************************************************************** -* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences * * DiffTest is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -61,7 +61,7 @@ FpgaXdma::FpgaXdma(const char *workload) { } void FpgaXdma::handle_sigint(int sig) { - printf("Unlink sem success, exit success!\n"); + printf("handle sigint unlink pcie success, exit fpga-host!\n"); exit(1); } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index d186feb5d..bc1adc621 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -1,6 +1,6 @@ /*************************************************************************************** -* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences * * DiffTest is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. diff --git a/src/test/csrc/fpga/xdma_unpack.cpp b/src/test/csrc/fpga/xdma_unpack.cpp index 860750b6b..66dbe8be6 100644 --- a/src/test/csrc/fpga/xdma_unpack.cpp +++ b/src/test/csrc/fpga/xdma_unpack.cpp @@ -1,6 +1,6 @@ /*************************************************************************************** -* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences * * DiffTest is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -20,7 +20,8 @@ #include #include #include -extern void simv_nstep(uint8_t step); + +extern void fpga_nstep(uint8_t step); #pragma pack(1) typedef struct { uint8_t io_step; @@ -302,6 +303,5 @@ void squash_unpackge(uint8_t *packge) { } #endif // PACKGE END - simv_nstep(1); - usleep(50000); + fpga_nstep(1); } diff --git a/src/test/csrc/fpga/xdma_unpack.h b/src/test/csrc/fpga/xdma_unpack.h index 1f88fd5a6..c57a5bc42 100644 --- a/src/test/csrc/fpga/xdma_unpack.h +++ b/src/test/csrc/fpga/xdma_unpack.h @@ -1,6 +1,6 @@ /*************************************************************************************** -* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences * * DiffTest is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. @@ -17,6 +17,7 @@ #define __DIFF_UNPACK_H__ #include "diffstate.h" #include "difftest-dpic.h" + void squash_unpackge(uint8_t *packge); #endif diff --git a/src/test/vsrc/vcs/DifftestEndpoint.v b/src/test/vsrc/vcs/DifftestEndpoint.v index 26aea698f..ff58e99a6 100644 --- a/src/test/vsrc/vcs/DifftestEndpoint.v +++ b/src/test/vsrc/vcs/DifftestEndpoint.v @@ -237,16 +237,12 @@ assign difftest_perfCtrl_dump = 0; reg [63:0] n_cycles; reg [63:0] stuck_timer; -`define MACRO aa.bb.cc always @(posedge clock) begin if (reset) begin n_cycles <= 64'h0; stuck_timer <= 64'h0; end else begin - if (n_cycles == 64'h1) begin - $display("suf `MACRO suf"); - end n_cycles <= n_cycles + 64'h1; // max cycles From df0e48a97444a12be3ced5abe80bb2453b8e8c0e Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 9 Jan 2025 14:31:34 +0800 Subject: [PATCH 39/41] fpga: Clean up the code, remove the hardware changes, and keep the software interface --- .github/workflows/main.yml | 2 +- src/main/scala/DPIC.scala | 11 +- src/main/scala/Gateway.scala | 22 --- src/test/csrc/fpga/xdma.cpp | 8 +- src/test/csrc/fpga/xdma.h | 2 +- src/test/csrc/fpga/xdma_unpack.cpp | 307 ----------------------------- src/test/csrc/fpga/xdma_unpack.h | 23 --- 7 files changed, 7 insertions(+), 368 deletions(-) delete mode 100644 src/test/csrc/fpga/xdma_unpack.cpp delete mode 100644 src/test/csrc/fpga/xdma_unpack.h diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4a92efd41..6c7933013 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -332,6 +332,6 @@ jobs: - name: FPGA-difftest Build run: | cd $NOOP_HOME - make sim-verilog MILL_ARGS="--difftest-config SF" -j2 + make sim-verilog MILL_ARGS="--difftest-config BF" -j2 cd ./difftest make fpga-build FPGA=1 diff --git a/src/main/scala/DPIC.scala b/src/main/scala/DPIC.scala index d631707a1..93e8f4faa 100644 --- a/src/main/scala/DPIC.scala +++ b/src/main/scala/DPIC.scala @@ -296,7 +296,6 @@ private class DummyDPICBatchWrapper( object DPIC { val interfaces = ListBuffer.empty[(String, String, String)] - var defMacros = new StringBuilder() def apply(control: GatewaySinkControl, io: Valid[DifftestBundle], config: GatewayConfig): Unit = { val module = Module(new DummyDPICWrapper(chiselTypeOf(io), config)) @@ -314,12 +313,6 @@ object DPIC { module.control := control module.io := io val dpic = module.dpic - if (!config.isFPGA) - defMacros ++= - s""" - |#ifdef CONFIG_DIFFTEST_BATCH - |#include "svdpi.h" - |#endif // CONFIG_DIFFTEST_BATCH""".stripMargin interfaces += ((dpic.dpicFuncName, dpic.dpicFuncProto, dpic.dpicFunc)) } @@ -334,10 +327,12 @@ object DPIC { interfaceCpp += "" interfaceCpp += "#include " interfaceCpp += "#include \"diffstate.h\"" + interfaceCpp += "#ifdef CONFIG_DIFFTEST_BATCH" + interfaceCpp += "#include \"svdpi.h\"" + interfaceCpp += "#endif // CONFIG_DIFFTEST_BATCH" interfaceCpp += "#ifdef CONFIG_DIFFTEST_PERFCNT" interfaceCpp += "#include \"perf.h\"" interfaceCpp += "#endif // CONFIG_DIFFTEST_PERFCNT" - interfaceCpp += defMacros.toString() interfaceCpp += "" interfaceCpp += """ diff --git a/src/main/scala/Gateway.scala b/src/main/scala/Gateway.scala index d88e98c66..481099e5d 100644 --- a/src/main/scala/Gateway.scala +++ b/src/main/scala/Gateway.scala @@ -254,13 +254,11 @@ class GatewayEndpoint(instanceWithDelay: Seq[(DifftestBundle, Int)], config: Gat } object GatewaySink { - private val ports = ListBuffer.empty[Valid[DifftestBundle]] def apply(control: GatewaySinkControl, io: Valid[DifftestBundle], config: GatewayConfig): Unit = { config.style match { case "dpic" => DPIC(control, io, config) case _ => DPIC(control, io, config) // Default: DPI-C } - ports += io } def batch(template: Seq[DifftestBundle], control: GatewaySinkControl, io: BatchIO, config: GatewayConfig): Unit = { @@ -282,26 +280,6 @@ object GatewaySink { } def collect(config: GatewayConfig): GatewayResult = { - val collected = if (config.isFPGA && !config.isBatch && config.isSquash) { - MixedVecInit( - ports.toSeq.map { gen => Seq(gen.valid.asTypeOf(UInt(8.W)), gen.bits.getByteAlign) }.flatten.toSeq - ).asUInt - } else { - 0.U - } - val out = if (config.isFPGA && !config.isBatch && config.isSquash) { - Some(IO(new Bundle { - val data = Output(UInt(collected.getWidth.W)) - val enable = Output(Bool()) - })) - } else { - None - } - if (config.isFPGA && !config.isBatch && config.isSquash) { - out.get.data := collected - out.get.enable := VecInit(ports.toSeq.map(_.valid)).asUInt.orR - dontTouch(out.get) - } config.style match { case "dpic" => DPIC.collect() case _ => DPIC.collect() // Default: DPI-C diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 70ca74133..934fbe1ed 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -25,10 +25,6 @@ #include #include -#ifdef CONFIG_DIFFTEST_SQUASH -#include "xdma_unpack.h" -#endif // CONFIG_DIFFTEST_SQUASH - #define XDMA_USER "/dev/xdma0_user" #define XDMA_BYPASS "/dev/xdma0_bypass" #define XDMA_C2H_DEVICE "/dev/xdma0_c2h_" @@ -158,7 +154,7 @@ void FpgaXdma::read_xdma_thread(int channel) { #ifdef CONFIG_DIFFTEST_BATCH v_difftest_Batch(packge->diff_packge); #elif defined(CONFIG_DIFFTEST_SQUASH) - squash_unpackge(packge->diff_packge); + //TODO: need automatically generates squash data parsing implementations #endif } free(packge); @@ -182,7 +178,7 @@ void FpgaXdma::write_difftest_thread() { #ifdef CONFIG_DIFFTEST_BATCH v_difftest_Batch(packge.diff_packge); #elif defined(CONFIG_DIFFTEST_SQUASH) - squash_unpackge(packge.diff_packge); + //TODO: need automatically generates squash data parsing implementations #endif } } diff --git a/src/test/csrc/fpga/xdma.h b/src/test/csrc/fpga/xdma.h index bc1adc621..c17dc7b32 100644 --- a/src/test/csrc/fpga/xdma.h +++ b/src/test/csrc/fpga/xdma.h @@ -32,7 +32,7 @@ #ifdef CONFIG_DIFFTEST_BATCH #define DMA_DIFF_PACKGE_LEN CONFIG_DIFFTEST_BATCH_BYTELEN #elif defined(CONFIG_DIFFTEST_SQUASH) -#define DMA_DIFF_PACKGE_LEN 1280 +#define DMA_DIFF_PACKGE_LEN 1280 // XDMA Min size #endif typedef struct FpgaPackgeHead { diff --git a/src/test/csrc/fpga/xdma_unpack.cpp b/src/test/csrc/fpga/xdma_unpack.cpp deleted file mode 100644 index 66dbe8be6..000000000 --- a/src/test/csrc/fpga/xdma_unpack.cpp +++ /dev/null @@ -1,307 +0,0 @@ -/*************************************************************************************** -* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences -* -* DiffTest is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, -* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ -#include "xdma_unpack.h" -#include "diffstate.h" -#include "difftest-dpic.h" -#include -#include -#include -#include - -extern void fpga_nstep(uint8_t step); -#pragma pack(1) -typedef struct { - uint8_t io_step; -} SquashStep; - -typedef struct { - uint8_t io_valid; - uint8_t io_bits_valid; - uint64_t io_data; - uint8_t io_coreid; - uint8_t io_index; -} SquashCommitData; - -typedef struct { - uint8_t io_valid; - uint8_t io_bits_valid; - uint8_t io_success; - uint8_t io_coreid; -} SquashLrScEvent; - -typedef struct { - uint8_t io_valid; - uint64_t io_virtMode; - uint64_t io_mtval2; - uint64_t io_mtinst; - uint64_t io_hstatus; - uint64_t io_hideleg; - uint64_t io_hedele; - uint64_t io_hcounteren; - uint64_t io_htval; - uint64_t io_htinst; - uint64_t io_hgatp; - uint64_t io_vsstatus; - uint64_t io_vstvec; - uint64_t io_vsepc; - uint64_t io_vscause; - uint64_t io_vstval; - uint64_t io_vsatp; - uint64_t io_vsscratch; - uint8_t io_coreid; -} SquashHCSRState; - -typedef struct { - uint8_t io_valid; - uint64_t io_fcsr; - uint8_t io_coreid; -} SquashFpCSRState; - -typedef struct { - uint8_t io_valid; - uint64_t io_value[32]; - uint8_t io_coreid; -} SquashArchFpRegState; - -typedef struct { - uint8_t io_valid; - uint64_t io_value[32]; - uint8_t io_coreid; -} SquashArchIntRegState; - -typedef struct { - uint8_t io_valid; - uint64_t io_privilegeMode; - uint64_t io_mstatus; - uint64_t io_sstatus; - uint64_t io_mepc; - uint64_t io_sepc; - uint64_t io_mtval; - uint64_t io_stval; - uint64_t io_mtvec; - uint64_t io_stvec; - uint64_t io_mcause; - uint64_t io_scause; - uint64_t io_satp; - uint64_t io_mip; - uint64_t io_mie; - uint64_t io_mscratch; - uint64_t io_sscratch; - uint64_t io_mideleg; - uint64_t io_medeleg; - uint8_t io_coreid; -} SquashCSRState; - -typedef struct { - uint8_t io_valid; - uint8_t io_bits_valid; - uint32_t io_interrupt; - uint32_t io_exception; - uint64_t io_exceptionPC; - uint32_t io_exceptionInst; - uint8_t io_hasNMI; - uint8_t io_virtualInterruptIsHvictlInject; - uint8_t io_coreid; -} SquashArchEvent; - -typedef struct { - uint8_t io_valid; - uint8_t hasTrap; - uint64_t cycleCnt; - uint64_t instrCnt; - uint8_t hasWFI; - uint64_t code; - uint64_t pc; - uint8_t coreid; -} SquashTrapEvent; - -typedef struct { - uint8_t valid; - uint8_t bits_valid; - uint8_t skip; - uint8_t isRVC; - uint8_t rfwen; - uint8_t fpwen; - uint8_t vecwen; - uint8_t wpdest; - uint8_t wdest; - uint64_t pc; - uint32_t instr; - uint16_t robIdx; - uint8_t lqIdx; - uint8_t sqIdx; - uint8_t isLoad; - uint8_t isStore; - uint8_t nFused; - uint8_t special; - uint8_t coreid; - uint8_t index; -} SquashInstrCommit; -#pragma pack() - -void squash_unpackge(uint8_t *packge) { -#ifdef USE_THREAD_MEMPOOL - packge += sizeof(uint8_t); -#endif - // PACKGE HEAD -#if defined(CPU_XIANGSHAN) - for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { - SquashInstrCommit temp; - memcpy(&temp, packge, sizeof(SquashInstrCommit)); - packge += sizeof(SquashInstrCommit); - v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, temp.pc, - temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, temp.nFused, - temp.special, temp.coreid, temp.index); - } - { - SquashTrapEvent temp; - memcpy(&temp, packge, sizeof(SquashTrapEvent)); - packge += sizeof(SquashTrapEvent); - v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); - } - { - SquashArchFpRegState temp; - memcpy(&temp, packge, sizeof(SquashArchFpRegState)); - packge += sizeof(SquashArchFpRegState); - v_difftest_ArchFpRegState( - temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], - temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], - temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], - temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); - } - { - SquashArchIntRegState temp; - memcpy(&temp, packge, sizeof(SquashArchIntRegState)); - packge += sizeof(SquashArchIntRegState); - v_difftest_ArchIntRegState( - temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], - temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], - temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], - temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], temp.io_coreid); - } - { - SquashArchEvent temp; - memcpy(&temp, packge, sizeof(SquashArchEvent)); - packge += sizeof(SquashArchEvent); - v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, - temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); - } - { - SquashCSRState temp; - memcpy(&temp, packge, sizeof(SquashCSRState)); - packge += sizeof(SquashCSRState); - v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, - temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, - temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, - temp.io_medeleg, temp.io_coreid); - } - { - SquashFpCSRState temp; - memcpy(&temp, packge, sizeof(SquashFpCSRState)); - packge += sizeof(SquashFpCSRState); - v_difftest_FpCSRState(temp.io_fcsr, temp.io_coreid); - } - { - SquashHCSRState temp; - memcpy(&temp, packge, sizeof(SquashHCSRState)); - packge += sizeof(SquashHCSRState); - v_difftest_HCSRState(temp.io_virtMode, temp.io_mtval2, temp.io_mtinst, temp.io_hstatus, temp.io_hideleg, - temp.io_hedele, temp.io_hcounteren, temp.io_htval, temp.io_htinst, temp.io_hgatp, - temp.io_vsstatus, temp.io_vstvec, temp.io_vsepc, temp.io_vscause, temp.io_vstval, - temp.io_vsatp, temp.io_vsscratch, temp.io_coreid); - } - { - SquashLrScEvent temp; - memcpy(&temp, packge, sizeof(SquashLrScEvent)); - packge += sizeof(SquashLrScEvent); - v_difftest_LrScEvent(temp.io_success, temp.io_coreid); - } - for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { - SquashCommitData temp; - memcpy(&temp, packge, sizeof(SquashCommitData)); - packge += sizeof(SquashCommitData); - v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); - } -#elif defined(CPU_NUTSHELL) - { - SquashArchIntRegState temp; - memcpy(&temp, packge, sizeof(SquashArchIntRegState)); - packge += sizeof(SquashArchIntRegState); - if (temp.io_valid) { - v_difftest_ArchIntRegState( - temp.io_value[0], temp.io_value[1], temp.io_value[2], temp.io_value[3], temp.io_value[4], temp.io_value[5], - temp.io_value[6], temp.io_value[7], temp.io_value[8], temp.io_value[9], temp.io_value[10], temp.io_value[11], - temp.io_value[12], temp.io_value[13], temp.io_value[14], temp.io_value[15], temp.io_value[16], - temp.io_value[17], temp.io_value[18], temp.io_value[19], temp.io_value[20], temp.io_value[21], - temp.io_value[22], temp.io_value[23], temp.io_value[24], temp.io_value[25], temp.io_value[26], - temp.io_value[27], temp.io_value[28], temp.io_value[29], temp.io_value[30], temp.io_value[31], - temp.io_coreid); - } - } - { - SquashCSRState temp; - memcpy(&temp, packge, sizeof(SquashCSRState)); - packge += sizeof(SquashCSRState); - if (temp.io_valid) { - v_difftest_CSRState(temp.io_privilegeMode, temp.io_mstatus, temp.io_sstatus, temp.io_mepc, temp.io_sepc, - temp.io_mtval, temp.io_stval, temp.io_mtvec, temp.io_stvec, temp.io_mcause, temp.io_scause, - temp.io_satp, temp.io_mip, temp.io_mie, temp.io_mscratch, temp.io_sscratch, temp.io_mideleg, - temp.io_medeleg, temp.io_coreid); - } - } - { - SquashArchEvent temp; - memcpy(&temp, packge, sizeof(SquashArchEvent)); - packge += sizeof(SquashArchEvent); - if (temp.io_valid) { - v_difftest_ArchEvent(temp.io_interrupt, temp.io_exception, temp.io_exceptionPC, temp.io_exceptionInst, - temp.io_hasNMI, temp.io_virtualInterruptIsHvictlInject, temp.io_coreid); - } - } - { - SquashTrapEvent temp; - memcpy(&temp, packge, sizeof(SquashTrapEvent)); - packge += sizeof(SquashTrapEvent); - if (temp.io_valid) { - v_difftest_TrapEvent(temp.hasTrap, temp.cycleCnt, temp.instrCnt, temp.hasWFI, temp.code, temp.pc, temp.coreid); - } - } - for (size_t i = 0; i < CONFIG_DIFF_COMMIT_WIDTH; i++) { - SquashInstrCommit temp; - memcpy(&temp, packge, sizeof(SquashInstrCommit)); - packge += sizeof(SquashInstrCommit); - if (temp.valid) { - v_difftest_InstrCommit(temp.skip, temp.isRVC, temp.rfwen, temp.fpwen, temp.vecwen, temp.wpdest, temp.wdest, - temp.pc, temp.instr, temp.robIdx, temp.lqIdx, temp.sqIdx, temp.isLoad, temp.isStore, - temp.nFused, temp.special, temp.coreid, temp.index); - } - } - for (size_t i = 0; i < CONFIG_DIFF_COMMIT_DATA_WIDTH; i++) { - SquashCommitData temp; - memcpy(&temp, packge, sizeof(SquashCommitData)); - packge += sizeof(SquashCommitData); - if (temp.io_valid) { - v_difftest_CommitData(temp.io_data, temp.io_coreid, temp.io_index); - } - } -#endif - // PACKGE END - fpga_nstep(1); -} diff --git a/src/test/csrc/fpga/xdma_unpack.h b/src/test/csrc/fpga/xdma_unpack.h deleted file mode 100644 index c57a5bc42..000000000 --- a/src/test/csrc/fpga/xdma_unpack.h +++ /dev/null @@ -1,23 +0,0 @@ -/*************************************************************************************** -* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences -* -* DiffTest is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, -* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, -* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ -#ifndef __DIFF_UNPACK_H__ -#define __DIFF_UNPACK_H__ -#include "diffstate.h" -#include "difftest-dpic.h" - -void squash_unpackge(uint8_t *packge); - -#endif From 282835a56a99be39484230662bcc74c667f40747 Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 9 Jan 2025 15:19:04 +0800 Subject: [PATCH 40/41] fpga: edit makefile out path --- .github/workflows/main.yml | 2 +- Makefile | 6 +++--- fpga.mk | 14 ++++++++++---- src/test/csrc/common/mpool.cpp | 4 ++-- src/test/csrc/common/mpool.h | 4 ++-- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6c7933013..4a92efd41 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -332,6 +332,6 @@ jobs: - name: FPGA-difftest Build run: | cd $NOOP_HOME - make sim-verilog MILL_ARGS="--difftest-config BF" -j2 + make sim-verilog MILL_ARGS="--difftest-config SF" -j2 cd ./difftest make fpga-build FPGA=1 diff --git a/Makefile b/Makefile index 3b98f3e38..1635e6b16 100644 --- a/Makefile +++ b/Makefile @@ -96,16 +96,16 @@ SIM_CXXFLAGS += -DCONFIG_DIFFTEST_PERFCNT endif endif -WITH_CHISELDB ?= 1 # ChiselDB +WITH_CHISELDB ?= 1 ifeq ($(WITH_CHISELDB), 1) SIM_CXXFILES += $(BUILD_DIR)/chisel_db.cpp SIM_CXXFLAGS += -I$(BUILD_DIR) -DENABLE_CHISEL_DB SIM_LDFLAGS += -lsqlite3 endif -WITH_CONSTANTIN ?= 1 # ConstantIn +WITH_CONSTANTIN ?= 1 ifeq ($(WITH_CONSTANTIN), 1) SIM_CXXFILES += $(BUILD_DIR)/constantin.cpp SIM_CXXFLAGS += -I$(BUILD_DIR) -DENABLE_CONSTANTIN @@ -241,7 +241,7 @@ include palladium.mk include libso.mk include fpga.mk -clean: vcs-clean pldm-clean +clean: vcs-clean pldm-clean fpga-clean rm -rf $(BUILD_DIR) format: scala-format clang-format diff --git a/fpga.mk b/fpga.mk index 40c29b6f5..b01e6ac09 100644 --- a/fpga.mk +++ b/fpga.mk @@ -1,7 +1,8 @@ +FPGA_TARGET = $(BUILD_DIR)/fpga-host FPGA_CSRC_DIR = $(abspath ./src/test/csrc/fpga) FPGA_CONFIG_DIR = $(abspath ./config) # Reserve storage for xdma configuration -FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") -include cstring +FPGA_CXXFILES = $(SIM_CXXFILES) $(shell find $(FPGA_CSRC_DIR) -name "*.cpp") FPGA_CXXFLAGS = $(subst \\\",\", $(SIM_CXXFLAGS)) -I$(FPGA_CSRC_DIR) -DNUM_CORES=$(NUM_CORES) -DCONFIG_PLATFORM_FPGA -O2 FPGA_LDFLAGS = $(SIM_LDFLAGS) -lpthread -ldl @@ -10,8 +11,13 @@ FPGA_LDFLAGS += -DCONFIG_DMA_CHANNELS=$(DMA_CHANNELS) fpga-build: fpga-clean fpga-host -fpga-host: - $(CXX) $(FPGA_CXXFLAGS) $(FPGA_CXXFILES) $^ -o $@ $(FPGA_LDFLAGS) +$(FPGA_TARGET): $(FPGA_CXXFILES) | $(FPGA_TARGET_DIR) + $(CXX) $(FPGA_CXXFLAGS) $(FPGA_CXXFILES) -o $@ $(FPGA_LDFLAGS) + +$(FPGA_TARGET_DIR): + mkdir -p $(FPGA_TARGET_DIR) + +fpga-host: $(FPGA_TARGET) fpga-clean: - rm -f fpga-host + rm -f $(FPGA_TARGET) diff --git a/src/test/csrc/common/mpool.cpp b/src/test/csrc/common/mpool.cpp index 5dcbd30f6..57db210cb 100644 --- a/src/test/csrc/common/mpool.cpp +++ b/src/test/csrc/common/mpool.cpp @@ -1,6 +1,6 @@ /*************************************************************************************** -* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences * * DiffTest is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. diff --git a/src/test/csrc/common/mpool.h b/src/test/csrc/common/mpool.h index 25fe02c8b..280702a83 100644 --- a/src/test/csrc/common/mpool.h +++ b/src/test/csrc/common/mpool.h @@ -1,6 +1,6 @@ /*************************************************************************************** -* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) -* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences +* Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) +* Copyright (c) 2020-2025 Institute of Computing Technology, Chinese Academy of Sciences * * DiffTest is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. From be80b041bb026dee1b46ad48fdefc534ffb7a46b Mon Sep 17 00:00:00 2001 From: Kami Date: Thu, 9 Jan 2025 16:32:36 +0800 Subject: [PATCH 41/41] fpga: merge macro variables and get_load_img_size move get_img_size --- fpga.mk | 5 +---- src/test/csrc/common/ram.h | 3 --- src/test/csrc/difftest/difftest.cpp | 12 ++++-------- src/test/csrc/fpga/xdma.cpp | 4 ++-- 4 files changed, 7 insertions(+), 17 deletions(-) diff --git a/fpga.mk b/fpga.mk index b01e6ac09..bb5a494ae 100644 --- a/fpga.mk +++ b/fpga.mk @@ -11,12 +11,9 @@ FPGA_LDFLAGS += -DCONFIG_DMA_CHANNELS=$(DMA_CHANNELS) fpga-build: fpga-clean fpga-host -$(FPGA_TARGET): $(FPGA_CXXFILES) | $(FPGA_TARGET_DIR) +$(FPGA_TARGET): $(FPGA_CXXFILES) $(CXX) $(FPGA_CXXFLAGS) $(FPGA_CXXFILES) -o $@ $(FPGA_LDFLAGS) -$(FPGA_TARGET_DIR): - mkdir -p $(FPGA_TARGET_DIR) - fpga-host: $(FPGA_TARGET) fpga-clean: diff --git a/src/test/csrc/common/ram.h b/src/test/csrc/common/ram.h index 2c74e48c3..f4cbce014 100644 --- a/src/test/csrc/common/ram.h +++ b/src/test/csrc/common/ram.h @@ -112,9 +112,6 @@ class SimMemory { uint64_t get_size() { return memory_size; } - uint64_t get_load_img_size() { - return get_img_size(); - } bool in_range_u8(uint64_t address) { return address < memory_size; } diff --git a/src/test/csrc/difftest/difftest.cpp b/src/test/csrc/difftest/difftest.cpp index d8e696eec..6892f77f9 100644 --- a/src/test/csrc/difftest/difftest.cpp +++ b/src/test/csrc/difftest/difftest.cpp @@ -21,11 +21,9 @@ #include "goldenmem.h" #include "ram.h" #include "spikedasm.h" -#ifdef CONFIG_DIFFTEST_SQUASH -#ifndef CONFIG_PLATFORM_FPGA +#if defined(CONFIG_DIFFTEST_SQUASH) && !defined(CONFIG_PLATFORM_FPGA) #include "svdpi.h" -#endif // CONFIG_PLATFORM_FPGA -#endif // CONFIG_DIFFTEST_SQUASH +#endif // CONFIG_DIFFTEST_SQUASH && !CONFIG_PLATFORM_FPGA #ifdef CONFIG_DIFFTEST_PERFCNT #include "perf.h" #endif // CONFIG_DIFFTEST_PERFCNT @@ -134,8 +132,7 @@ void difftest_finish() { difftest = NULL; } -#ifdef CONFIG_DIFFTEST_SQUASH -#ifndef CONFIG_PLATFORM_FPGA +#if defined(CONFIG_DIFFTEST_SQUASH) && !defined(CONFIG_PLATFORM_FPGA) svScope squashScope; void set_squash_scope() { squashScope = svGetScope(); @@ -150,8 +147,7 @@ void difftest_squash_enable(int enable) { svSetScope(squashScope); set_squash_enable(enable); } -#endif // CONFIG_PLATFORM_FPGA -#endif // CONFIG_DIFFTEST_SQUASH +#endif // CONFIG_DIFFTEST_SQUASH && !CONFIG_PLATFORM_FPGA #ifdef CONFIG_DIFFTEST_REPLAY svScope replayScope; diff --git a/src/test/csrc/fpga/xdma.cpp b/src/test/csrc/fpga/xdma.cpp index 934fbe1ed..936b49c68 100644 --- a/src/test/csrc/fpga/xdma.cpp +++ b/src/test/csrc/fpga/xdma.cpp @@ -92,12 +92,12 @@ void FpgaXdma::device_write(bool is_bypass, const char *workload, uint64_t addr, } if (is_bypass) { - if (simMemory->get_load_img_size() > aligned_size) { + if (simMemory->get_img_size() > aligned_size) { printf("The loaded workload size exceeds the xdma bypass size"); exit(-1); } memcpy(static_cast(m_ptr) + offset, static_cast(simMemory->as_ptr()), - simMemory->get_load_img_size()); + simMemory->get_img_size()); } else { ((volatile uint32_t *)m_ptr)[offset >> 2] = value; }