From f92265b6a1ee8954a96699f9dd32dc072cbb71a5 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 16:24:52 +0100 Subject: [PATCH 01/11] [package] adjust DM entry points --- rtl/core/neorv32_package.vhd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index d2ff396b2..0910d5cc6 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -94,8 +94,8 @@ package neorv32_package is constant base_io_ocd_c : std_ulogic_vector(31 downto 0) := x"ffff0000"; -- On-Chip Debugger - Debug Module Entry Points (Code ROM) -- - constant dm_exc_entry_c : std_ulogic_vector(31 downto 0) := x"ffffff00"; -- = base_io_ocd_c + code_rom + 0 - constant dm_park_entry_c : std_ulogic_vector(31 downto 0) := x"ffffff08"; -- = base_io_ocd_c + code_rom + 8 + constant dm_exc_entry_c : std_ulogic_vector(31 downto 0) := x"fffffe00"; -- = base_io_ocd_c + code_rom_base + 0 + constant dm_park_entry_c : std_ulogic_vector(31 downto 0) := x"fffffe10"; -- = base_io_ocd_c + code_rom_base + 16 -- ********************************************************************************************************** -- SoC Definitions From d0e04fcd299beafb30e2cbf5719a83504cf08211 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 16:25:12 +0100 Subject: [PATCH 02/11] [top] update DM-CPU ports --- rtl/core/neorv32_top.vhd | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index f26ed825a..5b279e5b8 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -1685,14 +1685,14 @@ begin AUTHENTICATOR => OCD_AUTHENTICATION ) port map ( - clk_i => clk_i, - rstn_i => rstn_ext, - dmi_req_i => dmi_req, - dmi_rsp_o => dmi_rsp, - bus_req_i => iodev_req(IODEV_OCD), - bus_rsp_o => iodev_rsp(IODEV_OCD), - cpu_ndmrstn_o => dci_ndmrstn, - cpu_halt_req_o => dci_haltreq + clk_i => clk_i, + rstn_i => rstn_ext, + dmi_req_i => dmi_req, + dmi_rsp_o => dmi_rsp, + bus_req_i => iodev_req(IODEV_OCD), + bus_rsp_o => iodev_rsp(IODEV_OCD), + ndmrstn_o => dci_ndmrstn, + halt_req_o(0) => dci_haltreq ); end generate; From 2737b3825d2a96e4aa97e72030152eda0bf9c3b2 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 16:25:55 +0100 Subject: [PATCH 03/11] [ocd-firmware] update park loop code --- sw/ocd-firmware/debug_rom.ld | 9 ++-- sw/ocd-firmware/park_loop.S | 93 ++++++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 41 deletions(-) diff --git a/sw/ocd-firmware/debug_rom.ld b/sw/ocd-firmware/debug_rom.ld index 26c4e6b56..cfb4e5691 100644 --- a/sw/ocd-firmware/debug_rom.ld +++ b/sw/ocd-firmware/debug_rom.ld @@ -1,8 +1,7 @@ /* ================================================================================ */ /* NEORV32 CPU - RISC-V GCC Linker Script */ /* -------------------------------------------------------------------------------- */ -/* For the execution-based on-chip debugger (OCD) code memory ROM: "park loop" code */ -/* (build-in firmware) */ +/* "Park loop" code for execution-based on-chip debugger'S (OCD) code ROM. */ /* -------------------------------------------------------------------------------- */ /* The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 */ /* Copyright (c) NEORV32 contributors. */ @@ -13,11 +12,11 @@ OUTPUT_FORMAT("elf32-littleriscv") OUTPUT_ARCH(riscv) -ENTRY(__start) +ENTRY(_ocd_start) MEMORY { - debug_mem (rx) : ORIGIN = 0xFFFFFF00, LENGTH = 128 + debug_rom (rx) : ORIGIN = 0xFFFFFE00, LENGTH = 128 } SECTIONS @@ -25,5 +24,5 @@ SECTIONS .text : { KEEP(*(.text.ocd)); - } > debug_mem + } > debug_rom } diff --git a/sw/ocd-firmware/park_loop.S b/sw/ocd-firmware/park_loop.S index c5b0d3209..42ded7126 100644 --- a/sw/ocd-firmware/park_loop.S +++ b/sw/ocd-firmware/park_loop.S @@ -1,6 +1,8 @@ // ================================================================================ // // NEORV32 CPU - park_loop.S - Execution-Based On-Chip Debugger (OCD) Firmware // // -------------------------------------------------------------------------------- // +// WARNING! This code only supports up to 4 harts! // +// -------------------------------------------------------------------------------- // // The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 // // Copyright (c) NEORV32 contributors. // // Copyright (c) 2020 - 2024 Stephan Nolting. All rights reserved. // @@ -8,60 +10,79 @@ // SPDX-License-Identifier: BSD-3-Clause // // ================================================================================ // -// debug module (DM) address map -.equ DM_CODE_BASE, 0xffffff00 // base address of debug_module's code ROM (park loop) -.equ DM_PBUF_BASE, 0xffffff40 // base address of debug_module's program buffer (PBUF) -.equ DM_DATA_BASE, 0xffffff80 // base address of debug_module's abstract data buffer (DATA) -.equ DM_SREG_BASE, 0xffffffC0 // base address of debug_module's status register - -// status register (SREG) byte(!) offsets -.equ SREG_HLT_ACK, ( 0 / 8) // -/w: CPU has halted in debug mode and is waiting in park loop -.equ SREG_RES_REQ, ( 8 / 8) // r/-: DM requests to resume -.equ SREG_RES_ACK, ( 8 / 8) // -/w: CPU starts to resume -.equ SREG_EXE_REQ, (16 / 8) // r/-: DM requests to execute program buffer -.equ SREG_EXE_ACK, (16 / 8) // -/w: CPU starts to execute program buffer -.equ SREG_EXC_ACK, (24 / 8) // -/w: CPU has detected an exception while in debug-mode - .file "park_loop.S" .section .text.ocd .balign 4 .option norvc -.global __start +.global _ocd_start .global entry_exception .global entry_normal -__start: +// debug module (DM) address map +.equ DM_CODE_BASE, 0xFFFFFE00 // base address of code ROM (park loop) +.equ DM_PBUF_BASE, 0xFFFFFE80 // base address of program buffer +.equ DM_DATA_BASE, 0xFFFFFF00 // base address of abstract data buffer +.equ DM_SREG_BASE, 0xFFFFFF80 // base address of status register(s) -// BASE + 0: exception entry - signal EXCEPTION condition to DM and restart parking loop +// Request register (DM_SREG_BASE read-access) byte-field bits +.equ REQ_RES, 0 // r/-: DM requests to resume +.equ REQ_EXE, 1 // r/-: DM requests to execute program buffer + +// Acknowledge register (DM_SREG_BASE write-access) address offsets +.equ ACK_HLT, 0x0 // -/w: CPU has halted in debug mode and is waiting in park loop +.equ ACK_RES, 0x4 // -/w: CPU starts to resume +.equ ACK_EXE, 0x8 // -/w: CPU starts to execute program buffer +.equ ACK_EXC, 0xC // -/w: CPU has detected an exception while in debug-mode + +_ocd_start: + +// BASE + 0: exception entry - exeption during program buffer execution entry_exception: - sb zero, (DM_SREG_BASE+SREG_EXC_ACK)(zero) // trigger exception-acknowledge to inform DM - ebreak // re-enter debug mode (at "entry_normal" entry point) + sw zero, (DM_SREG_BASE+ACK_EXC)(zero) // send exception-acknowledge (no need for a hart ID) + csrr x8, dscratch0 // restore x8 from dscratch0 (might be changed during PBUF execution) + ebreak // re-enter debug mode (at "entry_normal" entry point) + nop -// BASE + 8: normal entry - ebreak in debug-mode, halt request or return from single-stepped instruction +// BASE + 16: normal entry - ebreak in debug-mode, halt request or return from single-stepped instruction entry_normal: - csrw dscratch0, x8 // backup x8 to dscratch0 so we have a GPR available + csrw dscratch0, x8 // backup x8 to dscratch0 so we have a GPR available + csrr x8, mhartid // get hart ID (0..3) + sw x8, (DM_SREG_BASE+ACK_HLT)(zero) // send halt-acknowledge // polling loop - waiting for requests park_loop: - sb zero, (DM_SREG_BASE+SREG_HLT_ACK)(zero) // ACK that CPU is halted - lbu x8, (DM_SREG_BASE+SREG_EXE_REQ)(zero) // request to execute program buffer? - bnez x8, execute - lbu x8, (DM_SREG_BASE+SREG_RES_REQ)(zero) // request to resume? - beqz x8, park_loop + csrr x8, mhartid // get hart ID (0..3) + lbu x8, DM_SREG_BASE(x8) // read hart-specific byte from request register + andi x8, x8, 1 << REQ_EXE // execute-request bit set? + bnez x8, execute + + csrr x8, mhartid // get hart ID (0..3) + lbu x8, DM_SREG_BASE(x8) // read hart-specific byte from request register + andi x8, x8, 1 << REQ_RES // resume-request bit set? + beqz x8, park_loop // resume normal operation resume: - sb zero, (DM_SREG_BASE+SREG_RES_ACK)(zero) // ACK that CPU is about to resume - csrr x8, dscratch0 // restore x8 from dscratch0 - dret // exit debug mode + csrr x8, mhartid // get hart ID (0..3) + sw x8, (DM_SREG_BASE+ACK_RES)(zero) // send resume-acknowledge + csrr x8, dscratch0 // restore x8 from dscratch0 + dret // exit debug mode -// execute program buffer +// execute program buffer (implicit ebreak at the end of the buffer will bring us back to 'entry_normal') execute: - sb zero, (DM_SREG_BASE+SREG_EXE_ACK)(zero) // ACK that execution is about to start - csrr x8, dscratch0 // restore x8 from dscratch0 - fence.i // synchronize instruction fetch with memory (PBUF) - jalr zero, zero, %lo(DM_PBUF_BASE) // jump to beginning of program buffer (PBUF) + csrr x8, mhartid // get hart ID (0..3) + sw x8, (DM_SREG_BASE+ACK_EXE)(zero) // send execute-acknowledge + csrr x8, dscratch0 // restore x8 from dscratch0 + fence.i // synchronize instruction fetch with memory (PBUF) + jalr zero, zero, %lo(DM_PBUF_BASE) // jump to beginning of program buffer (PBUF) // fill remaining ROM space with instructions that cause a debug-mode-internal exception -unused: - ecall // should never be reached +unused: // should never be reached + ecall + ecall + ecall + ecall + ecall + ecall + ecall + ecall From 40cc86b0924ffe4293ab4b810be2afbf7c4eae6d Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 16:26:34 +0100 Subject: [PATCH 04/11] [dm] add mulit-hart support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ⚠️ this is highly experimental! --- rtl/core/neorv32_debug_dm.vhd | 396 +++++++++++++++++++--------------- 1 file changed, 225 insertions(+), 171 deletions(-) diff --git a/rtl/core/neorv32_debug_dm.vhd b/rtl/core/neorv32_debug_dm.vhd index af2d70b84..237e58a3d 100644 --- a/rtl/core/neorv32_debug_dm.vhd +++ b/rtl/core/neorv32_debug_dm.vhd @@ -1,7 +1,8 @@ -- ================================================================================ -- --- NEORV32 SoC - RISC-V-Compatible Debug Module (DM) -- +-- NEORV32 OCD - RISC-V-Compatible Debug Module (DM) -- -- -------------------------------------------------------------------------------- -- -- Execution-based debugger compatible to the "Minimal RISC-V Debug Specification". -- +-- The DM can support up to 4 harts in parallel. -- -- -------------------------------------------------------------------------------- -- -- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- -- Copyright (c) NEORV32 contributors. -- @@ -19,31 +20,32 @@ use neorv32.neorv32_package.all; entity neorv32_debug_dm is generic ( - AUTHENTICATOR : boolean -- implement authentication module when true + NUM_HARTS : natural range 1 to 4 := 1; -- number of physical CPU cores + AUTHENTICATOR : boolean := false -- implement authentication module when true ); port ( -- global control -- - clk_i : in std_ulogic; -- global clock line - rstn_i : in std_ulogic; -- global reset line, low-active + clk_i : in std_ulogic; -- global clock line + rstn_i : in std_ulogic; -- global reset line, low-active -- debug module interface (DMI) -- - dmi_req_i : in dmi_req_t; -- request - dmi_rsp_o : out dmi_rsp_t; -- response + dmi_req_i : in dmi_req_t; -- request + dmi_rsp_o : out dmi_rsp_t; -- response -- CPU bus access -- - bus_req_i : in bus_req_t; -- bus request - bus_rsp_o : out bus_rsp_t; -- bus response + bus_req_i : in bus_req_t; -- bus request + bus_rsp_o : out bus_rsp_t; -- bus response -- CPU control -- - cpu_ndmrstn_o : out std_ulogic; -- soc reset - cpu_halt_req_o : out std_ulogic -- request hart to halt (enter debug mode) + ndmrstn_o : out std_ulogic; -- soc reset + halt_req_o : out std_ulogic_vector(NUM_HARTS-1 downto 0) -- request hart to halt (enter debug mode) ); end neorv32_debug_dm; architecture neorv32_debug_dm_rtl of neorv32_debug_dm is - -- memory map; replicated throughout the entire device address space -- - constant dm_code_base_c : std_ulogic_vector(31 downto 0) := x"ffffff00"; -- code ROM (park loop) - constant dm_pbuf_base_c : std_ulogic_vector(31 downto 0) := x"ffffff40"; -- program buffer (PBUF) - constant dm_data_base_c : std_ulogic_vector(31 downto 0) := x"ffffff80"; -- abstract data buffer (DATA) - constant dm_sreg_base_c : std_ulogic_vector(31 downto 0) := x"ffffffC0"; -- status register (SREG) + -- memory map, 128 bytes per device; replicated throughout the entire device address space -- + constant dm_code_base_c : std_ulogic_vector(31 downto 0) := x"fffffe00"; -- code ROM (park loop) + constant dm_pbuf_base_c : std_ulogic_vector(31 downto 0) := x"fffffe80"; -- program buffer (PBUF) + constant dm_data_base_c : std_ulogic_vector(31 downto 0) := x"ffffff00"; -- abstract data buffer (DATA) + constant dm_sreg_base_c : std_ulogic_vector(31 downto 0) := x"ffffff80"; -- status register(s) (SREG) -- rv32i instruction prototypes -- constant instr_nop_c : std_ulogic_vector(31 downto 0) := x"00000013"; -- nop @@ -84,8 +86,11 @@ architecture neorv32_debug_dm_rtl of neorv32_debug_dm is command : std_ulogic_vector(31 downto 0); -- halt_req : std_ulogic; - resume_req : std_ulogic; + req_res : std_ulogic; reset_ack : std_ulogic; + hartsel : std_ulogic_vector(1+1 downto 0); -- plus one bit to detect "unavailable hart" + hartsel_dec : std_ulogic_vector(NUM_HARTS-1 downto 0); + hartsel_inv : std_ulogic; -- invalid/unavailable hart selection wr_acc_err : std_ulogic; rd_acc_err : std_ulogic; clr_acc_err : std_ulogic; @@ -118,10 +123,10 @@ architecture neorv32_debug_dm_rtl of neorv32_debug_dm is illegal_cmd : std_ulogic; cmderr : std_ulogic_vector(2 downto 0); -- hart status -- - hart_halted : std_ulogic; - hart_resume_req : std_ulogic; - hart_resume_ack : std_ulogic; - hart_reset : std_ulogic; + hart_halted : std_ulogic_vector(NUM_HARTS-1 downto 0); + hart_resume_req : std_ulogic_vector(NUM_HARTS-1 downto 0); + hart_resume_ack : std_ulogic_vector(NUM_HARTS-1 downto 0); + hart_reset : std_ulogic_vector(NUM_HARTS-1 downto 0); end record; signal dm_ctrl : dm_ctrl_t; @@ -139,50 +144,60 @@ architecture neorv32_debug_dm_rtl of neorv32_debug_dm is -- CPU Bus and Debug Interfaces -- ---------------------------------------------------------- - -- status and control register - bits -- - -- for write access we only care about the actual BYTE WRITE ACCESSES! -- - constant sreg_halt_ack_c : natural := 0; -- -/w: CPU is halted in debug mode and waits in park loop - constant sreg_resume_req_c : natural := 8; -- r/-: DM requests CPU to resume - constant sreg_resume_ack_c : natural := 8; -- -/w: CPU starts resuming - constant sreg_execute_req_c : natural := 16; -- r/-: DM requests to execute program buffer - constant sreg_execute_ack_c : natural := 16; -- -/w: CPU starts to execute program buffer - constant sreg_exception_ack_c : natural := 24; -- -/w: CPU has detected an exception - -- code ROM containing "park loop" -- - -- copied manually from 'sw/ocd-firmware/neorv32_debug_mem_code.vhd' -- - type code_rom_t is array (0 to 15) of std_ulogic_vector(31 downto 0); - constant code_rom : code_rom_t := ( - 00 => x"fc0001a3", - 01 => x"00100073", - 02 => x"7b241073", - 03 => x"fc000023", - 04 => x"fc204403", - 05 => x"00041c63", - 06 => x"fc104403", - 07 => x"fe0408e3", - 08 => x"fc0000a3", - 09 => x"7b202473", - 10 => x"7b200073", - 11 => x"fc000123", - 12 => x"7b202473", - 13 => x"0000100f", - 14 => x"f4000067", - 15 => x"00000073" + -- copied manually from 'sw/ocd-firmware/neorv32_application_image.vhd' -- + type code_rom_t is array (0 to 31) of std_ulogic_vector(31 downto 0); + constant code_rom_c : code_rom_t := ( + 00 => x"f8002623", + 01 => x"7b202473", + 02 => x"00100073", + 03 => x"00000013", + 04 => x"7b241073", + 05 => x"f1402473", + 06 => x"f8802023", + 07 => x"f1402473", + 08 => x"f8044403", + 09 => x"00247413", + 10 => x"02041263", + 11 => x"f1402473", + 12 => x"f8044403", + 13 => x"00147413", + 14 => x"fe0402e3", + 15 => x"f1402473", + 16 => x"f8802223", + 17 => x"7b202473", + 18 => x"7b200073", + 19 => x"f1402473", + 20 => x"f8802423", + 21 => x"7b202473", + 22 => x"0000100f", + 23 => x"e8000067", + 24 => x"00000073", + 25 => x"00000073", + 26 => x"00000073", + 27 => x"00000073", + 28 => x"00000073", + 29 => x"00000073", + 30 => x"00000073", + 31 => x"00000073" ); -- CPU access helpers -- signal accen, rden, wren : std_ulogic; + -- CPU response (hart ID) decoder -- + signal cpu_rsp_dec : std_ulogic_vector(NUM_HARTS-1 downto 0); + -- Debug Core Interface -- type dci_t is record - halt_ack : std_ulogic; -- CPU (re-)entered HALT state (single-shot) - resume_req : std_ulogic; -- DM wants the CPU to resume when set - resume_ack : std_ulogic; -- CPU starts resuming when set (single-shot) - execute_req : std_ulogic; -- DM wants CPU to execute program buffer when set - execute_ack : std_ulogic; -- CPU starts executing program buffer when set (single-shot) - exception_ack : std_ulogic; -- CPU has detected an exception (single-shot) - data_we : std_ulogic; -- write abstract data - data_reg : std_ulogic_vector(31 downto 0); -- memory-mapped data exchange register + ack_hlt : std_ulogic_vector(NUM_HARTS-1 downto 0); -- CPU (re-)entered HALT state (single-shot) + req_res : std_ulogic_vector(NUM_HARTS-1 downto 0); -- DM wants the CPU to resume when set + ack_res : std_ulogic_vector(NUM_HARTS-1 downto 0); -- CPU starts resuming when set (single-shot) + req_exe : std_ulogic_vector(NUM_HARTS-1 downto 0); -- DM wants CPU to execute program buffer when set + ack_exe : std_ulogic_vector(NUM_HARTS-1 downto 0); -- CPU starts executing program buffer when set (single-shot) + ack_exc : std_ulogic; -- CPU has detected an exception (single-shot) + data_reg_we : std_ulogic; -- write abstract data + data_reg : std_ulogic_vector(31 downto 0); -- memory-mapped data exchange register end record; signal dci : dci_t; @@ -205,7 +220,7 @@ begin if (rstn_i = '0') then dm_ctrl.state <= CMD_IDLE; dm_ctrl.ldsw_progbuf <= (others => '0'); - dci.execute_req <= '0'; + dci.req_exe <= (others => '0'); dm_ctrl.pbuf_en <= '0'; dm_ctrl.illegal_cmd <= '0'; dm_ctrl.illegal_state <= '0'; @@ -214,7 +229,7 @@ begin if (dm_reg.dmcontrol_dmactive = '0') then -- DM reset / DM disabled dm_ctrl.state <= CMD_IDLE; dm_ctrl.ldsw_progbuf <= instr_sw_c; - dci.execute_req <= '0'; + dci.req_exe <= (others => '0'); dm_ctrl.pbuf_en <= '0'; dm_ctrl.illegal_cmd <= '0'; dm_ctrl.illegal_state <= '0'; @@ -222,7 +237,7 @@ begin else -- DM active -- defaults -- - dci.execute_req <= '0'; + dci.req_exe <= (others => '0'); dm_ctrl.illegal_cmd <= '0'; dm_ctrl.illegal_state <= '0'; @@ -248,7 +263,7 @@ begin (dm_reg.command(22 downto 20) = "010") and -- aarsize: has to be 32-bit (dm_reg.command(19) = '0') and -- aarpostincrement: not supported ((dm_reg.command(17) = '0') or (dm_reg.command(15 downto 5) = "00010000000")) then -- regno: only GPRs are supported: 0x1000..0x101f if transfer is set - if (dm_ctrl.hart_halted = '1') then -- CPU is halted + if (or_reduce_f(dm_ctrl.hart_halted and dm_reg.hartsel_dec) = '1') then -- selected CPU is halted dm_ctrl.state <= CMD_PREPARE; else -- error! CPU is still running dm_ctrl.illegal_state <= '1'; @@ -280,14 +295,14 @@ begin when CMD_TRIGGER => -- request CPU to execute command -- ------------------------------------------------------------ - dci.execute_req <= '1'; -- request execution - if (dci.execute_ack = '1') then -- CPU starts execution + dci.req_exe <= dm_reg.hartsel_dec; -- request execution + if (or_reduce_f(dci.ack_exe and dm_reg.hartsel_dec) = '1') then -- selected CPU starts execution dm_ctrl.state <= CMD_BUSY; end if; when CMD_BUSY => -- wait for CPU to finish -- ------------------------------------------------------------ - if (dci.halt_ack = '1') then -- CPU is parked (halted) again -> execution done + if (or_reduce_f(dci.ack_hlt and dm_reg.hartsel_dec) = '1') then -- selected CPU is parked (halted) again -> execution done dm_ctrl.state <= CMD_IDLE; end if; @@ -302,11 +317,10 @@ begin end case; -- error code -- - -- ------------------------------------------------------------ if (dm_ctrl.cmderr = "000") then -- ready to set new error if (dm_ctrl.illegal_state = '1') then -- cannot execute since hart is not in expected state dm_ctrl.cmderr <= "100"; - elsif (dci.exception_ack = '1') then -- exception during execution + elsif (dci.ack_exc = '1') then -- exception during execution (can only be caused by the currently selected hart) dm_ctrl.cmderr <= "011"; elsif (dm_ctrl.illegal_cmd = '1') then -- unsupported command dm_ctrl.cmderr <= "010"; @@ -330,41 +344,48 @@ begin hart_status: process(rstn_i, clk_i) begin if (rstn_i = '0') then - dm_ctrl.hart_halted <= '0'; - dm_ctrl.hart_resume_req <= '0'; - dm_ctrl.hart_resume_ack <= '0'; - dm_ctrl.hart_reset <= '0'; + dm_ctrl.hart_halted <= (others => '0'); + dm_ctrl.hart_resume_req <= (others => '0'); + dm_ctrl.hart_resume_ack <= (others => '0'); + dm_ctrl.hart_reset <= (others => '0'); elsif rising_edge(clk_i) then - -- halted ACK -- - if (dm_reg.dmcontrol_ndmreset = '1') then - dm_ctrl.hart_halted <= '0'; - elsif (dci.halt_ack = '1') then - dm_ctrl.hart_halted <= '1'; - elsif (dci.resume_ack = '1') then - dm_ctrl.hart_halted <= '0'; - end if; - -- resume REQ -- - if (dm_reg.dmcontrol_ndmreset = '1') then - dm_ctrl.hart_resume_req <= '0'; - elsif (dm_reg.resume_req = '1') then - dm_ctrl.hart_resume_req <= '1'; - elsif (dci.resume_ack = '1') then - dm_ctrl.hart_resume_req <= '0'; - end if; - -- resume ACK -- - if (dm_reg.dmcontrol_ndmreset = '1') then - dm_ctrl.hart_resume_ack <= '0'; - elsif (dci.resume_ack = '1') then - dm_ctrl.hart_resume_ack <= '1'; - elsif (dm_reg.resume_req = '1') then - dm_ctrl.hart_resume_ack <= '0'; - end if; - -- reset ACK -- - if (dm_reg.dmcontrol_ndmreset = '1') then -- explicit RESET triggered by DM - dm_ctrl.hart_reset <= '1'; - elsif (dm_reg.reset_ack = '1') then - dm_ctrl.hart_reset <= '0'; - end if; + for i in 0 to NUM_HARTS-1 loop + + -- halted ACK -- + if (dm_reg.dmcontrol_ndmreset = '1') then -- DM reset + dm_ctrl.hart_halted(i) <= '0'; + elsif (dci.ack_hlt(i) = '1') then + dm_ctrl.hart_halted(i) <= '1'; + elsif (dci.ack_res(i) = '1') then + dm_ctrl.hart_halted(i) <= '0'; + end if; + + -- resume REQ -- + if (dm_reg.dmcontrol_ndmreset = '1') then -- DM reset + dm_ctrl.hart_resume_req(i) <= '0'; + elsif (dm_reg.req_res = '1') and (dm_reg.hartsel_dec(i) = '1') then + dm_ctrl.hart_resume_req(i) <= '1'; + elsif (dci.ack_res(i) = '1') then + dm_ctrl.hart_resume_req(i) <= '0'; + end if; + + -- resume ACK -- + if (dm_reg.dmcontrol_ndmreset = '1') then -- DM reset + dm_ctrl.hart_resume_ack(i) <= '0'; + elsif (dci.ack_res(i) = '1') then + dm_ctrl.hart_resume_ack(i) <= '1'; + elsif (dm_reg.req_res = '1') and (dm_reg.hartsel_dec(i) = '1') then + dm_ctrl.hart_resume_ack(i) <= '0'; + end if; + + -- reset ACK -- + if (dm_reg.dmcontrol_ndmreset = '1') then -- DM reset + dm_ctrl.hart_reset(i) <= '1'; + elsif (dm_reg.reset_ack = '1') and (dm_reg.hartsel_dec(i) = '1') then + dm_ctrl.hart_reset(i) <= '0'; + end if; + + end loop; end if; end process hart_status; @@ -384,15 +405,16 @@ begin dm_reg.progbuf <= (others => instr_nop_c); -- dm_reg.halt_req <= '0'; - dm_reg.resume_req <= '0'; + dm_reg.req_res <= '0'; dm_reg.reset_ack <= '0'; + dm_reg.hartsel <= (others => '0'); dm_reg.wr_acc_err <= '0'; dm_reg.clr_acc_err <= '0'; dm_reg.autoexec_wr <= '0'; elsif rising_edge(clk_i) then -- default -- - dm_reg.resume_req <= '0'; + dm_reg.req_res <= '0'; dm_reg.reset_ack <= '0'; dm_reg.wr_acc_err <= '0'; dm_reg.clr_acc_err <= '0'; @@ -402,8 +424,9 @@ begin if (dmi_req_i.addr = addr_dmcontrol_c) then if (dmi_wren_auth = '1') then -- valid and authenticated DM write access dm_reg.halt_req <= dmi_req_i.data(31); -- haltreq (-/w): write 1 to request halt; has to be cleared again by debugger - dm_reg.resume_req <= dmi_req_i.data(30); -- resumereq (-/w1): write 1 to request resume; auto-clears + dm_reg.req_res <= dmi_req_i.data(30); -- resumereq (-/w1): write 1 to request resume; auto-clears dm_reg.reset_ack <= dmi_req_i.data(28); -- ackhavereset (-/w1): write 1 to ACK reset; auto-clears + dm_reg.hartsel <= dmi_req_i.data(18 downto 16); -- hartsello (r/w): up to 4 harts are supported (plus 1 bit to detect unavailable) dm_reg.dmcontrol_ndmreset <= dmi_req_i.data(1); -- ndmreset (r/w): SoC reset when high end if; if (dmi_wren = '1') then -- valid DM write access (may be unauthenticated) @@ -458,18 +481,28 @@ begin end if; end process dmi_write_access; + -- hat select decoder (one-hot) -- + hartsel_decode: + for i in 0 to NUM_HARTS-1 generate + dm_reg.hartsel_dec(i) <= '1' when (dm_reg.hartsel(2) = '0') and (dm_reg.hartsel(1 downto 0) = std_ulogic_vector(to_unsigned(i, 2))) else '0'; + end generate; + dm_reg.hartsel_inv <= '0' when (unsigned(dm_reg.hartsel) < NUM_HARTS) else '1'; -- invalid/unavailable hart selection + -- Direct Control ------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- -- write to abstract data register -- - dci.data_we <= '1' when (dmi_wren_auth = '1') and (dmi_req_i.addr = addr_data0_c) and (dm_ctrl.busy = '0') else '0'; + dci.data_reg_we <= '1' when (dmi_wren_auth = '1') and (dmi_req_i.addr = addr_data0_c) and (dm_ctrl.busy = '0') else '0'; - -- CPU halt/resume request -- - cpu_halt_req_o <= dm_reg.halt_req and dm_reg.dmcontrol_dmactive when ((not AUTHENTICATOR) or (auth.valid = '1')) else '0'; - dci.resume_req <= dm_ctrl.hart_resume_req; -- active until explicitly cleared + -- CPU halt/resume requests -- + request_gen: + for i in 0 to NUM_HARTS-1 generate + halt_req_o(i) <= dm_reg.halt_req and dm_reg.hartsel_dec(i) and dm_reg.dmcontrol_dmactive when ((not AUTHENTICATOR) or (auth.valid = '1')) else '0'; + dci.req_res(i) <= dm_ctrl.hart_resume_req(i); -- active until explicitly cleared + end generate; -- SoC reset -- - cpu_ndmrstn_o <= '0' when (dm_reg.dmcontrol_ndmreset = '1') and (dm_reg.dmcontrol_dmactive = '1') and ((not AUTHENTICATOR) or (auth.valid = '1')) else '1'; + ndmrstn_o <= '0' when (dm_reg.dmcontrol_ndmreset = '1') and (dm_reg.dmcontrol_dmactive = '1') and ((not AUTHENTICATOR) or (auth.valid = '1')) else '1'; -- construct program buffer array for CPU access -- cpu_progbuf(0) <= dm_ctrl.ldsw_progbuf; -- pseudo program buffer for GPR<->DM.data0 transfer @@ -495,23 +528,23 @@ begin -- debug module status register -- when addr_dmstatus_c => if (not AUTHENTICATOR) or (auth.valid = '1') then -- authenticated? - dmi_rsp_o.data(31 downto 23) <= (others => '0'); -- reserved (r/-) - dmi_rsp_o.data(22) <= '1'; -- impebreak (r/-): there is an implicit ebreak instruction after the visible program buffer - dmi_rsp_o.data(21 downto 20) <= (others => '0'); -- reserved (r/-) - dmi_rsp_o.data(19) <= dm_ctrl.hart_reset; -- allhavereset (r/-): there is only one hart that can be reset - dmi_rsp_o.data(18) <= dm_ctrl.hart_reset; -- anyhavereset (r/-): there is only one hart that can be reset - dmi_rsp_o.data(17) <= dm_ctrl.hart_resume_ack; -- allresumeack (r/-): there is only one hart that can acknowledge resume request - dmi_rsp_o.data(16) <= dm_ctrl.hart_resume_ack; -- anyresumeack (r/-): there is only one hart that can acknowledge resume request - dmi_rsp_o.data(15) <= '0'; -- allnonexistent (r/-): there is only one hart that is always existent - dmi_rsp_o.data(14) <= '0'; -- anynonexistent (r/-): there is only one hart that is always existent - dmi_rsp_o.data(13) <= dm_reg.dmcontrol_ndmreset; -- allunavail (r/-): there is only one hart that is unavailable during reset - dmi_rsp_o.data(12) <= dm_reg.dmcontrol_ndmreset; -- anyunavail (r/-): there is only one hart that is unavailable during reset - dmi_rsp_o.data(11) <= not dm_ctrl.hart_halted; -- allrunning (r/-): there is only one hart that can be RUNNING or HALTED - dmi_rsp_o.data(10) <= not dm_ctrl.hart_halted; -- anyrunning (r/-): there is only one hart that can be RUNNING or HALTED - dmi_rsp_o.data(9) <= dm_ctrl.hart_halted; -- allhalted (r/-): there is only one hart that can be RUNNING or HALTED - dmi_rsp_o.data(8) <= dm_ctrl.hart_halted; -- anyhalted (r/-): there is only one hart that can be RUNNING or HALTED - dmi_rsp_o.data(5) <= '0'; -- hasresethaltreq (r/-): halt-on-reset not implemented - dmi_rsp_o.data(4) <= '0'; -- confstrptrvalid (r/-): no configuration string available + dmi_rsp_o.data(31 downto 23) <= (others => '0'); -- reserved (r/-) + dmi_rsp_o.data(22) <= '1'; -- impebreak (r/-): there is an implicit ebreak instruction after the visible program buffer + dmi_rsp_o.data(21 downto 20) <= (others => '0'); -- reserved (r/-) + dmi_rsp_o.data(19) <= or_reduce_f(dm_ctrl.hart_reset and dm_reg.hartsel_dec); -- allhavereset (r/-): selected hart in reset + dmi_rsp_o.data(18) <= or_reduce_f(dm_ctrl.hart_reset and dm_reg.hartsel_dec); -- anyhavereset (r/-): selected hart in reset + dmi_rsp_o.data(17) <= or_reduce_f(dm_ctrl.hart_resume_ack and dm_reg.hartsel_dec); -- allresumeack (r/-): selected hart is resuming + dmi_rsp_o.data(16) <= or_reduce_f(dm_ctrl.hart_resume_ack and dm_reg.hartsel_dec); -- anyresumeack (r/-): selected hart is resuming + dmi_rsp_o.data(15) <= dm_reg.hartsel_inv; -- allnonexistent (r/-): invalid hart selection + dmi_rsp_o.data(14) <= dm_reg.hartsel_inv; -- anynonexistent (r/-): invalid hart selection + dmi_rsp_o.data(13) <= dm_reg.dmcontrol_ndmreset; -- allunavail (r/-): DM in reset + dmi_rsp_o.data(12) <= dm_reg.dmcontrol_ndmreset; -- anyunavail (r/-): DM in reset + dmi_rsp_o.data(11) <= not or_reduce_f(dm_ctrl.hart_halted and dm_reg.hartsel_dec); -- allrunning (r/-): selected hart not halted + dmi_rsp_o.data(10) <= not or_reduce_f(dm_ctrl.hart_halted and dm_reg.hartsel_dec); -- anyrunning (r/-): selected hart not halted + dmi_rsp_o.data(9) <= or_reduce_f(dm_ctrl.hart_halted and dm_reg.hartsel_dec); -- allhalted (r/-): selected hart halted + dmi_rsp_o.data(8) <= or_reduce_f(dm_ctrl.hart_halted and dm_reg.hartsel_dec); -- anyhalted (r/-): selected hart halted + dmi_rsp_o.data(5) <= '0'; -- hasresethaltreq (r/-): halt-on-reset not implemented + dmi_rsp_o.data(4) <= '0'; -- confstrptrvalid (r/-): no configuration string available end if; dmi_rsp_o.data(7) <= auth.valid; -- authenticated (r/-): authentication successful when set dmi_rsp_o.data(6) <= auth.busy; -- authbusy (r/-): wait for authenticator operation when set @@ -520,18 +553,18 @@ begin -- debug module control -- when addr_dmcontrol_c => if (not AUTHENTICATOR) or (auth.valid = '1') then -- authenticated? - dmi_rsp_o.data(31) <= '0'; -- haltreq (-/w): write-only - dmi_rsp_o.data(30) <= '0'; -- resumereq (-/w1): write-only - dmi_rsp_o.data(29) <= '0'; -- hartreset (r/w): not supported - dmi_rsp_o.data(28) <= '0'; -- ackhavereset (-/w1): write-only - dmi_rsp_o.data(27) <= '0'; -- reserved (r/-) - dmi_rsp_o.data(26) <= '0'; -- hasel (r/-) - only a single hart can be selected at once - dmi_rsp_o.data(25 downto 16) <= (others => '0'); -- hartsello (r/-) - there is only one hart - dmi_rsp_o.data(15 downto 6) <= (others => '0'); -- hartselhi (r/-) - there is only one hart - dmi_rsp_o.data(5 downto 4) <= (others => '0'); -- reserved (r/-) - dmi_rsp_o.data(3) <= '0'; -- setresethaltreq (-/w1): halt-on-reset request - halt-on-reset not implemented - dmi_rsp_o.data(2) <= '0'; -- clrresethaltreq (-/w1): halt-on-reset ack - halt-on-reset not implemented - dmi_rsp_o.data(1) <= dm_reg.dmcontrol_ndmreset; -- ndmreset (r/w): soc reset + dmi_rsp_o.data(31) <= '0'; -- haltreq (-/w): write-only + dmi_rsp_o.data(30) <= '0'; -- resumereq (-/w1): write-only + dmi_rsp_o.data(29) <= '0'; -- hartreset (r/w): not supported + dmi_rsp_o.data(28) <= '0'; -- ackhavereset (-/w1): write-only + dmi_rsp_o.data(27) <= '0'; -- reserved (r/-) + dmi_rsp_o.data(26) <= '0'; -- hasel (r/-) - only a single hart can be selected at once + dmi_rsp_o.data(25 downto 16) <= "0000000" & dm_reg.hartsel; -- hartsello (r/w) - only up to 4 harts are supported (plus 1 bit to detect unavailable) + dmi_rsp_o.data(15 downto 6) <= (others => '0'); -- hartselhi (r/-) - hardwired to zero; hartsello is sufficient + dmi_rsp_o.data(5 downto 4) <= (others => '0'); -- reserved (r/-) + dmi_rsp_o.data(3) <= '0'; -- setresethaltreq (-/w1): halt-on-reset request - halt-on-reset not implemented + dmi_rsp_o.data(2) <= '0'; -- clrresethaltreq (-/w1): halt-on-reset ack - halt-on-reset not implemented + dmi_rsp_o.data(1) <= dm_reg.dmcontrol_ndmreset; -- ndmreset (r/w): soc reset end if; dmi_rsp_o.data(0) <= dm_reg.dmcontrol_dmactive; -- dmactive (r/w): DM reset @@ -576,11 +609,11 @@ begin when addr_authdata_c => dmi_rsp_o.data <= auth.rdata; --- -- halt summary 0 (not required for DM spec. v1.0 if there is only a single hart) -- --- when "1000000" => -- haltsum0 --- if (not AUTHENTICATOR) or (auth.valid = '1') then -- authenticated? --- dmi_rsp_o.data(0) <= dm_ctrl.hart_halted; -- hart 0 is halted --- end if; + -- halt summary 0 -- + when addr_haltsum0_c => -- haltsum0 + if (not AUTHENTICATOR) or (auth.valid = '1') then -- authenticated? + dmi_rsp_o.data(NUM_HARTS-1 downto 0) <= dm_ctrl.hart_halted(NUM_HARTS-1 downto 0); -- hart i is halted + end if; -- not implemented or read-only-zero -- when others => -- addr_sbcs_c, addr_progbuf0_c, addr_progbuf1_c, addr_nextdm_c, addr_command_c @@ -617,58 +650,79 @@ begin bus_access: process(rstn_i, clk_i) begin if (rstn_i = '0') then - bus_rsp_o <= rsp_terminate_c; - dci.data_reg <= (others => '0'); - dci.halt_ack <= '0'; - dci.resume_ack <= '0'; - dci.execute_ack <= '0'; - dci.exception_ack <= '0'; + bus_rsp_o <= rsp_terminate_c; + dci.data_reg <= (others => '0'); + dci.ack_hlt <= (others => '0'); + dci.ack_res <= (others => '0'); + dci.ack_exe <= (others => '0'); + dci.ack_exc <= '0'; elsif rising_edge(clk_i) then -- bus handshake -- - bus_rsp_o.ack <= accen; - bus_rsp_o.err <= '0'; - bus_rsp_o.data <= (others => '0'); + bus_rsp_o.ack <= accen; + bus_rsp_o.err <= '0'; - -- data buffer -- - if (dci.data_we = '1') then -- DM write access + -- data buffer access -- + if (dci.data_reg_we = '1') then -- DM write access dci.data_reg <= dmi_req_i.data; - elsif (bus_req_i.addr(7 downto 6) = dm_data_base_c(7 downto 6)) and (wren = '1') then -- CPU write access + elsif (wren = '1') and (bus_req_i.addr(8 downto 7) = dm_data_base_c(8 downto 7)) then -- CPU write access dci.data_reg <= bus_req_i.data; end if; - -- control and status register CPU write access -- - dci.halt_ack <= '0'; -- all writable flags auto-clear - dci.resume_ack <= '0'; - dci.execute_ack <= '0'; - dci.exception_ack <= '0'; - if (bus_req_i.addr(7 downto 6) = dm_sreg_base_c(7 downto 6)) and (wren = '1') then - dci.halt_ack <= bus_req_i.ben(sreg_halt_ack_c/8); -- [NOTE] use individual BYTE ENABLES and not the actual write data - dci.resume_ack <= bus_req_i.ben(sreg_resume_ack_c/8); - dci.execute_ack <= bus_req_i.ben(sreg_execute_ack_c/8); - dci.exception_ack <= bus_req_i.ben(sreg_exception_ack_c/8); + -- CPU status register write access -- + dci.ack_hlt <= (others => '0'); -- all writable flags auto-clear + dci.ack_res <= (others => '0'); + dci.ack_exe <= (others => '0'); + dci.ack_exc <= '0'; + if (wren = '1') and (bus_req_i.addr(8 downto 7) = dm_sreg_base_c(8 downto 7)) then + for i in 0 to NUM_HARTS-1 loop + case bus_req_i.addr(3 downto 2) is + when "00" => dci.ack_hlt(i) <= cpu_rsp_dec(i); -- CPU is HALTED in debug mode and waits in park loop + when "01" => dci.ack_res(i) <= cpu_rsp_dec(i); -- CPU starts RESUMING + when "10" => dci.ack_exe(i) <= cpu_rsp_dec(i); -- CPU starts to EXECUTE program buffer + when others => dci.ack_exc <= '1'; -- CPU has detected an EXCEPTION (can only be caused by the currently selected hart) + end case; + end loop; end if; - -- control and status register CPU read access -- + -- CPU read access -- + bus_rsp_o.data <= (others => '0'); -- default if (rden = '1') then -- output enable - case bus_req_i.addr(7 downto 6) is -- module select + case bus_req_i.addr(8 downto 7) is -- module select when "00" => -- dm_code_base_c: code ROM - bus_rsp_o.data <= code_rom(to_integer(unsigned(bus_req_i.addr(5 downto 2)))); + bus_rsp_o.data <= code_rom_c(to_integer(unsigned(bus_req_i.addr(6 downto 2)))); when "01" => -- dm_pbuf_base_c: program buffer bus_rsp_o.data <= cpu_progbuf(to_integer(unsigned(bus_req_i.addr(3 downto 2)))); when "10" => -- dm_data_base_c: data buffer bus_rsp_o.data <= dci.data_reg; - when others => -- dm_sreg_base_c: control and status register - bus_rsp_o.data(sreg_resume_req_c) <= dci.resume_req; - bus_rsp_o.data(sreg_execute_req_c) <= dci.execute_req; + when others => -- dm_sreg_base_c: request register + for i in 0 to NUM_HARTS-1 loop + bus_rsp_o.data(i*8+0) <= dci.req_res(i); -- DM requests CPU to resume + bus_rsp_o.data(i*8+1) <= dci.req_exe(i); -- DM requests CPU to execute program buffer + end loop; end case; end if; end if; end process bus_access; -- access helpers -- - accen <= bus_req_i.debug and bus_req_i.stb; -- allow access only when in debug-mode + accen <= bus_req_i.debug and bus_req_i.stb; -- access only when in debug-mode rden <= accen and (not bus_req_i.rw); - wren <= accen and ( bus_req_i.rw); + wren <= accen and ( bus_req_i.rw) and and_reduce_f(bus_req_i.ben); + + -- CPU response (hart ID) decoder for a single hart -- + hart_id_decode_single: + if NUM_HARTS = 1 generate + cpu_rsp_dec <= (others => '1'); + end generate; + + -- CPU response (hart ID) decoder for multiple harts (max 4) -- + hart_id_decode_multiple: + if NUM_HARTS > 1 generate + hart_id_decode_gen: + for i in 0 to NUM_HARTS-1 generate + cpu_rsp_dec(i) <= '1' when (bus_req_i.data(1 downto 0) = std_ulogic_vector(to_unsigned(i, 2))) else '0'; + end generate; + end generate; -- Authentication Module ------------------------------------------------------------------ From 510053e1b74ece73fdf251a38797a9cf1178e8e8 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 17:59:12 +0100 Subject: [PATCH 05/11] [docs] update section "on-chip debugger" --- docs/datasheet/on_chip_debugger.adoc | 267 ++++++++++++++++----------- 1 file changed, 159 insertions(+), 108 deletions(-) diff --git a/docs/datasheet/on_chip_debugger.adoc b/docs/datasheet/on_chip_debugger.adoc index db634115c..50fb0dc0e 100644 --- a/docs/datasheet/on_chip_debugger.adoc +++ b/docs/datasheet/on_chip_debugger.adoc @@ -2,15 +2,18 @@ :sectnums: == On-Chip Debugger (OCD) -The NEORV32 Processor features an _on-chip debugger_ (OCD) implementing the **execution-based debugging** scheme -compatible to the **Minimal RISC-V Debug Specification**. A copy of the specification is available in `docs/references`. -The on-chip debugger is implemented via the <<_processor_top_entity_generics, `OCD_EN`>> processor top generic. +The NEORV32 Processor features an _on-chip debugger_ (OCD) compatible to the **Minimal RISC-V Debug Specification** +implementing the **execution-based debugging** scheme. A copy of the specification is available in `docs/references`. +The on-chip debugger is implemented if the <<_processor_top_entity_generics, `OCD_EN`>> processor top generic is set +to `true`. **Key Features** * standard 4-wire JTAG access port +* debugging of up to 4 CPU cores ("harts") * full control of the CPU: halting, single-stepping and resuming * indirect access to all core registers and the entire processor address space (via program buffer) +* execution of arbitrary programs via the program buffer * compatible with upstream OpenOCD and GDB * optional trigger module for hardware breakpoints * optional authentication for increased security @@ -21,46 +24,38 @@ A simple example on how to use NEORV32 on-chip debugger in combination with Open section https://stnolting.github.io/neorv32/ug/#_debugging_using_the_on_chip_debugger[Debugging using the On-Chip Debugger] of the User Guide. -**Section Structure** - -* <<_debug_transport_module_dtm>> -* <<_debug_module_dm>> -* <<_debug_authentication>> -* <<_cpu_debug_mode>> -* <<_trigger_module>> - -The NEORV32 on-chip debugger is based on five hardware modules: +**Overview** .NEORV32 on-chip debugger complex image::neorv32_ocd_complex.png[align=center] +The NEORV32 on-chip debugger is based on five hardware modules: + [start=1] -. <<_debug_transport_module_dtm>>: JTAG access tap to allow an external adapter to interface with the _debug module (DM)_. -. <<_debug_module_dm>>: RISC-V debug module that is configured by the DTM. From the CPU's perspective this module behaves as -another memory-mapped peripheral that can be accessed via the processor-internal bus. The memory-mapped registers provide an -internal _data buffer_ for data transfer from/to the DM, a _code ROM_ containing the "park loop" code, a _program buffer_ to -allow the debugger to execute small programs defined by the DM and a _status register_ that is used to communicate _exception_, -_halt_, _resume_ and _execute_ requests/acknowledges from/to the DM. -. <<_debug_authentication>>: Authenticator module to secure on-chip debugger access. This module implements a very simple -authentication mechanism as example. Users can modify/replace this default logic to implement arbitrary authentication mechanism. -. <<_cpu_debug_mode>> ISA extension: This ISA extension provides the "debug execution mode" as another operation mode that is -used to execute the park loop code from the DM. This mode also provides additional CSRs and instructions. -. CPU <<_trigger_module>>: This module provides a single _hardware_ breakpoint. +. <<_debug_transport_module_dtm>>: JTAG access tap to allow an external adapter to interface with the _debug module (DM)_. +. <<_debug_module_dm>>: The RISC-V debug module is the main bridge between the external debugger and the processor being +debugged. It provides a _data buffer_ for data transfer from/to the DM, a _code ROM_ containing the "park loop" code, a +_program buffer_ to allow the debugger to execute small programs defined by the DM and a _status register_ that is used +to communicate _exception_, _halt_, _resume_ and _execute_ requests/acknowledges between the debugger and the CPU. +. <<_debug_authentication>>: Authenticator module to secure on-chip debugger access. By default this module implements a +very simple authentication mechanism as example. Users can modify/replace this default logic to implement arbitrary +authentication mechanism. +. <<_cpu_debug_mode>> ISA extension: This ISA extension provides the "debug execution mode" as another CPU operation mode +that is used to execute the park loop code from the DM. This mode also provides additional CSRs and instructions. +. CPU <<_trigger_module>>: This module provides a single _hardware breakpoint_. **Theory of Operation** -When debugging the system using the OCD, the debugger (like GDB) issues a halt request to the CPU to make the it enter -_debug mode_. In this mode the application-defined architectural state of the system/CPU is "frozen" so the debugger -can monitor it without interfering with the actual application. However, the OCD can also modify the entire architectural -state at any time. While in debug mode, the debugger has full control over the entire CPU and processor operating at -highest-privileged mode. +When debugging the system using the OCD, the external debugger (e.g. GDB) issues a halt request to the CPU to make it +enter so-called _debug mode_. In this mode the application-defined architectural state of the system/CPU is "frozen" so +the debugger can monitor it without interfering with the actual application. However, the OCD can also modify the entire +architectural state at any time. While in debug mode, the debugger has full control over the entire CPU core. -While in debug mode, the CPU executes the "park loop" code from the code ROM of the debug module (DM). -This park loop implements an endless loop, where the CPU polls a memory-mapped <<_status_register>> that is -controlled by the DM. The flags in this register are used to communicate requests from the DM and to acknowledge -them by the CPU: trigger execution of the program buffer or resume the halted application. Furthermore, the CPU -uses this register to signal that the CPU has halted after a halt request or to signal that an exception has been -raised while being in debug mode. +After halting, the CPU executes the "park loop" code from the code ROM of the debug module (DM). This park loop implements +an endless loop that is used to poll a memory-mapped <<_status_register>> of the DM. The flags in this register are used to +communicate requests from the DM and to acknowledge their processing them by the CPU: trigger execution of the program buffer +or resume the halted application. Furthermore, the CPU uses this register to signal that the CPU has halted after a halt +request or to signal that an exception has been raised while being in debug mode. <<< @@ -68,10 +63,10 @@ raised while being in debug mode. :sectnums: === Debug Transport Module (DTM) -The debug transport module "DTM" (VHDL module: `rtl/core/neorv32_debug_dtm.vhd`) provides a standard 4-wire JTAG test -access port ("tap") via the following top-level ports: +The debug transport module "DTM" (VHDL module: `rtl/core/neorv32_debug_dtm.vhd`) provides a bridge between a standard 4-wire +JTAG test access port ("tap") and the internal debug module interface. -.JTAG top level signals +.JTAG Top Level Signals of the DTM [cols="^2,^2,^2,<8"] [options="header",grid="rows"] |======================= @@ -84,35 +79,46 @@ access port ("tap") via the following top-level ports: .Maximum JTAG Clock [IMPORTANT] -All JTAG signals are synchronized to the processor's clock domain. Hence, no additional clock domain is required for the DTM. -However, this constraints the maximal JTAG clock frequency (`jtag_tck_i`) to be less than or equal to **1/5** of the processor -clock frequency (`clk_i`). +All JTAG signals are synchronized to the processor's clock domain. Hence, no additional clock domain is required +for the DTM. However, this constraints the maximal JTAG clock frequency (`jtag_tck_i`) to be less than or equal +to **1/5** of the processor clock frequency (`clk_i`). .JTAG TAP Reset [NOTE] -The NEORV32 JTAG TAP does not provide a dedicated reset signal ("TRST"). However, the missing TRST is not a problem, -since JTAG-level resets can be triggered using with TMS signaling. +The NEORV32 JTAG TAP does not provide a dedicated reset signal ("TRST"). +However, JTAG-level resets can be triggered using TMS signaling. -.Maintaining JTAG Chain +.Maintaining the JTAG Chain [NOTE] If the on-chip debugger is disabled the JTAG serial input `jtag_tdi_i` is directly connected to the JTAG serial output `jtag_tdo_o` to maintain the JTAG chain. -JTAG accesses are based on a single 5-bit _instruction register_ `IR` and several _data registers_ `DR` -with different sizes. The individual data registers are accessed by writing the according address to the instruction -register. The following table shows the available data registers and their addresses: +The DTM implement a single 5-bit _instruction register_ `IR` and several _data registers_ `DR` with different sizes. The +individual data registers are accessed by writing the according address to the instruction register. The following table +shows all available data registers and their addresses: .JTAG TAP registers [cols="^2,^2,^2,<8"] [options="header",grid="rows"] |======================= | Address (via `IR`) | Name | Size (bits) | Description -| `00001` | `IDCODE` | 32 | identifier, version and part ID fields are hardwired to zero, manufacturer ID is assigned via the <<_processor_top_entity_generics, `JEDEC_ID`>> generic +| `00001` | `IDCODE` | 32 | identification code (see below) | `10000` | `DTMCS` | 32 | debug transport module control and status register (see below) -| `10001` | `DMI` | 41 | debug module interface: 7-bit address, 32-bit read/write data, 2-bit operation (`00` = NOP; `10` = write; `01` = read) +| `10001` | `DMI` | 41 | debug module interface (see below) | others | `BYPASS` | 1 | default JTAG bypass register |======================= +.`IDCODE` - DTM Identification Code Register +[cols="^2,^3,^1,<8"] +[options="header",grid="rows"] +|======================= +| Bit(s) | Name | R/W | Description +| 31:28 | `version` | r/- | version ID, hardwired to zero +| 27:12 | `partid` | r/- | part ID, hardwired to zero +| 11:1 | `manid` | r/- | JEDEDC manufacturer ID, assigned via the <<_processor_top_entity_generics, `JEDEC_ID`>> generic +| 0 | - | r/- | hardwired to `1` +|======================= + .`DTMCS` - DTM Control and Status Register [cols="^2,^3,^1,<8"] [options="header",grid="rows"] @@ -128,6 +134,16 @@ register. The following table shows the available data registers and their addre | 3:0 | `version` | r/- | `0001` = DTM is compatible to RISC-V debug spec. versions v0.13 and v1.0 |======================= +.`DMI` - DTM Debug Module Interface Register +[cols="^2,^3,^1,<8"] +[options="header",grid="rows"] +|======================= +| Bit(s) | Name | R/W | Description +| 40:34 | `address` | r/w | 7-bit address, see <<_dm_registers>> +| 33:2 | `data` | r/w | 32-bit to write/read to/from the addresses DM register +| 1:0 | `command` | r/w | 2-bit operation (`00` = NOP; `10` = write; `01` = read) +|======================= + <<< // #################################################################################################################### @@ -143,14 +159,14 @@ It supports the following features: * Provides abstract read and write access to the halted hart's general purpose registers. * Provides access to a reset signal that allows debugging from the very first instruction after reset. * Provides a _program buffer_ to force the hart to execute arbitrary instructions. -* Allows memory access from a hart's point of view. +* Allows memory accesses (to the entire address space) from a hart's point of view. * Optionally implements an authentication mechanism to secure on-chip debugger access. The NEORV32 DM follows the "Minimal RISC-V External Debug Specification" to provide full debugging capabilities while -keeping resource/area requirements at a minimum. It implements the **execution based debugging scheme** for a -single hart and provides the following architectural core features: +keeping resource/area requirements at a minimum. It implements the **execution based debugging scheme** for up to +four individual CPU cores ("harts") and provides the following architectural core features: -* program buffer with 2 entries and an implicit `ebreak` instruction +* program buffer with 2 entries and an implicit `ebreak` instruction at the end * indirect bus access via the CPU using the program buffer * abstract commands: "access register" plus auto-execution * halt-on-reset capability @@ -162,7 +178,7 @@ The NEORV32 DM complies to the RISC-V DM spec version 1.0. From the DTM's point of view, the DM implements a set of <<_dm_registers>> that are used to control and monitor the debugging session. From the CPU's point of view, the DM implements several memory-mapped registers that are used for -communicating debugging control and status (<<_dm_cpu_access>>). +communicating data, instructions, debugging control and status (<<_dm_cpu_access>>). :sectnums: @@ -172,15 +188,15 @@ The DM is controlled via a set of registers that are accessed via the DTM. The f .Unimplemented Registers [NOTE] -Write accesses to registers that are not implemented are simply ignored and read accesses -to these registers will always return zero. +Write accesses to registers that are not implemented are simply ignored and read accesses to these +registers will always return zero. In both cases no error condition is signaled to the DTM. .Available DM registers [cols="^2,^3,<7"] [options="header",grid="rows"] |======================= | Address | Name | Description -| 0x04 | <<_data0>> | Abstract data 0, used for data transfer between debugger and processor +| 0x04 | <<_data0>> | Abstract data register 0 | 0x10 | <<_dmcontrol>> | Debug module control | 0x11 | <<_dmstatus>> | Debug module status | 0x12 | <<_hartinfo>> | Hart information @@ -192,6 +208,7 @@ to these registers will always return zero. | 0x21 | <<_progbuf, `progbuf1`>> | Program buffer 1 | 0x30 | <<_authdata>> | Data to/from the authentication module | 0x38 | `sbcs` | System bus access control and status; reads as zero to indicate there is **no** system bus access +| 0x40 | <<_haltsum0>> | Hart halt summary |======================= @@ -223,12 +240,19 @@ are configured as "zero" and are read-only. Writing '1' to these bits/fields wil [cols="^1,^2,^1,<8"] [options="header",grid="rows"] |======================= -| Bit | Name [RISC-V] | R/W | Description -| 31 | `haltreq` | -/w | set/clear hart halt request -| 30 | `resumereq` | -/w | request hart to resume -| 28 | `ackhavereset` | -/w | write `1` to clear `*havereset` flags -| 1 | `ndmreset` | r/w | put whole system (except OCD) into reset state when `1` -| 0 | `dmactive` | r/w | DM enable; writing `0`-`1` will reset the DM +| Bit | Name [RISC-V] | R/W | Description +| 31 | `haltreq` | -/w | set/clear hart halt request +| 30 | `resumereq` | -/w | request hart to resume +| 28 | `ackhavereset` | -/w | write `1` to clear `*havereset` flags +| 27 | - | r/- | reserved, hardwired to zero +| 26 | `hasel` | r/- | `0`: only a single hart can be selected at once +| 25:16 | `hartsello` | r/w | hart select; only the lowest 3 bits are implemented +| 15:6 | `hartselhi` | r/- | hardwired to zero +| 5:4 | - | r/- | reserved, hardwired to zero +| 3 | `setresethaltreq` | r/- | `0`: halt-on-reset not implemented +| 2 | `clrresethaltreq` | r/- | `0`: halt-on-reset not implemented +| 1 | `ndmreset` | r/w | put whole system (except OCD) into reset state when `1` +| 0 | `dmactive` | r/w | DM enable; writing `0`-`1` will reset the DM |======================= @@ -251,17 +275,17 @@ are configured as "zero" and are read-only. Writing '1' to these bits/fields wil | 31:23 | _reserved_ | reserved; zero | 22 | `impebreak` | `1`: indicates an implicit `ebreak` instruction after the last program buffer entry | 21:20 | _reserved_ | reserved; zero -| 19 | `allhavereset` .2+| `1` when the hart is in reset +| 19 | `allhavereset` .2+| `1` when the selected hart is in reset state | 18 | `anyhavereset` -| 17 | `allresumeack` .2+| `1` when the hart has acknowledged a resume request +| 17 | `allresumeack` .2+| `1` when the selected hart has acknowledged a resume request | 16 | `anyresumeack` -| 15 | `allnonexistent` .2+| zero to indicate the hart is always existent +| 15 | `allnonexistent` .2+| `1` when the selected hart is not available | 14 | `anynonexistent` -| 13 | `allunavail` .2+| `1` when the DM is disabled to indicate the hart is unavailable +| 13 | `allunavail` .2+| `1` when the DM is disabled to indicate the selected hart is unavailable | 12 | `anyunavail` -| 11 | `allrunning` .2+| `1` when the hart is running +| 11 | `allrunning` .2+| `1` when the selected hart is running | 10 | `anyrunning` -| 9 | `allhalted` .2+| `1` when the hart is halted +| 9 | `allhalted` .2+| `1` when the selected hart is halted | 8 | `anyhalted` | 7 | `authenticated` | set if authentication passed; see <<_debug_authentication>> | 6 | `authbusy` | set if authentication is busy, see <<_debug_authentication>> @@ -410,58 +434,72 @@ hart's GPRs x0 - x15/31 (abstract command register index `0x1000` - `0x101f`). |====== +:sectnums!: +===== **`haltsum0`** + +[cols="4,27,>7"] +[frame="topbot",grid="none"] +|====== +| 0x30 | **Halt summary 0** | `haltsum0` +3+| Reset value: `0x00000000` +3+| Each bit corresponds to a hart being halted. Only the lowest four bits are implemented. +|====== + + :sectnums: ==== DM CPU Access -From the CPU's perspective the DM acts like another memory-mapped peripheral. It occupies 256 bytes of the CPU's address -space starting at address `base_io_dm_c`. This address space is divided into four sections of 64 bytes each to provide -access to the _park loop code ROM_, the _program buffer_, the _data buffer_ and the _status register_. The program buffer, -the data buffer and the status register do not fully occupy the 64-byte-wide sections and are mirrored several times to fill -the entire section. +From the CPU's perspective the DM acts like another memory-mapped peripheral. It occupies 512 bytes of the CPU's +address space starting at address `base_io_dm_c` (`0xffff0000`). This address space is divided into four sections +128 64 bytes each to provide access to the _park loop code ROM_, the _program buffer_, the _data buffer_ and the +_status register_. The program buffer, the data buffer and the status register do not fully occupy the 128-byte-wide +sections and are mirrored several times across the entire section. .DM CPU Access - Address Map [cols="^2,^2,<4"] [options="header",grid="rows"] |======================= | Base address | Physical size | Description -| `0xffffff00` | 64 bytes | ROM for the "park loop" code -| `0xffffff40` | 16 bytes | Program buffer (<<_progbuf>>) -| `0xffffff80` | 4 bytes | Data buffer (<<_data0>>) -| `0xffffffc0` | 4 bytes | Control and <<_status_register>> +| `0xfffffe00` | 128 bytes | ROM for the "park loop" code (<<_code_rom>>) +| `0xfffffe80` | 16 bytes | Program buffer (<<_progbuf>>) +| `0xffffff00` | 4 bytes | Data buffer (<<_data0>>) +| `0xffffff80` | 16 bytes | Control and <<_status_register>> |======================= .DM Register Access [IMPORTANT] -All memory-mapped registers of the DM can only be accessed by the CPU if it is in debug mode. Hence, the DM registers are not -visible nor accessible for normal CPU operations. Any CPU access outside of debug mode will raise a bus access fault exception. +All memory-mapped registers of the DM can only be accessed by the CPU when in debug mode. Hence, the DM registers are +not accessible for normal CPU operations. Any CPU access outside of debug mode will raise a bus access fault exception. + + +:sectnums: +===== Code ROM + +The code ROM contain the minimal OCD firmware that implements the debuggers part loop. .Park Loop Code Sources ("OCD Firmware") [NOTE] The assembly sources of the park loop code are available in `sw/ocd-firmware/park_loop.S`. - -:sectnums: -===== Code ROM Entry Points - -The park loop code provides two entry points where code execution can start. These are used to enter the park loop either when -an explicit debug-entry/halt request has been issued (for example a halt request) or when an exception has occurred while executing -code in debug mode. +The park loop code provides two entry points where code execution can start. These are used to enter the park loop +either when an explicit debug-entry/halt request has been issued (for example a halt request) or when an exception +has occurred while executing code in debug mode (from the profram buffer). .Park Loop Entry Points [cols="^6,<4"] [options="header",grid="rows"] |======================= -| Address | Description -| `dm_exc_entry_c` (`base_io_dm_c` + 0) | Exception entry address -| `dm_park_entry_c` (`base_io_dm_c` + 8) | Normal entry address (halt request) +| Address | Description +| `dm_exc_entry_c` (`base_io_dm_c` + 0) | Exception entry address +| `dm_park_entry_c` (`base_io_dm_c` + 16) | Normal entry address (halt request) |======================= -When the CPU enters (via an explicit halt request from the dubber) or re-enters debug mode (for example via an `ebreak` in the -DM's program buffer), it jumps to the _normal entry point_ that is configured via the <<_cpu_top_entity_generics, `CPU_DEBUG_PARK_ADDR`>> -CPU generic. By default, this address is set to `dm_park_entry_c`, which is defined in the main -package file. If an exception is encountered during debug mode, the CPU jumps to the address of the _exception entry point_ -configured via the <<_cpu_top_entity_generics, `CPU_DEBUG_EXC_ADDR`>> CPU generic. By default, this address -is set to `dm_exc_entry_c`, which is also defined in the main package file. +When the CPU enters (via an explicit halt request from the debugger) or re-enters debug mode (for example via an +`ebreak` in the DM's program buffer), it jumps to the **normal entry point** that is configured via the +<<_cpu_top_entity_generics, `CPU_DEBUG_PARK_ADDR`>> CPU generic. By default, this address is set to `dm_park_entry_c`, +which is defined in the main package file. If an exception is encountered during debug mode, the CPU jumps to the +address of the **exception entry point** configured via the <<_cpu_top_entity_generics, `CPU_DEBUG_EXC_ADDR`>> CPU +generic. By default, this address is set to `dm_exc_entry_c`, which is also defined in the main package file. :sectnums: @@ -469,24 +507,37 @@ is set to `dm_exc_entry_c`, which is also defined in the main package file. The status register provides a direct communication channel between the CPU's debug-mode executing the park loop and the debugger-controlled DM. This register is used to communicate requests, which are issued by the -DM, and the according acknowledges, which are generated by the CPU. +DM, and the according acknowledges, which are generated by the CPU. The status register is sub-divided into four +consecutive memory-mapped registers. + +The functionality of the first register (offset 0) depends on whether the CPU accesses the register in read or write +mode. In read mode, the register provides the resume and execute requests for four individual harts. The according +flags are placed in individual byes so the CPU can use load-byte instructions with the hart ID as byte-offset to load +the hart-specific request flags. -There are only 4 bits in this register that are used to implement requests/acknowledges. Each bit is left-aligned -in one sub-byte of the entire 32-bit register. Thus, the CPU can access each bit individually using store-byte (`sb`) and -load-byte (`lb`) instructions. This eliminates the need to perform bit-masking in the park loop code resulting in less code -size and faster execution. +All four status register provide a write mode. Writing the hart ID to the first register (offset 0) acknowledges the +**HALT** request for that specific hart. Writing the hart ID to the second register (offset 4) acknowledges the +**RESUME** request for that specific hart. Writing the hart ID to the third register (offset 8) acknowledges the +**EXECUTE** request for that specific hart. Writing any data to the fourth register (offset 12) acknowledged an +**EXCEPTION** encountered during execution of the program buffer. .DM Status Register - CPU Access -[cols="^1,^3,^3,<8"] +[cols="^1,^1,^1,<10"] [options="header",grid="rows"] |======================= -| Bit | Name | CPU/DM access <| Description -| 0 | `sreg_halt_ack` | CPU write, DM read <| Set by the CPU when halting. -.2+| 8 | `sreg_resume_req` | DM write, CPU read <| Set by the DM to request the CPU to resume normal operation. - | `sreg_resume_ack` | CPU write, DM read <| Set by the CPU before it starts resuming. -.2+| 16 | `sreg_execute_req` | DM write, CPU read <| Set by the DM to request execution of the program buffer. - | `sreg_execute_ack` | CPU write, DM read <| Set by the CPU before it starts executing the program buffer. -| 24 | `sreg_execute_ack` | CPU write, DM read <| Set by the CPU if an exception occurs while being in debug mode. +| Offset | R/W | Bits | Description +.9+| 0 .8+| r/- | 0 | Hart 0: RESUME request + | 1 | Hart 0: EXECUTE request + | 8 | Hart 1: RESUME request + | 9 | Hart 1: EXECUTE request + | 16 | Hart 2: RESUME request + | 17 | Hart 2: EXECUTE request + | 24 | Hart 3: RESUME request + | 25 | Hart 3: EXECUTE request + | -/w | 1:0 | Write hart ID (0..3) to acknowledge HALT +| 4 | -/w | 1:0 | Write hart ID (0..3) to acknowledge RESUME +| 8 | -/w | 1:0 | Write hart ID (0..3) to acknowledge EXECUTE +| 12 | -/w | - | Write any value to acknowledge EXCEPTION |======================= From a0205ec17285d8168f422d68b08249f96c7858c1 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 19:54:43 +0100 Subject: [PATCH 06/11] [docs] cleanups --- docs/datasheet/on_chip_debugger.adoc | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/datasheet/on_chip_debugger.adoc b/docs/datasheet/on_chip_debugger.adoc index 50fb0dc0e..0ec7c5ed4 100644 --- a/docs/datasheet/on_chip_debugger.adoc +++ b/docs/datasheet/on_chip_debugger.adoc @@ -526,18 +526,18 @@ All four status register provide a write mode. Writing the hart ID to the first [options="header",grid="rows"] |======================= | Offset | R/W | Bits | Description -.9+| 0 .8+| r/- | 0 | Hart 0: RESUME request - | 1 | Hart 0: EXECUTE request - | 8 | Hart 1: RESUME request - | 9 | Hart 1: EXECUTE request - | 16 | Hart 2: RESUME request - | 17 | Hart 2: EXECUTE request - | 24 | Hart 3: RESUME request - | 25 | Hart 3: EXECUTE request - | -/w | 1:0 | Write hart ID (0..3) to acknowledge HALT -| 4 | -/w | 1:0 | Write hart ID (0..3) to acknowledge RESUME -| 8 | -/w | 1:0 | Write hart ID (0..3) to acknowledge EXECUTE -| 12 | -/w | - | Write any value to acknowledge EXCEPTION +.9+| 0 .8+| r/- | 0 <| Hart 0: RESUME request + | 1 <| Hart 0: EXECUTE request + | 8 <| Hart 1: RESUME request + | 9 <| Hart 1: EXECUTE request + | 16 <| Hart 2: RESUME request + | 17 <| Hart 2: EXECUTE request + | 24 <| Hart 3: RESUME request + | 25 <| Hart 3: EXECUTE request + | -/w | 1:0 <| Write hart ID (0..3) to acknowledge HALT +| 4 | -/w | 1:0 <| Write hart ID (0..3) to acknowledge RESUME +| 8 | -/w | 1:0 <| Write hart ID (0..3) to acknowledge EXECUTE +| 12 | -/w | - <| Write any value to acknowledge EXCEPTION |======================= From 79bf7fd58f8a9a098e014904dff2a3f59e8b6878 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 19:54:55 +0100 Subject: [PATCH 07/11] [ocd-firmware] minor edits --- sw/ocd-firmware/park_loop.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sw/ocd-firmware/park_loop.S b/sw/ocd-firmware/park_loop.S index 42ded7126..a44876d76 100644 --- a/sw/ocd-firmware/park_loop.S +++ b/sw/ocd-firmware/park_loop.S @@ -16,7 +16,7 @@ .option norvc .global _ocd_start .global entry_exception -.global entry_normal +.global entry_park // debug module (DM) address map .equ DM_CODE_BASE, 0xFFFFFE00 // base address of code ROM (park loop) @@ -40,11 +40,11 @@ _ocd_start: entry_exception: sw zero, (DM_SREG_BASE+ACK_EXC)(zero) // send exception-acknowledge (no need for a hart ID) csrr x8, dscratch0 // restore x8 from dscratch0 (might be changed during PBUF execution) - ebreak // re-enter debug mode (at "entry_normal" entry point) - nop + ebreak // re-enter debug mode (at "entry_park" entry point) + nop // dummy to align the address of "entry_park" -// BASE + 16: normal entry - ebreak in debug-mode, halt request or return from single-stepped instruction -entry_normal: +// BASE + 16: normal entry - halt CPU: ebreak in debug-mode, halt request or return from single-stepped instruction +entry_park: csrw dscratch0, x8 // backup x8 to dscratch0 so we have a GPR available csrr x8, mhartid // get hart ID (0..3) sw x8, (DM_SREG_BASE+ACK_HLT)(zero) // send halt-acknowledge @@ -68,7 +68,7 @@ resume: csrr x8, dscratch0 // restore x8 from dscratch0 dret // exit debug mode -// execute program buffer (implicit ebreak at the end of the buffer will bring us back to 'entry_normal') +// execute program buffer (implicit ebreak at the end of the buffer will bring us back to "entry_park") execute: csrr x8, mhartid // get hart ID (0..3) sw x8, (DM_SREG_BASE+ACK_EXE)(zero) // send execute-acknowledge From ca1b500c85be58cd3bc463659550e23035a2cb69 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sat, 28 Dec 2024 19:55:29 +0100 Subject: [PATCH 08/11] [top] cpu-halt as vector --- rtl/core/neorv32_top.vhd | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index 5b279e5b8..0f9638941 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -303,7 +303,8 @@ architecture neorv32_top_rtl of neorv32_top is signal dmi_rsp : dmi_rsp_t; -- debug core interface (DCI) -- - signal dci_ndmrstn, dci_haltreq : std_ulogic; + signal dci_ndmrstn : std_ulogic; + signal dci_haltreq : std_ulogic_vector(0 downto 0); -- bus: core complex (CPU + caches) and DMA -- signal cpu_i_req, cpu_d_req, icache_req, dcache_req, core_req, main_req, main2_req, dma_req : bus_req_t; @@ -538,7 +539,7 @@ begin mei_i => mext_irq_i, mti_i => mtime_irq, firq_i => cpu_firq, - dbi_i => dci_haltreq, + dbi_i => dci_haltreq(0), -- instruction bus interface -- ibus_req_o => cpu_i_req, ibus_rsp_i => cpu_i_rsp, @@ -1682,17 +1683,18 @@ begin -- ------------------------------------------------------------------------------------------- neorv32_debug_dm_inst: entity neorv32.neorv32_debug_dm generic map ( + NUM_HARTS => 1, AUTHENTICATOR => OCD_AUTHENTICATION ) port map ( - clk_i => clk_i, - rstn_i => rstn_ext, - dmi_req_i => dmi_req, - dmi_rsp_o => dmi_rsp, - bus_req_i => iodev_req(IODEV_OCD), - bus_rsp_o => iodev_rsp(IODEV_OCD), - ndmrstn_o => dci_ndmrstn, - halt_req_o(0) => dci_haltreq + clk_i => clk_i, + rstn_i => rstn_ext, + dmi_req_i => dmi_req, + dmi_rsp_o => dmi_rsp, + bus_req_i => iodev_req(IODEV_OCD), + bus_rsp_o => iodev_rsp(IODEV_OCD), + ndmrstn_o => dci_ndmrstn, + halt_req_o => dci_haltreq ); end generate; @@ -1702,7 +1704,7 @@ begin iodev_rsp(IODEV_OCD) <= rsp_terminate_c; jtag_tdo_o <= jtag_tdi_i; -- JTAG pass-through dci_ndmrstn <= '1'; - dci_haltreq <= '0'; + dci_haltreq <= (others => '0'); end generate; From 16e4237ac5fc6da3d9373d1411f6645321e02467 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 29 Dec 2024 14:40:17 +0100 Subject: [PATCH 09/11] [docs] cleanups --- docs/datasheet/on_chip_debugger.adoc | 61 ++++++++++++++++------------ 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/docs/datasheet/on_chip_debugger.adoc b/docs/datasheet/on_chip_debugger.adoc index 0ec7c5ed4..49b8c1e40 100644 --- a/docs/datasheet/on_chip_debugger.adoc +++ b/docs/datasheet/on_chip_debugger.adoc @@ -510,34 +510,43 @@ and the debugger-controlled DM. This register is used to communicate requests, w DM, and the according acknowledges, which are generated by the CPU. The status register is sub-divided into four consecutive memory-mapped registers. -The functionality of the first register (offset 0) depends on whether the CPU accesses the register in read or write -mode. In read mode, the register provides the resume and execute requests for four individual harts. The according -flags are placed in individual byes so the CPU can use load-byte instructions with the hart ID as byte-offset to load -the hart-specific request flags. - -All four status register provide a write mode. Writing the hart ID to the first register (offset 0) acknowledges the -**HALT** request for that specific hart. Writing the hart ID to the second register (offset 4) acknowledges the -**RESUME** request for that specific hart. Writing the hart ID to the third register (offset 8) acknowledges the -**EXECUTE** request for that specific hart. Writing any data to the fourth register (offset 12) acknowledged an -**EXCEPTION** encountered during execution of the program buffer. - -.DM Status Register - CPU Access -[cols="^1,^1,^1,<10"] +Starting at `0xffffff80` the status register provides a set of memory-mapped interface register whose functionality +depends on whether the CPU accesses the register in read or write mode. **Read** accesses return the **requests** +for each individual hart generated by the DM. **Write** accesses are used to **acknowledge** these requests by the +individual harts back to the DM. + +For read accesses, the hart ID is used as byte offset to read the hart-specific request flags. The flags for hart 0 +are located at `0xffffff80 + 0`, the flags for hart 1 are located at `0xffffff80 + 1` and so on. Hence, each hart +can use load-unsigned-byte instructions to isolate the hart specific flags. + +.DM Status Register - Read Access (byte-wise access) +[cols="^2,^1,^1,^1,<6"] +[options="header",grid="rows"] +|======================= +| Address | Hart | R/W | Bits | Description +.2+| `0xffffff80` .2+| 0 .2+| r/- ^| 0 <| Resume request + ^| 1 <| Execute request +.2+| `0xffffff81` .2+| 1 .2+| r/- ^| 0 <| Resume request + ^| 1 <| Execute request +.2+| `0xffffff82` .2+| 2 .2+| r/- ^| 0 <| Resume request + ^| 1 <| Execute request +.2+| `0xffffff83` .2+| 3 .2+| r/- ^| 0 <| Resume request + ^| 1 <| Execute request +|======================= + +For write accesses, four consecutive memory-mapped registers are implemented. Each individual register is used +to acknowledge a specific condition: halt, resume, execute and exception. Each hart can acknowledge the according +condition by writing its hart ID to the according register. + +.DM Status Register - Write Access (word-wise access) +[cols="^2,^1,^1,<6"] [options="header",grid="rows"] |======================= -| Offset | R/W | Bits | Description -.9+| 0 .8+| r/- | 0 <| Hart 0: RESUME request - | 1 <| Hart 0: EXECUTE request - | 8 <| Hart 1: RESUME request - | 9 <| Hart 1: EXECUTE request - | 16 <| Hart 2: RESUME request - | 17 <| Hart 2: EXECUTE request - | 24 <| Hart 3: RESUME request - | 25 <| Hart 3: EXECUTE request - | -/w | 1:0 <| Write hart ID (0..3) to acknowledge HALT -| 4 | -/w | 1:0 <| Write hart ID (0..3) to acknowledge RESUME -| 8 | -/w | 1:0 <| Write hart ID (0..3) to acknowledge EXECUTE -| 12 | -/w | - <| Write any value to acknowledge EXCEPTION +| Address | R/W | Bits | Description +| `0xffffff80` | r/w | 1:0 | write hart ID to send hart's HALT acknowledge +| `0xffffff84` | r/w | 1:0 | write hart ID to send hart's RESUME acknowledge +| `0xffffff88` | r/w | 1:0 | write hart ID to send hart's EXECUTE acknowledge +| `0xffffff8c` | r/w | 1:0 | write any value to send hart's EXCEPTION acknowledge |======================= From 1f1413624bbdeebb0fd202f281a3cc2150526f71 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 29 Dec 2024 22:25:50 +0100 Subject: [PATCH 10/11] [package] update version ID --- rtl/core/neorv32_package.vhd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index 79e790f17..50d0f5d46 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -29,7 +29,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100804"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100805"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width From 4aac093a49d43e6b57604a55ce11bef1b634d950 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 29 Dec 2024 22:27:06 +0100 Subject: [PATCH 11/11] [changelog] add v1.10.8.5 --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6c883b86..3d1829be5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,8 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Ticket | |:----:|:-------:|:--------|:------:| -| 29.12.2024 | 1.10.8.4 | :warning: rename `SYSINFO.MEM -> SYSINFO.MISC`; add new `SYSINFO-MISC` entry for number of CPU cores (hardwired to one) | [#1134](https://github.com/stnolting/neorv32/pull/1134) | +| 29.12.2024 | 1.10.8.5 | :test_tube: add multi-hart support to debug module | [#1132](https://github.com/stnolting/neorv32/pull/1132) | +| 29.12.2024 | 1.10.8.4 | :warning: rename `SYSINFO.MEM -> SYSINFO.MISC`; add new `SYSINFO.MISC` entry for number of CPU cores (hardwired to one) | [#1134](https://github.com/stnolting/neorv32/pull/1134) | | 29.12.2024 | 1.10.8.3 | :bug: fix incorrect HPM counter sizes if `HPM_CNT_WIDTH = 64` | [#1128](https://github.com/stnolting/neorv32/pull/1128) | | 27.12.2024 | 1.10.8.2 | add out-of-band signals to internal request bus | [#1131](https://github.com/stnolting/neorv32/pull/1131) | | 27.12.2024 | 1.10.8.1 | :warning: replace MTIME by CLINT; :warning: remove `HART_ID` generic | [#1130](https://github.com/stnolting/neorv32/pull/1130) |