From 8dd8f535d8c2a0e9d91da9bd979f96ca48393984 Mon Sep 17 00:00:00 2001 From: Isaac David Date: Sun, 10 Nov 2024 23:21:58 +0000 Subject: [PATCH] refactor vector uog gen & add narrowing mode --- arches/isa_json/gen_uarch_rv64v_json.py | 290 +++++++++++------------ arches/isa_json/olympia_uarch_rv64v.json | 280 +++++++++++----------- core/InstArchInfo.cpp | 11 +- core/InstArchInfo.hpp | 11 +- core/VectorUopGenerator.cpp | 184 ++++++++------ core/VectorUopGenerator.hpp | 4 +- 6 files changed, 412 insertions(+), 368 deletions(-) diff --git a/arches/isa_json/gen_uarch_rv64v_json.py b/arches/isa_json/gen_uarch_rv64v_json.py index 7fabe099..c05bc784 100755 --- a/arches/isa_json/gen_uarch_rv64v_json.py +++ b/arches/isa_json/gen_uarch_rv64v_json.py @@ -20,31 +20,31 @@ # TODO: Vector Loads and Stores: Vector Load/Store Whole Register Instructions # Vector Integer Arithmetic Instructions: Vector Single-Width Integer Add and Subtract - "vadd.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vadd.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vadd.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsub.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsub.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vrsub.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vrsub.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vadd.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vadd.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vadd.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsub.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsub.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vrsub.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vrsub.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Widening Integer Add/Subtract - "vwaddu.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwaddu.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsubu.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsubu.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwadd.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwadd.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsub.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsub.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwaddu.wv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwaddu.wx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsubu.wv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsubu.wx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwadd.wv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwadd.wx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsub.wv" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, - "vwsub.wx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, + "vwaddu.vv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwaddu.vx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsubu.vv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsubu.vx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwadd.vv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwadd.vx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsub.vv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsub.vx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwaddu.wv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwaddu.wx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsubu.wv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsubu.wx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwadd.wv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwadd.wx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsub.wv" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, + "vwsub.wx" : {"pipe" : "vint", "uop_gen" : "WIDENING", "latency" : 1}, # TODO: Vector Integer Arithmetic Instructions: Vector Integer Extension # FIXME: Requires Mavis fix to support correctly @@ -57,161 +57,161 @@ # Vector Integer Arithmetic Instructions: Vector Integer Add-with-Carry/Subtract-with-Borrow Instructions # FIXME: Requires Mavis fix to include vector mask - "vadc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vadc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vadc.vim" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmadc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmadc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmadc.vim" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmadc.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmadc.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmadc.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsbc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsbc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmsbc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmsbc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmsbc.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmsbc.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vadc.vvm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vadc.vxm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vadc.vim" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmadc.vvm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmadc.vxm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmadc.vim" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmadc.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmadc.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmadc.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsbc.vvm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsbc.vxm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmsbc.vvm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmsbc.vxm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmsbc.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmsbc.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Bitwise Logical Instructions - "vand.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vand.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vand.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vor.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vor.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vor.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vxor.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vxor.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vxor.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vand.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vand.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vand.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vor.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vor.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vor.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vxor.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vxor.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vxor.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Single-Width Shift Instructions - "vsll.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsll.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsll.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsrl.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsrl.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsrl.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsra.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsra.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vsra.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsll.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsll.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsll.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsrl.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsrl.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsrl.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsra.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsra.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsra.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Narrowing Integer Right Shift Instructions - "vnsrl.wv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vnsrl.wx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vnsrl.wi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vnsra.wv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vnsra.wx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vnsra.wi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vnsrl.wv" : {"pipe" : "vint", "uop_gen" : "NARROWING", "latency" : 1}, + "vnsrl.wx" : {"pipe" : "vint", "uop_gen" : "NARROWING", "latency" : 1}, + "vnsrl.wi" : {"pipe" : "vint", "uop_gen" : "NARROWING", "latency" : 1}, + "vnsra.wv" : {"pipe" : "vint", "uop_gen" : "NARROWING", "latency" : 1}, + "vnsra.wx" : {"pipe" : "vint", "uop_gen" : "NARROWING", "latency" : 1}, + "vnsra.wi" : {"pipe" : "vint", "uop_gen" : "NARROWING", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Integer Compare Instructions - "vmseq.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmseq.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmseq.vi" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsne.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsne.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsne.vi" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsltu.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsltu.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmslt.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmslt.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsleu.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsleu.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsleu.vi" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsle.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsle.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsle.vi" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsgtu.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsgtu.vi" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsgt.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, - "vmsgt.vi" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, + "vmseq.vv" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmseq.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmseq.vi" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsne.vv" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsne.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsne.vi" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsltu.vv" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsltu.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmslt.vv" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmslt.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsleu.vv" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsleu.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsleu.vi" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsle.vv" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsle.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsle.vi" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsgtu.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsgtu.vi" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsgt.vx" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, + "vmsgt.vi" : {"pipe" : "vint", "uop_gen" : "SINGLE_DEST", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Integer Min/Max Instructions - "vminu.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vminu.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmin.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmin.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmaxu.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmaxu.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmax.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmax.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vminu.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vminu.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmin.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmin.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmaxu.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmaxu.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmax.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmax.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Single-Width Integer Multiply Instructions - "vmul.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vmul.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vmulhu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vmulhu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vmulh.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vmulh.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vmulhsu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vmulhsu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, + "vmul.vv" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vmul.vx" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vmulhu.vx" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vmulhu.vv" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vmulh.vv" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vmulh.vx" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vmulhsu.vv" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vmulhsu.vx" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, # Vector Integer Arithmetic Instructions: Vector Integer Divide Instructions - "vdiv.vv" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, - "vdiv.vx" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, - "vdivu.vv" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, - "vdivu.vx" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, - "vremu.vv" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, - "vremu.vx" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, - "vrem.vv" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, - "vrem.vx" : {"pipe" : "vdiv", "uop_gen" : "ARITH", "latency" : 23}, + "vdiv.vv" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, + "vdiv.vx" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, + "vdivu.vv" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, + "vdivu.vx" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, + "vremu.vv" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, + "vremu.vx" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, + "vrem.vv" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, + "vrem.vx" : {"pipe" : "vdiv", "uop_gen" : "ELEMENTWISE", "latency" : 23}, # Vector Integer Arithmetic Instructions: Vector Widening Integer Multiply Instructions - "vwmul.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, - "vwmul.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, - "vwmulu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, - "vwmulu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, - "vwmulsu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, - "vwmulsu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, + "vwmul.vv" : {"pipe" : "vmul", "uop_gen" : "WIDENING", "latency" : 3}, + "vwmul.vx" : {"pipe" : "vmul", "uop_gen" : "WIDENING", "latency" : 3}, + "vwmulu.vv" : {"pipe" : "vmul", "uop_gen" : "WIDENING", "latency" : 3}, + "vwmulu.vx" : {"pipe" : "vmul", "uop_gen" : "WIDENING", "latency" : 3}, + "vwmulsu.vv" : {"pipe" : "vmul", "uop_gen" : "WIDENING", "latency" : 3}, + "vwmulsu.vx" : {"pipe" : "vmul", "uop_gen" : "WIDENING", "latency" : 3}, # Vector Integer Arithmetic Instructions: Vector Single-Width Integer Multiply-Add Instructions - "vmacc.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, - "vmacc.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, - "vnmsac.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, - "vnmsac.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, - "vmadd.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, - "vmadd.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, - "vnmsub.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, - "vnmsub.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vmacc.vv" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, + "vmacc.vx" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, + "vnmsac.vv" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, + "vnmsac.vx" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, + "vmadd.vv" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, + "vmadd.vx" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, + "vnmsub.vv" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, + "vnmsub.vx" : {"pipe" : "vmul", "uop_gen" : "MAC", "latency" : 3}, # Vector Integer Arithmetic Instructions: Vector Widening Integer Multiply-Add Instructions - "vwmaccu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, - "vwmaccu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, - "vwmacc.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, - "vwmacc.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, - "vwmaccsu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, - "vwmaccsu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, - "vwmaccus.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + "vwmaccu.vv" : {"pipe" : "vmul", "uop_gen" : "MAC_WIDE", "latency" : 3}, + "vwmaccu.vx" : {"pipe" : "vmul", "uop_gen" : "MAC_WIDE", "latency" : 3}, + "vwmacc.vv" : {"pipe" : "vmul", "uop_gen" : "MAC_WIDE", "latency" : 3}, + "vwmacc.vx" : {"pipe" : "vmul", "uop_gen" : "MAC_WIDE", "latency" : 3}, + "vwmaccsu.vv" : {"pipe" : "vmul", "uop_gen" : "MAC_WIDE", "latency" : 3}, + "vwmaccsu.vx" : {"pipe" : "vmul", "uop_gen" : "MAC_WIDE", "latency" : 3}, + "vwmaccus.vx" : {"pipe" : "vmul", "uop_gen" : "MAC_WIDE", "latency" : 3}, # Vector Integer Arithmetic Instructions: Vector Integer Merge Instructions # FIXME: Requires Mavis fix to include vector mask - "vmerge.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmerge.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmerge.vim" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmerge.vvm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmerge.vxm" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmerge.vim" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # Vector Integer Arithmetic Instructions: Vector Integer Move Instructions - "vmv.v.v" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmv.v.x" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, - "vmv.v.i" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmv.v.v" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmv.v.x" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vmv.v.i" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Saturating Add and Subtract - "vsaddu.vv" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, - "vsaddu.vx" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, - "vsaddu.vi" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, + "vsaddu.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsaddu.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsaddu.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, - "vsadd.vv" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, - "vsadd.vx" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, - "vsadd.vi" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, + "vsadd.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsadd.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vsadd.vi" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, - "vssubu.vv" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, - "vssubu.vx" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, + "vssubu.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vssubu.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, - "vssub.vv" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, - "vssub.vx" : {"pipe" : "vfixed", "uop_gen" : "ARITH", "latency" : 1}, + "vssub.vv" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, + "vssub.vx" : {"pipe" : "vint", "uop_gen" : "ELEMENTWISE", "latency" : 1}, # TODO: Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Averaging Add and Subtract # Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Fractional Multiply with Rounding and Saturation - "vsmul.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, - "vsmul.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH", "latency" : 3}, + "vsmul.vx" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, + "vsmul.vv" : {"pipe" : "vmul", "uop_gen" : "ELEMENTWISE", "latency" : 3}, # TODO: Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Scaling Shift Instructions # TODO: Vector Fixed-Point Arithmetic Instructions: Vector Narrowing Fixed-Point Clip Instructions diff --git a/arches/isa_json/olympia_uarch_rv64v.json b/arches/isa_json/olympia_uarch_rv64v.json index c46ba06c..fc525669 100644 --- a/arches/isa_json/olympia_uarch_rv64v.json +++ b/arches/isa_json/olympia_uarch_rv64v.json @@ -26,55 +26,55 @@ { "mnemonic": "vadc.vim", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vadc.vvm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vadc.vxm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vadd.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vadd.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vadd.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vand.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vand.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vand.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -110,25 +110,25 @@ { "mnemonic": "vdiv.vv", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { "mnemonic": "vdiv.vx", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { "mnemonic": "vdivu.vv", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { "mnemonic": "vdivu.vx", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { @@ -926,43 +926,43 @@ { "mnemonic": "vmacc.vv", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { "mnemonic": "vmacc.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { "mnemonic": "vmadc.vim", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmadc.vvm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmadc.vxm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmadd.vv", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { "mnemonic": "vmadd.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { @@ -980,43 +980,43 @@ { "mnemonic": "vmax.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmax.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmaxu.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmaxu.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmerge.vim", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmerge.vvm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmerge.vxm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1082,25 +1082,25 @@ { "mnemonic": "vmin.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmin.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vminu.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vminu.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1130,13 +1130,13 @@ { "mnemonic": "vmsbc.vvm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmsbc.vxm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1148,43 +1148,43 @@ { "mnemonic": "vmseq.vi", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmseq.vv", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmseq.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsgt.vi", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsgt.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsgtu.vi", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsgtu.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { @@ -1196,79 +1196,79 @@ { "mnemonic": "vmsle.vi", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsle.vv", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsle.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsleu.vi", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsleu.vv", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsleu.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmslt.vv", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmslt.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsltu.vv", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsltu.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsne.vi", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsne.vv", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { "mnemonic": "vmsne.vx", "pipe": "vint", - "uop_gen": "ARITH_SINGLE_DEST", + "uop_gen": "SINGLE_DEST", "latency": 1 }, { @@ -1280,49 +1280,49 @@ { "mnemonic": "vmul.vv", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vmul.vx", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vmulh.vv", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vmulh.vx", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vmulhsu.vv", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vmulhsu.vx", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vmulhu.vv", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vmulhu.vx", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { @@ -1334,19 +1334,19 @@ { "mnemonic": "vmv.v.i", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmv.v.v", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vmv.v.x", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1430,79 +1430,79 @@ { "mnemonic": "vnmsac.vv", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { "mnemonic": "vnmsac.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { "mnemonic": "vnmsub.vv", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { "mnemonic": "vnmsub.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC", + "uop_gen": "MAC", "latency": 3 }, { "mnemonic": "vnsra.wi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "NARROWING", "latency": 1 }, { "mnemonic": "vnsra.wv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "NARROWING", "latency": 1 }, { "mnemonic": "vnsra.wx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "NARROWING", "latency": 1 }, { "mnemonic": "vnsrl.wi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "NARROWING", "latency": 1 }, { "mnemonic": "vnsrl.wv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "NARROWING", "latency": 1 }, { "mnemonic": "vnsrl.wx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "NARROWING", "latency": 1 }, { "mnemonic": "vor.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vor.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vor.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1562,25 +1562,25 @@ { "mnemonic": "vrem.vv", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { "mnemonic": "vrem.vx", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { "mnemonic": "vremu.vv", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { "mnemonic": "vremu.vx", "pipe": "vdiv", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 23 }, { @@ -1610,13 +1610,13 @@ { "mnemonic": "vrsub.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vrsub.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1646,49 +1646,49 @@ { "mnemonic": "vsadd.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsadd.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsadd.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsaddu.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsaddu.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsaddu.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsbc.vvm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsbc.vxm", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1790,19 +1790,19 @@ { "mnemonic": "vsll.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsll.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsll.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1814,13 +1814,13 @@ { "mnemonic": "vsmul.vv", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { "mnemonic": "vsmul.vx", "pipe": "vmul", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 3 }, { @@ -1850,37 +1850,37 @@ { "mnemonic": "vsra.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsra.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsra.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsrl.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsrl.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsrl.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -1946,37 +1946,37 @@ { "mnemonic": "vssub.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vssub.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vssubu.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vssubu.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsub.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vsub.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { @@ -2006,127 +2006,127 @@ { "mnemonic": "vwadd.vv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwadd.vx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwadd.wv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwadd.wx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwaddu.vv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwaddu.vx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwaddu.wv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwaddu.wx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwmacc.vv", "pipe": "vmul", - "uop_gen": "ARITH_MAC_WIDE_DEST", + "uop_gen": "MAC_WIDE", "latency": 3 }, { "mnemonic": "vwmacc.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC_WIDE_DEST", + "uop_gen": "MAC_WIDE", "latency": 3 }, { "mnemonic": "vwmaccsu.vv", "pipe": "vmul", - "uop_gen": "ARITH_MAC_WIDE_DEST", + "uop_gen": "MAC_WIDE", "latency": 3 }, { "mnemonic": "vwmaccsu.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC_WIDE_DEST", + "uop_gen": "MAC_WIDE", "latency": 3 }, { "mnemonic": "vwmaccu.vv", "pipe": "vmul", - "uop_gen": "ARITH_MAC_WIDE_DEST", + "uop_gen": "MAC_WIDE", "latency": 3 }, { "mnemonic": "vwmaccu.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC_WIDE_DEST", + "uop_gen": "MAC_WIDE", "latency": 3 }, { "mnemonic": "vwmaccus.vx", "pipe": "vmul", - "uop_gen": "ARITH_MAC_WIDE_DEST", + "uop_gen": "MAC_WIDE", "latency": 3 }, { "mnemonic": "vwmul.vv", "pipe": "vmul", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 3 }, { "mnemonic": "vwmul.vx", "pipe": "vmul", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 3 }, { "mnemonic": "vwmulsu.vv", "pipe": "vmul", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 3 }, { "mnemonic": "vwmulsu.vx", "pipe": "vmul", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 3 }, { "mnemonic": "vwmulu.vv", "pipe": "vmul", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 3 }, { "mnemonic": "vwmulu.vx", "pipe": "vmul", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 3 }, { @@ -2144,67 +2144,67 @@ { "mnemonic": "vwsub.vv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwsub.vx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwsub.wv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwsub.wx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwsubu.vv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwsubu.vx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwsubu.wv", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vwsubu.wx", "pipe": "vint", - "uop_gen": "ARITH_WIDE_DEST", + "uop_gen": "WIDENING", "latency": 1 }, { "mnemonic": "vxor.vi", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vxor.vv", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { "mnemonic": "vxor.vx", "pipe": "vint", - "uop_gen": "ARITH", + "uop_gen": "ELEMENTWISE", "latency": 1 }, { diff --git a/core/InstArchInfo.cpp b/core/InstArchInfo.cpp index 7e863c4f..2063ca52 100644 --- a/core/InstArchInfo.cpp +++ b/core/InstArchInfo.cpp @@ -50,11 +50,12 @@ namespace olympia }; const InstArchInfo::UopGenMap InstArchInfo::uop_gen_type_map = { - {"ARITH", InstArchInfo::UopGenType::ARITH}, - {"ARITH_SINGLE_DEST", InstArchInfo::UopGenType::ARITH_SINGLE_DEST}, - {"ARITH_WIDE_DEST", InstArchInfo::UopGenType::ARITH_WIDE_DEST}, - {"ARITH_MAC", InstArchInfo::UopGenType::ARITH_MAC}, - {"ARITH_MAC_WIDE_DEST", InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST}, + {"ELEMENTWISE", InstArchInfo::UopGenType::ELEMENTWISE}, + {"SINGLE_DEST", InstArchInfo::UopGenType::SINGLE_DEST}, + {"WIDENING", InstArchInfo::UopGenType::WIDENING}, + {"NARROWING", InstArchInfo::UopGenType::NARROWING}, + {"MAC", InstArchInfo::UopGenType::MAC}, + {"MAC_WIDE", InstArchInfo::UopGenType::MAC_WIDE}, {"NONE", InstArchInfo::UopGenType::NONE} }; diff --git a/core/InstArchInfo.hpp b/core/InstArchInfo.hpp index 98378e57..cfdbee6e 100644 --- a/core/InstArchInfo.hpp +++ b/core/InstArchInfo.hpp @@ -68,11 +68,12 @@ namespace olympia enum class UopGenType { - ARITH, - ARITH_SINGLE_DEST, - ARITH_WIDE_DEST, - ARITH_MAC, - ARITH_MAC_WIDE_DEST, + ELEMENTWISE, + SINGLE_DEST, + WIDENING, + NARROWING, + MAC, + MAC_WIDE, NONE, UNKNOWN }; diff --git a/core/VectorUopGenerator.cpp b/core/VectorUopGenerator.cpp index a67458ef..1da01546 100644 --- a/core/VectorUopGenerator.cpp +++ b/core/VectorUopGenerator.cpp @@ -6,10 +6,11 @@ namespace olympia { constexpr char VectorUopGenerator::name[]; - VectorUopGenerator::VectorUopGenerator(sparta::TreeNode* node, const VectorUopGeneratorParameterSet* p) : + VectorUopGenerator::VectorUopGenerator(sparta::TreeNode* node, + const VectorUopGeneratorParameterSet* p) : sparta::Unit(node) { - // Vector arithmetic uop generator, increment all src and dest register numbers + // Vector uop generator, increment all src and dest register numbers // For a "vadd.vv v12, v4,v8" with an LMUL of 4: // Uop 1: vadd.vv v12, v4, v8 // Uop 2: vadd.vv v13, v5, v9 @@ -19,26 +20,32 @@ namespace olympia constexpr bool SINGLE_DEST = false; constexpr bool WIDE_DEST = false; constexpr bool ADD_DEST_AS_SRC = false; - uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH, - &VectorUopGenerator::generateArithUop); + constexpr bool NARROW_DEST = false; + uop_gen_function_map_.emplace( + InstArchInfo::UopGenType::ELEMENTWISE, + &VectorUopGenerator::generateUops); } - // Vector arithmetic single dest uop generator, only increment all src register numbers - // For a "vmseq.vv v12, v4,v8" with an LMUL of 4: - // Uop 1: vadd.vv v12, v4, v8 - // Uop 2: vadd.vv v12, v5, v9 - // Uop 3: vadd.vv v12, v6, v10 - // Uop 4: vadd.vv v12, v7, v11 + // Vector single dest uop generator, only increment all src register numbers + // For a "vmseq.vv v12, v4, v8" with an LMUL of 4: + // Uop 1: vmseq.vv v12, v4, v8 + // Uop 2: vmseq.vv v12, v5, v9 + // Uop 3: vmseq.vv v12, v6, v10 + // Uop 4: vmseq.vv v12, v7, v11 { constexpr bool SINGLE_DEST = true; constexpr bool WIDE_DEST = false; constexpr bool ADD_DEST_AS_SRC = false; - uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_SINGLE_DEST, - &VectorUopGenerator::generateArithUop); + constexpr bool NARROW_DEST = false; + uop_gen_function_map_.emplace( + InstArchInfo::UopGenType::SINGLE_DEST, + &VectorUopGenerator::generateUops); } - // Vector arithmetic wide dest uop generator, only increment src register numbers for even uops - // For a "vwmul.vv v12, v4, v8" with an LMUL of 4: + // Vector wide dest uop generator, only increment src register numbers for even + // uops For a "vwmul.vv v12, v4, v8" with an LMUL of 4: // Uop 1: vwmul.vv v12, v4, v8 // Uop 2: vwmul.vv v13, v4, v8 // Uop 3: vwmul.vv v14, v6, v10 @@ -49,61 +56,89 @@ namespace olympia // Uop 8: vwmul.vv v19, v10, v14 { constexpr bool SINGLE_DEST = false; - constexpr bool WIDE_DEST = true; + constexpr bool WIDENING = true; constexpr bool ADD_DEST_AS_SRC = false; - uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_WIDE_DEST, - &VectorUopGenerator::generateArithUop); + constexpr bool NARROWING = false; + uop_gen_function_map_.emplace( + InstArchInfo::UopGenType::WIDENING, + &VectorUopGenerator::generateUops); } - // Vector arithmetic multiplay-add wide dest uop generator, add dest as source - // For a "vmacc.vv v12, v4, v8" with an LMUL of 4: - // Uop 1: vwmacc.vv v12, v4, v8, v12 - // Uop 2: vwmacc.vv v13, v4, v8, v13 - // Uop 3: vwmacc.vv v14, v5, v9, v14 - // Uop 4: vwmacc.vv v15, v5, v9, v15 - // Uop 5: vwmacc.vv v16, v6, v10, v16 - // Uop 6: vwmacc.vv v17, v6, v10, v17 - // Uop 7: vwmacc.vv v18, v7, v11, v18 - // Uop 8: vwmacc.vv v19, v7, v11, v19 + // Vector narrowing uop generator, only increment src register numbers for even + // uops For a "vnsrl.wv v12, v4, v8" with an LMUL of 4: + // Uop 1: vnsrl.wv v12, v4, v8 + // Uop 2: vnsrl.wv v12, v5, v9 + // Uop 3: vnsrl.wv v13, v6, v10 + // Uop 4: vnsrl.wv v13, v7, v11 + // Uop 5: vnsrl.wv v14, v8, v12 + // Uop 6: vnsrl.wv v14, v9, v13 + // Uop 7: vnsrl.wv v15, v10, v14 + // Uop 8: vnsrl.wv v15, v11, v15 { constexpr bool SINGLE_DEST = false; - constexpr bool WIDE_DEST = false; - constexpr bool ADD_DEST_AS_SRC = true; - uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_MAC, - &VectorUopGenerator::generateArithUop); + constexpr bool WIDENING = true; + constexpr bool ADD_DEST_AS_SRC = false; + constexpr bool NARROWING = false; + uop_gen_function_map_.emplace( + InstArchInfo::UopGenType::WIDENING, + &VectorUopGenerator::generateUops); } - // Vector arithmetic multiplay-add uop generator, add dest as source + // Vector arithmetic multiply-add uop generator, add dest as source // For a "vmacc.vv v12, v4, v8" with an LMUL of 4: // Uop 1: vmacc.vv v12, v4, v8, v12 // Uop 2: vmacc.vv v13, v5, v9, v13 // Uop 3: vmacc.vv v14, v6, v10, v14 // Uop 4: vmacc.vv v15, v7, v11, v15 + { + constexpr bool SINGLE_DEST = false; + constexpr bool WIDE_DEST = false; + constexpr bool ADD_DEST_AS_SRC = true; + constexpr bool NARROW_DEST = false; + uop_gen_function_map_.emplace( + InstArchInfo::UopGenType::MAC, + &VectorUopGenerator::generateUops); + } + + // Vector multiply-add wide dest uop generator, add dest as source + // For a "vwmacc.vv v12, v4, v8" with an LMUL of 4: + // Uop 1: vwmacc.vv v12, v4, v8, v12 + // Uop 2: vwmacc.vv v13, v4, v8, v13 + // Uop 3: vwmacc.vv v14, v5, v9, v14 + // Uop 4: vwmacc.vv v15, v5, v9, v15 + // Uop 5: vwmacc.vv v16, v6, v10, v16 + // Uop 6: vwmacc.vv v17, v6, v10, v17 + // Uop 7: vwmacc.vv v18, v7, v11, v18 + // Uop 8: vwmacc.vv v19, v7, v11, v19 { constexpr bool SINGLE_DEST = false; constexpr bool WIDE_DEST = true; constexpr bool ADD_DEST_AS_SRC = true; - uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST, - &VectorUopGenerator::generateArithUop); + constexpr bool NARROW_DEST = false; + uop_gen_function_map_.emplace( + InstArchInfo::UopGenType::MAC_WIDE, + &VectorUopGenerator::generateUops); } } - void VectorUopGenerator::onBindTreeLate_() - { - mavis_facade_ = getMavis(getContainer()); - } + void VectorUopGenerator::onBindTreeLate_() { mavis_facade_ = getMavis(getContainer()); } void VectorUopGenerator::setInst(const InstPtr & inst) { sparta_assert(current_inst_ == nullptr, - "Cannot start generating uops for a new vector instruction, " - "current instruction has not finished: " << current_inst_); + "Cannot start generating uops for a new vector instruction, " + "current instruction has not finished: " + << current_inst_); const auto uop_gen_type = inst->getUopGenType(); sparta_assert(uop_gen_type != InstArchInfo::UopGenType::UNKNOWN, - "Inst: " << current_inst_ << " uop gen type is unknown"); + "Inst: " << current_inst_ << " uop gen type is unknown"); sparta_assert(uop_gen_type != InstArchInfo::UopGenType::NONE, - "Inst: " << current_inst_ << " uop gen type is none"); + "Inst: " << current_inst_ << " uop gen type is none"); // Number of vector elements processed by each uop const VectorConfigPtr & vector_config = inst->getVectorConfig(); @@ -111,23 +146,23 @@ namespace olympia // TODO: For now, generate uops for all elements even if there is a tail num_uops_to_generate_ = std::ceil(vector_config->getVLMAX() / num_elems_per_uop); - if((uop_gen_type == InstArchInfo::UopGenType::ARITH_WIDE_DEST) || - (uop_gen_type == InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST)) + if ((uop_gen_type == InstArchInfo::UopGenType::WIDENING) + || (uop_gen_type == InstArchInfo::UopGenType::MAC_WIDE)) { // TODO: Add parameter to support dual dests num_uops_to_generate_ *= 2; } current_inst_ = inst; - ILOG("Inst: " << current_inst_ << - " is being split into " << num_uops_to_generate_ << " UOPs"); + ILOG("Inst: " << current_inst_ << " is being split into " << num_uops_to_generate_ + << " UOPs"); } const InstPtr VectorUopGenerator::generateUop() { const auto uop_gen_type = current_inst_->getUopGenType(); sparta_assert(uop_gen_type <= InstArchInfo::UopGenType::NONE, - "Inst: " << current_inst_ << " uop gen type is unknown"); + "Inst: " << current_inst_ << " uop gen type is unknown"); // Generate uop auto uop_gen_func = uop_gen_function_map_.at(uop_gen_type); @@ -151,7 +186,7 @@ namespace olympia uop->setTail((num_elems_per_uop * num_uops_generated_) > vector_config->getVL()); // Handle last uop - if(num_uops_generated_ == num_uops_to_generate_) + if (num_uops_generated_ == num_uops_to_generate_) { reset_(); } @@ -161,8 +196,8 @@ namespace olympia return uop; } - template - const InstPtr VectorUopGenerator::generateArithUop() + template + const InstPtr VectorUopGenerator::generateUops() { // Increment source and destination register values auto srcs = current_inst_->getSourceOpInfoList(); @@ -177,8 +212,7 @@ namespace olympia if constexpr (WIDE_DEST == true) { // Only increment source values for even uops - src.field_value += (num_uops_generated_ % 2) ? num_uops_generated_ - 1 - : num_uops_generated_; + src.field_value += num_uops_generated_ / 2; } else { @@ -188,23 +222,34 @@ namespace olympia // Add a destination to the list of sources auto add_dest_as_src = [](auto & srcs, auto & dest) - { - // OperandFieldID is an enum with RS1 = 0, RS2 = 1, etc. with a max RS of RS4 - using OperandFieldID = mavis::InstMetaData::OperandFieldID; - const OperandFieldID field_id = static_cast(srcs.size()); - sparta_assert(field_id <= OperandFieldID::RS_MAX, - "Mavis does not support instructions with more than " << std::dec << - static_cast>(OperandFieldID::RS_MAX) << - " sources"); - srcs.emplace_back(field_id, dest.operand_type, dest.field_value); - }; + { + // OperandFieldID is an enum with RS1 = 0, RS2 = 1, etc. with a max RS of RS4 + using OperandFieldID = mavis::InstMetaData::OperandFieldID; + const OperandFieldID field_id = static_cast(srcs.size()); + sparta_assert( + field_id <= OperandFieldID::RS_MAX, + "Mavis does not support instructions with more than " + << std::dec + << static_cast>(OperandFieldID::RS_MAX) + << " sources"); + srcs.emplace_back(field_id, dest.operand_type, dest.field_value); + }; auto dests = current_inst_->getDestOpInfoList(); if constexpr (SINGLE_DEST == false) { for (auto & dest : dests) { - dest.field_value += num_uops_generated_; + + if constexpr (NARROW_DEST == true) + { + // Only increment destination values for even uops + dest.field_value += num_uops_generated_ / 2; + } + else + { + dest.field_value += num_uops_generated_; + } if constexpr (ADD_DEST_AS_SRC == true) { @@ -219,7 +264,8 @@ namespace olympia { const VectorConfigPtr & vector_config = current_inst_->getVectorConfig(); const uint32_t num_elems_per_uop = vector_config->getVLMAX() / vector_config->getSEW(); - const bool uop_contains_tail_elems = (num_elems_per_uop * num_uops_generated_) > vector_config->getVL(); + const bool uop_contains_tail_elems = + (num_elems_per_uop * num_uops_generated_) > vector_config->getVL(); if (uop_contains_tail_elems && (vector_config->getVTA() == false)) { @@ -234,17 +280,13 @@ namespace olympia InstPtr uop; if (current_inst_->hasImmediate()) { - mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), - srcs, - dests, + mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), srcs, dests, current_inst_->getImmediate()); uop = mavis_facade_->makeInstDirectly(ex_info, getClock()); } else { - mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), - srcs, - dests); + mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), srcs, dests); uop = mavis_facade_->makeInstDirectly(ex_info, getClock()); } @@ -253,7 +295,7 @@ namespace olympia void VectorUopGenerator::handleFlush(const FlushManager::FlushingCriteria & flush_criteria) { - if(current_inst_ && flush_criteria.includedInFlush(current_inst_)) + if (current_inst_ && flush_criteria.includedInFlush(current_inst_)) { reset_(); } diff --git a/core/VectorUopGenerator.hpp b/core/VectorUopGenerator.hpp index d819ada5..1165a324 100644 --- a/core/VectorUopGenerator.hpp +++ b/core/VectorUopGenerator.hpp @@ -46,8 +46,8 @@ namespace olympia const InstPtr generateUop(); - template - const InstPtr generateArithUop(); + template + const InstPtr generateUops(); uint64_t getNumUopsRemaining() const { return num_uops_to_generate_; }