From 10e5cf5dd8b9a5ce95e45ae37a396d352f4b4a95 Mon Sep 17 00:00:00 2001 From: John Brandwood Date: Mon, 16 Dec 2024 10:40:22 -0500 Subject: [PATCH 1/4] Remove all the extraneous preserving of the X register in HuCC's library code now that it is no longer needed. --- include/hucc/hucc-baselib.asm | 24 ++---- include/hucc/hucc-gfx.asm | 132 +++++++++---------------------- include/hucc/hucc-old-line.asm | 18 +---- include/hucc/hucc-old-map.asm | 33 +++----- include/hucc/hucc-old-spr.asm | 37 ++++----- include/hucc/hucc-scroll.asm | 24 +----- include/hucc/hucc-string.asm | 11 +-- include/hucc/hucc-systemcard.asm | 41 ---------- include/hucc/hucc.asm | 4 +- 9 files changed, 76 insertions(+), 248 deletions(-) diff --git a/include/hucc/hucc-baselib.asm b/include/hucc/hucc-baselib.asm index 9317f6b4..48ff8055 100644 --- a/include/hucc/hucc-baselib.asm +++ b/include/hucc/hucc-baselib.asm @@ -271,11 +271,9 @@ _vsync.1 .macro _joy.1 .macro .if SUPPORT_6BUTTON - tay - lda joy6now, y - pha - lda joynow, y - ply + tax + lda joy6now, x + ldy joynow, x .else tay lda joynow, y @@ -292,11 +290,9 @@ _joy.1 .macro _joytrg.1 .macro .if SUPPORT_6BUTTON - tay - lda joy6trg, y - pha - lda joytrg, y - ply + tax + lda joy6trg, x + ldy joytrg, x .else tay lda joytrg, y @@ -531,18 +527,14 @@ _set_xres.1 .macro ; void __fastcall __macro sgx_put_vram( unsigned int address<_di>, unsigned int data ); .macro _get_vram.1 - phx jsr vdc_di_to_marr - plx lda VDC_DL ldy VDC_DH .endm .macro _put_vram.2 pha - phx jsr vdc_di_to_mawr - plx pla sta VDC_DL sty VDC_DH @@ -550,18 +542,14 @@ _set_xres.1 .macro .if SUPPORT_SGX .macro _sgx_get_vram.1 - phx jsr sgx_di_to_marr - plx lda SGX_DL ldy SGX_DH .endm .macro _sgx_put_vram.2 pha - phx jsr sgx_di_to_mawr - plx pla sta SGX_DL sty SGX_DH diff --git a/include/hucc/hucc-gfx.asm b/include/hucc/hucc-gfx.asm index f3f433e1..9979c5a4 100644 --- a/include/hucc/hucc-gfx.asm +++ b/include/hucc/hucc-gfx.asm @@ -51,8 +51,6 @@ _set_256x224 .proc .CHR_0x20 = .BAT_SIZE / 16 ; 1st tile # after the BAT. .SAT_ADDR = $7F00 ; SAT takes 16 tiles of VRAM. - phx ; Preserve X (aka __sp). - php ; Disable interrupts. sei @@ -100,8 +98,6 @@ _set_256x224 .proc call wait_vsync ; Wait for the next VBLANK. - plx ; Restore X (aka __sp). - leave ; All done, phew! ; A standard 256x224 screen with overscan. @@ -164,8 +160,8 @@ huc_screen_size .procgroup .if SUPPORT_SGX screen_size_sgx .proc - ldy #SGX_VDC_OFFSET ; Offset to SGX VDC. - db $F0 ; Turn "cly" into a "beq". + ldx #SGX_VDC_OFFSET ; Offset to SGX VDC. + db $F0 ; Turn "clx" into a "beq". .ref screen_size_vdc .endp @@ -173,10 +169,7 @@ screen_size_sgx .proc screen_size_vdc .proc - cly ; Offset to PCE VDC. - - phx ; Preserve X (aka __sp). - sxy ; Put VDC offset in X. + clx ; Offset to PCE VDC. lda <_al ; Get screen size value. and #7 ; Sanitize screen size value. @@ -221,8 +214,6 @@ screen_size_vdc .proc sta ); -_spr_x.1: phy +_spr_x.1: sxy clc adc #32 ldy #2 sta [spr_ptr], y - pla + txa adc #0 iny sta [spr_ptr], y @@ -355,12 +350,12 @@ _spr_get_x: sec ldy #2 lda [spr_ptr], y sbc #32 - pha + tax iny lda [spr_ptr], y sbc #0 tay - pla + txa rts @@ -373,12 +368,12 @@ _spr_get_x: sec _spr_get_y: sec lda [spr_ptr] sbc #64 - pha + tax ldy #1 lda [spr_ptr], y sbc #0 tay - pla + txa rts @@ -440,12 +435,12 @@ _sgx_spr_show: ldy #1 ; ; void __fastcall sgx_spr_x( unsigned int value ); -_sgx_spr_x.1: phy +_sgx_spr_x.1: sxy clc adc #32 ldy #2 sta [sgx_spr_ptr], y - pla + txa adc #0 iny sta [sgx_spr_ptr], y @@ -550,12 +545,12 @@ _sgx_spr_get_x: sec ldy #2 lda [sgx_spr_ptr], y sbc #32 - pha + tax iny lda [sgx_spr_ptr], y sbc #0 tay - pla + txa rts @@ -568,12 +563,12 @@ _sgx_spr_get_x: sec _sgx_spr_get_y: sec lda [sgx_spr_ptr] sbc #64 - pha + tax ldy #1 lda [sgx_spr_ptr], y sbc #0 tay - pla + txa rts .endif SUPPORT_SGX diff --git a/include/hucc/hucc-scroll.asm b/include/hucc/hucc-scroll.asm index 731c9007..4b6da4ef 100644 --- a/include/hucc/hucc-scroll.asm +++ b/include/hucc/hucc-scroll.asm @@ -65,8 +65,6 @@ HUCC_SCR_HEIGHT = 224 .proc _scroll_split.5 - phx ; Preserve X (aka __sp). - php ; Disable interrupts while sei ; updating this structure. @@ -108,8 +106,6 @@ HUCC_SCR_HEIGHT = 224 sta vdc_region_sel, x !done: plp ; Restore interrupts. - - plx ; Restore X (aka __sp). leave ; All done! .regionA: lda <_ah ; Scanline (i.e. top). @@ -142,8 +138,6 @@ HUCC_SCR_HEIGHT = 224 stz vdc_region_sel, x !done: plp ; Restore interrupts. - - plx ; Restore X (aka __sp). leave ; All done! .endp @@ -152,8 +146,6 @@ HUCC_SCR_HEIGHT = 224 .proc _sgx_scroll_split.5 - phx ; Preserve X (aka __sp). - php ; Disable interrupts while sei ; updating this structure. @@ -196,8 +188,6 @@ HUCC_SCR_HEIGHT = 224 sta sgx_region_sel, x !done: plp ; Restore interrupts. - - plx ; Restore X (aka __sp). leave ; All done! .regionA: lda <_ah ; Scanline (i.e. top). @@ -231,8 +221,6 @@ HUCC_SCR_HEIGHT = 224 stz sgx_region_sel, x !done: plp ; Restore interrupts. - - plx ; Restore X (aka __sp). leave ; All done! .endp @@ -250,8 +238,6 @@ HUCC_SCR_HEIGHT = 224 ; disable screen scrolling for a scroll region _disable_split.1: - phx ; Preserve X (aka __sp). - php ; Disable interrupts while sei ; updating this structure. @@ -270,8 +256,6 @@ _disable_split.1: sta vdc_region_sel, x plp ; Restore interrupts. - - plx ; Restore X (aka __sp). rts .regionA: stz vdc_regionA_crl, x ; Region disabled if $00. @@ -281,15 +265,11 @@ _disable_split.1: stz vdc_region_sel, x plp ; Restore interrupts. - - plx ; Restore X (aka __sp). rts .if SUPPORT_SGX _sgx_disable_split.1: - phx ; Preserve X (aka __sp). - php ; Disable interrupts while sei ; updating this structure. @@ -307,7 +287,7 @@ _sgx_disable_split.1: sta sgx_region_new, x ; selected region. sta sgx_region_sel, x ; Update last so there is no - plx ; need to disable irqs. + plp ; Restore interrupts. rts .regionA: stz sgx_regionA_crl, x ; Region disabled if $00. @@ -317,8 +297,6 @@ _sgx_disable_split.1: stz sgx_region_sel, x plp ; Restore interrupts. - - plx ; Restore X (aka __sp). rts .endif SUPPORT_SGX diff --git a/include/hucc/hucc-string.asm b/include/hucc/hucc-string.asm index 6fbc6421..13730985 100644 --- a/include/hucc/hucc-string.asm +++ b/include/hucc/hucc-string.asm @@ -64,8 +64,7 @@ _strcat: cla ; Max string length == 256! ldy.h #256 -_strlcat: phx ; Preserve X (aka __sp). - tax ; X = buffer length (1..256). +_strlcat: tax ; X = buffer length (1..256). .ifdef _DEBUG bne !+ ; Sanity check buffer length. @@ -109,8 +108,7 @@ _strlcat: phx ; Preserve X (aka __sp). _strcpy: cla ; Max string length == 256! ldy.h #256 -_strlcpy: phx ; Preserve X (aka __sp). - tax ; X = buffer length (1..256). +_strlcpy: tax ; X = buffer length (1..256). .ifdef _DEBUG bne !+ ; Sanity check buffer length. @@ -148,9 +146,7 @@ str_overflow: bne str_exit ; -_strlen: phx ; Preserve X (aka __sp). - - tma3 +_strlen: tma3 pha tma4 pha @@ -178,7 +174,6 @@ str_exit: tax ; X:Y = string or buffer length. txa ; A:Y = string or buffer length. say ; Y:A = string or buffer length. - plx ; Restore X (aka __sp). rts .alias _strlen.1 = _strlen diff --git a/include/hucc/hucc-systemcard.asm b/include/hucc/hucc-systemcard.asm index 2b252476..3ad483a6 100644 --- a/include/hucc/hucc-systemcard.asm +++ b/include/hucc/hucc-systemcard.asm @@ -72,11 +72,9 @@ __bm_error .ds 1 ; unsigned int __fastcall __macro cd_getver( void ); _cd_getver .macro - phx ; Preserve X (aka __sp). system ex_getver txa ; Put version in A:Y say ; Put version in Y:A - plx ; Restore X (aka __sp). .endm @@ -98,9 +96,7 @@ _cd_boot .macro ; void __fastcall __macro cd_reset( void ); _cd_reset .macro - phx ; Preserve X (aka __sp). system cd_reset - plx ; Restore X (aka __sp). .endm @@ -111,10 +107,8 @@ _cd_reset .macro ; unsigned char __fastcall __macro cd_pause( void ); _cd_pause .macro - phx ; Preserve X (aka __sp). system cd_pause cly - plx ; Restore X (aka __sp). .endm @@ -131,9 +125,7 @@ _cd_pause .macro ; $0E -> ADPCM fadeout 2.5 seconds _cd_fade.1 .macro - phx ; Preserve X (aka __sp). system cd_fade - plx ; Restore X (aka __sp). .endm @@ -405,8 +397,6 @@ _cd_loaddata.4 .proc ; unsigned char __fastcall cd_loadbank( unsigned char ovl_index<_cl>, unsigned int sect_offset<_si>, unsigned char bank<_bl>, unsigned int sectors<_al> ); _cd_loadbank.4 .macro - phx ; Preserve X (aka __sp). - ldx <_cl ; Get file address and length. jsr get_file_info @@ -432,7 +422,6 @@ _cd_loadbank.4 .macro txa ; value on a CD error. cly - plx ; Restore X (aka __sp). .endm @@ -443,10 +432,8 @@ _cd_loadbank.4 .macro ; unsigned char __fastcall __macro cd_status( unsigned char mode ); _cd_status.1 .macro - phx ; Preserve X (aka __sp). system cd_stat cly - plx ; Restore X (aka __sp). .endm @@ -457,9 +444,7 @@ _cd_status.1 .macro ; void __fastcall __macro ad_reset( void ); _ad_reset .macro - phx ; Preserve X (aka __sp). system ad_reset - plx ; Restore X (aka __sp). .endm @@ -469,7 +454,6 @@ _ad_reset .macro ; unsigned char __fastcall __macro ad_trans( unsigned char ovl_index<_cl>, unsigned int sect_offset<_si>, unsigned char nb_sectors<_dh>, unsigned int ad_addr<_bx> ); _ad_trans.4 .macro - phx ; Preserve X (aka __sp). ldx <_cl ; Get file address and length. jsr get_file_info @@ -484,7 +468,6 @@ _ad_trans.4 .macro !: system ad_trans cly - plx ; Restore X (aka __sp). .endm ; *************************************************************************** @@ -493,9 +476,7 @@ _ad_trans.4 .macro ; void __fastcall __macro ad_read( unsigned int ad_addr<_cx>, unsigned char mode<_dh>, unsigned int buf<_bx>, unsigned int bytes<_ax> ); _ad_read.4 .macro - phx ; Preserve X (aka __sp). system ad_read - plx ; Restore X (aka __sp). .endm @@ -506,9 +487,7 @@ _ad_read.4 .macro ; void __fastcall __macro ad_write( unsigned int ad_addr<_cx>, unsigned char mode<_dh>, unsigned int buf<_bx>, unsigned int bytes<_ax> ); _ad_write.4 .macro - phx ; Preserve X (aka __sp). system ad_write - plx ; Restore X (aka __sp). .endm @@ -518,10 +497,8 @@ _ad_write.4 .macro ; unsigned char __fastcall __macro ad_play( unsigned int ad_addr<_bx>, unsigned int bytes<_ax>, unsigned char freq<_dh>, unsigned char mode<_dl> ); _ad_play.4 .macro - phx ; Preserve X (aka __sp). system ad_play cly - plx ; Restore X (aka __sp). .endm @@ -532,7 +509,6 @@ _ad_play.4 .macro ; unsigned char __fastcall __macro ad_cplay( unsigned char ovl_index<_cl>, unsigned int sect_offset<_si>, unsigned int nb_sectors<_bx>, unsigned char freq<_dh> ); _ad_cplay.4 .macro - phx ; Preserve X (aka __sp). ldx <_cl ; Get file address and length. jsr get_file_info @@ -551,7 +527,6 @@ _ad_cplay.4 .macro system ad_cplay cly - plx ; Restore X (aka __sp). .endm @@ -562,9 +537,7 @@ _ad_cplay.4 .macro ; void __fastcall __macro ad_stop( void ); _ad_stop .macro - phx ; Preserve X (aka __sp). system ad_stop - plx ; Restore X (aka __sp). .endm @@ -575,10 +548,8 @@ _ad_stop .macro ; unsigned char __fastcall __macro ad_stat( void ); _ad_stat .macro - phx ; Preserve X (aka __sp). system ad_stat cly - plx ; Restore X (aka __sp). .endm @@ -708,7 +679,6 @@ _bm_format .proc ; After return __bm_error = BM_OK or BM_NOT_FORMATTED. _bm_free .macro - phx ; Preserve X (aka __sp). system bm_free sta __bm_error tay @@ -717,7 +687,6 @@ _bm_free .macro stz.h <_cx !ok: lda.l <_cx ldy.h <_cx - plx ; Restore X (aka __sp). .endm @@ -738,11 +707,9 @@ _bm_free .macro ; After return __bm_error = BM_OK or BM_NOT_FORMATTED. _bm_read.4 .macro - phx ; Preserve X (aka __sp). system bm_read sta __bm_error cly - plx ; Restore X (aka __sp). .endm @@ -763,11 +730,9 @@ _bm_read.4 .macro ; After return __bm_error = BM_OK, BM_NOT_FOUND (i.e. not enough memory) or BM_NOT_FORMATTED. _bm_write.4 .macro - phx ; Preserve X (aka __sp). system bm_write sta __bm_error cly - plx ; Restore X (aka __sp). .endm @@ -788,11 +753,9 @@ _bm_write.4 .macro ; After return __bm_error = BM_OK, BM_NOT_FOUND or BM_NOT_FORMATTED. _bm_delete.1 .macro - phx ; Preserve X (aka __sp). system bm_delete sta __bm_error cly - plx ; Restore X (aka __sp). .endm @@ -819,7 +782,6 @@ _bm_exist.1 .macro stz.h <_cx stz.l <_dx ; Offset from start. stz.h <_dx - phx ; Preserve X (aka __sp). system bm_read sta __bm_error tay ; $00 if file OK, else NZ. @@ -827,7 +789,6 @@ _bm_exist.1 .macro lda #$FF !done: inc a ; $01 if file OK, else $00. cly - plx ; Restore X (aka __sp). .endm @@ -855,9 +816,7 @@ _bm_create.2 .macro sta.h <_bx stz.l <_dx ; Offset from start. stz.h <_dx - phx ; Preserve X (aka __sp). system bm_write sta __bm_error cly - plx ; Restore X (aka __sp). .endm diff --git a/include/hucc/hucc.asm b/include/hucc/hucc.asm index de561b6b..dd70e911 100644 --- a/include/hucc/hucc.asm +++ b/include/hucc/hucc.asm @@ -297,8 +297,8 @@ core_main: tma7 ; Get the CORE_BANK. call init_random ; Initialize random seed. .endif - ldx #HUCC_STACK_SZ ; Initialize the HuCC stack - stx <__sp ; and put the __sp in X. + ldx #HUCC_STACK_SZ ; Initialize the HuCC stack. + stx <__sp call _main ; Execute the HuCC program. From c588e96018215fddc5e135852b6daa213ce8c787 Mon Sep 17 00:00:00 2001 From: John Brandwood Date: Wed, 18 Dec 2024 12:17:42 -0500 Subject: [PATCH 2/4] Add optimized macros to hucc-codegen.asm for comparing a signed int with an unsigned char variable in memory or on the stack. --- include/hucc/hucc-codegen.asm | 292 ++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) diff --git a/include/hucc/hucc-codegen.asm b/include/hucc/hucc-codegen.asm index e93165cb..3e3cb1cd 100644 --- a/include/hucc/hucc-codegen.asm +++ b/include/hucc/hucc-codegen.asm @@ -1117,6 +1117,147 @@ __uge_w.wm .macro sbc.h \1 ; CS if Y:A >= memory. .endm +; ************** +; optimized boolean test +; C is true (1) if Y:A == memory-value, else false (0) +; this MUST set the C flag for the subsequent branches! + +__equ_w.um .macro + cmp \1 + bne !false+ + tya + beq !+ +!false: clc +!: + .endm + +; ************** +; optimized boolean test +; C is true (1) if Y:A != memory-value, else false (0) +; this MUST set the C flag for the subsequent branches! + +__neq_w.um .macro + sec + eor \1 + bne !+ + tya + bne !+ + clc +!: + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A < memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__slt_w.um .macro + cmp \1 ; Subtract memory from Y:A. + tya + sbc #0 + bvc !+ + eor #$80 ; -ve if Y:A < memory (signed). +!: asl a + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A <= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__sle_w.um .macro + clc ; Subtract memory+1 from Y:A. + sbc \1 + tya + sbc #0 + bvc !+ + eor #$80 ; -ve if Y:A <= memory (signed). +!: asl a + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A > memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__sgt_w.um .macro + clc ; Subtract memory+1 from Y:A. + sbc \1 + tya + sbc #0 + bvc !+ + eor #$80 ; +ve if Y:A > memory (signed). +!: eor #$80 + asl a + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A >= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__sge_w.um .macro + cmp \1 ; Subtract memory from Y:A. + tya + sbc #0 + bvc !+ + eor #$80 ; +ve if Y:A >= memory (signed). +!: eor #$80 + asl a + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A < memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__ult_w.um .macro + cmp \1 ; Subtract memory from Y:A. + tya + sbc #0 ; CC if Y:A < memory. + ror a + eor #$80 + rol a + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A <= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__ule_w.um .macro + clc ; Subtract memory+1 from Y:A. + sbc \1 + tya + sbc #0 ; CC if Y:A <= memory. + ror a + eor #$80 + rol a + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A > memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__ugt_w.um .macro + clc ; Subtract memory+1 from Y:A. + sbc \1 + tya + sbc #0 ; CS if Y:A > memory. + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A >= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__uge_w.um .macro + cmp \1 ; Subtract memory from Y:A. + tya + sbc #0 ; CS if Y:A >= memory. + .endm + ; ************** ; optimized boolean test ; C is true (1) if A == memory-value, else false (0) @@ -1394,6 +1535,157 @@ __uge_w.ws .macro sbc.h <__stack + \1, x; CS if Y:A >= memory. .endm +; ************** +; optimized boolean test +; C is true (1) if Y:A == memory-value, else false (0) +; this MUST set the C flag for the subsequent branches! + +__equ_w.us .macro + ldx.l <__sp + cmp <__stack + \1, x + bne !false+ + tya + beq !+ +!false: clc +!: + .endm + +; ************** +; optimized boolean test +; C is true (1) if Y:A != memory-value, else false (0) +; this MUST set the C flag for the subsequent branches! + +__neq_w.us .macro + ldx.l <__sp + sec + eor <__stack + \1, x + bne !+ + tya + bne !+ + clc +!: + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A < memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__slt_w.us .macro + ldx.l <__sp + cmp <__stack + \1, x; Subtract memory from Y:A. + tya + sbc #0 + bvc !+ + eor #$80 ; -ve if Y:A < memory (signed). +!: asl a + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A <= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__sle_w.us .macro + ldx.l <__sp + clc ; Subtract memory+1 from Y:A. + sbc <__stack + \1, x + tya + sbc #0 + bvc !+ + eor #$80 ; -ve if Y:A <= memory (signed). +!: asl a + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A > memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__sgt_w.us .macro + ldx.l <__sp + clc ; Subtract memory+1 from Y:A. + sbc <__stack + \1, x + tya + sbc #0 + bvc !+ + eor #$80 ; +ve if Y:A > memory (signed). +!: eor #$80 + asl a + .endm + +; ************** +; optimized boolean test (signed word) +; C is true (1) if Y:A >= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__sge_w.us .macro + ldx.l <__sp + cmp <__stack + \1, x; Subtract memory from Y:A. + tya + sbc #0 + bvc !+ + eor #$80 ; +ve if Y:A >= memory (signed). +!: eor #$80 + asl a + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A < memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__ult_w.us .macro + ldx.l <__sp + cmp <__stack + \1, x; Subtract memory from Y:A. + tya + sbc #0 ; CC if Y:A < memory. + ror a + eor #$80 + rol a + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A <= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__ule_w.us .macro + ldx.l <__sp + clc ; Subtract memory+1 from Y:A. + sbc <__stack + \1, x + tya + sbc #0 ; CC if Y:A <= memory. + ror a + eor #$80 + rol a + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A > memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__ugt_w.us .macro + ldx.l <__sp + clc ; Subtract memory+1 from Y:A. + sbc <__stack + \1, x + tya + sbc #0 ; CS if Y:A > memory. + .endm + +; ************** +; optimized boolean test (unsigned word) +; C is true (1) if Y:A >= memory-value, else false (0) +; this MUST set the C flag for the susequent branches! + +__uge_w.us .macro + ldx.l <__sp + cmp <__stack + \1, x; Subtract memory from Y:A. + tya + sbc #0 ; CS if Y:A >= memory. + .endm + ; ************** ; optimized boolean test ; C is true (1) if A == memory-value, else false (0) From 87de700e991880455b9e8acd0c85bc77cc4d75cf Mon Sep 17 00:00:00 2001 From: John Brandwood Date: Wed, 18 Dec 2024 12:44:13 -0500 Subject: [PATCH 3/4] Allow HuCC's code-generator to perform some slightly higher-level optimizations by letting it look inside the peephole optimizer's queue of instructions. The first goal is to allow it to flip the lval and rval of expression operators when doing so would avoid the need to push the primary register onto the stack. This new optimization is currently only implemented for comparison operators. --- src/hucc/code.c | 36 ++++- src/hucc/defs.h | 10 +- src/hucc/expr.c | 366 +++++++++++++++++++++++++++++++++++++++----- src/hucc/gen.c | 136 ---------------- src/hucc/gen.h | 13 -- src/hucc/optimize.c | 62 +++++--- 6 files changed, 411 insertions(+), 212 deletions(-) diff --git a/src/hucc/code.c b/src/hucc/code.c index ac5c0c84..cdbdcda8 100644 --- a/src/hucc/code.c +++ b/src/hucc/code.c @@ -200,6 +200,9 @@ void out_ins_cmp (int code, int type) void gen_ins (INS *tmp) { + static unsigned sequence = 0; + tmp->sequence = sequence++; + if (optimize) push_ins(tmp); else { @@ -274,10 +277,14 @@ static void out_addr (int type, intptr_t data) void dump_ins (INS *tmp) { + INS copy = *tmp; FILE *save = output; + if (copy.ins_code == I_DEBUG) + copy.ins_data = 0; + output = stdout; - gen_code(tmp); + gen_code(©); output = save; } @@ -322,9 +329,12 @@ void gen_code (INS *tmp) outstr("\", "); outdec((int)imm_data); outstr("; "); - outstr(source); + if (source) { + outstr(source); + free((void *)data); + tmp->ins_data = 0; + } nl(); - free((void *)data); } break; case T_CLEAR_LINE: @@ -611,6 +621,14 @@ void gen_code (INS *tmp) nl(); break; + case X_CMP_UM: + ot("__"); + outstr(compare2str[tmp->cmp_type]); + outstr("_w.um\t"); + out_type(type, data); + nl(); + break; + case X_CMP_WS: ot("__"); outstr(compare2str[tmp->cmp_type]); @@ -620,6 +638,15 @@ void gen_code (INS *tmp) nl(); break; + case X_CMP_US: + ot("__"); + outstr(compare2str[tmp->cmp_type]); + outstr("_w.us\t"); + outdec((int)data); + outlocal(tmp->sym); + nl(); + break; + case X_CMP_UIQ: ot("__"); outstr(compare2str[tmp->cmp_type]); @@ -1809,6 +1836,9 @@ void gen_code (INS *tmp) gen_asm(tmp); break; } + + /* mark the instruction as invalid */ + tmp->ins_code = I_RETIRED; } /* ---- diff --git a/src/hucc/defs.h b/src/hucc/defs.h index 81b4ebf2..028fc629 100644 --- a/src/hucc/defs.h +++ b/src/hucc/defs.h @@ -4,6 +4,7 @@ #define INCLUDE_DEFS_H #define ULI_NORECURSE 1 +// #define DEBUG_OPTIMIZER /* * i-code pseudo instructions @@ -12,9 +13,13 @@ * as the table of i-code flag information in optimize.c */ enum ICODE { + /* i-code to mark an instrucion as retired */ + + I_RETIRED = 0, + /* i-code for debug information */ - I_DEBUG = 1, + I_DEBUG, /* i-code that retires the primary register contents */ @@ -75,7 +80,9 @@ enum ICODE { I_CMP_WT, X_CMP_WI, X_CMP_WM, + X_CMP_UM, X_CMP_WS, + X_CMP_US, X_CMP_UIQ, X_CMP_UMQ, @@ -578,6 +585,7 @@ struct macro { /* pseudo instruction structure */ typedef struct { + unsigned sequence; enum ICODE ins_code; enum ICOMPARE cmp_type; int ins_type; diff --git a/src/hucc/expr.c b/src/hucc/expr.c index 536a2e8f..e5d48811 100644 --- a/src/hucc/expr.c +++ b/src/hucc/expr.c @@ -18,6 +18,23 @@ #include "lex.h" #include "primary.h" #include "sym.h" +#include "optimize.h" + +#define INFORM_POTENTIAL_REORDER 0 + +/* invert comparison operation */ +int compare2swap [] = { + CMP_EQU, // CMP_EQU + CMP_NEQ, // CMP_NEQ + CMP_SGT, // CMP_SLT + CMP_SGE, // CMP_SLE + CMP_SLT, // CMP_SGT + CMP_SLE, // CMP_SGE + CMP_UGT, // CMP_ULT + CMP_UGE, // CMP_ULE + CMP_ULT, // CMP_UGT + CMP_ULE // CMP_UGE +}; /* * lval->symbol - symbol table address, else 0 for constant @@ -118,6 +135,8 @@ int heir1 (LVALUE *lval, int comma) int k; LVALUE lval2[1] = {{0}}; char fc; + INS variable; + variable.ins_code = 0; k = heir1a(lval, comma); if (match("=")) { @@ -125,13 +144,33 @@ int heir1 (LVALUE *lval, int comma) needlval(); return (0); } - if (lval->indirect) + if (lval->indirect) { +#if 0 + /* peek at the output to see the variable's address type */ + if (q_nb) { + if + ((q_ins[q_wr].ins_code == I_LEA_S) || + (q_ins[q_wr].ins_code == I_LD_WI) || + (q_ins[q_wr].ins_code == I_ADD_WI && + q_ins[q_wr].ins_type == T_SYMBOL && + is_small_array((SYMBOL *)q_ins[q_wr].ins_data)) + ) { + variable = q_ins[q_wr]; +// if ((--q_wr) < 0) +// q_wr += Q_SIZE; + printf("\nassignment: "); + dump_ins(&variable); + } + } +#endif gpush(); + } if (heir1(lval2, comma)) rvalue(lval2); if (lval2->val_type == CVOID) void_value_error(lval2); - store(lval); + if (variable.ins_code == 0) + store(lval); return (0); } else { @@ -150,8 +189,27 @@ int heir1 (LVALUE *lval, int comma) needlval(); return (0); } - if (lval->indirect) + if (lval->indirect) { +#if 0 + /* peek at the output to see the variable's address type */ + if (q_nb) { + if + ((q_ins[q_wr].ins_code == I_LEA_S) || + (q_ins[q_wr].ins_code == I_LD_WI) || + (q_ins[q_wr].ins_code == I_ADD_WI && + q_ins[q_wr].ins_type == T_SYMBOL && + is_small_array((SYMBOL *)q_ins[q_wr].ins_data)) + ) { + variable = q_ins[q_wr]; +// if ((--q_wr) < 0) +// q_wr += Q_SIZE; + printf("\nassignment: "); + dump_ins(&variable); + } + } +#endif gpush(); + } rvalue(lval); gpush(); if (heir1(lval2, comma)) @@ -176,11 +234,12 @@ int heir1 (LVALUE *lval, int comma) case '%': gmod(is_unsigned(lval) || is_unsigned(lval2)); break; case '>': gasr(is_unsigned(lval)); break; case '<': gasl(); break; - case '&': gand(); break; - case '^': gxor(); break; - case '|': gor(); break; + case '&': out_ins(I_AND_WT, 0, 0); break; + case '^': out_ins(I_EOR_WT, 0, 0); break; + case '|': out_ins(I_OR_WT, 0, 0); break; } - store(lval); + if (variable.ins_code == 0) + store(lval); return (0); } else @@ -189,7 +248,7 @@ int heir1 (LVALUE *lval, int comma) } /* - * processes ? : expression + * processes "? :" expression * @param lval * @return 0 or 1, fetch or no fetch */ @@ -231,7 +290,7 @@ int heir1a (LVALUE *lval, int comma) } /* - * processes logical or || + * processes logical "or" * @param lval * @return 0 or 1, fetch or no fetch */ @@ -265,7 +324,7 @@ int heir1b (LVALUE *lval, int comma) } /* - * processes logical and && + * processes logical "and" * @param lval * @return 0 or 1, fetch or no fetch */ @@ -299,7 +358,7 @@ int heir1c (LVALUE *lval, int comma) } /* - * processes bitwise or | + * processes bitwise "or" * @param lval * @return 0 or 1, fetch or no fetch */ @@ -307,6 +366,8 @@ int heir2 (LVALUE *lval, int comma) { int k; LVALUE lval2[1] = {{0}}; + int linst; + unsigned lseqn; k = heir3(lval, comma); blanks(); @@ -318,6 +379,10 @@ int heir2 (LVALUE *lval, int comma) if (lval->val_type == CVOID) void_value_error(lval); FOREVER { + /* remember the lval position in the peephole instruction queue */ + linst = q_wr; + lseqn = q_ins[q_wr].sequence; + if ((ch() == '|') && (nch() != '|') && (nch() != '=')) { inbyte(); gpush(); @@ -325,16 +390,29 @@ int heir2 (LVALUE *lval, int comma) rvalue(lval2); if (lval2->val_type == CVOID) void_value_error(lval2); - gor(); + out_ins(I_OR_WT, 0, 0); blanks(); } else return (0); + + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_OR_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked operator \"|\" with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + } + } } } /* - * processes bitwise exclusive or + * processes bitwise "exclusive or" * @param lval * @return 0 or 1, fetch or no fetch */ @@ -342,6 +420,8 @@ int heir3 (LVALUE *lval, int comma) { int k; LVALUE lval2[1] = {{0}}; + int linst; + unsigned lseqn; k = heir4(lval, comma); blanks(); @@ -353,6 +433,10 @@ int heir3 (LVALUE *lval, int comma) if (lval->val_type == CVOID) void_value_error(lval); FOREVER { + /* remember the lval position in the peephole instruction queue */ + linst = q_wr; + lseqn = q_ins[q_wr].sequence; + if ((ch() == '^') && (nch() != '=')) { inbyte(); gpush(); @@ -360,16 +444,29 @@ int heir3 (LVALUE *lval, int comma) rvalue(lval2); if (lval2->val_type == CVOID) void_value_error(lval2); - gxor(); + out_ins(I_EOR_WT, 0, 0); blanks(); } else return (0); + + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_EOR_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked operator \"^\" with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + } + } } } /* - * processes bitwise and & + * processes bitwise "and" * @param lval * @return 0 or 1, fetch or no fetch */ @@ -377,6 +474,8 @@ int heir4 (LVALUE *lval, int comma) { int k; LVALUE lval2[1] = {{0}}; + int linst; + unsigned lseqn; k = heir5(lval, comma); blanks(); @@ -388,6 +487,10 @@ int heir4 (LVALUE *lval, int comma) if (lval->val_type == CVOID) void_value_error(lval); FOREVER { + /* remember the lval position in the peephole instruction queue */ + linst = q_wr; + lseqn = q_ins[q_wr].sequence; + if ((ch() == '&') && (nch() != '&') && (nch() != '=')) { inbyte(); gpush(); @@ -395,16 +498,29 @@ int heir4 (LVALUE *lval, int comma) rvalue(lval2); if (lval2->val_type == CVOID) void_value_error(lval2); - gand(); + out_ins(I_AND_WT, 0, 0); blanks(); } else return (0); + + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_AND_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked operator \"&\" with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + } + } } } /* - * processes equal and not equal operators + * processes "equal" and "not equal" operators * @param lval * @return 0 or 1, fetch or no fetch */ @@ -412,6 +528,8 @@ int heir5 (LVALUE *lval, int comma) { int k; LVALUE lval2[1] = {{0}}; + int linst; + unsigned lseqn; k = heir6(lval, comma); blanks(); @@ -424,13 +542,17 @@ int heir5 (LVALUE *lval, int comma) if (lval->val_type == CVOID) void_value_error(lval); FOREVER { + /* remember the lval position in the peephole instruction queue */ + linst = q_wr; + lseqn = q_ins[q_wr].sequence; + if (match("==")) { gpush(); if (heir6(lval2, comma)) rvalue(lval2); if (lval2->val_type == CVOID) void_value_error(lval2); - geq(); + out_ins_cmp(I_CMP_WT, CMP_EQU); } else if (match("!=")) { gpush(); @@ -438,10 +560,68 @@ int heir5 (LVALUE *lval, int comma) rvalue(lval2); if (lval2->val_type == CVOID) void_value_error(lval2); - gne(); + out_ins_cmp(I_CMP_WT, CMP_NEQ); } else return (0); + + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_CMP_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked comparison with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + if + ((q_ins[linst].ins_code == I_LD_WI) || + (q_ins[linst].ins_code == I_LD_WM) || + (q_ins[linst].ins_code == I_LD_UM) || + (q_ins[linst].ins_code == X_LD_WS) || + (q_ins[linst].ins_code == X_LD_US) + ) { + /* preserve the lval instructions */ + INS parked[1]; + int copy, from; + int compare = compare2swap[q_ins[q_wr].cmp_type]; + parked[0] = q_ins[linst]; + /* remove both the lval and rval instructions */ + copy = q_wr - linst; + if (copy++ < 0) + copy += Q_SIZE; + q_nb -= copy; + q_wr = linst - 1; + if (q_wr < 0) + q_wr += Q_SIZE; + /* re-insert the rval instructions */ + from = linst + 2; /* skip I_LD_WM, I_PUSH_WR */ + copy = copy - 3; /* skip I_LD_WM, I_PUSH_WR, I_CMP_WT */ + for (; copy > 0; copy--) { + if (from >= Q_SIZE) + from -= Q_SIZE; +#ifdef DEBUG_OPTIMIZER + printf("\nReinserting after reordering ..."); +#endif + push_ins(&q_ins[from++]); + } + /* re-insert the lval instructions */ + gpush(); + push_ins(&parked[0]); + out_ins_cmp(I_CMP_WT, compare); +#if INFORM_POTENTIAL_REORDER + printf("reorder comparison with: "); + dump_ins(&q_ins[q_wr]); + printf("\n\n"); +#endif + } + } + } + + /* convert the C flag into a boolean (usually removed by the optimizer) */ + out_ins(I_BOOLEAN, 0, 0); + blanks(); } } @@ -454,6 +634,8 @@ int heir6 (LVALUE *lval, int comma) { int k; LVALUE lval2[1] = {{0}}; + int linst; + unsigned lseqn; k = heir7(lval, comma); blanks(); @@ -471,6 +653,10 @@ int heir6 (LVALUE *lval, int comma) if (lval->val_type == CVOID) void_value_error(lval); FOREVER { + /* remember the lval position in the peephole instruction queue */ + linst = q_wr; + lseqn = q_ins[q_wr].sequence; + if (match("<=")) { gpush(); if (heir7(lval2, comma)) @@ -480,11 +666,10 @@ int heir6 (LVALUE *lval, int comma) if (lval->ptr_type || lval2->ptr_type || is_unsigned(lval) || is_unsigned(lval2) - ) { - gule(); - continue; - } - gle(); + ) + out_ins_cmp(I_CMP_WT, CMP_ULE); + else + out_ins_cmp(I_CMP_WT, CMP_SLE); } else if (match(">=")) { gpush(); @@ -495,11 +680,10 @@ int heir6 (LVALUE *lval, int comma) if (lval->ptr_type || lval2->ptr_type || is_unsigned(lval) || is_unsigned(lval2) - ) { - guge(); - continue; - } - gge(); + ) + out_ins_cmp(I_CMP_WT, CMP_UGE); + else + out_ins_cmp(I_CMP_WT, CMP_SGE); } else if ((sstreq("<")) && !sstreq("<<")) { @@ -512,11 +696,10 @@ int heir6 (LVALUE *lval, int comma) if (lval->ptr_type || lval2->ptr_type || is_unsigned(lval) || is_unsigned(lval2) - ) { - gult(); - continue; - } - glt(); + ) + out_ins_cmp(I_CMP_WT, CMP_ULT); + else + out_ins_cmp(I_CMP_WT, CMP_SLT); } else if ((sstreq(">")) && !sstreq(">>")) { @@ -529,15 +712,70 @@ int heir6 (LVALUE *lval, int comma) if (lval->ptr_type || lval2->ptr_type || is_unsigned(lval) || is_unsigned(lval2) - ) { - gugt(); - continue; - } - ggt(); + ) + out_ins_cmp(I_CMP_WT, CMP_UGT); + else + out_ins_cmp(I_CMP_WT, CMP_SGT); } else return (0); + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_CMP_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked comparison with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + if + ((q_ins[linst].ins_code == I_LD_WI) || + (q_ins[linst].ins_code == I_LD_WM) || + (q_ins[linst].ins_code == I_LD_UM) || + (q_ins[linst].ins_code == X_LD_WS) || + (q_ins[linst].ins_code == X_LD_US) + ) { + /* preserve the lval instructions */ + INS parked[1]; + int copy, from; + int compare = compare2swap[q_ins[q_wr].cmp_type]; + parked[0] = q_ins[linst]; + /* remove both the lval and rval instructions */ + copy = q_wr - linst; + if (copy++ < 0) + copy += Q_SIZE; + q_nb -= copy; + q_wr = linst - 1; + if (q_wr < 0) + q_wr += Q_SIZE; + /* re-insert the rval instructions */ + from = linst + 2; /* skip I_LD_WM, I_PUSH_WR */ + copy = copy - 3; /* skip I_LD_WM, I_PUSH_WR, I_CMP_WT */ + for (; copy > 0; copy--) { + if (from >= Q_SIZE) + from -= Q_SIZE; +#ifdef DEBUG_OPTIMIZER + printf("\nReinserting after reordering ..."); +#endif + push_ins(&q_ins[from++]); + } + /* re-insert the lval instructions */ + gpush(); + push_ins(&parked[0]); + out_ins_cmp(I_CMP_WT, compare); +#if INFORM_POTENTIAL_REORDER + printf("reorder comparison with: "); + dump_ins(&q_ins[q_wr]); + printf("\n\n"); +#endif + } + } + } + + /* convert the C flag into a boolean (usually removed by the optimizer) */ + out_ins(I_BOOLEAN, 0, 0); blanks(); } } @@ -597,6 +835,8 @@ int heir8 (LVALUE *lval, int comma) { int k; LVALUE lval2[1] = {{0}}; + int linst; + unsigned lseqn; k = heir9(lval, comma); blanks(); @@ -608,6 +848,10 @@ int heir8 (LVALUE *lval, int comma) if (lval->val_type == CVOID) void_value_error(lval); FOREVER { + /* remember the lval position in the peephole instruction queue */ + linst = q_wr; + lseqn = q_ins[q_wr].sequence; + if (match("+")) { gpush(); if (heir9(lval2, comma)) @@ -616,8 +860,21 @@ int heir8 (LVALUE *lval, int comma) void_value_error(lval2); /* if left is pointer and right is int, scale right */ gen_scale_right(lval, lval2); - /* will scale left if right int pointer and left int */ + /* will scale left if right is pointer and left int */ gadd(lval, lval2); + + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_ADD_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked operation \"+\" with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + } + } result(lval, lval2); } else if (match("-")) { @@ -632,6 +889,19 @@ int heir8 (LVALUE *lval, int comma) in second, result is scaled down. */ gen_scale_right(lval, lval2); gsub(); + + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_SUB_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked operation \"-\" with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + } + } /* if both pointers, scale result */ if ((lval->ptr_type == CINT || lval->ptr_type == CUINT || is_ptrptr(lval)) && (lval2->ptr_type == CINT || lval2->ptr_type == CUINT || is_ptrptr(lval2))) @@ -659,6 +929,8 @@ int heir9 (LVALUE *lval, int comma) { int k; LVALUE lval2[1] = {{0}}; + int linst; + unsigned lseqn; k = heir10(lval, comma); blanks(); @@ -671,6 +943,8 @@ int heir9 (LVALUE *lval, int comma) if (lval->val_type == CVOID) void_value_error(lval); FOREVER { + linst = q_wr; + lseqn = q_ins[q_wr].sequence; if (match("*")) { gpush(); if (heir10(lval2, comma)) @@ -678,6 +952,18 @@ int heir9 (LVALUE *lval, int comma) if (lval2->val_type == CVOID) void_value_error(lval2); gmult(is_unsigned(lval) || is_unsigned(lval2)); + /* is this a candidate for reordering */ + if (optimize >= 2 && q_nb && q_ins[q_wr].ins_code == I_MUL_WT) { + /* is the lval still in the peephole instruction queue? */ + if (q_ins[linst].ins_code != I_RETIRED && q_ins[linst].sequence == lseqn) { +#if INFORM_POTENTIAL_REORDER + printf("stacked operation \"*\" with: "); + dump_ins(&q_ins[linst]); + printf("File \"%s\", Line %d\n", (inclsp) ? inclstk_name[inclsp - 1] : fname_copy, line_number); + printf("%s\n\n", line); +#endif + } + } } else if (match("/")) { gpush(); diff --git a/src/hucc/gen.c b/src/hucc/gen.c index 7fa1ce6f..f28c9ee6 100644 --- a/src/hucc/gen.c +++ b/src/hucc/gen.c @@ -372,33 +372,6 @@ void gmod (int is_unsigned) out_ins(I_SMOD_WT, 0, 0); } -/* - * inclusive 'or' the primary and secondary registers - * - */ -void gor (void) -{ - out_ins(I_OR_WT, 0, 0); -} - -/* - * exclusive 'or' the primary and secondary registers - * - */ -void gxor (void) -{ - out_ins(I_EOR_WT, 0, 0); -} - -/* - * 'and' the primary and secondary registers - * - */ -void gand (void) -{ - out_ins(I_AND_WT, 0, 0); -} - /* * arithmetic shift right the secondary register the number of * times in the primary register @@ -502,115 +475,6 @@ void gdec (LVALUE *lval) out_ins(I_SUB_WI, T_VALUE, 1); } -/* - * following are the conditional operators. - * they compare the secondary register against the primary register - * and put a literl 1 in the primary if the condition is true, - * otherwise they clear the primary register - * - */ - -/* - * equal - * - */ - -void geq (void) -{ - out_ins_cmp(I_CMP_WT, CMP_EQU); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * not equal - * - */ -void gne (void) -{ - out_ins_cmp(I_CMP_WT, CMP_NEQ); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * less than (signed) - * - */ -void glt (void) -{ - out_ins_cmp(I_CMP_WT, CMP_SLT); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * less than or equal (signed) - * - */ -void gle (void) -{ - out_ins_cmp(I_CMP_WT, CMP_SLE); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * greater than (signed) - * - */ -void ggt (void) -{ - out_ins_cmp(I_CMP_WT, CMP_SGT); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * greater than or equal (signed) - * - */ -void gge (void) -{ - out_ins_cmp(I_CMP_WT, CMP_SGE); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * less than (unsigned) - * - */ -void gult (void) -{ - out_ins_cmp(I_CMP_WT, CMP_ULT); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * less than or equal (unsigned) - * - */ -void gule (void) -{ - out_ins_cmp(I_CMP_WT, CMP_ULE); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * greater than (unsigned) - * - */ -void gugt (void) -{ - out_ins_cmp(I_CMP_WT, CMP_UGT); - out_ins(I_BOOLEAN, 0, 0); -} - -/* - * greater than or equal (unsigned) - * - */ -void guge (void) -{ - out_ins_cmp(I_CMP_WT, CMP_UGE); - out_ins(I_BOOLEAN, 0, 0); -} - void scale_const (int type, int otag, int *size) { switch (type) { diff --git a/src/hucc/gen.h b/src/hucc/gen.h index 7b1a1ac9..61352e25 100644 --- a/src/hucc/gen.h +++ b/src/hucc/gen.h @@ -29,9 +29,6 @@ void gmult_imm (int value); void gdiv (int is_unsigned); void gdiv_imm (int value); void gmod (int is_unsigned); -void gor (void); -void gxor (void); -void gand (void); void gasr (int is_unsigned); void gasl (void); void gneg (void); @@ -41,16 +38,6 @@ void gtest (void); void gnot (void); void ginc (LVALUE *lval); void gdec (LVALUE *lval); -void geq (void); -void gne (void); -void glt (void); -void gle (void); -void ggt (void); -void gge (void); -void gult (void); -void gule (void); -void gugt (void); -void guge (void); void gcast (int type); void gsei (void); void gcli (void); diff --git a/src/hucc/optimize.c b/src/hucc/optimize.c index e9d65918..b755d28d 100644 --- a/src/hucc/optimize.c +++ b/src/hucc/optimize.c @@ -3,17 +3,6 @@ * */ -// #define DEBUG_OPTIMIZER - -#define OPT_ARRAY_RD 1 -#define OPT_ARRAY_WR 1 - -#ifdef DEBUG_OPTIMIZER -#define ODEBUG(...) printf( __VA_ARGS__ ) -#else -#define ODEBUG(...) -#endif - #include #include #include @@ -39,13 +28,24 @@ #pragma GCC diagnostic ignored "-Wstringop-overread" #endif +#define OPT_ARRAY_RD 1 +#define OPT_ARRAY_WR 1 + +#ifdef DEBUG_OPTIMIZER +#define ODEBUG(...) printf( __VA_ARGS__ ) +#else +#define ODEBUG(...) +#endif + /* flag information for each of the i-code instructions */ /* * N.B. this table MUST be kept updated and in the same order as the i-code * enum list in defs.h */ unsigned char icode_flags[] = { - 0, + // i-code to mark an instrucion as retired */ + + /* I_RETIRED */ 0, // i-code for debug information @@ -110,7 +110,9 @@ unsigned char icode_flags[] = { /* I_CMP_WT */ IS_USEPR, /* X_CMP_WI */ IS_USEPR, /* X_CMP_WM */ IS_USEPR, + /* X_CMP_UM */ IS_USEPR, /* X_CMP_WS */ IS_USEPR + IS_SPREL, + /* X_CMP_US */ IS_USEPR + IS_SPREL, /* X_CMP_UIQ */ IS_USEPR + IS_UBYTE, /* X_CMP_UMQ */ IS_USEPR + IS_UBYTE, @@ -1385,23 +1387,33 @@ void push_ins (INS *ins) } /* - * __push.wr --> __cmp.wi type, i + * __push.wr --> __cmp_w.wi type, i * __ld.wi i - * __cmp.wt type + * __cmp_w.wt type * - * __push.wr --> __cmp.wm type, symbol + * __push.wr --> __cmp_w.wm type, symbol * __ld.wm symbol - * __cmp.wt type + * __cmp_w.wt type * - * __push.wr --> __cmp.ws type, n + * __push.wr --> __cmp_w.um type, symbol + * __ld.um symbol + * __cmp_w.wt type + * + * __push.wr --> __cmp_w.ws type, n * __ld.ws n - * __cmp.wt type + * __cmp_w.wt type + * + * __push.wr --> __cmp_w.us type, n + * __ld.us n + * __cmp_w.wt type */ else if ((p[0]->ins_code == I_CMP_WT) && (p[1]->ins_code == I_LD_WI || p[1]->ins_code == I_LD_WM || - p[1]->ins_code == X_LD_WS) && + p[1]->ins_code == I_LD_UM || + p[1]->ins_code == X_LD_WS || + p[1]->ins_code == X_LD_US) && (p[2]->ins_code == I_PUSH_WR) ) { /* replace code */ @@ -1409,7 +1421,9 @@ void push_ins (INS *ins) switch (p[1]->ins_code) { case I_LD_WI: p[2]->ins_code = X_CMP_WI; break; case I_LD_WM: p[2]->ins_code = X_CMP_WM; break; + case I_LD_UM: p[2]->ins_code = X_CMP_UM; break; case X_LD_WS: p[2]->ins_code = X_CMP_WS; break; + case X_LD_US: p[2]->ins_code = X_CMP_US; break; default: break; } p[2]->cmp_type = p[0]->cmp_type; @@ -1429,10 +1443,18 @@ void push_ins (INS *ins) * __bool * __not.wr * + * __cmp.um --> __cmp.um + * __bool + * __not.wr + * * __cmp.ws --> __cmp.ws * __bool * __not.wr * + * __cmp.us --> __cmp.us + * __bool + * __not.wr + * * __cmp.uiq --> __cmp.uiq * __bool * __not.wr @@ -1454,7 +1476,9 @@ void push_ins (INS *ins) (p[2]->ins_code == I_CMP_WT || p[2]->ins_code == X_CMP_WI || p[2]->ins_code == X_CMP_WM || + p[2]->ins_code == X_CMP_UM || p[2]->ins_code == X_CMP_WS || + p[2]->ins_code == X_CMP_US || p[2]->ins_code == X_CMP_UIQ || p[2]->ins_code == X_CMP_UMQ || p[2]->ins_code == X_CMP_USQ) From 64fec7a6b80a9a9968113de6117b93a2eaefe07e Mon Sep 17 00:00:00 2001 From: John Brandwood Date: Thu, 19 Dec 2024 11:01:58 -0500 Subject: [PATCH 4/4] Fix stupid typos/errors made in the recent updates to hucc-baselib and hucc-scroll. --- include/hucc/hucc-baselib.asm | 8 ++++---- include/hucc/hucc-scroll.asm | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/hucc/hucc-baselib.asm b/include/hucc/hucc-baselib.asm index 48ff8055..df212bdb 100644 --- a/include/hucc/hucc-baselib.asm +++ b/include/hucc/hucc-baselib.asm @@ -272,8 +272,8 @@ _vsync.1 .macro _joy.1 .macro .if SUPPORT_6BUTTON tax - lda joy6now, x - ldy joynow, x + lda joynow, x + ldy joy6now, x .else tay lda joynow, y @@ -291,8 +291,8 @@ _joy.1 .macro _joytrg.1 .macro .if SUPPORT_6BUTTON tax - lda joy6trg, x - ldy joytrg, x + lda joytrg, x + ldy joy6trg, x .else tay lda joytrg, y diff --git a/include/hucc/hucc-scroll.asm b/include/hucc/hucc-scroll.asm index 4b6da4ef..c9b89c63 100644 --- a/include/hucc/hucc-scroll.asm +++ b/include/hucc/hucc-scroll.asm @@ -184,7 +184,6 @@ HUCC_SCR_HEIGHT = 224 lda #1 ; Mark that we've changed the sta sgx_region_new, x ; selected region. - sta sgx_region_sel, x !done: plp ; Restore interrupts. @@ -217,7 +216,6 @@ HUCC_SCR_HEIGHT = 224 lda #1 ; Mark that we've changed the sta sgx_region_new, x ; selected region. - stz sgx_region_sel, x !done: plp ; Restore interrupts. @@ -447,6 +445,8 @@ vbl_init_scroll .proc !next_region: dex ; All regions updated? bmi !save_first+ + stz sgx_region_new, x ; Clear region modified flag. + ldy sgx_region_sel, x ; 0=regionA or 1=regionB. beq !use_regionA+ @@ -540,8 +540,8 @@ USING_RCR_MACROS = 1 ; Tell IRQ1 to use the macros. bra !clr_next_rcr+ ; 4 as if the branch were taken. !set_next_rcr: lda vdc_regionA_rcr, y ; 5 Set next RCR 1 line before - adc #64-1 ; 2 the region begins. -!clr_next_rcr: sta VDC_DL ; 6 + adc #64-1 ; 2 the region begins, or 0 to +!clr_next_rcr: sta VDC_DL ; 6 disable. cla ; 2 rol a ; 2 sta VDC_DH ; 6 @@ -587,8 +587,8 @@ USING_RCR_MACROS = 1 ; Tell IRQ1 to use the macros. bra !clr_next_rcr+ ; 4 as if the branch were taken. !set_next_rcr: lda sgx_regionA_rcr, y ; 5 Set next RCR 1 line before - adc #64-1 ; 2 the region begins. -!clr_next_rcr: sta SGX_DL ; 6 + adc #64-1 ; 2 the region begins, or 0 to +!clr_next_rcr: sta SGX_DL ; 6 disable. cla ; 2 rol a ; 2 sta SGX_DH ; 6 @@ -667,8 +667,8 @@ USING_RCR_MACROS = 1 ; Tell IRQ1 to use the macros. bra !clr_next_rcr+ ; 4 as if the branch were taken. !set_next_rcr: lda vdc_regionA_rcr, y ; 5 Set next RCR 1 line before - adc #64-1 ; 2 the region begins. -!clr_next_rcr: sta VDC_DL ; 6 + adc #64-1 ; 2 the region begins, or 0 to +!clr_next_rcr: sta VDC_DL ; 6 disable. cla ; 2 rol a ; 2 sta VDC_DH ; 6