Skip to content

Commit

Permalink
[Feature](mluOpExecFFT): fix dftmtx bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
squidruge committed Jun 26, 2024
1 parent 828f7d4 commit bf9ba51
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions kernels/fft/fft_optm_device/fft_c2c_stockham_gdram.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,11 @@ __mlu_func__ void computeMutiStageOnchip(DT *input, DT *output, int *factors,
int last_radix = dft_table[entry].radix;
int last_offset = dft_table[entry].offset;

// last_radix * last_radix < last_radix * 64
sram_dftmtx_size = sizeof(DT) * 2 * (last_radix * 64 + last_offset);
__memcpy_async(sram_dftmtx, dft_matrix, sram_dftmtx_size, GDRAM2SRAM);
const int K_num = 64 / sizeof(DT);
int align_K = K_num * ((last_radix + K_num - 1) / K_num);
__memcpy_async(sram_dftmtx, dft_matrix,
sizeof(DT) * 2 * (last_radix * align_K + last_offset),
GDRAM2SRAM);
break;
}
}
Expand Down Expand Up @@ -375,7 +377,7 @@ __mlu_func__ void computeMutiStageOnchipColumn(DT *input, DT *output,
int last_radix = dft_table[entry].radix;
int last_offset = dft_table[entry].offset;
const int K_num = 64 / sizeof(DT);
int align_K = K_num * ((radix + K_num - 1) / K_num);
int align_K = K_num * ((last_radix + K_num - 1) / K_num);
__memcpy_async(sram_dftmtx, dft_matrix,
sizeof(DT) * 2 * (last_radix * align_K + last_offset),
GDRAM2SRAM);
Expand Down

0 comments on commit bf9ba51

Please sign in to comment.