Skip to content

Commit

Permalink
Fix page-attn block table read out-of-bound
Browse files Browse the repository at this point in the history
  • Loading branch information
poyenc committed Nov 11, 2024
1 parent 23cb26b commit fa40153
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion csrc/composable_kernel
Submodule composable_kernel updated 62 files
+4 −4 CMakeLists.txt
+3 −3 Jenkinsfile
+3 −2 example/44_elementwise_permute/elementwise_scale_permute_amax_2D_fp16_fp8.cpp
+4 −4 example/CMakeLists.txt
+10 −10 example/ck_tile/01_fmha/script/smoke_test_fwd.sh
+57 −48 example/ck_tile/02_layernorm2d/generate.py
+8 −0 example/ck_tile/13_moe_sorting/CMakeLists.txt
+27 −0 example/ck_tile/13_moe_sorting/README.md
+223 −0 example/ck_tile/13_moe_sorting/moe_sorting.cpp
+73 −0 example/ck_tile/13_moe_sorting/moe_sorting_api.cpp
+20 −0 example/ck_tile/13_moe_sorting/moe_sorting_api.hpp
+19 −0 example/ck_tile/13_moe_sorting/script/smoke_test.sh
+1 −0 example/ck_tile/CMakeLists.txt
+5 −3 include/ck/ck.hpp
+6 −6 include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp
+12 −12 include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
+6 −6 include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp
+12 −12 include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp
+4 −4 include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp
+3 −2 include/ck/utility/amd_wmma.hpp
+5 −3 include/ck_tile/core/config.hpp
+1 −0 include/ck_tile/host.hpp
+78 −0 include/ck_tile/host/reference/reference_moe_sorting.hpp
+6 −6 include/ck_tile/ops/common/generic_2d_block_shape.hpp
+9 −1 include/ck_tile/ops/fmha/block/page_block_navigator.hpp
+232 −0 include/ck_tile/ops/fused_moe/kernel/moe_sorting_kernel.hpp
+39 −0 include/ck_tile/ops/fused_moe/pipeline/moe_sorting_pipeline.hpp
+15 −0 include/ck_tile/ops/fused_moe/pipeline/moe_sorting_policy.hpp
+23 −0 include/ck_tile/ops/fused_moe/pipeline/moe_sorting_problem.hpp
+8 −4 include/ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_pipeline_default_policy.hpp
+10 −1 include/ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_pipeline_one_pass.hpp
+2 −0 include/ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_traits.hpp
+11 −0 include/ck_tile/ops/moe_sorting.hpp
+24 −10 include/ck_tile/ops/welford/block/block_welford.hpp
+5 −4 include/ck_tile/ops/welford/block/block_welford_problem.hpp
+32 −11 include/ck_tile/ops/welford/thread/thread_welford.hpp
+74 −2 ...or_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_two_stage_xdl_instance.hpp
+16 −0 library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp
+100 −0 library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight_xdl.inc
+4 −4 library/include/ck/library/utility/check_err.hpp
+5 −5 library/src/tensor_operation_instance/gpu/CMakeLists.txt
+4 −0 library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt
+41 −0 ...2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_bf16_pipev1_instance.cpp
+41 −0 ...v2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_ngchw_gkyxc_ngkhw_f16_pipev1_instance.cpp
+41 −0 ...2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp
+1 −1 ...2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_instance.cpp
+1 −1 ...2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_instance.cpp
+41 −0 ...v2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp
+1 −1 ...v2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_instance.cpp
+1 −1 ...v2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_instance.cpp
+4 −0 library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt
+41 −0 ...bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp
+1 −1 ...bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_instance.cpp
+1 −1 ...bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_instance.cpp
+41 −0 ..._bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp
+1 −1 ..._bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_instance.cpp
+1 −1 ..._bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_instance.cpp
+41 −0 ...bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_bf16_pipev1_instance.cpp
+41 −0 ..._bwd_weight/xdl/device_grouped_conv3d_bwd_weight_two_stage_xdl_ngcdhw_gkzyxc_ngkdhw_f16_pipev1_instance.cpp
+16 −2 profiler/include/profiler/profile_pool3d_fwd_impl.hpp
+1 −1 profiler/src/profile_layernorm_fwd.cpp
+6 −6 test/CMakeLists.txt

0 comments on commit fa40153

Please sign in to comment.