GridwiseBatchedGemmGemm_wmma_cshuffle_v3< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, BlkGemmPipeSched, BlkGemmPipelineVer > Struct Template Reference#
Classes |
Public Types |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck::GridwiseBatchedGemmGemm_wmma_cshuffle_v3< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, BlkGemmPipeSched, BlkGemmPipelineVer > Struct Template Reference
#include <gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp>
Classes | |
| struct | SharedMemTrait |
Public Types | |
| using | ThisThreadBlock = ThisThreadBlock<BlockSize> |
| using | BlockwiseGemmPipe |
| using | CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock |
| using | DefaultBlock2CTileMap |
Static Public Member Functions | |
| __host__ static __device__ constexpr auto | MakeABlockDescriptor () |
| __host__ static __device__ constexpr auto | MakeB0BlockDescriptor () |
| __host__ static __device__ constexpr auto | MakeB1BlockDescriptor () |
| __host__ static __device__ constexpr auto | MakeABlockSliceCopyStep () |
| __host__ static __device__ constexpr auto | MakeB0BlockSliceCopyStep () |
| __host__ static __device__ constexpr auto | MakeB1BlockSliceCopyStep () |
| template<typename ABlockDesc_> | |
| __host__ static __device__ constexpr auto | MakeAWaveDescriptor (const ABlockDesc_ &) |
| template<typename B0BlockDesc_> | |
| __host__ static __device__ constexpr auto | MakeB0WaveDescriptor (const B0BlockDesc_ &) |
| template<typename A1BlockDesc_AL0_M_AL1> | |
| __host__ static __device__ constexpr auto | MakeA1WaveDescriptor_L0_M0_M1_M2_L1 (const A1BlockDesc_AL0_M_AL1 &) |
| template<typename B1BlockDesc_> | |
| __host__ static __device__ constexpr auto | MakeB1WaveDescriptor (const B1BlockDesc_ &) |
| __host__ static __device__ constexpr auto | GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat () |
| __host__ static __device__ constexpr index_t | GetSharedMemoryNumberOfByte () |
| template<typename Block2CTileMap> | |
| __host__ static __device__ constexpr bool | CheckValidity (const AGridDesc &a_grid_desc, const B0GridDesc &b0_grid_desc, const B1GridDesc &b1_grid_desc, const CGridDesc_M_N &c_grid_desc_m_n, const Block2CTileMap &block_2_ctile_map) |
| __host__ static __device__ constexpr bool | CalculateHasMainKBlockLoop (index_t K) |
| __host__ static __device__ constexpr TailNumber | CalculateKBlockLoopTailNum (index_t K) |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock (const CGridDesc_M_N &c_grid_desc_m_n) |
| __host__ static __device__ constexpr auto | MakeDefaultBlock2CTileMap (const CGridDesc_M_N &c_grid_desc_m_n, index_t, index_t) |
| template<bool HasMainKBlockLoop, TailNumber TailNum, typename Block2CTileMap = DefaultBlock2CTileMap> | |
| static __device__ void | Run (const ADataType *__restrict__ p_a_grid, const B0DataType *__restrict__ p_b0_grid, const B1DataType *__restrict__ p_b1_grid, CDataType *__restrict__ p_c_grid, void *__restrict__ p_shared, const AGridDesc &a_grid_desc, const B0GridDesc &b0_grid_desc, const B1GridDesc &b1_grid_desc, const CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock &c_grid_desc_mblock_mperblock_nblock_nperblock, const AElementwiseOperation &a_element_op, const B0ElementwiseOperation &b0_element_op, const AccElementwiseOperation &acc_element_op, const B1ElementwiseOperation &b1_element_op, const CElementwiseOperation &c_element_op, const Block2CTileMap &block_2_ctile_map) |
Static Public Attributes | |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr auto | I4 = Number<4>{} |
| static constexpr auto | I5 = Number<5>{} |
| static constexpr auto | I6 = Number<6>{} |
| static constexpr auto | AK0 = Number<KPerBlock / AK1Value>{} |
| static constexpr auto | AK1 = Number<AK1Value>{} |
| static constexpr auto | BK0 = Number<KPerBlock / BK1Value>{} |
| static constexpr auto | BK1 = Number<BK1Value>{} |
| static constexpr auto | L0PerBlock = LTilePerBlock / L1Value |
| static constexpr auto | AL0 = Number<L0PerBlock / 2>{} |
| static constexpr auto | AL1 = Number<L1Value>{} |
| static constexpr auto | BL0 = Number<L0PerBlock>{} |
| static constexpr auto | BL1 = Number<L1Value>{} |
| static constexpr auto | MWaves = MPerBlock / (MRepeat * MPerWmma) |
| static constexpr auto | LWaves = LPerBlock / (LRepeat * LPerWmma) |
| static constexpr auto | NWaves = NPerBlock / (NRepeat * NPerWmma) |
| static constexpr auto | KPack |
Member Typedef Documentation
◆ BlockwiseGemmPipe
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| using ck::GridwiseBatchedGemmGemm_wmma_cshuffle_v3< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, BlkGemmPipeSched, BlkGemmPipelineVer >::BlockwiseGemmPipe |
Initial value:
BlkGemmPipelineVer,
BlkGemmPipeSched,
BlockSize,
ADataType,
B0DataType,
ADataType,
B0DataType,
Acc0DataType,
ABlockTransferSrcScalarPerVector,
B0BlockTransferSrcScalarPerVector,
MPerBlock,
LPerBlock,
KPerBlock,
MPerWmma,
LPerWmma,
MRepeat,
LRepeat,
true>())>
constexpr auto BlockGemmPipeline_Selector()
Definition blockwise_gemm_pipeline_wmma_selector.hpp:32
__host__ static __device__ constexpr auto MakeB0BlockDescriptor()
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:141
static constexpr auto KPack
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:113
__host__ static __device__ constexpr auto MakeAWaveDescriptor(const ABlockDesc_ &)
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:205
__host__ static __device__ constexpr auto MakeB0WaveDescriptor(const B0BlockDesc_ &)
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:230
__host__ static __device__ constexpr auto MakeABlockDescriptor()
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:118
◆ CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| using ck::GridwiseBatchedGemmGemm_wmma_cshuffle_v3< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, BlkGemmPipeSched, BlkGemmPipelineVer >::CGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock |
Initial value:
CGridDesc_M_N{}))>
__host__ static __device__ constexpr auto MakeCGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(const CGridDesc_M_N &c_grid_desc_m_n)
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:469
◆ DefaultBlock2CTileMap
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| using ck::GridwiseBatchedGemmGemm_wmma_cshuffle_v3< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, BlkGemmPipeSched, BlkGemmPipelineVer >::DefaultBlock2CTileMap |
Initial value:
__host__ static __device__ constexpr auto MakeDefaultBlock2CTileMap(const CGridDesc_M_N &c_grid_desc_m_n, index_t, index_t)
Definition gridwise_batched_gemm_gemm_wmma_cshuffle_v3.hpp:488
◆ ThisThreadBlock
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
| using ck::GridwiseBatchedGemmGemm_wmma_cshuffle_v3< ADataType, B0DataType, Acc0DataType, B1DataType, Acc1DataType, CShuffleDataType, CDataType, AElementwiseOperation, B0ElementwiseOperation, AccElementwiseOperation, B1ElementwiseOperation, CElementwiseOperation, CGlobalMemoryDataOperation, AGridDesc, B0GridDesc, B1GridDesc, CGridDesc_M_N, MPerBlock, LPerBlock, KPerBlock, AK1Value, BK1Value, NPerBlock, LTilePerBlock, L1Value, MPerWmma, LPerWmma, NPerWmma, MRepeat, LRepeat, NRepeat, BlockSize, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, B0BlockTransferThreadClusterLengths_K0_L_K1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_K1, B0ThreadTransferSrcResetCoordinateAfterRun, B0BlockLdsExtraL, B1BlockTransferThreadClusterLengths_L0_N_L1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_L1, B1ThreadTransferSrcResetCoordinateAfterRun, B1BlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CShuffleBlockTransferScalarPerVector_NPerBlock, PadN, BlkGemmPipeSched, BlkGemmPipelineVer >::ThisThreadBlock = ThisThreadBlock<BlockSize> |
Member Function Documentation
◆ CalculateHasMainKBlockLoop()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ CalculateKBlockLoopTailNum()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ CheckValidity()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
template<typename Block2CTileMap>
|
inlinestaticconstexpr |
◆ GetCShuffleBlockDescriptor_MShRepeat_MPerShRepeat_NShRepeat_NPerShRepeat()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ GetSharedMemoryNumberOfByte()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeA1WaveDescriptor_L0_M0_M1_M2_L1()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
template<typename A1BlockDesc_AL0_M_AL1>
|
inlinestaticconstexpr |
◆ MakeABlockDescriptor()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeABlockSliceCopyStep()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeAWaveDescriptor()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
template<typename ABlockDesc_>
|
inlinestaticconstexpr |
◆ MakeB0BlockDescriptor()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeB0BlockSliceCopyStep()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeB0WaveDescriptor()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
template<typename B0BlockDesc_>
|
inlinestaticconstexpr |
◆ MakeB1BlockDescriptor()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeB1BlockSliceCopyStep()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeB1WaveDescriptor()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
template<typename B1BlockDesc_>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeDefaultBlock2CTileMap()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
inlinestaticconstexpr |
◆ Run()
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
template<bool HasMainKBlockLoop, TailNumber TailNum, typename Block2CTileMap = DefaultBlock2CTileMap>
|
inlinestatic |
Member Data Documentation
◆ AK0
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ AK1
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ AL0
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ AL1
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ BK0
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ BK1
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ BL0
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ BL1
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ I0
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ I1
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ I2
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ I3
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ I4
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ I5
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ I6
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ KPack
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
Initial value:
=
math::integer_least_multiple(math::integer_least_multiple(AK1Value, BK1Value), 16)
__host__ __device__ constexpr auto integer_least_multiple(X x, Y y)
Definition utility/math.hpp:78
◆ L0PerBlock
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ LWaves
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ MWaves
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
◆ NWaves
template<typename ADataType, typename B0DataType, typename Acc0DataType, typename B1DataType, typename Acc1DataType, typename CShuffleDataType, typename CDataType, typename AElementwiseOperation, typename B0ElementwiseOperation, typename AccElementwiseOperation, typename B1ElementwiseOperation, typename CElementwiseOperation, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc, typename B0GridDesc, typename B1GridDesc, typename CGridDesc_M_N, index_t MPerBlock, index_t LPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t NPerBlock, index_t LTilePerBlock, index_t L1Value, index_t MPerWmma, index_t LPerWmma, index_t NPerWmma, index_t MRepeat, index_t LRepeat, index_t NRepeat, index_t BlockSize, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_K0_L_K1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_K1, bool B0ThreadTransferSrcResetCoordinateAfterRun, bool B0BlockLdsExtraL, typename B1BlockTransferThreadClusterLengths_L0_N_L1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_L1, bool B1ThreadTransferSrcResetCoordinateAfterRun, bool B1BlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CShuffleBlockTransferScalarPerVector_NPerBlock, bool PadN, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v1>
|
staticconstexpr |
The documentation for this struct was generated from the following file: