gemm_pipeline_ag_bg_cr_comp_async.hpp Source File#
gemm_pipeline_ag_bg_cr_comp_async.hpp
Go to the documentation of this file.
146 static constexpr index_t GetVectorSizeC() { return Policy::template GetVectorSizeC<Problem>(); }
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
__device__ uint32_t amd_wave_read_first_lane(uint16_t v)
Definition tile/core/arch/amd_buffer_addressing.hpp:35
TransposeTileDistributionTraits< TileDistributionEncoding_, DataType_, Policy, true > InputTileDistributionTraits
Definition load_tile_transpose.hpp:343
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition type_traits.hpp:67
CK_TILE_DEVICE void block_sync_lds_direct_load()
Definition arch.hpp:288
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto generate_tuple(F &&f, number< N >)
Definition tile/core/container/tuple.hpp:429
constexpr bool is_tile_window_linear_v
Helper variable template to check if a type is a linear tile window.
Definition tile_window_linear.hpp:1119
CK_TILE_DEVICE void clear_tile(DstrTensors &dstr_tensor)
Definition tile_elementwise.hpp:177
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Intrawave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:16
CK_TILE_HOST_DEVICE constexpr details::return_type< D, Ts... > make_array(Ts &&... ts)
Definition tile/core/container/array.hpp:242
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:16
static constexpr index_t GlobalBufferNum
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:19
static CK_TILE_HOST_DEVICE auto TailHandler(const RunFunction &run_func, bool has_hot_loop, TailNumber tail_number)
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:44
static CK_TILE_HOST constexpr bool BlockHasHotloop(index_t num_loop)
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:21
static CK_TILE_HOST constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:26
static constexpr index_t PrefillStages
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:18
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:17
PipelineImplBase Base
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:185
static CK_TILE_DEVICE constexpr auto HotLoopScheduler()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:187
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:232
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:179
Compute optimized pipeline version async; which is based on V4.
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:96
static constexpr auto TailNum
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:161
static constexpr bool kPadM
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:154
remove_cvref_t< std::tuple_element_t< 0, BsDataType > > BDataType
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:116
static constexpr bool kPadK
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:156
remove_cvref_t< typename Problem::BsLayoutTuple > BsLayout
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:106
GemmPipelineAgBgCrImplBase< Problem, Policy > PipelineImplBase
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:98
static constexpr index_t MPerBlock
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:132
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:103
static constexpr index_t NumWaveGroups
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:151
remove_cvref_t< std::tuple_element_t< 0, BsLayout > > BLayout
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:113
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem_0, void *p_smem_1) const
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:526
remove_cvref_t< std::tuple_element_t< 0, AsLayout > > ALayout
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:112
remove_cvref_t< typename Problem::AElementWise > AElementWise
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:109
number< 0 > I0
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:126
static constexpr index_t BlockSize
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:130
remove_cvref_t< typename Problem::CLayout > CLayout
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:107
static constexpr auto is_b_load_tr_v
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:165
static constexpr index_t BPackedSize
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:122
remove_cvref_t< typename Problem::AsDataTypeTuple > AsDataType
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:100
static constexpr index_t KPerBlock
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:134
remove_cvref_t< std::tuple_element_t< 0, AsDataType > > ADataType
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:115
static constexpr index_t GetVectorSizeB()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:142
remove_cvref_t< typename Problem::BElementWise > BElementWise
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:110
static constexpr auto is_a_load_tr_v
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:164
static constexpr index_t Preshuffle
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:152
static constexpr bool DoubleSmemBuffer
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:158
static constexpr index_t NPerBlock
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:133
remove_cvref_t< decltype(Policy::template GetBlockGemm< Problem >())> BlockGemm
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:125
static constexpr bool kPadN
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:155
remove_cvref_t< typename Problem::BsDataTypeTuple > BsDataType
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:101
static constexpr index_t GetSmemPackA()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:148
static CK_TILE_HOST_DEVICE constexpr auto IsTransposeC()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:172
remove_cvref_t< typename Problem::AsLayoutTuple > AsLayout
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:105
static constexpr index_t GetSmemPackB()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:149
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, const index_t num_loop, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:546
static constexpr index_t APackedSize
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:120
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:167
number< 2 > I2
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:128
BaseGemmPipelineAgBgCrCompAsync< Problem > Base
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:97
number< 1 > I1
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:127
static constexpr index_t GetVectorSizeA()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:137
static constexpr auto Scheduler
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:162
static constexpr bool HasHotLoop
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:160
remove_cvref_t< typename Problem::CDataType > CDataType
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:102
static constexpr index_t GetVectorSizeC()
Definition gemm_pipeline_ag_bg_cr_comp_async.hpp:146
Definition gemm_pipeline_ag_bg_cr_base.hpp:13
remove_cvref_t< std::tuple_element_t< number< 0 >{}, BsDataType > > BDataType
Definition gemm_pipeline_ag_bg_cr_base.hpp:22
CK_TILE_DEVICE auto GetABLdsTensorViews(void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:83
static constexpr index_t NPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:26
CK_TILE_DEVICE void GlobalPrefetchAsync(DstBlockWindow &dst_block_window, SrcTileWindow &dram_tile_window, const DramTileWindowStep &dram_tile_window_step) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:48
CK_TILE_DEVICE void LocalPrefetch(DstBlockTile &dst_block_tile, const SrcTileWindow &lds_tile_window, bool_constant< LoadTranspose >={}) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:73
static constexpr index_t MPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:25
static constexpr index_t KPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:27
Definition tile/ops/elementwise/unary_element_wise_operation.hpp:437
Definition tile/core/numeric/integral_constant.hpp:30
Definition tile/core/numeric/numeric.hpp:81
Definition tile/core/utility/functional.hpp:43