device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp Source File#
device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp
Go to the documentation of this file.
15#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp"
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
Definition convolution_backward_data_specialization.hpp:8
std::string getGemmSpecializationString(const GemmSpecialization &s)
Definition gemm_specialization.hpp:32
GemmSpecialization
Definition gemm_specialization.hpp:11
@ MKPadding
Definition gemm_specialization.hpp:18
@ KPadding
Definition gemm_specialization.hpp:16
@ MNKPadding
Definition gemm_specialization.hpp:20
@ NKPadding
Definition gemm_specialization.hpp:19
Definition convolution_backward_data_specialization.hpp:7
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, GemmArgs &gemm_args, Args... args)
Definition flush_cache.hpp:398
Definition ck.hpp:268
__global__ void kernel_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle_2lds(typename GridwiseGemm::Argument karg)
Definition gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp:75
__global__ void kernel_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle(typename GridwiseGemm::Argument karg)
Definition gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp:39
bool is_bf16_atomic_supported()
Definition host_utility/device_prop.hpp:108
Definition ck/stream_config.hpp:10
Definition gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp:157
static __host__ constexpr bool CheckValidity(const Argument &karg)
Definition gridwise_gemm_xdl_cshuffle_v3_multi_d_blockscale_b_preshuffle.hpp:872
Definition device_base.hpp:197
BaseInvoker()=default
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:165
float RunImp(const typename GridwiseGemm::Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:167
INVOKER_RUN3_IMPL float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:336
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:94
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:484
typename GridwiseGemm64::Argument Argument
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:159
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:402
static auto MakeInvoker()
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:443
ck::tensor_operation::device::DeviceGemmMultiD_BlockScale_Xdl_CShuffle_V3_BPreshuffle::NXdlPerWave64
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:97
static bool IsSupportedArgument(const Argument &arg)
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:349
GridwiseGemmBase< math::max(NXdlPerWave32, 1)> GridwiseGemm32
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:157
static constexpr index_t NumDTensor
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:95
int GetPreShuffleParameters() override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:161
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, std::array< const void *, NumDTensor > p_ds, void *p_c, const index_t M, const index_t N, const index_t K, const index_t StrideA, const index_t StrideB, const std::array< ck::index_t, NumDTensor > StrideDs, const index_t StrideC, const void *p_a_scale, const void *p_b_scale, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:447
GridwiseGemmMultiD_blockscale_xdl_cshuffle_v3_b_preshuffle< ALayout, BLayout, DsLayout, CLayout, ADataType, BDataType, GemmAccDataType, CShuffleDataType, DsDataType, CDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, ScaleBlockM, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave_, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, math::min(CShuffleNXdlPerWavePerShuffle, NXdlPerWave_), CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, LDSTypeA, LDSTypeB > GridwiseGemmBase
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:102
static constexpr bool IsValidCompilationParameter()
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:343
static auto MakeArgument(const void *p_a, const void *p_b, std::array< const void *, NumDTensor > p_ds, void *p_c, const index_t M, const index_t N, const index_t K, const index_t StrideA, const index_t StrideB, const std::array< index_t, NumDTensor > StrideDs, const index_t StrideC, const void *p_a_scale, const void *p_b_scale, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op)
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:407
ck::tensor_operation::device::DeviceGemmMultiD_BlockScale_Xdl_CShuffle_V3_BPreshuffle::GetTypeString
std::string GetTypeString() const override
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:490
ck::tensor_operation::device::DeviceGemmMultiD_BlockScale_Xdl_CShuffle_V3_BPreshuffle::NXdlPerWave32
static constexpr auto NXdlPerWave32
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:98
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_gemm_multiple_d_xdl_cshuffle_v3_blockscale_bpreshuffle.hpp:156
Definition device_gemm_multiple_d_ab_scale.hpp:82
Definition flush_cache.hpp:299