F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > Struct Template Reference#
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > Struct Template Reference
#include <mixed_prec_flatmm_kernel.hpp>
Inheritance diagram for ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >:
Public Types | |
| using | Underlying = FlatmmKernel<TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_> |
| using | TilePartitioner = remove_cvref_t<TilePartitioner_> |
| using | FlatmmPipeline = remove_cvref_t<FlatmmPipeline_> |
| using | BlockGemmShape |
| using | EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
| using | ALayout = remove_cvref_t<typename FlatmmPipeline::ALayout> |
| using | BLayout = remove_cvref_t<typename FlatmmPipeline::BLayout> |
| using | ELayout = remove_cvref_t<typename FlatmmPipeline::CLayout> |
| using | DsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout> |
| using | DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType> |
| using | ADataType = remove_cvref_t<typename FlatmmPipeline::ADataType> |
| using | BDataType = remove_cvref_t<typename FlatmmPipeline::BDataType> |
| using | EDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
| using | SplitKBatchOffset = typename Underlying::SplitKBatchOffset |
| Public Types inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > | |
| using | TilePartitioner = remove_cvref_t<TilePartitioner_> |
| using | FlatmmPipeline = remove_cvref_t<FlatmmPipeline_> |
| using | BlockGemmShape |
| using | EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
| using | ALayout = remove_cvref_t<typename FlatmmPipeline::ALayout> |
| using | BLayout = remove_cvref_t<typename FlatmmPipeline::BLayout> |
| using | ELayout = remove_cvref_t<typename FlatmmPipeline::CLayout> |
| using | DsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout> |
| using | DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType> |
| using | ADataType = remove_cvref_t<typename FlatmmPipeline::ADataType> |
| using | BDataType = remove_cvref_t<typename FlatmmPipeline::BDataType> |
| using | EDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
Public Member Functions | |
| template<class ScaleM, class ScaleN> | |
| CK_TILE_DEVICE void | operator() (FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> kargs, int partition_idx=blockIdx.x) const |
| Public Member Functions inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > | |
| template<class ScaleM, class ScaleN> | |
| CK_TILE_DEVICE void | operator() (FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> kargs, int partition_idx=blockIdx.x) const |
Static Public Member Functions | |
| static CK_TILE_HOST const std::string | GetName () |
| template<class ScaleM, class ScaleN> | |
| static CK_TILE_HOST constexpr auto | GridSize (const FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> &kargs) |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set, class KernelArgs> | |
| static CK_TILE_DEVICE auto | MakeGemmTensorViews (const ADataType *a_ptr, const BDataType *b_flat_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset) |
| template<typename TensorView> | |
| static CK_TILE_DEVICE auto | MakeGemmPadViews (const TensorView &views) |
| template<typename PadView> | |
| static CK_TILE_DEVICE auto | MakeGemmTileWindows (const PadView &views, const index_t i_m, const index_t i_n) |
| template<class ScaleM, class ScaleN, bool UseDefaultScheduler = true> | |
| static CK_TILE_DEVICE void | RunFlatmm (const ADataType *a_ptr, const BDataType *b_flat_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, void *smem_ptr_ping, void *smem_ptr_pong, const FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n) |
| Static Public Member Functions inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > | |
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST constexpr auto | GridSize (index_t M, index_t N, index_t KBatch) |
| template<class ScaleM, class ScaleN> | |
| static CK_TILE_HOST constexpr auto | GridSize (const FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> &kargs) |
| static CK_TILE_HOST constexpr auto | BlockSize () |
| template<class ScaleM, class ScaleN> | |
| static CK_TILE_HOST constexpr FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> | MakeKernelArgs (const ScaleFlatmmHostArgs< ScaleM, ScaleN, DsDataType::size()> &hostArgs) |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemPingSize () |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemPongSize () |
| template<class KernelArgs> | |
| static CK_TILE_HOST bool | IsSupportedArgument (const KernelArgs &kargs) |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set, class KernelArgs> | |
| static CK_TILE_DEVICE auto | MakeGemmTensorViews (const ADataType *a_ptr, const BDataType *b_flat_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, const KernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset) |
| template<typename TensorView> | |
| static CK_TILE_DEVICE auto | MakeGemmPadViews (const TensorView &views) |
| template<typename PadView> | |
| static CK_TILE_DEVICE auto | MakeGemmTileWindows (const PadView &views, const index_t i_m, const index_t i_n) |
| template<class ScaleM, class ScaleN, bool UseDefaultScheduler = true> | |
| static CK_TILE_DEVICE void | RunFlatmm (const ADataType *a_ptr, const BDataType *b_flat_ptr, const std::array< const void *, NumDTensor > &ds_ptr, EDataType *e_ptr, void *smem_ptr_ping, void *smem_ptr_pong, const FlatmmKernelArgs< ScaleM, ScaleN, DsDataType::size()> &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n) |
Static Public Attributes | |
| static constexpr index_t | KernelBlockSize = FlatmmPipeline::BlockSize |
| static constexpr bool | UsePersistentKernel = FlatmmPipeline::UsePersistentKernel |
| static constexpr int | QuantPackedSize = numeric_traits<BDataType>::PackedSize |
| static constexpr int | N_Pack = 2 |
| static constexpr index_t | NumDTensor = DsDataType::size() |
| static constexpr auto | I0 = number<0>() |
| static constexpr auto | I1 = number<1>() |
| static constexpr auto | I2 = number<2>() |
| static constexpr auto | I3 = number<3>() |
| static constexpr auto | I4 = number<4>() |
| Static Public Attributes inherited from ck_tile::FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ > | |
| static constexpr index_t | kBlockSize = FlatmmPipeline::BlockSize |
| static constexpr bool | UsePersistentKernel = FlatmmPipeline::UsePersistentKernel |
| static constexpr index_t | NumDTensor = DsDataType::size() |
| static constexpr auto | I0 = number<0>() |
| static constexpr auto | I1 = number<1>() |
| static constexpr auto | I2 = number<2>() |
| static constexpr auto | I3 = number<3>() |
Member Typedef Documentation
◆ ADataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::ADataType = remove_cvref_t<typename FlatmmPipeline::ADataType> |
◆ ALayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::ALayout = remove_cvref_t<typename FlatmmPipeline::ALayout> |
◆ BDataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::BDataType = remove_cvref_t<typename FlatmmPipeline::BDataType> |
◆ BLayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::BLayout = remove_cvref_t<typename FlatmmPipeline::BLayout> |
◆ BlockGemmShape
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::BlockGemmShape |
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
◆ DsDataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::DsDataType = remove_cvref_t<typename EpiloguePipeline::DsDataType> |
◆ DsLayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::DsLayout = remove_cvref_t<typename EpiloguePipeline::DsLayout> |
◆ EDataType
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::EDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
◆ ELayout
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::ELayout = remove_cvref_t<typename FlatmmPipeline::CLayout> |
◆ EpiloguePipeline
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
◆ FlatmmPipeline
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::FlatmmPipeline = remove_cvref_t<FlatmmPipeline_> |
◆ SplitKBatchOffset
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::SplitKBatchOffset = typename Underlying::SplitKBatchOffset |
◆ TilePartitioner
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_> |
◆ Underlying
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
| using ck_tile::F16xMXF4FlatmmKernel< TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_ >::Underlying = FlatmmKernel<TilePartitioner_, FlatmmPipeline_, EpiloguePipeline_> |
Member Function Documentation
◆ GetName()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
inlinestaticnodiscard |
◆ GridSize()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
template<class ScaleM, class ScaleN>
|
inlinestaticconstexpr |
◆ MakeGemmPadViews()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
template<typename TensorView>
|
inlinestatic |
◆ MakeGemmTensorViews()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
template<memory_operation_enum DstInMemOp = memory_operation_enum::set, class KernelArgs>
|
inlinestatic |
◆ MakeGemmTileWindows()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
template<typename PadView>
|
inlinestatic |
◆ operator()()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
template<class ScaleM, class ScaleN>
|
inline |
◆ RunFlatmm()
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
template<class ScaleM, class ScaleN, bool UseDefaultScheduler = true>
|
inlinestatic |
Member Data Documentation
◆ I0
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I1
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I2
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I3
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ I4
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ KernelBlockSize
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ N_Pack
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ NumDTensor
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ QuantPackedSize
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
◆ UsePersistentKernel
template<typename TilePartitioner_, typename FlatmmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
The documentation for this struct was generated from the following file: