cshuffle_epilogue.hpp Source File#
cshuffle_epilogue.hpp
Go to the documentation of this file.
445 // TODO: Check if there would be nicer ways to overload rather than with EmptyScale or nullptr_t
CK_TILE_HOST_DEVICE constexpr auto make_embed_tile_distribution_encoding(OuterDstr, InnerDstr)
Definition tile_distribution_encoding.hpp:457
Definition tile/core/algorithm/cluster_descriptor.hpp:13
typename impl::WarpGemmDispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition warp_gemm_dispatcher.hpp:182
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
constexpr tuple< Args &... > tie(Args &... args) noexcept
Definition tile/core/container/tuple.hpp:376
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType *__restrict__ p, const tensor_descriptor< Ts... > &desc)
Definition tensor_view.hpp:452
CK_TILE_HOST_DEVICE constexpr auto make_naive_tensor_descriptor(const tuple< Lengths... > &lengths, const tuple< Strides... > &strides, number< GuaranteedLastDimensionVectorLength >=number<-1 >{}, number< GuaranteedLastDimensionVectorStride >=number<-1 >{})
Definition tile/core/tensor/tensor_descriptor.hpp:274
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition type_traits.hpp:67
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
CK_TILE_HOST_DEVICE constexpr auto concat_tuple_of_reference(const tuple< X &... > &tx, const tuple< Y &... > &ty)
Definition tile/core/container/tuple.hpp:443
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_DEVICE auto tile_elementwise_inout_unpack(const InElementFunc &in_element_func, const Tuple &t, std::index_sequence< I... >)
Template function that "unpacks" a tuple and applies an element-wise operation.
Definition tile_elementwise.hpp:71
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
@ thread_raked
Thread raked pattern.
Definition static_encoding_pattern.hpp:94
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_DEVICE auto cast_tile(const SrcTensor &src_tensor)
Definition tile_elementwise.hpp:327
CK_TILE_HOST_DEVICE constexpr auto generate_tuple(F &&f, number< N >)
Definition tile/core/container/tuple.hpp:429
CK_TILE_HOST_DEVICE constexpr auto generate_tie(F &&f, number< N >)
Definition tile/core/container/tuple.hpp:435
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
CK_TILE_DEVICE void update_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition update_tile.hpp:22
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
CK_TILE_HOST_DEVICE constexpr Y type_convert(X x)
Definition tile/core/numeric/type_convert.hpp:29
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
STL namespace.
Definition cshuffle_epilogue.hpp:447
typename T::DataType DataType
Definition cshuffle_epilogue.hpp:459
Definition cshuffle_epilogue.hpp:452
float DataType
Definition cshuffle_epilogue.hpp:453
static constexpr index_t kBlockSize
Definition cshuffle_epilogue.hpp:103
CK_TILE_DEVICE void scale_tile(LdsTile &lds_tile, ScaleM &scale_m_window, ScaleN &scale_n_window)
Definition cshuffle_epilogue.hpp:329
static constexpr index_t NRepeat
Definition cshuffle_epilogue.hpp:120
CK_TILE_DEVICE void slice_acc_tile(const OAccTile &o_acc_tile, LdsTile &lds_tile)
Definition cshuffle_epilogue.hpp:370
static constexpr index_t MRepeat
Definition cshuffle_epilogue.hpp:119
typename WG::CWarpTensor CWarpTensor
Definition cshuffle_epilogue.hpp:260
typename WG::CWarpDstrEncoding CWarpDstrEncoding
Definition cshuffle_epilogue.hpp:261
remove_cvref_t< typename Problem::AsDataType > AsDataType
Definition cshuffle_epilogue.hpp:74
remove_cvref_t< std::tuple_element_t< number< 0 >{}, AsDataTypeTuple > > ADataType
Definition cshuffle_epilogue.hpp:92
remove_cvref_t< Problem_ > Problem
Definition cshuffle_epilogue.hpp:73
static constexpr index_t MPerXdl
Definition cshuffle_epilogue.hpp:108
static constexpr bool FixedVectorSize
Definition cshuffle_epilogue.hpp:112
remove_cvref_t< typename Problem::ODataType > ODataType
Definition cshuffle_epilogue.hpp:77
CK_TILE_DEVICE void store_to_dram(OutDramWindow &out_dram_window, const COutTensor &c_out_tensor)
Definition cshuffle_epilogue.hpp:411
static CK_TILE_HOST_DEVICE constexpr index_t GetVectorSizeC()
Get the vector store size for C tensor.
Definition cshuffle_epilogue.hpp:151
static constexpr bool ADataTypeIsTuple
Definition cshuffle_epilogue.hpp:81
static constexpr index_t kNPerBlock
Definition cshuffle_epilogue.hpp:105
static constexpr index_t BlockedXDLN_PerWarp
Definition cshuffle_epilogue.hpp:114
remove_cvref_t< typename Problem::ELayout > ELayout
Definition cshuffle_epilogue.hpp:100
static constexpr memory_operation_enum MemoryOperation
Definition cshuffle_epilogue.hpp:102
static constexpr bool TiledMMAPermuteN
Definition cshuffle_epilogue.hpp:113
static constexpr bool BDataTypeIsTuple
Definition cshuffle_epilogue.hpp:82
remove_cvref_t< typename Problem::DsLayout > DsLayout
Definition cshuffle_epilogue.hpp:79
static constexpr index_t MPerIteration
Definition cshuffle_epilogue.hpp:116
static constexpr auto MNPerIterationShuffle
Definition cshuffle_epilogue.hpp:240
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition cshuffle_epilogue.hpp:322
static constexpr index_t isCTransposed
Definition cshuffle_epilogue.hpp:111
CK_TILE_DEVICE void apply_d_tensors(DramWindows &d_dram_windows, COutTensor &c_out_tensor)
Definition cshuffle_epilogue.hpp:397
static constexpr index_t VectorSizeC
Definition cshuffle_epilogue.hpp:115
remove_cvref_t< typename Problem::DsDataType > DsDataType
Definition cshuffle_epilogue.hpp:78
CK_TILE_DEVICE void move_windows(OutDramWindow &out_dram_window, DDramWindows &d_dram_windows)
Move both the output and D tensors windows for the next access.
Definition cshuffle_epilogue.hpp:428
static CK_TILE_HOST_DEVICE constexpr auto MakeLdsBlockDescriptor()
Definition cshuffle_epilogue.hpp:267
remove_cvref_t< typename Problem::CDElementwise > CDElementwise
Definition cshuffle_epilogue.hpp:101
static CK_TILE_HOST const std::string GetName()
Definition cshuffle_epilogue.hpp:129
std::conditional_t< std::is_same_v< ADataType, pk_int4_t >, BDataType, ADataType > ATypeToUse
Definition cshuffle_epilogue.hpp:95
static CK_TILE_DEVICE constexpr auto MakeLdsDistributionEncode()
Definition cshuffle_epilogue.hpp:289
static constexpr index_t NPerIterationShuffle
Definition cshuffle_epilogue.hpp:249
remove_cvref_t< typename Problem::AccDataType > AccDataType
Definition cshuffle_epilogue.hpp:76
CK_TILE_DEVICE auto operator()(ODramWindow &out_dram_window, const OAccTile &o_acc_tile, const DsDramWindows &ds_dram_windows, void *, const ScaleM &scale_m={}, const ScaleN &scale_n={})
Definition cshuffle_epilogue.hpp:469
static constexpr index_t NumDTensor
Definition cshuffle_epilogue.hpp:118
static constexpr index_t KPerXdl
Definition cshuffle_epilogue.hpp:110
CK_TILE_DEVICE auto operator()(ODramWindow &out_dram_window, const OAccTile &o_acc_tile, const DsDramWindows &ds_dram_windows, void *p_smem, const ScaleM &scale_m={}, const ScaleN &scale_n={})
Definition cshuffle_epilogue.hpp:610
static constexpr index_t NumMXdlPerWavePerShuffle
Definition cshuffle_epilogue.hpp:236
static constexpr index_t NumNXdlPerWavePerShuffle
Definition cshuffle_epilogue.hpp:237
static CK_TILE_HOST_DEVICE constexpr index_t GetVectorSizeD(number< I > index)
Get the vector store size for Di tensor.
Definition cshuffle_epilogue.hpp:180
remove_cvref_t< typename Problem::BsDataType > BsDataType
Definition cshuffle_epilogue.hpp:75
space_filling_curve< sequence< kMPerBlock, kNPerBlock >, sequence< 0, 1 >, sequence< MPerIterationShuffle, NPerIterationShuffle > > SFC
Definition cshuffle_epilogue.hpp:262
std::conditional_t< ADataTypeIsTuple, remove_cvref_t< AsDataType >, remove_cvref_t< tuple< AsDataType > > > AsDataTypeTuple
Definition cshuffle_epilogue.hpp:84
static constexpr index_t MPerIterationShuffle
Definition cshuffle_epilogue.hpp:248
CK_TILE_DEVICE void cast_lds_tile(LdsTile &lds_tile, InLdsWindow &in_lds_window)
Definition cshuffle_epilogue.hpp:389
CK_TILE_DEVICE CShuffleEpilogue(CDElementwise elfunc=CDElementwise{})
Definition cshuffle_epilogue.hpp:124
static constexpr auto shuffle_tile_tuple
Shuffle tile configuration parameters.
Definition cshuffle_epilogue.hpp:209
WarpGemmDispatcher< ATypeToUse, BTypeToUse, AccDataType, MPerXdl, NPerXdl, KPerXdl, isCTransposed > WG
Definition cshuffle_epilogue.hpp:251
remove_cvref_t< std::tuple_element_t< number< 0 >{}, BsDataTypeTuple > > BDataType
Definition cshuffle_epilogue.hpp:93
std::conditional_t< BDataTypeIsTuple, remove_cvref_t< BsDataType >, remove_cvref_t< tuple< BsDataType > > > BsDataTypeTuple
Definition cshuffle_epilogue.hpp:88
std::conditional_t< std::is_same_v< BDataType, pk_int4_t >, ADataType, BDataType > BTypeToUse
Definition cshuffle_epilogue.hpp:98
static constexpr index_t kMPerBlock
Definition cshuffle_epilogue.hpp:104
static constexpr index_t NPerIteration
Definition cshuffle_epilogue.hpp:117
static constexpr index_t NPerXdl
Definition cshuffle_epilogue.hpp:109
typename WG::CWarpDstr CWarpDstr
Definition cshuffle_epilogue.hpp:259
Definition cshuffle_epilogue.hpp:40
static constexpr index_t kNPerBlock
Definition cshuffle_epilogue.hpp:51
remove_cvref_t< AccDataType_ > AccDataType
Definition cshuffle_epilogue.hpp:43
static constexpr index_t NumDTensor
Definition cshuffle_epilogue.hpp:64
static constexpr index_t MPerXdl
Definition cshuffle_epilogue.hpp:54
static constexpr index_t NPerXdl
Definition cshuffle_epilogue.hpp:55
remove_cvref_t< CDElementwise_ > CDElementwise
Definition cshuffle_epilogue.hpp:48
static constexpr index_t isCTransposed
Definition cshuffle_epilogue.hpp:57
static constexpr bool TiledMMAPermuteN
Definition cshuffle_epilogue.hpp:62
static constexpr index_t kBlockSize
Definition cshuffle_epilogue.hpp:49
static constexpr index_t VectorSizeC
Definition cshuffle_epilogue.hpp:60
static constexpr index_t kNumWaveGroups
Definition cshuffle_epilogue.hpp:63
static constexpr index_t NWave
Definition cshuffle_epilogue.hpp:53
remove_cvref_t< ODataType_ > ODataType
Definition cshuffle_epilogue.hpp:44
static constexpr index_t BlockedXDLN_PerWarp
Definition cshuffle_epilogue.hpp:61
remove_cvref_t< DsDataType_ > DsDataType
Definition cshuffle_epilogue.hpp:45
static constexpr memory_operation_enum MemoryOperation
Definition cshuffle_epilogue.hpp:58
static constexpr index_t KPerXdl
Definition cshuffle_epilogue.hpp:56
remove_cvref_t< BsDataType_ > BsDataType
Definition cshuffle_epilogue.hpp:42
static constexpr index_t kMPerBlock
Definition cshuffle_epilogue.hpp:50
static constexpr bool FixedVectorSize
Definition cshuffle_epilogue.hpp:59
remove_cvref_t< ELayout_ > ELayout
Definition cshuffle_epilogue.hpp:47
static constexpr index_t MWave
Definition cshuffle_epilogue.hpp:52
remove_cvref_t< DsLayout_ > DsLayout
Definition cshuffle_epilogue.hpp:46
remove_cvref_t< AsDataType_ > AsDataType
Definition cshuffle_epilogue.hpp:41
static constexpr value_type value
Definition tile/core/numeric/integral_constant.hpp:16
Definition tile/ops/elementwise/unary_element_wise_operation.hpp:491
Definition tile/core/container/sequence.hpp:49
Definition space_filling_curve.hpp:20
static CK_TILE_HOST_DEVICE constexpr auto get_index(number< AccessIdx1d >)
Definition space_filling_curve.hpp:158
static CK_TILE_HOST_DEVICE constexpr index_t get_num_of_access()
Definition space_filling_curve.hpp:46
static CK_TILE_HOST_DEVICE constexpr auto get_forward_step(number< AccessIdx1d >)
Definition space_filling_curve.hpp:70
Definition tile/core/utility/functional.hpp:43
Definition tile_distribution_encoding.hpp:26
Definition tile/core/container/tuple.hpp:192