load_tile.hpp Source File

load_tile.hpp Source File#

Composable Kernel: load_tile.hpp Source File
load_tile.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
18
19namespace ck_tile {
20
21template <typename TileWindow_, index_t i_access = -1, bool oob_conditional_check = true>
22CK_TILE_DEVICE auto load_tile(const TileWindow_& tile_window,
25{
26 return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
27}
28
37template <typename TileWindow_,
38 typename ElementWise_,
39 index_t i_access = -1,
40 bool oob_conditional_check = true>
41CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_& tile_window,
42 ElementWise_ elementwise,
45{
46 // TODO: Tile windows should works with unknow number of params
47 // Load element_wise API works only when the input typle is a tuple-tyupe
48 return tile_window[number<0>{}].load(
49 tile_window, elementwise, number<i_access>{}, bool_constant<oob_conditional_check>{});
50}
51
52template <typename DistributedTensor_,
53 typename TileWindow_,
54 index_t i_access = -1,
55 bool oob_conditional_check = true>
56CK_TILE_DEVICE auto load_tile(DistributedTensor_& dst_tile,
57 const TileWindow_& tile_window,
60{
61 return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
62}
63
73template <typename T,
74 typename BottomTensorView_,
75 typename WindowLengths_,
76 typename TileDistribution_,
77 index_t NumCoord,
78 index_t i_access = -1,
79 bool oob_conditional_check = true,
80 bool pre_nop = false>
82 const tile_window_with_static_distribution<BottomTensorView_,
83 WindowLengths_,
84 TileDistribution_,
85 NumCoord>& tile_window,
89{
90 tile_window.load_raw(
92}
93
94template <typename T,
95 typename BottomTensorView_,
96 typename WindowLengths_,
97 typename TileDistribution_,
98 typename LinearBottomDims_,
99 index_t i_access = -1,
100 bool oob_conditional_check = true,
101 bool pre_nop = false>
103 const tile_window_linear<BottomTensorView_,
104 WindowLengths_,
105 TileDistribution_,
106 LinearBottomDims_>& tile_window,
107 number<i_access> = {},
110{
111 tile_window.load_raw(
113}
114
115template <typename LdsTileWindow_,
116 typename TileWindow_,
117 index_t i_access = -1,
118 bool oob_conditional_check = true>
119CK_TILE_DEVICE auto async_load_tile(LdsTileWindow_&& lds_tile,
120 const TileWindow_& tile_window,
121 number<i_access> = {},
123{
124 return tile_window.async_load(
126}
127
128template <typename LdsTileWindow_,
129 typename TileWindow_,
130 index_t i_access = -1,
131 bool oob_conditional_check = true,
132 bool pre_nop = false>
133CK_TILE_DEVICE auto async_load_tile_raw(LdsTileWindow_&& lds_tile,
134 const TileWindow_& tile_window,
135 number<i_access> = {},
138{
139 return tile_window.async_load_raw(lds_tile,
143}
144
146{
147 asm volatile("s_waitcnt vmcnt(%0)" : : "n"(cnt) : "memory");
148}
149
150template <typename WindowLengths>
155
156template <typename T, typename WindowLengths>
158{
159}
160
161} // namespace ck_tile
#define CK_TILE_DEVICE
Definition config.hpp:41
Definition tile/core/algorithm/cluster_descriptor.hpp:13
CK_TILE_DEVICE auto async_load_tile(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:119
CK_TILE_DEVICE auto async_load_fence(index_t cnt=0)
Definition load_tile.hpp:145
CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access >={}, bool_constant< oob_conditional_check >={})
Load tile with elementwise function.
Definition load_tile.hpp:41
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
constant< v > number
Definition tile/core/numeric/integral_constant.hpp:37
CK_TILE_DEVICE auto load_tile_raw(T &tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Loads a tile of data using inline assembly.
Definition load_tile.hpp:81
CK_TILE_DEVICE auto async_load_tile_raw(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition load_tile.hpp:133
int32_t index_t
Definition integer.hpp:9
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
Definition null_tensor.hpp:9
Definition null_tile_window.hpp:19
Definition tile_window_linear.hpp:55
This class provides tile (windowed) view and access to the device memory.
Definition tile_window.hpp:46