generic_memory_space_atomic.hpp Source File

generic_memory_space_atomic.hpp Source File#

Composable Kernel: generic_memory_space_atomic.hpp Source File
utility/generic_memory_space_atomic.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5#include "data_type.hpp"
6#include "dtype_fp64.hpp"
7
8namespace ck {
9
10// Caution: DO NOT REMOVE
11// intentionally have only declaration but no definition to cause compilation failure when trying to
12// instantiate this template. The purpose is to make the implementation of atomic_add explicit for
13// each datatype.
14template <typename X>
15__device__ X atomic_add(X* p_dst, const X& x);
16
17template <>
18__device__ int32_t atomic_add<int32_t>(int32_t* p_dst, const int32_t& x)
19{
20 return atomicAdd(p_dst, x);
21}
22
23template <>
24__device__ uint32_t atomic_add<uint32_t>(uint32_t* p_dst, const uint32_t& x)
25{
26 return atomicAdd(p_dst, x);
27}
28
29template <>
30__device__ float atomic_add<float>(float* p_dst, const float& x)
31{
32 return atomicAdd(p_dst, x);
33}
34
35template <>
36__device__ unsigned short atomic_add<unsigned short>(unsigned short* p_dst, const unsigned short& x)
37{
38 // Use atomicAdd with unsigned int
39 return static_cast<unsigned short>(
40 atomicAdd(reinterpret_cast<unsigned int*>(p_dst), static_cast<unsigned int>(x)));
41}
42
43template <>
44__device__ _Float16 atomic_add<_Float16>(_Float16* p_dst, const _Float16& x)
45{
46 // Use atomicAdd with unsigned int
47 return static_cast<_Float16>(
48 atomicAdd(reinterpret_cast<unsigned int*>(p_dst), static_cast<unsigned int>(x)));
49}
50
51template <>
52__device__ double atomic_add<double>(double* p_dst, const double& x)
53{
54 return atomicAdd(p_dst, x);
55}
56
57template <>
58__device__ float2_t atomic_add<float2_t>(float2_t* p_dst, const float2_t& x)
59{
60 constexpr auto I0 = Number<0>{};
61 constexpr auto I1 = Number<1>{};
62
63 const vector_type<float, 2> vx{x};
65
66 vy.template AsType<float>()(I0) =
67 atomicAdd(c_style_pointer_cast<float*>(p_dst), vx.template AsType<float>()[I0]);
68 vy.template AsType<float>()(I1) =
69 atomicAdd(c_style_pointer_cast<float*>(p_dst) + 1, vx.template AsType<float>()[I1]);
70
71 return vy.template AsType<float2_t>()[I0];
72}
73
74template <>
76{
77 constexpr auto I0 = Number<0>{};
78 constexpr auto I1 = Number<1>{};
79
80 const vector_type<double, 2> vx{x};
82
83 vy.template AsType<double>()(I0) =
84 atomicAdd(c_style_pointer_cast<double*>(p_dst), vx.template AsType<double>()[I0]);
85 vy.template AsType<double>()(I1) =
86 atomicAdd(c_style_pointer_cast<double*>(p_dst) + 1, vx.template AsType<double>()[I1]);
87
88 return vy.template AsType<double2_t>()[I0];
89}
90
91// Caution: DO NOT REMOVE
92// intentionally have only declaration but no definition to cause compilation failure when trying to
93// instantiate this template. The purpose is to make the implementation of atomic_max explicit for
94// each datatype.
95
96template <typename X>
97__device__ X atomic_max(X* p_dst, const X& x);
98
99template <>
100__device__ int32_t atomic_max<int32_t>(int32_t* p_dst, const int32_t& x)
101{
102 return atomicMax(p_dst, x);
103}
104
105template <>
106__device__ uint32_t atomic_max<uint32_t>(uint32_t* p_dst, const uint32_t& x)
107{
108 return atomicMax(p_dst, x);
109}
110
111template <>
112__device__ float atomic_max<float>(float* p_dst, const float& x)
113{
114 return atomicMax(p_dst, x);
115}
116
117template <>
118__device__ double atomic_max<double>(double* p_dst, const double& x)
119{
120 return atomicMax(p_dst, x);
121}
122
123template <>
124__device__ float2_t atomic_max<float2_t>(float2_t* p_dst, const float2_t& x)
125{
126 constexpr auto I0 = Number<0>{};
127 constexpr auto I1 = Number<1>{};
128
129 const vector_type<float, 2> vx{x};
131
132 vy.template AsType<float>()(I0) =
133 atomicMax(c_style_pointer_cast<float*>(p_dst), vx.template AsType<float>()[I0]);
134 vy.template AsType<float>()(I1) =
135 atomicMax(c_style_pointer_cast<float*>(p_dst) + 1, vx.template AsType<float>()[I1]);
136
137 return vy.template AsType<float2_t>()[I0];
138}
139
140} // namespace ck
Definition ck.hpp:268
__device__ _Float16 atomic_add< _Float16 >(_Float16 *p_dst, const _Float16 &x)
Definition utility/generic_memory_space_atomic.hpp:44
__device__ uint32_t atomic_add< uint32_t >(uint32_t *p_dst, const uint32_t &x)
Definition utility/generic_memory_space_atomic.hpp:24
__device__ float atomic_add< float >(float *p_dst, const float &x)
Definition utility/generic_memory_space_atomic.hpp:30
__device__ uint32_t atomic_max< uint32_t >(uint32_t *p_dst, const uint32_t &x)
Definition utility/generic_memory_space_atomic.hpp:106
integral_constant< index_t, N > Number
Definition number.hpp:12
__device__ float2_t atomic_add< float2_t >(float2_t *p_dst, const float2_t &x)
Definition utility/generic_memory_space_atomic.hpp:58
__host__ __device__ PY c_style_pointer_cast(PX p_x)
Definition c_style_pointer_cast.hpp:15
typename vector_type< float, 2 >::type float2_t
Definition dtype_vector.hpp:2145
__device__ X atomic_max(X *p_dst, const X &x)
__device__ int32_t atomic_max< int32_t >(int32_t *p_dst, const int32_t &x)
Definition utility/generic_memory_space_atomic.hpp:100
__device__ double atomic_add< double >(double *p_dst, const double &x)
Definition utility/generic_memory_space_atomic.hpp:52
__device__ unsigned short atomic_add< unsigned short >(unsigned short *p_dst, const unsigned short &x)
Definition utility/generic_memory_space_atomic.hpp:36
__device__ int32_t atomic_add< int32_t >(int32_t *p_dst, const int32_t &x)
Definition utility/generic_memory_space_atomic.hpp:18
__device__ float atomic_max< float >(float *p_dst, const float &x)
Definition utility/generic_memory_space_atomic.hpp:112
__device__ float2_t atomic_max< float2_t >(float2_t *p_dst, const float2_t &x)
Definition utility/generic_memory_space_atomic.hpp:124
typename vector_type< double, 2 >::type double2_t
Definition dtype_fp64.hpp:5
__device__ double2_t atomic_add< double2_t >(double2_t *p_dst, const double2_t &x)
Definition utility/generic_memory_space_atomic.hpp:75
__device__ double atomic_max< double >(double *p_dst, const double &x)
Definition utility/generic_memory_space_atomic.hpp:118
__device__ X atomic_add(X *p_dst, const X &x)
unsigned int uint32_t
Definition stdint.h:126
signed int int32_t
Definition stdint.h:123
Definition dtype_vector.hpp:10