#include <naive_attention.hpp>
◆ OAccType
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::OAccType = float |
◆ p_vec_type
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_type = ext_vector_t<PType, 16 / sizeof(PType)> |
◆ PType
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::PType = VType |
◆ QCompute
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QCompute = KType |
◆ QuantComputeType
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::QuantComputeType = float |
◆ SoftmaxType
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| using ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::SoftmaxType = float |
◆ naive_attention_fwd_kernel()
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| __host__ __device__ ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::naive_attention_fwd_kernel |
( |
| ) |
|
|
inline |
◆ cross_wave_reduce()
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
template<typename T, typename F>
| __device__ constexpr T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::cross_wave_reduce |
( |
T | local, |
|
|
F | reduce_f, |
|
|
T * | smem ) |
|
inlineconstexpr |
◆ get_block_size()
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| __device__ static __host__ constexpr int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_block_size |
( |
| ) |
|
|
inlinestaticconstexpr |
◆ get_grid_size()
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| __host__ dim3 ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::get_grid_size |
( |
naive_attention_fwd_args | args | ) |
|
|
inlinestatic |
◆ operator()()
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| __device__ void ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::operator() |
( |
naive_attention_fwd_args | args | ) |
|
|
inline |
◆ wave_reduce()
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
template<typename T, typename F>
| __device__ constexpr T ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::wave_reduce |
( |
T | local, |
|
|
F | reduce_f ) |
|
inlineconstexpr |
◆ is_kvcache_fp8
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_fp8 |
|
staticconstexpr |
Initial value:=
std::is_same_v<KType, fp8_t> && std::is_same_v<VType, fp8_t>
◆ is_kvcache_i8
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| bool ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::is_kvcache_i8 |
|
staticconstexpr |
Initial value:=
std::is_same_v<KType, int8_t> && std::is_same_v<VType, int8_t>
◆ kBlockSize
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::kBlockSize = 256 |
|
staticconstexpr |
◆ p_vec_elem
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::p_vec_elem = vector_traits<p_vec_type>::vector_size |
|
staticconstexpr |
◆ v_per_token_quant_group_size
template<typename QType, typename KType, typename VType, typename OType, typename AccType, typename KVScaleType,
naive_attention_layout_enum QLayout,
naive_attention_layout_enum KLayout,
naive_attention_layout_enum VLayout,
naive_attention_layout_enum OLayout,
naive_attention_layout_enum KScaleLayout,
naive_attention_layout_enum VScaleLayout, typename Traits>
| int ck_tile::naive_attention_fwd_kernel< QType, KType, VType, OType, AccType, KVScaleType, QLayout, KLayout, VLayout, OLayout, KScaleLayout, VScaleLayout, Traits >::v_per_token_quant_group_size = 64 |
|
staticconstexpr |
The documentation for this struct was generated from the following file: