gridwise_sparse_embeddings_forward_layernorm.hpp Source File#
gridwise_sparse_embeddings_forward_layernorm.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__device__ int32x4_t make_wave_buffer_resource_with_default_range(T *p_wave)
Definition utility/amd_buffer_addressing.hpp:38
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__device__ void amd_buffer_store_impl(const typename vector_type< T, N >::type src_thread_data, int32x4_t dst_wave_buffer_resource, index_t dst_thread_addr_offset, index_t dst_wave_addr_offset)
Definition utility/amd_buffer_addressing.hpp:544
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
__global__ void kernel_sparse_embeddings_forward_layernorm(OutType *p_out, const ck::Array< EmbType *, NumEmbeddings > p_embs, const ck::Array< IndexType *, NumEmbeddings > p_indexes, const GammaDataType *p_gamma, const BetaDataType *p_beta, const OutGridDesc out_grid_desc, const AccDataType epsilon, const EmbElementwiseOperation emb_elementwise_op)
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:26
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__device__ vector_type< T, N >::type amd_buffer_load_impl(int32x4_t src_wave_buffer_resource, index_t src_thread_addr_offset, index_t src_wave_addr_offset)
Definition utility/amd_buffer_addressing.hpp:419
__host__ __device__ constexpr auto unpack2(F &&f, X &&x, Y &&y)
Definition functional4.hpp:55
__host__ __device__ constexpr auto generate_tie(F &&f, Number< N >)
Definition tuple_helper.hpp:34
typename vector_type_maker< T, N >::type vector_type_maker_t
Definition dtype_vector.hpp:54
Definition utility/array.hpp:14
static __device__ void Run(T &mean_value, T &var_value, CountDataType &count)
Definition blockwise_welford.hpp:51
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:57
static constexpr auto I0
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:58
static constexpr auto RowPerSubBlock
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:76
static constexpr auto RowSubBlocks
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:72
static constexpr auto DimSubBlocks
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:71
BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLength, Sequence< 0, 1 > > BlockwiseWelford
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:89
static constexpr auto DimPerSubBlock
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:75
decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< DimSubBlocks *DimThreadSize >{}, Number< RowSubBlocks *RowVectorSize >{}))) ThreadwiseWolfordDesc2D
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:78
static constexpr auto I1
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:59
decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< DimSubBlocks *DimThreadSize >{}))) ThreadwiseWolfordDescReduce
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:81
static constexpr auto I2
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:60
static __device__ void Run(OutType *p_out, const ck::Array< EmbType *, NumEmbeddings > p_embs, const ck::Array< IndexType *, NumEmbeddings > p_indexes, const GammaDataType *p_gamma, const BetaDataType *p_beta, const OutGridDesc, const AccDataType epsilon, const EmbElementwiseOperation emb_elementwise_op)
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:92
Sequence< DimClusterSize, RowClusterSize > ThreadClusterLength
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:87
static constexpr index_t WaveSize
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:62
ThreadwiseWelford< AccDataType, ThreadwiseWolfordDesc2D, ThreadwiseWolfordDescReduce > ThreadwiseWelford
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:84
static constexpr auto I3
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:61
Definition utility/sequence.hpp:43
Definition static_buffer.hpp:16
Definition functional2.hpp:33