GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings > Struct Template Reference

GridwiseSparseEmbeddingsForwardLayernorm&lt; EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings > Struct Template Reference
ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings > Struct Template Reference

#include <gridwise_sparse_embeddings_forward_layernorm.hpp>

Public Types

using ThreadwiseWolfordDesc2D
using ThreadwiseWolfordDescReduce
using ThreadwiseWelford
using ThreadClusterLength = Sequence<DimClusterSize, RowClusterSize>
using BlockwiseWelford
using ThreadwiseWolfordDesc2D
using ThreadwiseWolfordDescReduce
using ThreadwiseWelford
using ThreadClusterLength = Sequence<DimClusterSize, RowClusterSize>
using BlockwiseWelford

Static Public Member Functions

static __device__ void Run (OutType *p_out, const ck::Array< EmbType *, NumEmbeddings > p_embs, const ck::Array< IndexType *, NumEmbeddings > p_indexes, const GammaDataType *p_gamma, const BetaDataType *p_beta, const OutGridDesc, const AccDataType epsilon, const EmbElementwiseOperation emb_elementwise_op)
static __device__ void Run (OutType *p_out, const ck::Array< EmbType *, NumEmbeddings > p_embs, const ck::Array< IndexType *, NumEmbeddings > p_indexes, const GammaDataType *p_gamma, const BetaDataType *p_beta, const OutGridDesc, const AccDataType epsilon, const EmbElementwiseOperation emb_elementwise_op)

Static Public Attributes

static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr auto I2 = Number<2>{}
static constexpr auto I3 = Number<3>{}
static constexpr index_t WaveSize = 64
static constexpr auto DimSubBlocks = DimPerBlock / (DimClusterSize * DimThreadSize)
static constexpr auto RowSubBlocks = RowPerBlock / (RowClusterSize * RowVectorSize)
static constexpr auto DimPerSubBlock = DimPerBlock / DimSubBlocks
static constexpr auto RowPerSubBlock = RowPerBlock / RowSubBlocks

Member Typedef Documentation

◆ BlockwiseWelford [1/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::BlockwiseWelford
Initial value:
BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLength, Sequence< 0, 1 > > BlockwiseWelford
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:89

◆ BlockwiseWelford [2/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::BlockwiseWelford

◆ ThreadClusterLength [1/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadClusterLength = Sequence<DimClusterSize, RowClusterSize>

◆ ThreadClusterLength [2/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadClusterLength = Sequence<DimClusterSize, RowClusterSize>

◆ ThreadwiseWelford [1/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWelford
Initial value:
ThreadwiseWelford< AccDataType, ThreadwiseWolfordDesc2D, ThreadwiseWolfordDescReduce > ThreadwiseWelford
Definition gridwise_sparse_embeddings_forward_layernorm.hpp:84

◆ ThreadwiseWelford [2/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWelford

◆ ThreadwiseWolfordDesc2D [1/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDesc2D
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ ThreadwiseWolfordDesc2D [2/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDesc2D

◆ ThreadwiseWolfordDescReduce [1/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDescReduce

◆ ThreadwiseWolfordDescReduce [2/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
using ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::ThreadwiseWolfordDescReduce

Member Function Documentation

◆ Run() [1/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
__device__ void ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::Run ( OutType * p_out,
const ck::Array< EmbType *, NumEmbeddings > p_embs,
const ck::Array< IndexType *, NumEmbeddings > p_indexes,
const GammaDataType * p_gamma,
const BetaDataType * p_beta,
const OutGridDesc ,
const AccDataType epsilon,
const EmbElementwiseOperation emb_elementwise_op )
inlinestatic

◆ Run() [2/2]

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
__device__ void ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::Run ( OutType * p_out,
const ck::Array< EmbType *, NumEmbeddings > p_embs,
const ck::Array< IndexType *, NumEmbeddings > p_indexes,
const GammaDataType * p_gamma,
const BetaDataType * p_beta,
const OutGridDesc ,
const AccDataType epsilon,
const EmbElementwiseOperation emb_elementwise_op )
inlinestatic

Member Data Documentation

◆ DimPerSubBlock

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::DimPerSubBlock = DimPerBlock / DimSubBlocks
staticconstexpr

◆ DimSubBlocks

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::DimSubBlocks = DimPerBlock / (DimClusterSize * DimThreadSize)
staticconstexpr

◆ I0

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I1 = Number<1>{}
staticconstexpr

◆ I2

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I2 = Number<2>{}
staticconstexpr

◆ I3

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::I3 = Number<3>{}
staticconstexpr

◆ RowPerSubBlock

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::RowPerSubBlock = RowPerBlock / RowSubBlocks
staticconstexpr

◆ RowSubBlocks

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr auto ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::RowSubBlocks = RowPerBlock / (RowClusterSize * RowVectorSize)
staticconstexpr

◆ WaveSize

template<typename EmbType, typename IndexType, typename GammaDataType, typename BetaDataType, typename AccDataType, typename OutType, typename OutGridDesc, typename EmbElementwiseOperation, ck::index_t BlockSize, ck::index_t DimClusterSize, ck::index_t RowClusterSize, ck::index_t DimPerBlock, ck::index_t RowPerBlock, ck::index_t DimThreadSize, ck::index_t RowVectorSize, ck::index_t NumEmbeddings>
constexpr index_t ck::GridwiseSparseEmbeddingsForwardLayernorm< EmbType, IndexType, GammaDataType, BetaDataType, AccDataType, OutType, OutGridDesc, EmbElementwiseOperation, BlockSize, DimClusterSize, RowClusterSize, DimPerBlock, RowPerBlock, DimThreadSize, RowVectorSize, NumEmbeddings >::WaveSize = 64
staticconstexpr

The documentation for this struct was generated from the following files: