GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference

GridwiseMultipleReduction_mk_to_m_multiblock&lt; NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq &gt; Struct Template Reference#

Composable Kernel: ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference
ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq > Struct Template Reference

#include <gridwise_2d_multiple_reduction_multiblock.hpp>

Public Types

using ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>
using ThreadBufferDimAccessOrder
using ThreadClusterArrangeOrder
using ThreadReduceSrcDesc_M_K
using ThreadReduceDstDesc_M
using BlockwiseReduce
using ThreadwiseReduce
using PassThroughOp = tensor_operation::element_wise::PassThrough
using Accumulation = detail::AccumulateWithNanCheck<PropagateNan, ReduceOperation, AccDataType>

Static Public Member Functions

static __device__ void Run (const InGridDesc_M_K &in_grid_desc_m_k, const OutGridDesc_M_Tuple &out_grid_desc_m_tuple, const InElementwiseOperationTuple &in_elementwise_op_tuple, const AccElementwiseOperationTuple &acc_elementwise_op_tuple, index_t block_group_size, index_t num_k_block_tile_iteration, Array< AccDataType, NumReduction > alpha_values, const InDataType *const __restrict__ p_in_value_global, Array< AccDataType, NumReduction > beta_values, OutDataTypePointerTuple p_out_value_global_tuple)

Static Public Attributes

static constexpr bool reorder_thread_cluster = (InSrcVectorDim == 0)
static constexpr auto thread_cluster_desc
static constexpr auto I0 = Number<0>{}
static constexpr auto I1 = Number<1>{}
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize

Member Typedef Documentation

◆ Accumulation

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::Accumulation = detail::AccumulateWithNanCheck<PropagateNan, ReduceOperation, AccDataType>

◆ BlockwiseReduce

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::BlockwiseReduce
Initial value:
BlockSize,
ReduceOperation,
PropagateNan>
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:110
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:115
Definition reduction_functions_blockwise.hpp:28

◆ PassThroughOp

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::PassThroughOp = tensor_operation::element_wise::PassThrough

◆ ThreadBufferDimAccessOrder

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadBufferDimAccessOrder
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100

◆ ThreadClusterArrangeOrder

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadClusterArrangeOrder

◆ ThreadClusterLengths_M_K

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize>

◆ ThreadReduceDstDesc_M

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadReduceDstDesc_M
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ ThreadReduceSrcDesc_M_K

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadReduceSrcDesc_M_K

◆ ThreadwiseReduce

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
using ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::ThreadwiseReduce
Initial value:
ThreadwiseReduction<AccDataType,
ReduceOperation,
PropagateNan>
decltype(make_naive_tensor_descriptor_packed(make_tuple(Number< MThreadSliceSize >{}))) ThreadReduceDstDesc_M
Definition gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp:123
decltype(make_naive_tensor_descriptor_packed( make_tuple(Number< MThreadSliceSize >{}, Number< KThreadSliceSize >{}))) ThreadReduceSrcDesc_M_K
Definition gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp:116
Definition reduction_functions_threadwise.hpp:23

Member Function Documentation

◆ Run()

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
__device__ void ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::Run ( const InGridDesc_M_K & in_grid_desc_m_k,
const OutGridDesc_M_Tuple & out_grid_desc_m_tuple,
const InElementwiseOperationTuple & in_elementwise_op_tuple,
const AccElementwiseOperationTuple & acc_elementwise_op_tuple,
index_t block_group_size,
index_t num_k_block_tile_iteration,
Array< AccDataType, NumReduction > alpha_values,
const InDataType *const __restrict__ p_in_value_global,
Array< AccDataType, NumReduction > beta_values,
OutDataTypePointerTuple p_out_value_global_tuple )
inlinestatic

Member Data Documentation

◆ I0

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
auto ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::I0 = Number<0>{}
staticconstexpr

◆ I1

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
auto ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::I1 = Number<1>{}
staticconstexpr

◆ K_BlockTileSize

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
index_t ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
index_t ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ reorder_thread_cluster

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
bool ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::reorder_thread_cluster = (InSrcVectorDim == 0)
staticconstexpr

◆ thread_cluster_desc

template<index_t NumReduction, typename InDataType, typename OutDataTypePointerTuple, typename AccDataType, typename InGridDesc_M_K, typename OutGridDesc_M_Tuple, typename ReduceOperation, typename InElementwiseOperationTuple, typename AccElementwiseOperationTuple, InMemoryDataOperationEnum OutMemoryDataOperation, bool PropagateNan, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, typename OutDstVectorSizeSeq>
auto ck::GridwiseMultipleReduction_mk_to_m_multiblock< NumReduction, InDataType, OutDataTypePointerTuple, AccDataType, InGridDesc_M_K, OutGridDesc_M_Tuple, ReduceOperation, InElementwiseOperationTuple, AccElementwiseOperationTuple, OutMemoryDataOperation, PropagateNan, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSizeSeq >::thread_cluster_desc
staticconstexpr
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_2d_multiple_reduction_multiblock.hpp:83
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_2d_multiple_reduction_multiblock.hpp:88

The documentation for this struct was generated from the following file: