BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ > Struct Template Reference#
Public Member Functions |
Static Public Member Functions |
Public Attributes |
Static Public Attributes |
List of all members
ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ > Struct Template Reference
#include <block_to_ctile_map.hpp>
Public Member Functions | |
| __host__ __device__ | BlockToCTileMap_GemmStreamK_v2 (uint32_t m, uint32_t n, uint32_t k, uint32_t grid_size=1, uint32_t streamk_sel=1, StreamKReductionStrategy reduction_strategy_=StreamKReductionStrategy::Atomic) |
| __host__ __device__ uint32_t | get_sk_total_iters () const |
| __host__ __device__ uint32_t | get_sk_tiles () const |
| __host__ __device__ index_t | get_grid_dims () const |
| __device__ uint32_t | get_block_idx () const |
| __device__ void | get_block_itr (uint32_t block_idx, uint32_t &iter_start, uint32_t &iter_end) const |
| __device__ uint32_t | get_current_iter_length (uint32_t iter_start, uint32_t iter_end, uint32_t total_iter_length) const |
| __device__ uint32_t | get_tile_idx (uint32_t iter) const |
| __device__ void | get_tile_idx_with_offset (uint32_t iter, uint32_t &tile_idx, uint32_t &iter_offset) const |
| __device__ auto | tile_to_spatial (uint32_t tile_idx, uint32_t m, uint32_t n) const |
| __host__ __device__ uint32_t | get_workspace_size_for_acc (uint32_t acc_element_bytes) const |
| __host__ __device__ uint32_t | get_workspace_size_for_semaphore () const |
| __host__ __device__ uint32_t | get_workspace_size (uint32_t acc_element_bytes) const |
| __host__ __device__ uint32_t | get_tile_intersections (uint32_t tiles_, const MDiv &equiv_tiles_) const |
| __host__ __device__ uint32_t | get_tiles_cover_sk_block (uint32_t num_sk_blocks_, uint32_t iters_per_sk_block_) const |
| __host__ __device__ uint32_t | get_total_acc_buffers () const |
| __device__ uint32_t | get_acc_buffer_offset_from_tile (uint32_t tile_idx_) const |
| __device__ uint32_t | get_acc_buffer_offset_from_block (uint32_t block_idx_) const |
Static Public Member Functions | |
| __host__ static __device__ constexpr index_t | CalculateGridSize (index_t M, index_t N) |
Static Public Attributes | |
| static constexpr uint32_t | min_k_iters_per_sk_block = 2 |
| static constexpr uint32_t | MPerBlock = MPerBlock_ |
| static constexpr uint32_t | NPerBlock = NPerBlock_ |
| static constexpr uint32_t | KPerBlock = KPerBlock_ |
| static constexpr uint32_t | tile_swizzle_sub_m = TileSwizzleSubM_ |
Constructor & Destructor Documentation
◆ BlockToCTileMap_GemmStreamK_v2()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
Member Function Documentation
◆ CalculateGridSize()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inlinestaticconstexpr |
◆ get_acc_buffer_offset_from_block()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_acc_buffer_offset_from_tile()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_block_idx()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_block_itr()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_current_iter_length()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_grid_dims()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_sk_tiles()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_sk_total_iters()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_tile_idx()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_tile_idx_with_offset()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_tile_intersections()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_tiles_cover_sk_block()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_total_acc_buffers()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_workspace_size()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_workspace_size_for_acc()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ get_workspace_size_for_semaphore()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
◆ tile_to_spatial()
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
inline |
Member Data Documentation
◆ dp_start_block_idx
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| uint32_t ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::dp_start_block_idx |
◆ equiv_tiles_big
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| MDiv ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::equiv_tiles_big |
◆ equiv_tiles_little
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| MDiv ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::equiv_tiles_little |
◆ k_iters_per_big_block
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| uint32_t ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::k_iters_per_big_block |
◆ k_iters_per_tile
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| MDiv ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::k_iters_per_tile |
◆ KPerBlock
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
staticconstexpr |
◆ min_k_iters_per_sk_block
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
staticconstexpr |
◆ MPerBlock
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
staticconstexpr |
◆ n_tiles
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| MDiv2 ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::n_tiles |
◆ NPerBlock
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
staticconstexpr |
◆ reduction_start_block_idx
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| uint32_t ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::reduction_start_block_idx |
◆ reduction_strategy
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| StreamKReductionStrategy ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::reduction_strategy |
◆ sk_num_big_blocks
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
| uint32_t ck::BlockToCTileMap_GemmStreamK_v2< MPerBlock_, NPerBlock_, KPerBlock_, ReductionStrategy_, TileSwizzleSubM_, GroupNum, M01_ >::sk_num_big_blocks |
◆ sk_num_blocks
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
mutable |
◆ tile_swizzle_sub_m
template<uint32_t MPerBlock_, uint32_t NPerBlock_, uint32_t KPerBlock_, StreamKReductionStrategy ReductionStrategy_ = StreamKReductionStrategy::Atomic, uint32_t TileSwizzleSubM_ = 8, index_t GroupNum = 8, index_t M01_ = 4>
|
staticconstexpr |
The documentation for this struct was generated from the following file: