flatmm_sn_32x128x512_1x4x1_16x16x32.hpp File Reference#
flatmm_sn_32x128x512_1x4x1_16x16x32.hpp File Reference
#include "ck_tile/core.hpp"#include "ck_tile/ops/gemm/warp/warp_gemm.hpp"#include "ck_tile/ops/flatmm/block/flatmm_uk_config.hpp"#include "uk/flatmm_sn_uk_gfx9_32x128x512_1x4x1_16x16x16.inc"Go to the source code of this file.
Classes | |
| struct | ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_Base |
| struct | ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_BF16 |
| struct | ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_FP16 |
Namespaces | |
| namespace | ck_tile |
Macros | |
| #define | CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_BF16 |
| #define | _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
| #define | _UK_PK_CVT_(x0_, x1_, y_) |
| #define | _UK_ATOMIC_ADD_ "global_atomic_pk_add_bf16" |
| #define | CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_FP16 |
| #define | _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
| #define | _UK_PK_CVT_(x0_, x1_, y_) |
| #define | _UK_ATOMIC_ADD_ "global_atomic_pk_add_bf16" |
Macro Definition Documentation
◆ _UK_ATOMIC_ADD_ [1/2]
| #define _UK_ATOMIC_ADD_ "global_atomic_pk_add_bf16" |
◆ _UK_ATOMIC_ADD_ [2/2]
| #define _UK_ATOMIC_ADD_ "global_atomic_pk_add_bf16" |
◆ _UK_MFMA_ [1/2]
| #define _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
◆ _UK_MFMA_ [2/2]
| #define _UK_MFMA_ "v_mfma_f32_16x16x16_bf16" |
◆ _UK_PK_CVT_ [1/2]
| #define _UK_PK_CVT_ | ( | x0_, | |
| x1_, | |||
| y_ ) |
Value:
" v_cmp_u_f32 s[36:37], " x0_ ", " x0_ " \n" \
" v_add3_u32 v50, " x0_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v54, v50, %[v_nan_hi], s[36:37] \n" \
" v_cmp_u_f32 s[36:37], " x1_ ", " x1_ " \n" \
" v_add3_u32 v50, " x1_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v55, v50, %[v_nan_hi], s[36:37] \n" \
" v_perm_b32 " y_ ", v55, v54, s52 \n"
◆ _UK_PK_CVT_ [2/2]
| #define _UK_PK_CVT_ | ( | x0_, | |
| x1_, | |||
| y_ ) |
Value:
" v_cmp_u_f32 s[36:37], " x0_ ", " x0_ " \n" \
" v_add3_u32 v50, " x0_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v54, v50, %[v_nan_hi], s[36:37] \n" \
" v_cmp_u_f32 s[36:37], " x1_ ", " x1_ " \n" \
" v_add3_u32 v50, " x1_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v55, v50, %[v_nan_hi], s[36:37] \n" \
" v_perm_b32 " y_ ", v55, v54, s52 \n"
◆ CK_TILE_FLATMM_UK_MFMA [1/2]
| #define CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_FP16 |
◆ CK_TILE_FLATMM_UK_MFMA [2/2]
| #define CK_TILE_FLATMM_UK_MFMA CK_TILE_FLATMM_UK_MFMA_BF16 |