flatmm_32x512x128_1x4x1_16x16x32.hpp File Reference

flatmm_32x512x128_1x4x1_16x16x32.hpp File Reference#

Composable Kernel: flatmm_32x512x128_1x4x1_16x16x32.hpp File Reference
flatmm_32x512x128_1x4x1_16x16x32.hpp File Reference

Go to the source code of this file.

Classes

struct  ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_Base
struct  ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_BF16
struct  ck_tile::Flatmm_32x512x128_1x4x1_16x16x32_FP16

Namespaces

namespace  ck_tile

Macros

#define _EXPAND_ASM_ARGS_OUT_ONE_ACC
#define _EXPAND_ASM_ARGS_OUT_TWO_ACC
#define _EXPAND_ASM_ARGS_IN
#define _EXPAND_ASM_ARGS_CLOBBER
#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_BF16
#define CK_TILE_FLATMM_UK_2B   1
#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"
#define _UK_GLD_A0
#define _UK_GLD_A1
#define _UK_GLD_A2
#define _UK_GLD_A3
#define _UK_GLD_A4
#define _UK_GLD_A5
#define _UK_GLD_A6
#define _UK_GLD_A7_AND_L1
#define _UK_GLD_A7_AND_L0
#define _UK_NONE   ""
#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"
#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"
#define _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)
#define _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)
#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_BF16
#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"
#define CK_TILE_FLATMM_UK_2B   0
#define _UK_GLD_A0
#define _UK_GLD_A1
#define _UK_GLD_A2
#define _UK_GLD_A3
#define _UK_GLD_A4
#define _UK_GLD_A5
#define _UK_GLD_A6
#define _UK_GLD_A7_AND_L1
#define _UK_GLD_A7_AND_L0
#define _UK_NONE   ""
#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"
#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"
#define _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)
#define _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)
#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_FP16
#define CK_TILE_FLATMM_UK_2B   1
#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"
#define _UK_GLD_A0
#define _UK_GLD_A1
#define _UK_GLD_A2
#define _UK_GLD_A3
#define _UK_GLD_A4
#define _UK_GLD_A5
#define _UK_GLD_A6
#define _UK_GLD_A7_AND_L1
#define _UK_GLD_A7_AND_L0
#define _UK_NONE   ""
#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"
#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"
#define _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)
#define _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)
#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_FP16
#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"
#define CK_TILE_FLATMM_UK_2B   0
#define _UK_GLD_A0
#define _UK_GLD_A1
#define _UK_GLD_A2
#define _UK_GLD_A3
#define _UK_GLD_A4
#define _UK_GLD_A5
#define _UK_GLD_A6
#define _UK_GLD_A7_AND_L1
#define _UK_GLD_A7_AND_L0
#define _UK_NONE   ""
#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"
#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"
#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"
#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"
#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"
#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"
#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"
#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"
#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"
#define _UK_PIPELINE_0(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)
#define _UK_PIPELINE_1(gld_a0_, gld_a1_, gld_a2_, gld_a3_, gld_a4_, gld_a5_, gld_a6_, gld_a7_, sld_a0_, sld_a1_, sld_a2_, sld_a3_, sld_a4_, sld_a5_, sld_a6_, sld_a7_, src_a0_, src_a1_, src_a2_, src_a3_, src_a4_, src_a5_, src_a6_, src_a7_, src_a8_, src_a9_, src_a10_, src_a11_, src_a12_, src_a13_, src_a14_, src_a15_, fence_0_, fence_1_, fence_2_, fence_3_, acc_0_, acc_1_, acc_2_, acc_3_, acc_4_, acc_5_, acc_6_, acc_7_, acc_8_, acc_9_, acc_10_, acc_11_, acc_12_, acc_13_, acc_14_, acc_15_, s_base_b_)

Macro Definition Documentation

◆ _EXPAND_ASM_ARGS_CLOBBER

#define _EXPAND_ASM_ARGS_CLOBBER

◆ _EXPAND_ASM_ARGS_IN

#define _EXPAND_ASM_ARGS_IN

◆ _EXPAND_ASM_ARGS_OUT_ONE_ACC

#define _EXPAND_ASM_ARGS_OUT_ONE_ACC
Value:
[s_loop_cnt]"+s"(loop_cnt), \
[v_acc_0]"+v"(v_acc[0]), \
[v_acc_1]"+v"(v_acc[1]), \
[v_acc_2]"+v"(v_acc[2]), \
[v_acc_3]"+v"(v_acc[3]), \
[v_acc_4]"+v"(v_acc[4]), \
[v_acc_5]"+v"(v_acc[5]), \
[v_acc_6]"+v"(v_acc[6]), \
[v_acc_7]"+v"(v_acc[7]), \
[v_acc_8]"+v"(v_acc[8]), \
[v_acc_9]"+v"(v_acc[9]), \
[v_acc_10]"+v"(v_acc[10]), \
[v_acc_11]"+v"(v_acc[11]), \
[v_acc_12]"+v"(v_acc[12]), \
[v_acc_13]"+v"(v_acc[13]), \
[v_acc_14]"+v"(v_acc[14]), \
[v_acc_15]"+v"(v_acc[15]), \
[s_mem_]"+r"(smem)

◆ _EXPAND_ASM_ARGS_OUT_TWO_ACC

#define _EXPAND_ASM_ARGS_OUT_TWO_ACC

◆ _UK_GLD_A0 [1/4]

#define _UK_GLD_A0
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A0 [2/4]

#define _UK_GLD_A0
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A0 [3/4]

#define _UK_GLD_A0
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A0 [4/4]

#define _UK_GLD_A0
Value:
"buffer_load_dword %[v_os_a0], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A1 [1/4]

#define _UK_GLD_A1
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A1 [2/4]

#define _UK_GLD_A1
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A1 [3/4]

#define _UK_GLD_A1
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A1 [4/4]

#define _UK_GLD_A1
Value:
"buffer_load_dword %[v_os_a1], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A2 [1/4]

#define _UK_GLD_A2
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A2 [2/4]

#define _UK_GLD_A2
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A2 [3/4]

#define _UK_GLD_A2
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A2 [4/4]

#define _UK_GLD_A2
Value:
"buffer_load_dword %[v_os_a2], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A3 [1/4]

#define _UK_GLD_A3
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A3 [2/4]

#define _UK_GLD_A3
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A3 [3/4]

#define _UK_GLD_A3
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A3 [4/4]

#define _UK_GLD_A3
Value:
"buffer_load_dword %[v_os_a3], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A4 [1/4]

#define _UK_GLD_A4
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A4 [2/4]

#define _UK_GLD_A4
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A4 [3/4]

#define _UK_GLD_A4
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A4 [4/4]

#define _UK_GLD_A4
Value:
"buffer_load_dword %[v_os_a4], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A5 [1/4]

#define _UK_GLD_A5
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A5 [2/4]

#define _UK_GLD_A5
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A5 [3/4]

#define _UK_GLD_A5
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A5 [4/4]

#define _UK_GLD_A5
Value:
"buffer_load_dword %[v_os_a5], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A6 [1/4]

#define _UK_GLD_A6
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A6 [2/4]

#define _UK_GLD_A6
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A6 [3/4]

#define _UK_GLD_A6
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A6 [4/4]

#define _UK_GLD_A6
Value:
"buffer_load_dword %[v_os_a6], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[s_size_per_issue], m0\n"

◆ _UK_GLD_A7_AND_L0 [1/4]

#define _UK_GLD_A7_AND_L0
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"

◆ _UK_GLD_A7_AND_L0 [2/4]

#define _UK_GLD_A7_AND_L0
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"

◆ _UK_GLD_A7_AND_L0 [3/4]

#define _UK_GLD_A7_AND_L0
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"

◆ _UK_GLD_A7_AND_L0 [4/4]

#define _UK_GLD_A7_AND_L0
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, 0, %[s_m0_init]\n"

◆ _UK_GLD_A7_AND_L1 [1/4]

#define _UK_GLD_A7_AND_L1
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"

◆ _UK_GLD_A7_AND_L1 [2/4]

#define _UK_GLD_A7_AND_L1
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"

◆ _UK_GLD_A7_AND_L1 [3/4]

#define _UK_GLD_A7_AND_L1
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"

◆ _UK_GLD_A7_AND_L1 [4/4]

#define _UK_GLD_A7_AND_L1
Value:
"buffer_load_dword %[v_os_a7], s[16:19], 0 offen lds\n" \
"s_add_u32 m0, %[smem_sz], %[s_m0_init]\n"

◆ _UK_MFMA_ [1/4]

#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"

◆ _UK_MFMA_ [2/4]

#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"

◆ _UK_MFMA_ [3/4]

#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"

◆ _UK_MFMA_ [4/4]

#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"

◆ _UK_NONE [1/4]

#define _UK_NONE   ""

◆ _UK_NONE [2/4]

#define _UK_NONE   ""

◆ _UK_NONE [3/4]

#define _UK_NONE   ""

◆ _UK_NONE [4/4]

#define _UK_NONE   ""

◆ _UK_PIPELINE_0 [1/4]

#define _UK_PIPELINE_0 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_PIPELINE_0 [2/4]

#define _UK_PIPELINE_0 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_PIPELINE_0 [3/4]

#define _UK_PIPELINE_0 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_PIPELINE_0 [4/4]

#define _UK_PIPELINE_0 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_PIPELINE_1 [1/4]

#define _UK_PIPELINE_1 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_PIPELINE_1 [2/4]

#define _UK_PIPELINE_1 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_PIPELINE_1 [3/4]

#define _UK_PIPELINE_1 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_PIPELINE_1 [4/4]

#define _UK_PIPELINE_1 ( gld_a0_,
gld_a1_,
gld_a2_,
gld_a3_,
gld_a4_,
gld_a5_,
gld_a6_,
gld_a7_,
sld_a0_,
sld_a1_,
sld_a2_,
sld_a3_,
sld_a4_,
sld_a5_,
sld_a6_,
sld_a7_,
src_a0_,
src_a1_,
src_a2_,
src_a3_,
src_a4_,
src_a5_,
src_a6_,
src_a7_,
src_a8_,
src_a9_,
src_a10_,
src_a11_,
src_a12_,
src_a13_,
src_a14_,
src_a15_,
fence_0_,
fence_1_,
fence_2_,
fence_3_,
acc_0_,
acc_1_,
acc_2_,
acc_3_,
acc_4_,
acc_5_,
acc_6_,
acc_7_,
acc_8_,
acc_9_,
acc_10_,
acc_11_,
acc_12_,
acc_13_,
acc_14_,
acc_15_,
s_base_b_ )

◆ _UK_SLD_A0_X [1/4]

#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A0_X [2/4]

#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A0_X [3/4]

#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A0_X [4/4]

#define _UK_SLD_A0_X   "ds_read_b128 v[64:67], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A0_Y [1/4]

#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A0_Y [2/4]

#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A0_Y [3/4]

#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A0_Y [4/4]

#define _UK_SLD_A0_Y   "ds_read_b128 v[96 : 99], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_0]\n"

◆ _UK_SLD_A1_X [1/4]

#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A1_X [2/4]

#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A1_X [3/4]

#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A1_X [4/4]

#define _UK_SLD_A1_X   "ds_read_b128 v[68:71], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A1_Y [1/4]

#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A1_Y [2/4]

#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A1_Y [3/4]

#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A1_Y [4/4]

#define _UK_SLD_A1_Y   "ds_read_b128 v[100:103], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_1]\n"

◆ _UK_SLD_A2_X [1/4]

#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A2_X [2/4]

#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A2_X [3/4]

#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A2_X [4/4]

#define _UK_SLD_A2_X   "ds_read_b128 v[72:75], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A2_Y [1/4]

#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A2_Y [2/4]

#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A2_Y [3/4]

#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A2_Y [4/4]

#define _UK_SLD_A2_Y   "ds_read_b128 v[104:107], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_2]\n"

◆ _UK_SLD_A3_X [1/4]

#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A3_X [2/4]

#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A3_X [3/4]

#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A3_X [4/4]

#define _UK_SLD_A3_X   "ds_read_b128 v[76:79], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A3_Y [1/4]

#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A3_Y [2/4]

#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A3_Y [3/4]

#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A3_Y [4/4]

#define _UK_SLD_A3_Y   "ds_read_b128 v[108:111], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_3]\n"

◆ _UK_SLD_A4_X [1/4]

#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A4_X [2/4]

#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A4_X [3/4]

#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A4_X [4/4]

#define _UK_SLD_A4_X   "ds_read_b128 v[80:83], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A4_Y [1/4]

#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A4_Y [2/4]

#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A4_Y [3/4]

#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A4_Y [4/4]

#define _UK_SLD_A4_Y   "ds_read_b128 v[112:115], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_4]\n"

◆ _UK_SLD_A5_X [1/4]

#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A5_X [2/4]

#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A5_X [3/4]

#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A5_X [4/4]

#define _UK_SLD_A5_X   "ds_read_b128 v[84:87], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A5_Y [1/4]

#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A5_Y [2/4]

#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A5_Y [3/4]

#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A5_Y [4/4]

#define _UK_SLD_A5_Y   "ds_read_b128 v[116:119], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_5]\n"

◆ _UK_SLD_A6_X [1/4]

#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A6_X [2/4]

#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A6_X [3/4]

#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A6_X [4/4]

#define _UK_SLD_A6_X   "ds_read_b128 v[88:91], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A6_Y [1/4]

#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A6_Y [2/4]

#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A6_Y [3/4]

#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A6_Y [4/4]

#define _UK_SLD_A6_Y   "ds_read_b128 v[120:123], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_6]\n"

◆ _UK_SLD_A7_X [1/4]

#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"

◆ _UK_SLD_A7_X [2/4]

#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"

◆ _UK_SLD_A7_X [3/4]

#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"

◆ _UK_SLD_A7_X [4/4]

#define _UK_SLD_A7_X   "ds_read_b128 v[92:95], %[v_os_slda] offset:0*%[smem_sz] + %[sld_os_7]\n"

◆ _UK_SLD_A7_Y [1/4]

#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"

◆ _UK_SLD_A7_Y [2/4]

#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"

◆ _UK_SLD_A7_Y [3/4]

#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"

◆ _UK_SLD_A7_Y [4/4]

#define _UK_SLD_A7_Y   "ds_read_b128 v[124:127], %[v_os_slda], offset:1*%[smem_sz] + %[sld_os_7]\n"

◆ CK_TILE_FLATMM_UK_2B [1/4]

#define CK_TILE_FLATMM_UK_2B   1

◆ CK_TILE_FLATMM_UK_2B [2/4]

#define CK_TILE_FLATMM_UK_2B   1

◆ CK_TILE_FLATMM_UK_2B [3/4]

#define CK_TILE_FLATMM_UK_2B   0

◆ CK_TILE_FLATMM_UK_2B [4/4]

#define CK_TILE_FLATMM_UK_2B   0

◆ CK_TILE_FLATMM_UK_MFMA [1/4]

#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_FP16

◆ CK_TILE_FLATMM_UK_MFMA [2/4]

#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_FP16

◆ CK_TILE_FLATMM_UK_MFMA [3/4]

#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_BF16

◆ CK_TILE_FLATMM_UK_MFMA [4/4]

#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_BF16