device_gemm_wmma_cshuffle_v3.hpp Source File#
device_gemm_wmma_cshuffle_v3.hpp
Go to the documentation of this file.
Definition convolution_backward_data_specialization.hpp:8
std::string getGemmSpecializationString(const GemmSpecialization &s)
Definition gemm_specialization.hpp:32
GemmSpecialization
Definition gemm_specialization.hpp:11
Definition convolution_backward_data_specialization.hpp:7
Definition ck.hpp:268
static constexpr index_t KPack
Definition gridwise_gemm_wmma_cshuffle_v3_common.hpp:154
"Universal" GEMM kernel with SplitK support.
Definition gridwise_gemm_wmma_cshuffle_v3.hpp:233
Definition utility/sequence.hpp:43
Definition utility/tuple.hpp:186
Definition utility/tuple.hpp:117
Definition device_base.hpp:197
Definition device_gemm_wmma_cshuffle_v3_common.hpp:43
static bool IsSupportedArgument(const Argument &arg)
Definition device_gemm_wmma_cshuffle_v3_common.hpp:268
"Universal" GEMM operation with SplitK support.
Definition device_gemm_wmma_cshuffle_v3.hpp:179
static auto MakeArgument(const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation cde_element_op)
Definition device_gemm_wmma_cshuffle_v3.hpp:272
std::string GetTypeString() const override
Definition device_gemm_wmma_cshuffle_v3.hpp:344
typename DeviceGemmCommon::Invoker Invoker
Definition device_gemm_wmma_cshuffle_v3.hpp:254
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, index_t KBatch, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CElementwiseOperation c_element_op) override
Definition device_gemm_wmma_cshuffle_v3.hpp:306
GridwiseGemm_wmma_cshuffle_v3< ALayout, BLayout, Tuple<>, CLayout, Tuple< ADataType >, Tuple< BDataType >, AccDataType, CShuffleDataType, Tuple<>, CDataType, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, Sequence< CShuffleBlockTransferScalarPerVector_NPerBlock >, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB > GridwiseGemm
Definition device_gemm_wmma_cshuffle_v3.hpp:180
static auto MakeInvoker()
Definition device_gemm_wmma_cshuffle_v3.hpp:303
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_gemm_wmma_cshuffle_v3.hpp:262
bool GetPermuteA() override
Definition device_gemm_wmma_cshuffle_v3.hpp:269
DeviceGemm_Wmma_CShuffleV3_Common< GridwiseGemm, Tuple< ADataType >, Tuple< BDataType >, Tuple<>, CDataType, MPerBlock, NPerBlock, KPerBlock, BlockSize, AK1, BK1, GemmSpec, Sequence< CShuffleBlockTransferScalarPerVector_NPerBlock >, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB > DeviceGemmCommon
Definition device_gemm_wmma_cshuffle_v3.hpp:234
typename GridwiseGemm::Argument Argument
Definition device_gemm_wmma_cshuffle_v3.hpp:232
bool GetPermuteB() override
Definition device_gemm_wmma_cshuffle_v3.hpp:270
index_t GetKPerBlock() override
Definition device_gemm_wmma_cshuffle_v3.hpp:267
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_gemm_wmma_cshuffle_v3.hpp:338
static bool IsSupportedArgument(const Argument &arg)
Definition device_gemm_wmma_cshuffle_v3.hpp:256
Definition device_gemm_v2.hpp:22