DeviceGemmDpp< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerDpp, NPerDpp, MDppPerWave, NDppPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, NumPrefetch, PipelineVer > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
List of all members
ck::tensor_operation::device::DeviceGemmDpp< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerDpp, NPerDpp, MDppPerWave, NDppPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, NumPrefetch, PipelineVer > Struct Template Reference
#include <device_gemm_dpp.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceGemmDpp< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerDpp, NPerDpp, MDppPerWave, NDppPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, NumPrefetch, PipelineVer >:
Classes | |
| struct | Invoker |
Public Types | |
| using | GridwiseGemm |
| using | Argument = typename GridwiseGemm::Argument |
Public Member Functions | |
| bool | IsSupportedArgument (const BaseArgument *p_arg) override |
| std::unique_ptr< BaseArgument > | MakeArgumentPointer (const void *p_a, const void *p_b, void *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation) override |
| std::unique_ptr< BaseInvoker > | MakeInvokerPointer () override |
| std::string | GetTypeString () const override |
| Public Member Functions inherited from ck::tensor_operation::device::BaseOperator | |
| BaseOperator ()=default | |
| BaseOperator (const BaseOperator &)=default | |
| BaseOperator & | operator= (const BaseOperator &)=default |
| virtual std::string | GetInstanceString () const |
| virtual std::string | GetTypeIdName () const |
| virtual std::optional< std::string > | GetObjectName () const |
| virtual std::optional< std::string > | GetTemplateInfo () const |
| virtual std::string | GetTypeIdHashCode () const |
| virtual size_t | GetWorkSpaceSize (const BaseArgument *) const |
| virtual void | SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const |
| virtual | ~BaseOperator () |
Static Public Member Functions | |
| static constexpr bool | IsValidCompilationParameter () |
| static bool | IsSupportedArgument (const Argument &karg) |
| static auto | MakeArgument (const ADataType *p_a, const BDataType *p_b, CDataType *p_c, index_t M, index_t N, index_t K, index_t StrideA, index_t StrideB, index_t StrideC, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation) |
| static auto | MakeInvoker () |
Member Typedef Documentation
◆ Argument
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
| using ck::tensor_operation::device::DeviceGemmDpp< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerDpp, NPerDpp, MDppPerWave, NDppPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, NumPrefetch, PipelineVer >::Argument = typename GridwiseGemm::Argument |
◆ GridwiseGemm
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
| using ck::tensor_operation::device::DeviceGemmDpp< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerDpp, NPerDpp, MDppPerWave, NDppPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BBlockLdsAddExtraN, CThreadTransferSrcDstVectorDim, CThreadTransferDstScalarPerVector, NumPrefetch, PipelineVer >::GridwiseGemm |
Member Function Documentation
◆ GetTypeString()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsSupportedArgument() [1/2]
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ IsSupportedArgument() [2/2]
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
Reimplemented from ck::tensor_operation::device::BaseOperator.
◆ IsValidCompilationParameter()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ MakeArgument()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ MakeArgumentPointer()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
◆ MakeInvoker()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlinestatic |
◆ MakeInvokerPointer()
template<typename ADataType, typename BDataType, typename CDataType, typename AccDataType, typename ALayout, typename BLayout, typename CLayout, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerDpp, ck::index_t NPerDpp, ck::index_t MDppPerWave, ck::index_t NDppPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, ck::index_t ABlockTransferSrcVectorDim, ck::index_t ABlockTransferSrcScalarPerVector, ck::index_t ABlockTransferDstScalarPerVector_K1, bool ABlockLdsAddExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, ck::index_t BBlockTransferSrcVectorDim, ck::index_t BBlockTransferSrcScalarPerVector, ck::index_t BBlockTransferDstScalarPerVector_K1, bool BBlockLdsAddExtraN, ck::index_t CThreadTransferSrcDstVectorDim, ck::index_t CThreadTransferDstScalarPerVector, ck::index_t NumPrefetch = 1, ck::PipelineVersion PipelineVer = ck::PipelineVersion::v1>
|
inlineoverridevirtual |
The documentation for this struct was generated from the following file: