#ifndef KOKKOS_ROCMSPACE_HPP
#define KOKKOS_ROCMSPACE_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_ENABLE_ROCM )
#include <iosfwd>
#include <typeinfo>
#include <string>
#include <Kokkos_HostSpace.hpp>
namespace Kokkos {
namespace Experimental {
class ROCmSpace {
public:
typedef ROCmSpace memory_space ;
typedef Kokkos::Experimental::ROCm execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
ROCmSpace();
ROCmSpace( ROCmSpace && rhs ) = default ;
ROCmSpace( const ROCmSpace & rhs ) = default ;
ROCmSpace & operator = ( ROCmSpace && rhs ) = default ;
ROCmSpace & operator = ( const ROCmSpace & rhs ) = default ;
~ROCmSpace() = default ;
void * allocate( const size_t arg_alloc_size ) const ;
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
static constexpr const char* name() { return m_name; };
static void access_error();
static void access_error( const void * const );
private:
int m_device ;
static constexpr const char* m_name = "ROCm";
friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > ;
};
}
namespace Impl {
void * rocm_device_allocate(int);
void * rocm_hostpinned_allocate(int);
void rocm_device_free(void * );
void init_lock_arrays_rocm_space();
int* atomic_lock_array_rocm_space_ptr(bool deallocate = false);
int* scratch_lock_array_rocm_space_ptr(bool deallocate = false);
int* threadid_lock_array_rocm_space_ptr(bool deallocate = false);
}
}
namespace Kokkos {
namespace Experimental {
class ROCmHostPinnedSpace {
public:
typedef HostSpace::execution_space execution_space ;
typedef ROCmHostPinnedSpace memory_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
ROCmHostPinnedSpace();
ROCmHostPinnedSpace( ROCmHostPinnedSpace && rhs ) = default ;
ROCmHostPinnedSpace( const ROCmHostPinnedSpace & rhs ) = default ;
ROCmHostPinnedSpace & operator = ( ROCmHostPinnedSpace && rhs ) = default ;
ROCmHostPinnedSpace & operator = ( const ROCmHostPinnedSpace & rhs ) = default ;
~ROCmHostPinnedSpace() = default ;
void * allocate( const size_t arg_alloc_size ) const ;
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
static constexpr const char* name() { return m_name; };
private:
static constexpr const char* m_name = "ROCmHostPinned";
};
} }
namespace Kokkos {
namespace Impl {
static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace >::assignable , "" );
template<>
struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::ROCmSpace > {
enum { assignable = false };
enum { accessible = false };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace > {
enum { assignable = true };
enum { accessible = true };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::HostSpace > {
enum { assignable = false };
enum { accessible = false };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace > {
enum { assignable = false };
enum { accessible = true }; enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::HostSpace > {
enum { assignable = false }; enum { accessible = true }; enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmSpace > {
enum { assignable = false }; enum { accessible = false };
enum { deepcopy = true };
};
};
}
namespace Kokkos {
namespace Impl {
hc::completion_future DeepCopyAsyncROCm( void * dst , const void * src , size_t n);
template<> struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm>
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
fut.wait();
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm>( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
template<> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm>
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<class ExecutionSpace>
struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
fut.wait();
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
fut.wait();
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , Kokkos::Experimental::ROCm>( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
} }
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::ROCmSpace , Kokkos::HostSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("ROCm code attempted to access HostSpace memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("ROCm code attempted to access HostSpace memory"); }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
template< class OtherSpace >
struct VerifyExecutionCanAccessMemorySpace<
typename enable_if< ! is_same<Kokkos::Experimental::ROCmSpace,OtherSpace>::value , Kokkos::Experimental::ROCmSpace >::type ,
OtherSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("ROCm code attempted to access unknown Space memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("ROCm code attempted to access unknown Space memory"); }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::ROCmSpace >
{
enum { value = false };
inline static void verify( void ) { Kokkos::Experimental::ROCmSpace::access_error(); }
inline static void verify( const void * p ) { Kokkos::Experimental::ROCmSpace::access_error(p); }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) {}
KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
};
} }
namespace Kokkos {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
#ifdef KOKKOS_DEBUG
static RecordBase s_root_record ;
#endif
const Kokkos::Experimental::ROCmSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord( const Kokkos::Experimental::ROCmSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::Experimental::ROCmSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
static
void * allocate_tracked( const Kokkos::Experimental::ROCmSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
static
void * reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size );
static
void deallocate_tracked( void * const arg_alloc_ptr );
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::Experimental::ROCmSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
#ifdef KOKKOS_DEBUG
static RecordBase s_root_record ;
#endif
const Kokkos::Experimental::ROCmHostPinnedSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_space() {}
SharedAllocationRecord( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
static
void * allocate_tracked( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
static
void * reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size );
static
void deallocate_tracked( void * const arg_alloc_ptr );
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::Experimental::ROCmHostPinnedSpace & , bool detail = false );
};
} }
#endif
#endif