#ifndef KOKKOS_CUDASPACE_HPP
#define KOKKOS_CUDASPACE_HPP
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_ENABLE_CUDA )
#include <Kokkos_Core_fwd.hpp>
#include <iosfwd>
#include <typeinfo>
#include <string>
#include <Kokkos_HostSpace.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
namespace Kokkos {
class CudaSpace {
public:
typedef CudaSpace memory_space ;
typedef Kokkos::Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
CudaSpace();
CudaSpace( CudaSpace && rhs ) = default ;
CudaSpace( const CudaSpace & rhs ) = default ;
CudaSpace & operator = ( CudaSpace && rhs ) = default ;
CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
~CudaSpace() = default ;
void * allocate( const size_t arg_alloc_size ) const ;
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
static constexpr const char* name() { return m_name; }
static void access_error();
static void access_error( const void * const );
private:
int m_device ;
static constexpr const char* m_name = "Cuda";
friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
};
namespace Impl {
void init_lock_arrays_cuda_space();
int* atomic_lock_array_cuda_space_ptr(bool deallocate = false);
int* scratch_lock_array_cuda_space_ptr(bool deallocate = false);
int* threadid_lock_array_cuda_space_ptr(bool deallocate = false);
}
}
namespace Kokkos {
class CudaUVMSpace {
public:
typedef CudaUVMSpace memory_space ;
typedef Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
static bool available();
static int number_of_allocations();
CudaUVMSpace();
CudaUVMSpace( CudaUVMSpace && rhs ) = default ;
CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
CudaUVMSpace & operator = ( CudaUVMSpace && rhs ) = default ;
CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
~CudaUVMSpace() = default ;
void * allocate( const size_t arg_alloc_size ) const ;
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
static constexpr const char* name() { return m_name; }
private:
int m_device ;
static constexpr const char* m_name = "CudaUVM";
};
}
namespace Kokkos {
class CudaHostPinnedSpace {
public:
typedef HostSpace::execution_space execution_space ;
typedef CudaHostPinnedSpace memory_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
CudaHostPinnedSpace();
CudaHostPinnedSpace( CudaHostPinnedSpace && rhs ) = default ;
CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
CudaHostPinnedSpace & operator = ( CudaHostPinnedSpace && rhs ) = default ;
CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
~CudaHostPinnedSpace() = default ;
void * allocate( const size_t arg_alloc_size ) const ;
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
static constexpr const char* name() { return m_name; }
private:
static constexpr const char* m_name = "CudaHostPinned";
};
}
namespace Kokkos {
namespace Impl {
static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaSpace >::assignable , "" );
static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaUVMSpace >::assignable , "" );
static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" );
template<>
struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace > {
enum { assignable = false };
enum { accessible = false };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace > {
enum { assignable = false };
enum { accessible = true };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > {
enum { assignable = true };
enum { accessible = true };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace > {
enum { assignable = false };
enum { accessible = false };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace > {
enum { assignable = true };
enum { accessible = true };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > {
enum { assignable = false };
enum { accessible = true }; enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace > {
enum { assignable = false };
enum { accessible = false }; enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace > {
enum { assignable = false };
enum { accessible = true };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > {
enum { assignable = false };
enum { accessible = true }; enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace > {
enum { assignable = false }; enum { accessible = true }; enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > {
enum { assignable = false }; enum { accessible = false };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > {
enum { assignable = false }; enum { accessible = true }; enum { deepcopy = true };
};
}}
namespace Kokkos {
namespace Impl {
void DeepCopyAsyncCuda( void * dst , const void * src , size_t n);
template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda>
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< CudaSpace , HostSpace , Cuda >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , CudaSpace , Cuda >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<class ExecutionSpace> struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
} }
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
template< class OtherSpace >
struct VerifyExecutionCanAccessMemorySpace<
typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
OtherSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
{
enum { value = false };
inline static void verify( void ) { CudaSpace::access_error(); }
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) {}
KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
};
} }
namespace Kokkos {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::CudaSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static ::cudaTextureObject_t
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, const size_t alloc_size );
#ifdef KOKKOS_DEBUG
static RecordBase s_root_record ;
#endif
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
static
void * allocate_tracked( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
static
void * reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size );
static
void deallocate_tracked( void * const arg_alloc_ptr );
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaUVMSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
static
void * allocate_tracked( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
static
void * reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size );
static
void deallocate_tracked( void * const arg_alloc_ptr );
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
const Kokkos::CudaHostPinnedSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_space() {}
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
static
void * allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
static
void * reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size );
static
void deallocate_tracked( void * const arg_alloc_ptr );
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
};
} }
#endif
#endif