#ifndef KOKKOS_PARALLEL_HPP
#define KOKKOS_PARALLEL_HPP
#include <cstddef>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_View.hpp>
#include <Kokkos_ExecPolicy.hpp>
#if defined(KOKKOS_ENABLE_PROFILING)
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <typeinfo>
#endif
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_FunctorAnalysis.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#ifdef KOKKOS_DEBUG
#include<iostream>
#endif
namespace Kokkos {
namespace Impl {
template< class Functor
, class Policy
, class EnableFunctor
, class EnablePolicy
>
struct FunctorPolicyExecutionSpace {
typedef Kokkos::DefaultExecutionSpace execution_space ;
};
template< class Functor , class Policy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, typename enable_if_type< typename Policy ::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::execution_space >::type
, typename enable_if_type< typename Policy ::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnableFunctor >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, EnableFunctor
, typename enable_if_type< typename Policy::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnablePolicy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, EnablePolicy
>
{
typedef typename Functor::device_type execution_space ;
};
template< class Functor , class Policy , class EnablePolicy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::execution_space >::type
, EnablePolicy
>
{
typedef typename Functor::execution_space execution_space ;
};
} }
namespace Kokkos {
template< class ExecPolicy , class FunctorType >
inline
void parallel_for( const ExecPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type * = 0
)
{
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Impl::ParallelConstructName<FunctorType, typename ExecPolicy::work_tag> name(str);
Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID);
}
#endif
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelFor< FunctorType , ExecPolicy > closure( functor , policy );
Kokkos::Impl::shared_allocation_tracking_enable();
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
}
#endif
}
template< class FunctorType >
inline
void parallel_for( const size_t work_count
, const FunctorType & functor
, const std::string& str = ""
)
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > policy ;
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID);
}
#endif
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelFor< FunctorType , policy > closure( functor , policy(0,work_count) );
Kokkos::Impl::shared_allocation_tracking_enable();
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
}
#endif
}
template< class ExecPolicy , class FunctorType >
inline
void parallel_for( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor )
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
#endif
::Kokkos::parallel_for(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
#endif
(void) str;
}
}
#include <Kokkos_Parallel_Reduce.hpp>
namespace Kokkos {
template< class ExecutionPolicy , class FunctorType >
inline
void parallel_scan( const ExecutionPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecutionPolicy >::value >::type * = 0
)
{
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Impl::ParallelConstructName<FunctorType, typename ExecutionPolicy::work_tag> name(str);
Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
}
#endif
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelScan< FunctorType , ExecutionPolicy > closure( functor , policy );
Kokkos::Impl::shared_allocation_tracking_enable();
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelScan(kpID);
}
#endif
}
template< class FunctorType >
inline
void parallel_scan( const size_t work_count
, const FunctorType & functor
, const std::string& str = "" )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
}
#endif
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelScan< FunctorType , policy > closure( functor , policy(0,work_count) );
Kokkos::Impl::shared_allocation_tracking_enable();
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelScan(kpID);
}
#endif
}
template< class ExecutionPolicy , class FunctorType >
inline
void parallel_scan( const std::string& str
, const ExecutionPolicy & policy
, const FunctorType & functor)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
#endif
::Kokkos::parallel_scan(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
#endif
(void) str;
}
template< class ExecutionPolicy , class FunctorType, class ReturnType >
inline
void parallel_scan( const ExecutionPolicy & policy
, const FunctorType & functor
, ReturnType & return_value
, const std::string& str = ""
, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecutionPolicy >::value >::type * = 0
)
{
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Impl::ParallelConstructName<FunctorType, typename ExecutionPolicy::work_tag> name(str);
Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
}
#endif
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelScanWithTotal< FunctorType , ExecutionPolicy, ReturnType > closure( functor, policy, return_value );
Kokkos::Impl::shared_allocation_tracking_enable();
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelScan(kpID);
}
#endif
Kokkos::fence();
}
template< class FunctorType, class ReturnType >
inline
void parallel_scan( const size_t work_count
, const FunctorType & functor
, ReturnType & return_value
, const std::string & str = "" )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Impl::ParallelConstructName<FunctorType, void> name(str);
Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID);
}
#endif
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelScanWithTotal< FunctorType, policy, ReturnType > closure( functor, policy(0,work_count), return_value );
Kokkos::Impl::shared_allocation_tracking_enable();
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelScan(kpID);
}
#endif
Kokkos::fence();
}
template< class ExecutionPolicy, class FunctorType, class ReturnType >
inline
void parallel_scan( const std::string& str
, const ExecutionPolicy & policy
, const FunctorType & functor
, ReturnType & return_value)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
#endif
::Kokkos::parallel_scan(policy,functor,return_value,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
#endif
(void) str;
}
}
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Enable = void >
struct FunctorTeamShmemSize
{
KOKKOS_INLINE_FUNCTION static size_t value( const FunctorType & , int ) { return 0 ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.team_shmem_size( team_size ) ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.shmem_size( team_size ) ; }
};
} }
#endif