ulib 0.3.3

Universal data storage library for CPU/GPU heterogeneous applications
Documentation
//! # ulib: General library for universal computing.
//!
//! This library basically implements traits and structs for holding vectors on hosts and different kinds of devices.
//! It is intended to be used with ucc builder which generates wrapper bindings using this library.
//!
//! CUDA support must be manually enabled using the
//! feature `cuda`.

#[allow(unused_imports)]
use lazy_static::lazy_static;

// For our derive macros to refer to cust even when cust is
// not listed as a dependency in our dependent crates.
#[cfg(feature = "cuda")]
pub extern crate cust;

pub use ulib_derive::UniversalCopy;

#[cfg(feature = "cuda")]
use cust::memory::{ DeviceCopy, DeviceSlice };

#[cfg(feature = "cuda")]
pub const MAX_NUM_CUDA_DEVICES: usize = 4;
#[cfg(feature = "cuda")]
pub const MAX_DEVICES: usize = MAX_NUM_CUDA_DEVICES + 1;

#[cfg(not(feature = "cuda"))]
pub const MAX_DEVICES: usize = 1;

/// All supported device types.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Device {
    CPU,
    #[cfg(feature = "cuda")]
    CUDA(u8 /* device id */)
}

/// A generic device context
pub struct DeviceContext {
    #[cfg(feature = "cuda")]
    #[allow(dead_code)]
    cuda_context: Option<cust::context::Context>,
}

impl Device {
    #[inline]
    fn to_id(self) -> usize {
        use Device::*;
        match self {
            CPU => 0,
            #[cfg(feature = "cuda")]
            CUDA(c) => {
                assert!((c as usize) < MAX_NUM_CUDA_DEVICES,
                        "invalid cuda device id");
                c as usize + 1
            }
        }
    }
    
    #[inline]
    fn from_id(id: usize) -> Device {
        use Device::*;
        match id {
            0 => CPU,
            #[cfg(feature = "cuda")]
            c @ 1..=MAX_NUM_CUDA_DEVICES => CUDA(c as u8 - 1),
            id @ _ => panic!("device id {} is invalid.", id)
        }
    }

    #[inline]
    pub fn get_context(self) -> DeviceContext {
        use Device::*;
        match self {
            CPU => DeviceContext {
                #[cfg(feature = "cuda")]
                cuda_context: None
            },
            #[cfg(feature = "cuda")]
            CUDA(c) => DeviceContext {
                cuda_context: Some(cust::context::Context::new(
                    CUDA_DEVICES[c as usize].0).unwrap())
            }
        }
    }

    #[inline]
    pub fn synchronize(self) {
        use Device::*;
        match self {
            CPU => {},
            #[cfg(feature = "cuda")]
            CUDA(c) => {
                let _context = cust::context::Context::new(
                    CUDA_DEVICES[c as usize].0).unwrap();
                cust::context::CurrentContext::synchronize().unwrap();
            }
        }
    }
}

/// The trait for universally bit-copyable element.
///
/// For cuda build, this is equivalent to `Copy + cust::DeviceCopy`.
/// You can use the derive macro like this:
///
/// ```
/// use ulib::UniversalCopy;
/// 
/// #[derive(UniversalCopy, Clone)]
/// struct Test {
///     a: i32,
///     b: usize
/// }
/// ```
#[cfg(feature = "cuda")]
pub trait UniversalCopy: Copy + DeviceCopy { }
#[cfg(feature = "cuda")]
impl<T: Copy + DeviceCopy> UniversalCopy for T { }

/// The trait for universally bit-copyable element.
///
/// For cpu-only build, this is equivalent to a pure `Copy`.
/// You can use the derive macro like this:
///
/// ```
/// use ulib::UniversalCopy;
/// 
/// #[derive(UniversalCopy, Clone)]
/// struct Test {
///     a: i32,
///     b: usize
/// }
/// ```
#[cfg(not(feature = "cuda"))]
pub trait UniversalCopy: Copy { }
#[cfg(not(feature = "cuda"))]
impl<T: Copy> UniversalCopy for T { }

#[cfg(feature = "cuda")]
lazy_static! {
    /// vector of all devices and their primary contexts.
    ///
    /// the contexts follow the CUDA Driver API, not the runtime API.
    /// all contexts are kept here so they are never deallocated.
    static ref CUDA_DEVICES: Vec<(cust::device::Device, cust::context::Context)> = {
        // initialize the CUDA driver here and only here.
        cust::init(cust::CudaFlags::empty()).unwrap();
        let mut ret = cust::device::Device::devices().unwrap()
            .map(|d| {
                let d = d.unwrap();
                (d, cust::context::Context::new(d).unwrap())
            })
            .collect::<Vec<_>>();
        if ret.len() > MAX_NUM_CUDA_DEVICES as usize {
            clilog::warn!(ULIB_CUDA_TRUNC,
                          "the number of available cuda gpus {} \
                           exceed max supported {}, truncated.",
                          ret.len(), MAX_NUM_CUDA_DEVICES);
            ret.truncate(MAX_NUM_CUDA_DEVICES as usize);
        }
        ret
    };
    
    /// the number of CUDA devices.
    pub static ref NUM_CUDA_DEVICES: usize = CUDA_DEVICES.len();
}

/// A trait for objects that can be borrowed as an immutable CUDA slice.
#[cfg(feature = "cuda")]
pub trait AsCUDASlice<T: UniversalCopy> {
    /// Get an immutable CUDA slice on a specific GPU.
    /// 
    /// There is no borrow checker taking place, so nothing
    /// prevents one from using it mutably. Just don't do it.
    /// It would not only lead to data races, but also safety
    /// issues due to the possible Vec-like reallocation.
    /// 
    /// If one needs to update the content, use [`AsCUDASliceMut`]
    /// instead as it tracks the dirty flags correctly.
    fn as_cuda_slice(&self, cuda_device: Device) -> DeviceSlice<T>;
}

/// A trait for objects that can be borrowed as a mutable CUDA slice.
#[cfg(feature = "cuda")]
pub trait AsCUDASliceMut<T: UniversalCopy> {
    /// Get an immutable CUDA slice on a specific GPU.
    fn as_cuda_slice_mut(&mut self, cuda_device: Device) ->
        DeviceSlice<T>;
}

/// A trait to get raw pointer for any device.
pub trait AsUPtr<T: UniversalCopy> {
    /// Get an immutable raw pointer.
    fn as_uptr(&self, device: Device) -> *const T;
}

/// A trait to get mutable raw pointer for any device.
pub trait AsUPtrMut<T: UniversalCopy> {
    /// Get a mutable raw pointer.
    fn as_mut_uptr(&mut self, device: Device) -> *mut T;
}

mod uvec;
pub use uvec::UVec;