1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
//! # ulib: General library for universal computing.
//!
//! This library basically implements traits and structs for holding vectors on hosts and different kinds of devices.
//! It is intended to be used with ucc builder which generates wrapper bindings using this library.
//!
//! CUDA support must be manually enabled using the
//! feature `cuda`.
#[allow(unused_imports)]
use lazy_static::lazy_static;
// For our derive macros to refer to cust even when cust is
// not listed as a dependency in our dependent crates.
#[cfg(feature = "cuda")]
pub extern crate cust;
pub use ulib_derive::UniversalCopy;
#[cfg(feature = "cuda")]
use cust::memory::{ DeviceCopy, DeviceSlice };
#[cfg(feature = "cuda")]
pub const MAX_NUM_CUDA_DEVICES: usize = 4;
#[cfg(feature = "cuda")]
pub const MAX_DEVICES: usize = MAX_NUM_CUDA_DEVICES + 1;
#[cfg(not(feature = "cuda"))]
pub const MAX_DEVICES: usize = 1;
/// All supported device types.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Device {
CPU,
#[cfg(feature = "cuda")]
CUDA(u8 /* device id */)
}
/// A generic device context
pub struct DeviceContext {
#[cfg(feature = "cuda")]
#[allow(dead_code)]
cuda_context: Option<cust::context::Context>,
}
impl Device {
#[inline]
fn to_id(self) -> usize {
use Device::*;
match self {
CPU => 0,
#[cfg(feature = "cuda")]
CUDA(c) => {
assert!((c as usize) < MAX_NUM_CUDA_DEVICES,
"invalid cuda device id");
c as usize + 1
}
}
}
#[inline]
fn from_id(id: usize) -> Device {
use Device::*;
match id {
0 => CPU,
#[cfg(feature = "cuda")]
c @ 1..=MAX_NUM_CUDA_DEVICES => CUDA(c as u8 - 1),
id @ _ => panic!("device id {} is invalid.", id)
}
}
#[inline]
pub fn get_context(self) -> DeviceContext {
use Device::*;
match self {
CPU => DeviceContext {
#[cfg(feature = "cuda")]
cuda_context: None
},
#[cfg(feature = "cuda")]
CUDA(c) => DeviceContext {
cuda_context: Some(cust::context::Context::new(
CUDA_DEVICES[c as usize].0).unwrap())
}
}
}
#[inline]
pub fn synchronize(self) {
use Device::*;
match self {
CPU => {},
#[cfg(feature = "cuda")]
CUDA(c) => {
let _context = cust::context::Context::new(
CUDA_DEVICES[c as usize].0).unwrap();
cust::context::CurrentContext::synchronize().unwrap();
}
}
}
}
/// The trait for universally bit-copyable element.
///
/// For cuda build, this is equivalent to `Copy + cust::DeviceCopy`.
/// You can use the derive macro like this:
///
/// ```
/// use ulib::UniversalCopy;
///
/// #[derive(UniversalCopy, Clone)]
/// struct Test {
/// a: i32,
/// b: usize
/// }
/// ```
#[cfg(feature = "cuda")]
pub trait UniversalCopy: Copy + DeviceCopy { }
#[cfg(feature = "cuda")]
impl<T: Copy + DeviceCopy> UniversalCopy for T { }
/// The trait for universally bit-copyable element.
///
/// For cpu-only build, this is equivalent to a pure `Copy`.
/// You can use the derive macro like this:
///
/// ```
/// use ulib::UniversalCopy;
///
/// #[derive(UniversalCopy, Clone)]
/// struct Test {
/// a: i32,
/// b: usize
/// }
/// ```
#[cfg(not(feature = "cuda"))]
pub trait UniversalCopy: Copy { }
#[cfg(not(feature = "cuda"))]
impl<T: Copy> UniversalCopy for T { }
#[cfg(feature = "cuda")]
lazy_static! {
/// vector of all devices and their primary contexts.
///
/// the contexts follow the CUDA Driver API, not the runtime API.
/// all contexts are kept here so they are never deallocated.
static ref CUDA_DEVICES: Vec<(cust::device::Device, cust::context::Context)> = {
// initialize the CUDA driver here and only here.
cust::init(cust::CudaFlags::empty()).unwrap();
let mut ret = cust::device::Device::devices().unwrap()
.map(|d| {
let d = d.unwrap();
(d, cust::context::Context::new(d).unwrap())
})
.collect::<Vec<_>>();
if ret.len() > MAX_NUM_CUDA_DEVICES as usize {
clilog::warn!(ULIB_CUDA_TRUNC,
"the number of available cuda gpus {} \
exceed max supported {}, truncated.",
ret.len(), MAX_NUM_CUDA_DEVICES);
ret.truncate(MAX_NUM_CUDA_DEVICES as usize);
}
ret
};
/// the number of CUDA devices.
pub static ref NUM_CUDA_DEVICES: usize = CUDA_DEVICES.len();
}
/// A trait for objects that can be borrowed as an immutable CUDA slice.
#[cfg(feature = "cuda")]
pub trait AsCUDASlice<T: UniversalCopy> {
/// Get an immutable CUDA slice on a specific GPU.
///
/// There is no borrow checker taking place, so nothing
/// prevents one from using it mutably. Just don't do it.
/// It would not only lead to data races, but also safety
/// issues due to the possible Vec-like reallocation.
///
/// If one needs to update the content, use [`AsCUDASliceMut`]
/// instead as it tracks the dirty flags correctly.
fn as_cuda_slice(&self, cuda_device: Device) -> DeviceSlice<T>;
}
/// A trait for objects that can be borrowed as a mutable CUDA slice.
#[cfg(feature = "cuda")]
pub trait AsCUDASliceMut<T: UniversalCopy> {
/// Get an immutable CUDA slice on a specific GPU.
fn as_cuda_slice_mut(&mut self, cuda_device: Device) ->
DeviceSlice<T>;
}
/// A trait to get raw pointer for any device.
pub trait AsUPtr<T: UniversalCopy> {
/// Get an immutable raw pointer.
fn as_uptr(&self, device: Device) -> *const T;
}
/// A trait to get mutable raw pointer for any device.
pub trait AsUPtrMut<T: UniversalCopy> {
/// Get a mutable raw pointer.
fn as_mut_uptr(&mut self, device: Device) -> *mut T;
}
mod uvec;
pub use uvec::UVec;