1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//! # ulib: General library for universal computing.
//!
//! This library basically implements traits and structs for holding vectors on hosts and different kinds of devices.
//! It is intended to be used with ucc builder which generates wrapper bindings using this library.

#[allow(unused_imports)]
use lazy_static::lazy_static;

#[cfg(feature = "cuda")]
use cust::memory::{ DeviceCopy, DeviceSlice };

#[cfg(feature = "cuda")]
pub const MAX_NUM_CUDA_DEVICES: usize = 4;
#[cfg(feature = "cuda")]
pub const MAX_DEVICES: usize = MAX_NUM_CUDA_DEVICES + 1;

#[cfg(not(feature = "cuda"))]
pub const MAX_DEVICES: usize = 1;

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Device {
    CPU,
    #[cfg(feature = "cuda")]
    CUDA(u8 /* device id */)
}

impl Device {
    fn to_id(self) -> usize {
        use Device::*;
        match self {
            CPU => 0,
            #[cfg(feature = "cuda")]
            CUDA(c) => {
                assert!((c as usize) < MAX_NUM_CUDA_DEVICES,
                        "invalid cuda device id");
                c as usize + 1
            }
        }
    }
    
    fn from_id(id: usize) -> Device {
        use Device::*;
        match id {
            0 => CPU,
            #[cfg(feature = "cuda")]
            c @ 1..=MAX_NUM_CUDA_DEVICES => CUDA(c as u8 - 1),
            id @ _ => panic!("device id {} is invalid.", id)
        }
    }
}

/// The trait for universally bit-copyable element
#[cfg(feature = "cuda")]
pub trait UniversalCopy: Copy + DeviceCopy { }
#[cfg(feature = "cuda")]
impl<T: Copy + DeviceCopy> UniversalCopy for T { }

/// The trait for universally bit-copyable element
#[cfg(not(feature = "cuda"))]
pub trait UniversalCopy: Copy { }
#[cfg(not(feature = "cuda"))]
impl<T: Copy> UniversalCopy for T { }

#[cfg(feature = "cuda")]
lazy_static! {
    /// vector of all devices and their primary contexts.
    ///
    /// the contexts follow the CUDA Driver API, not the runtime API.
    /// all contexts are kept here so they are never deallocated.
    static ref CUDA_DEVICES: Vec<(cust::device::Device, cust::context::Context)> = {
        // initialize the CUDA driver here and only here.
        cust::init(cust::CudaFlags::empty()).unwrap();
        let mut ret = cust::device::Device::devices().unwrap()
            .map(|d| {
                let d = d.unwrap();
                (d, cust::context::Context::new(d).unwrap())
            })
            .collect::<Vec<_>>();
        if ret.len() > MAX_NUM_CUDA_DEVICES as usize {
            clilog::warn!(ULIB_CUDA_TRUNC,
                          "the number of available cuda gpus {} \
                           exceed max supported {}, truncated.",
                          ret.len(), MAX_NUM_CUDA_DEVICES);
            ret.truncate(MAX_NUM_CUDA_DEVICES as usize);
        }
        ret
    };
    
    /// the number of CUDA devices.
    pub static ref NUM_CUDA_DEVICES: usize = CUDA_DEVICES.len();
}

/// A trait for objects that can be borrowed as an immutable CUDA slice.
#[cfg(feature = "cuda")]
pub trait AsCUDASlice<T: UniversalCopy> {
    /// Get an immutable CUDA slice on a specific GPU.
    /// 
    /// There is no borrow checker taking place, so nothing
    /// prevents one from using it mutably. Just don't do it.
    /// It would not only lead to data races, but also safety
    /// issues due to the possible Vec-like reallocation.
    /// 
    /// If one needs to update the content, use [`AsCUDASliceMut`]
    /// instead as it tracks the dirty flags correctly.
    fn as_cuda_slice(&self, cuda_device: Device) -> DeviceSlice<T>;
}

/// A trait for objects that can be borrowed as a mutable CUDA slice.
#[cfg(feature = "cuda")]
pub trait AsCUDASliceMut<T: UniversalCopy> {
    /// Get an immutable CUDA slice on a specific GPU.
    fn as_cuda_slice_mut(&mut self, cuda_device: Device) ->
        DeviceSlice<T>;
}

/// A trait to get raw pointer for any device.
pub trait AsUPtr<T: UniversalCopy> {
    /// Get an immutable raw pointer.
    fn as_uptr(&self, device: Device) -> *const T;
}

/// A trait to get mutable raw pointer for any device.
pub trait AsUPtrMut<T: UniversalCopy> {
    /// Get a mutable raw pointer.
    fn as_mut_uptr(&mut self, device: Device) -> *mut T;
}

mod uvec;
pub use uvec::UVec;