solana_perf/
cuda_runtime.rs

1// Module for cuda-related helper functions and wrappers.
2//
3// cudaHostRegister/cudaHostUnregister -
4//    apis for page-pinning memory. Cuda driver/hardware cannot overlap
5//    copies from host memory to GPU memory unless the memory is page-pinned and
6//    cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory.
7
8use {
9    crate::{
10        perf_libs,
11        recycler::{RecyclerX, Reset},
12    },
13    rand::{seq::SliceRandom, Rng},
14    rayon::prelude::*,
15    std::{
16        ops::{Index, IndexMut},
17        os::raw::c_int,
18        slice::{Iter, IterMut, SliceIndex},
19        sync::Weak,
20    },
21};
22
23const CUDA_SUCCESS: c_int = 0;
24
25fn pin<T>(_mem: &mut Vec<T>) {
26    if let Some(api) = perf_libs::api() {
27        use std::{ffi::c_void, mem::size_of};
28
29        let ptr = _mem.as_mut_ptr();
30        let size = _mem.capacity().saturating_mul(size_of::<T>());
31        let err = unsafe {
32            (api.cuda_host_register)(ptr as *mut c_void, size, /*flags=*/ 0)
33        };
34        assert!(
35            err == CUDA_SUCCESS,
36            "cudaHostRegister error: {} ptr: {:?} bytes: {}",
37            err,
38            ptr,
39            size
40        );
41    }
42}
43
44fn unpin<T>(_mem: *mut T) {
45    if let Some(api) = perf_libs::api() {
46        use std::ffi::c_void;
47
48        let err = unsafe { (api.cuda_host_unregister)(_mem as *mut c_void) };
49        assert!(
50            err == CUDA_SUCCESS,
51            "cudaHostUnregister returned: {} ptr: {:?}",
52            err,
53            _mem
54        );
55    }
56}
57
58// A vector wrapper where the underlying memory can be
59// page-pinned. Controlled by flags in case user only wants
60// to pin in certain circumstances.
61#[derive(Debug, Default)]
62pub struct PinnedVec<T: Default + Clone + Sized> {
63    x: Vec<T>,
64    pinned: bool,
65    pinnable: bool,
66    recycler: Weak<RecyclerX<PinnedVec<T>>>,
67}
68
69impl<T: Default + Clone + Sized> Reset for PinnedVec<T> {
70    fn reset(&mut self) {
71        self.resize(0, T::default());
72    }
73    fn warm(&mut self, size_hint: usize) {
74        self.set_pinnable();
75        self.resize(size_hint, T::default());
76    }
77    fn set_recycler(&mut self, recycler: Weak<RecyclerX<Self>>) {
78        self.recycler = recycler;
79    }
80}
81
82impl<T: Clone + Default + Sized> From<PinnedVec<T>> for Vec<T> {
83    fn from(mut pinned_vec: PinnedVec<T>) -> Self {
84        if pinned_vec.pinned {
85            // If the vector is pinned and has a recycler, just return a clone
86            // so that the next allocation of a PinnedVec will recycle an
87            // already pinned one.
88            if pinned_vec.recycler.strong_count() != 0 {
89                return pinned_vec.x.clone();
90            }
91            unpin(pinned_vec.x.as_mut_ptr());
92            pinned_vec.pinned = false;
93        }
94        pinned_vec.pinnable = false;
95        pinned_vec.recycler = Weak::default();
96        std::mem::take(&mut pinned_vec.x)
97    }
98}
99
100impl<'a, T: Clone + Default + Sized> IntoIterator for &'a PinnedVec<T> {
101    type Item = &'a T;
102    type IntoIter = Iter<'a, T>;
103
104    fn into_iter(self) -> Self::IntoIter {
105        self.x.iter()
106    }
107}
108
109impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> Index<I> for PinnedVec<T> {
110    type Output = I::Output;
111
112    #[inline]
113    fn index(&self, index: I) -> &Self::Output {
114        &self.x[index]
115    }
116}
117
118impl<T: Clone + Default + Sized, I: SliceIndex<[T]>> IndexMut<I> for PinnedVec<T> {
119    #[inline]
120    fn index_mut(&mut self, index: I) -> &mut Self::Output {
121        &mut self.x[index]
122    }
123}
124
125impl<T: Clone + Default + Sized> PinnedVec<T> {
126    pub fn iter(&self) -> Iter<'_, T> {
127        self.x.iter()
128    }
129
130    pub fn iter_mut(&mut self) -> IterMut<'_, T> {
131        self.x.iter_mut()
132    }
133
134    pub fn capacity(&self) -> usize {
135        self.x.capacity()
136    }
137}
138
139impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a PinnedVec<T> {
140    type Iter = rayon::slice::Iter<'a, T>;
141    type Item = &'a T;
142    fn into_par_iter(self) -> Self::Iter {
143        self.x.par_iter()
144    }
145}
146
147impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a mut PinnedVec<T> {
148    type Iter = rayon::slice::IterMut<'a, T>;
149    type Item = &'a mut T;
150    fn into_par_iter(self) -> Self::Iter {
151        self.x.par_iter_mut()
152    }
153}
154
155impl<T: Clone + Default + Sized> PinnedVec<T> {
156    pub fn reserve(&mut self, size: usize) {
157        self.x.reserve(size);
158    }
159
160    pub fn reserve_and_pin(&mut self, size: usize) {
161        if self.x.capacity() < size {
162            if self.pinned {
163                unpin(self.x.as_mut_ptr());
164                self.pinned = false;
165            }
166            self.x.reserve(size);
167        }
168        self.set_pinnable();
169        if !self.pinned {
170            pin(&mut self.x);
171            self.pinned = true;
172        }
173    }
174
175    pub fn set_pinnable(&mut self) {
176        self.pinnable = true;
177    }
178
179    pub fn copy_from_slice(&mut self, data: &[T])
180    where
181        T: Copy,
182    {
183        self.x.copy_from_slice(data);
184    }
185
186    pub fn from_vec(source: Vec<T>) -> Self {
187        Self {
188            x: source,
189            pinned: false,
190            pinnable: false,
191            recycler: Weak::default(),
192        }
193    }
194
195    pub fn with_capacity(capacity: usize) -> Self {
196        Self::from_vec(Vec::with_capacity(capacity))
197    }
198
199    pub fn is_empty(&self) -> bool {
200        self.x.is_empty()
201    }
202
203    pub fn len(&self) -> usize {
204        self.x.len()
205    }
206
207    pub fn as_ptr(&self) -> *const T {
208        self.x.as_ptr()
209    }
210
211    pub fn as_mut_ptr(&mut self) -> *mut T {
212        self.x.as_mut_ptr()
213    }
214
215    fn prepare_realloc(&mut self, new_size: usize) -> (*mut T, usize) {
216        let old_ptr = self.x.as_mut_ptr();
217        let old_capacity = self.x.capacity();
218        // Predict realloc and unpin.
219        if self.pinned && self.x.capacity() < new_size {
220            unpin(old_ptr);
221            self.pinned = false;
222        }
223        (old_ptr, old_capacity)
224    }
225
226    pub fn push(&mut self, x: T) {
227        let (old_ptr, old_capacity) = self.prepare_realloc(self.x.len().saturating_add(1));
228        self.x.push(x);
229        self.check_ptr(old_ptr, old_capacity, "push");
230    }
231
232    pub fn truncate(&mut self, size: usize) {
233        self.x.truncate(size);
234    }
235
236    pub fn resize(&mut self, size: usize, elem: T) {
237        let (old_ptr, old_capacity) = self.prepare_realloc(size);
238        self.x.resize(size, elem);
239        self.check_ptr(old_ptr, old_capacity, "resize");
240    }
241
242    pub fn append(&mut self, other: &mut Vec<T>) {
243        let (old_ptr, old_capacity) =
244            self.prepare_realloc(self.x.len().saturating_add(other.len()));
245        self.x.append(other);
246        self.check_ptr(old_ptr, old_capacity, "resize");
247    }
248
249    pub fn append_pinned(&mut self, other: &mut Self) {
250        let (old_ptr, old_capacity) =
251            self.prepare_realloc(self.x.len().saturating_add(other.len()));
252        self.x.append(&mut other.x);
253        self.check_ptr(old_ptr, old_capacity, "resize");
254    }
255
256    /// Forces the length of the vector to `new_len`.
257    ///
258    /// This is a low-level operation that maintains none of the normal
259    /// invariants of the type. Normally changing the length of a vector
260    /// is done using one of the safe operations instead, such as
261    /// [`truncate`], [`resize`], [`extend`], or [`clear`].
262    ///
263    /// [`truncate`]: Vec::truncate
264    /// [`resize`]: Vec::resize
265    /// [`extend`]: Extend::extend
266    /// [`clear`]: Vec::clear
267    ///
268    /// # Safety
269    ///
270    /// - `new_len` must be less than or equal to [`capacity()`].
271    /// - The elements at `old_len..new_len` must be initialized.
272    ///
273    /// [`capacity()`]: Vec::capacity
274    ///
275    pub unsafe fn set_len(&mut self, size: usize) {
276        self.x.set_len(size);
277    }
278
279    pub fn shuffle<R: Rng>(&mut self, rng: &mut R) {
280        self.x.shuffle(rng)
281    }
282
283    fn check_ptr(&mut self, _old_ptr: *mut T, _old_capacity: usize, _from: &'static str) {
284        let api = perf_libs::api();
285        if api.is_some()
286            && self.pinnable
287            && (self.x.as_ptr() != _old_ptr || self.x.capacity() != _old_capacity)
288        {
289            if self.pinned {
290                unpin(_old_ptr);
291            }
292
293            trace!(
294                "pinning from check_ptr old: {} size: {} from: {}",
295                _old_capacity,
296                self.x.capacity(),
297                _from
298            );
299            pin(&mut self.x);
300            self.pinned = true;
301        }
302    }
303}
304
305impl<T: Clone + Default + Sized> Clone for PinnedVec<T> {
306    fn clone(&self) -> Self {
307        let mut x = self.x.clone();
308        let pinned = if self.pinned {
309            pin(&mut x);
310            true
311        } else {
312            false
313        };
314        debug!(
315            "clone PinnedVec: size: {} pinned?: {} pinnable?: {}",
316            self.x.capacity(),
317            self.pinned,
318            self.pinnable
319        );
320        Self {
321            x,
322            pinned,
323            pinnable: self.pinnable,
324            recycler: self.recycler.clone(),
325        }
326    }
327}
328
329impl<T: Sized + Default + Clone> Drop for PinnedVec<T> {
330    fn drop(&mut self) {
331        if let Some(recycler) = self.recycler.upgrade() {
332            recycler.recycle(std::mem::take(self));
333        } else if self.pinned {
334            unpin(self.x.as_mut_ptr());
335        }
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342
343    #[test]
344    fn test_pinned_vec() {
345        let mut mem = PinnedVec::with_capacity(10);
346        mem.set_pinnable();
347        mem.push(50);
348        mem.resize(2, 10);
349        assert_eq!(mem[0], 50);
350        assert_eq!(mem[1], 10);
351        assert_eq!(mem.len(), 2);
352        assert!(!mem.is_empty());
353        let mut iter = mem.iter();
354        assert_eq!(*iter.next().unwrap(), 50);
355        assert_eq!(*iter.next().unwrap(), 10);
356        assert_eq!(iter.next(), None);
357    }
358}