fil_rustacuda/memory/
locked.rs

1use super::DeviceCopy;
2use crate::error::*;
3use crate::memory::malloc::{cuda_free_locked, cuda_malloc_locked};
4use std::mem;
5use std::ops;
6use std::ptr;
7use std::slice;
8
9/// Fixed-size host-side buffer in page-locked memory.
10///
11/// See the [`module-level documentation`](../memory/index.html) for more details on page-locked
12/// memory.
13#[derive(Debug)]
14pub struct LockedBuffer<T: DeviceCopy> {
15    buf: *mut T,
16    capacity: usize,
17}
18impl<T: DeviceCopy + Clone> LockedBuffer<T> {
19    /// Allocate a new page-locked buffer large enough to hold `size` `T`'s and initialized with
20    /// clones of `value`.
21    ///
22    /// # Errors
23    ///
24    /// If the allocation fails, returns the error from CUDA. If `size` is large enough that
25    /// `size * mem::sizeof::<T>()` overflows usize, then returns InvalidMemoryAllocation.
26    ///
27    /// # Examples
28    ///
29    /// ```
30    /// # let _context = rustacuda::quick_init().unwrap();
31    /// use rustacuda::memory::*;
32    /// let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
33    /// buffer[0] = 1;
34    /// ```
35    pub fn new(value: &T, size: usize) -> CudaResult<Self> {
36        unsafe {
37            let mut uninit = LockedBuffer::uninitialized(size)?;
38            for x in 0..size {
39                *uninit.get_unchecked_mut(x) = value.clone();
40            }
41            Ok(uninit)
42        }
43    }
44
45    /// Allocate a new page-locked buffer of the same size as `slice`, initialized with a clone of
46    /// the data in `slice`.
47    ///
48    /// # Errors
49    ///
50    /// If the allocation fails, returns the error from CUDA.
51    ///
52    /// # Examples
53    ///
54    /// ```
55    /// # let _context = rustacuda::quick_init().unwrap();
56    /// use rustacuda::memory::*;
57    /// let values = [0u64; 5];
58    /// let mut buffer = LockedBuffer::from_slice(&values).unwrap();
59    /// buffer[0] = 1;
60    /// ```
61    pub fn from_slice(slice: &[T]) -> CudaResult<Self> {
62        unsafe {
63            let mut uninit = LockedBuffer::uninitialized(slice.len())?;
64            for (i, x) in slice.iter().enumerate() {
65                *uninit.get_unchecked_mut(i) = x.clone();
66            }
67            Ok(uninit)
68        }
69    }
70}
71impl<T: DeviceCopy> LockedBuffer<T> {
72    /// Allocate a new page-locked buffer large enough to hold `size` `T`'s, but without
73    /// initializing the contents.
74    ///
75    /// # Errors
76    ///
77    /// If the allocation fails, returns the error from CUDA. If `size` is large enough that
78    /// `size * mem::sizeof::<T>()` overflows usize, then returns InvalidMemoryAllocation.
79    ///
80    /// # Safety
81    ///
82    /// The caller must ensure that the contents of the buffer are initialized before reading from
83    /// the buffer.
84    ///
85    /// # Examples
86    ///
87    /// ```
88    /// # let _context = rustacuda::quick_init().unwrap();
89    /// use rustacuda::memory::*;
90    /// let mut buffer = unsafe { LockedBuffer::uninitialized(5).unwrap() };
91    /// for i in buffer.iter_mut() {
92    ///     *i = 0u64;
93    /// }
94    /// ```
95    pub unsafe fn uninitialized(size: usize) -> CudaResult<Self> {
96        let ptr: *mut T = if size > 0 && mem::size_of::<T>() > 0 {
97            cuda_malloc_locked(size)?
98        } else {
99            ptr::NonNull::dangling().as_ptr()
100        };
101        Ok(LockedBuffer {
102            buf: ptr as *mut T,
103            capacity: size,
104        })
105    }
106
107    /// Extracts a slice containing the entire buffer.
108    ///
109    /// Equivalent to `&s[..]`.
110    ///
111    /// # Examples
112    ///
113    /// ```
114    /// # let _context = rustacuda::quick_init().unwrap();
115    /// use rustacuda::memory::*;
116    /// let buffer = LockedBuffer::new(&0u64, 5).unwrap();
117    /// let sum : u64 = buffer.as_slice().iter().sum();
118    /// ```
119    pub fn as_slice(&self) -> &[T] {
120        self
121    }
122
123    /// Extracts a mutable slice of the entire buffer.
124    ///
125    /// Equivalent to `&mut s[..]`.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// # let _context = rustacuda::quick_init().unwrap();
131    /// use rustacuda::memory::*;
132    /// let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
133    /// for i in buffer.as_mut_slice() {
134    ///     *i = 12u64;
135    /// }
136    /// ```
137    pub fn as_mut_slice(&mut self) -> &mut [T] {
138        self
139    }
140
141    /// Creates a `LockedBuffer<T>` directly from the raw components of another locked buffer.
142    ///
143    /// # Safety
144    ///
145    /// This is highly unsafe, due to the number of invariants that aren't
146    /// checked:
147    ///
148    /// * `ptr` needs to have been previously allocated via `LockedBuffer` or
149    /// [`cuda_malloc_locked`](fn.cuda_malloc_locked.html).
150    /// * `ptr`'s `T` needs to have the same size and alignment as it was allocated with.
151    /// * `capacity` needs to be the capacity that the pointer was allocated with.
152    ///
153    /// Violating these may cause problems like corrupting the CUDA driver's
154    /// internal data structures.
155    ///
156    /// The ownership of `ptr` is effectively transferred to the
157    /// `LockedBuffer<T>` which may then deallocate, reallocate or change the
158    /// contents of memory pointed to by the pointer at will. Ensure
159    /// that nothing else uses the pointer after calling this
160    /// function.
161    ///
162    /// # Examples
163    ///
164    /// ```
165    /// # let _context = rustacuda::quick_init().unwrap();
166    /// use std::mem;
167    /// use rustacuda::memory::*;
168    ///
169    /// let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
170    /// let ptr = buffer.as_mut_ptr();
171    /// let size = buffer.len();
172    ///
173    /// mem::forget(buffer);
174    ///
175    /// let buffer = unsafe { LockedBuffer::from_raw_parts(ptr, size) };
176    /// ```
177    pub unsafe fn from_raw_parts(ptr: *mut T, size: usize) -> LockedBuffer<T> {
178        LockedBuffer {
179            buf: ptr,
180            capacity: size,
181        }
182    }
183
184    /// Destroy a `LockedBuffer`, returning an error.
185    ///
186    /// Deallocating page-locked memory can return errors from previous asynchronous work. This function
187    /// destroys the given buffer and returns the error and the un-destroyed buffer on failure.
188    ///
189    /// # Example
190    ///
191    /// ```
192    /// # let _context = rustacuda::quick_init().unwrap();
193    /// use rustacuda::memory::*;
194    /// let x = LockedBuffer::new(&0u64, 5).unwrap();
195    /// match LockedBuffer::drop(x) {
196    ///     Ok(()) => println!("Successfully destroyed"),
197    ///     Err((e, buf)) => {
198    ///         println!("Failed to destroy buffer: {:?}", e);
199    ///         // Do something with buf
200    ///     },
201    /// }
202    /// ```
203    pub fn drop(mut buf: LockedBuffer<T>) -> DropResult<LockedBuffer<T>> {
204        if buf.buf.is_null() {
205            return Ok(());
206        }
207
208        if buf.capacity > 0 && mem::size_of::<T>() > 0 {
209            let capacity = buf.capacity;
210            let ptr = mem::replace(&mut buf.buf, ptr::null_mut());
211            unsafe {
212                match cuda_free_locked(ptr) {
213                    Ok(()) => {
214                        mem::forget(buf);
215                        Ok(())
216                    }
217                    Err(e) => Err((e, LockedBuffer::from_raw_parts(ptr, capacity))),
218                }
219            }
220        } else {
221            Ok(())
222        }
223    }
224}
225
226impl<T: DeviceCopy> AsRef<[T]> for LockedBuffer<T> {
227    fn as_ref(&self) -> &[T] {
228        self
229    }
230}
231impl<T: DeviceCopy> AsMut<[T]> for LockedBuffer<T> {
232    fn as_mut(&mut self) -> &mut [T] {
233        self
234    }
235}
236impl<T: DeviceCopy> ops::Deref for LockedBuffer<T> {
237    type Target = [T];
238
239    fn deref(&self) -> &[T] {
240        unsafe {
241            let p = self.buf;
242            slice::from_raw_parts(p, self.capacity)
243        }
244    }
245}
246impl<T: DeviceCopy> ops::DerefMut for LockedBuffer<T> {
247    fn deref_mut(&mut self) -> &mut [T] {
248        unsafe {
249            let ptr = self.buf;
250            slice::from_raw_parts_mut(ptr, self.capacity)
251        }
252    }
253}
254impl<T: DeviceCopy> Drop for LockedBuffer<T> {
255    fn drop(&mut self) {
256        if self.buf.is_null() {
257            return;
258        }
259
260        if self.capacity > 0 && mem::size_of::<T>() > 0 {
261            // No choice but to panic if this fails.
262            unsafe {
263                cuda_free_locked(self.buf).expect("Failed to deallocate CUDA page-locked memory.");
264            }
265        }
266        self.capacity = 0;
267    }
268}
269
270#[cfg(test)]
271mod test {
272    use super::*;
273    use std::mem;
274
275    #[derive(Clone, Debug)]
276    struct ZeroSizedType;
277    unsafe impl DeviceCopy for ZeroSizedType {}
278
279    #[test]
280    fn test_new() {
281        let _context = crate::quick_init().unwrap();
282        let val = 0u64;
283        let mut buffer = LockedBuffer::new(&val, 5).unwrap();
284        buffer[0] = 1;
285    }
286
287    #[test]
288    fn test_from_slice() {
289        let _context = crate::quick_init().unwrap();
290        let values = [0u64; 10];
291        let mut buffer = LockedBuffer::from_slice(&values).unwrap();
292        for i in buffer[0..3].iter_mut() {
293            *i = 10;
294        }
295    }
296
297    #[test]
298    fn from_raw_parts() {
299        let _context = crate::quick_init().unwrap();
300        let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
301        buffer[2] = 1;
302        let ptr = buffer.as_mut_ptr();
303        let len = buffer.len();
304        mem::forget(buffer);
305
306        let buffer = unsafe { LockedBuffer::from_raw_parts(ptr, len) };
307        assert_eq!(&[0u64, 0, 1, 0, 0], buffer.as_slice());
308        drop(buffer);
309    }
310
311    #[test]
312    fn zero_length_buffer() {
313        let _context = crate::quick_init().unwrap();
314        let buffer = LockedBuffer::new(&0u64, 0).unwrap();
315        drop(buffer);
316    }
317
318    #[test]
319    fn zero_size_type() {
320        let _context = crate::quick_init().unwrap();
321        let buffer = LockedBuffer::new(&ZeroSizedType, 10).unwrap();
322        drop(buffer);
323    }
324
325    #[test]
326    fn overflows_usize() {
327        let _context = crate::quick_init().unwrap();
328        let err = LockedBuffer::new(&0u64, ::std::usize::MAX - 1).unwrap_err();
329        assert_eq!(CudaError::InvalidMemoryAllocation, err);
330    }
331
332    #[test]
333    fn test_allocate_correct_size() {
334        let _context = crate::quick_init().unwrap();
335
336        // Placeholder - read out available system memory here
337        let allocation_size = 1;
338        unsafe {
339            // Test if allocation fails with an out-of-memory error
340            let _buffer = LockedBuffer::<u64>::uninitialized(allocation_size).unwrap();
341        }
342    }
343}