fil_rustacuda/memory/locked.rs
1use super::DeviceCopy;
2use crate::error::*;
3use crate::memory::malloc::{cuda_free_locked, cuda_malloc_locked};
4use std::mem;
5use std::ops;
6use std::ptr;
7use std::slice;
8
9/// Fixed-size host-side buffer in page-locked memory.
10///
11/// See the [`module-level documentation`](../memory/index.html) for more details on page-locked
12/// memory.
13#[derive(Debug)]
14pub struct LockedBuffer<T: DeviceCopy> {
15 buf: *mut T,
16 capacity: usize,
17}
18impl<T: DeviceCopy + Clone> LockedBuffer<T> {
19 /// Allocate a new page-locked buffer large enough to hold `size` `T`'s and initialized with
20 /// clones of `value`.
21 ///
22 /// # Errors
23 ///
24 /// If the allocation fails, returns the error from CUDA. If `size` is large enough that
25 /// `size * mem::sizeof::<T>()` overflows usize, then returns InvalidMemoryAllocation.
26 ///
27 /// # Examples
28 ///
29 /// ```
30 /// # let _context = rustacuda::quick_init().unwrap();
31 /// use rustacuda::memory::*;
32 /// let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
33 /// buffer[0] = 1;
34 /// ```
35 pub fn new(value: &T, size: usize) -> CudaResult<Self> {
36 unsafe {
37 let mut uninit = LockedBuffer::uninitialized(size)?;
38 for x in 0..size {
39 *uninit.get_unchecked_mut(x) = value.clone();
40 }
41 Ok(uninit)
42 }
43 }
44
45 /// Allocate a new page-locked buffer of the same size as `slice`, initialized with a clone of
46 /// the data in `slice`.
47 ///
48 /// # Errors
49 ///
50 /// If the allocation fails, returns the error from CUDA.
51 ///
52 /// # Examples
53 ///
54 /// ```
55 /// # let _context = rustacuda::quick_init().unwrap();
56 /// use rustacuda::memory::*;
57 /// let values = [0u64; 5];
58 /// let mut buffer = LockedBuffer::from_slice(&values).unwrap();
59 /// buffer[0] = 1;
60 /// ```
61 pub fn from_slice(slice: &[T]) -> CudaResult<Self> {
62 unsafe {
63 let mut uninit = LockedBuffer::uninitialized(slice.len())?;
64 for (i, x) in slice.iter().enumerate() {
65 *uninit.get_unchecked_mut(i) = x.clone();
66 }
67 Ok(uninit)
68 }
69 }
70}
71impl<T: DeviceCopy> LockedBuffer<T> {
72 /// Allocate a new page-locked buffer large enough to hold `size` `T`'s, but without
73 /// initializing the contents.
74 ///
75 /// # Errors
76 ///
77 /// If the allocation fails, returns the error from CUDA. If `size` is large enough that
78 /// `size * mem::sizeof::<T>()` overflows usize, then returns InvalidMemoryAllocation.
79 ///
80 /// # Safety
81 ///
82 /// The caller must ensure that the contents of the buffer are initialized before reading from
83 /// the buffer.
84 ///
85 /// # Examples
86 ///
87 /// ```
88 /// # let _context = rustacuda::quick_init().unwrap();
89 /// use rustacuda::memory::*;
90 /// let mut buffer = unsafe { LockedBuffer::uninitialized(5).unwrap() };
91 /// for i in buffer.iter_mut() {
92 /// *i = 0u64;
93 /// }
94 /// ```
95 pub unsafe fn uninitialized(size: usize) -> CudaResult<Self> {
96 let ptr: *mut T = if size > 0 && mem::size_of::<T>() > 0 {
97 cuda_malloc_locked(size)?
98 } else {
99 ptr::NonNull::dangling().as_ptr()
100 };
101 Ok(LockedBuffer {
102 buf: ptr as *mut T,
103 capacity: size,
104 })
105 }
106
107 /// Extracts a slice containing the entire buffer.
108 ///
109 /// Equivalent to `&s[..]`.
110 ///
111 /// # Examples
112 ///
113 /// ```
114 /// # let _context = rustacuda::quick_init().unwrap();
115 /// use rustacuda::memory::*;
116 /// let buffer = LockedBuffer::new(&0u64, 5).unwrap();
117 /// let sum : u64 = buffer.as_slice().iter().sum();
118 /// ```
119 pub fn as_slice(&self) -> &[T] {
120 self
121 }
122
123 /// Extracts a mutable slice of the entire buffer.
124 ///
125 /// Equivalent to `&mut s[..]`.
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// # let _context = rustacuda::quick_init().unwrap();
131 /// use rustacuda::memory::*;
132 /// let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
133 /// for i in buffer.as_mut_slice() {
134 /// *i = 12u64;
135 /// }
136 /// ```
137 pub fn as_mut_slice(&mut self) -> &mut [T] {
138 self
139 }
140
141 /// Creates a `LockedBuffer<T>` directly from the raw components of another locked buffer.
142 ///
143 /// # Safety
144 ///
145 /// This is highly unsafe, due to the number of invariants that aren't
146 /// checked:
147 ///
148 /// * `ptr` needs to have been previously allocated via `LockedBuffer` or
149 /// [`cuda_malloc_locked`](fn.cuda_malloc_locked.html).
150 /// * `ptr`'s `T` needs to have the same size and alignment as it was allocated with.
151 /// * `capacity` needs to be the capacity that the pointer was allocated with.
152 ///
153 /// Violating these may cause problems like corrupting the CUDA driver's
154 /// internal data structures.
155 ///
156 /// The ownership of `ptr` is effectively transferred to the
157 /// `LockedBuffer<T>` which may then deallocate, reallocate or change the
158 /// contents of memory pointed to by the pointer at will. Ensure
159 /// that nothing else uses the pointer after calling this
160 /// function.
161 ///
162 /// # Examples
163 ///
164 /// ```
165 /// # let _context = rustacuda::quick_init().unwrap();
166 /// use std::mem;
167 /// use rustacuda::memory::*;
168 ///
169 /// let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
170 /// let ptr = buffer.as_mut_ptr();
171 /// let size = buffer.len();
172 ///
173 /// mem::forget(buffer);
174 ///
175 /// let buffer = unsafe { LockedBuffer::from_raw_parts(ptr, size) };
176 /// ```
177 pub unsafe fn from_raw_parts(ptr: *mut T, size: usize) -> LockedBuffer<T> {
178 LockedBuffer {
179 buf: ptr,
180 capacity: size,
181 }
182 }
183
184 /// Destroy a `LockedBuffer`, returning an error.
185 ///
186 /// Deallocating page-locked memory can return errors from previous asynchronous work. This function
187 /// destroys the given buffer and returns the error and the un-destroyed buffer on failure.
188 ///
189 /// # Example
190 ///
191 /// ```
192 /// # let _context = rustacuda::quick_init().unwrap();
193 /// use rustacuda::memory::*;
194 /// let x = LockedBuffer::new(&0u64, 5).unwrap();
195 /// match LockedBuffer::drop(x) {
196 /// Ok(()) => println!("Successfully destroyed"),
197 /// Err((e, buf)) => {
198 /// println!("Failed to destroy buffer: {:?}", e);
199 /// // Do something with buf
200 /// },
201 /// }
202 /// ```
203 pub fn drop(mut buf: LockedBuffer<T>) -> DropResult<LockedBuffer<T>> {
204 if buf.buf.is_null() {
205 return Ok(());
206 }
207
208 if buf.capacity > 0 && mem::size_of::<T>() > 0 {
209 let capacity = buf.capacity;
210 let ptr = mem::replace(&mut buf.buf, ptr::null_mut());
211 unsafe {
212 match cuda_free_locked(ptr) {
213 Ok(()) => {
214 mem::forget(buf);
215 Ok(())
216 }
217 Err(e) => Err((e, LockedBuffer::from_raw_parts(ptr, capacity))),
218 }
219 }
220 } else {
221 Ok(())
222 }
223 }
224}
225
226impl<T: DeviceCopy> AsRef<[T]> for LockedBuffer<T> {
227 fn as_ref(&self) -> &[T] {
228 self
229 }
230}
231impl<T: DeviceCopy> AsMut<[T]> for LockedBuffer<T> {
232 fn as_mut(&mut self) -> &mut [T] {
233 self
234 }
235}
236impl<T: DeviceCopy> ops::Deref for LockedBuffer<T> {
237 type Target = [T];
238
239 fn deref(&self) -> &[T] {
240 unsafe {
241 let p = self.buf;
242 slice::from_raw_parts(p, self.capacity)
243 }
244 }
245}
246impl<T: DeviceCopy> ops::DerefMut for LockedBuffer<T> {
247 fn deref_mut(&mut self) -> &mut [T] {
248 unsafe {
249 let ptr = self.buf;
250 slice::from_raw_parts_mut(ptr, self.capacity)
251 }
252 }
253}
254impl<T: DeviceCopy> Drop for LockedBuffer<T> {
255 fn drop(&mut self) {
256 if self.buf.is_null() {
257 return;
258 }
259
260 if self.capacity > 0 && mem::size_of::<T>() > 0 {
261 // No choice but to panic if this fails.
262 unsafe {
263 cuda_free_locked(self.buf).expect("Failed to deallocate CUDA page-locked memory.");
264 }
265 }
266 self.capacity = 0;
267 }
268}
269
270#[cfg(test)]
271mod test {
272 use super::*;
273 use std::mem;
274
275 #[derive(Clone, Debug)]
276 struct ZeroSizedType;
277 unsafe impl DeviceCopy for ZeroSizedType {}
278
279 #[test]
280 fn test_new() {
281 let _context = crate::quick_init().unwrap();
282 let val = 0u64;
283 let mut buffer = LockedBuffer::new(&val, 5).unwrap();
284 buffer[0] = 1;
285 }
286
287 #[test]
288 fn test_from_slice() {
289 let _context = crate::quick_init().unwrap();
290 let values = [0u64; 10];
291 let mut buffer = LockedBuffer::from_slice(&values).unwrap();
292 for i in buffer[0..3].iter_mut() {
293 *i = 10;
294 }
295 }
296
297 #[test]
298 fn from_raw_parts() {
299 let _context = crate::quick_init().unwrap();
300 let mut buffer = LockedBuffer::new(&0u64, 5).unwrap();
301 buffer[2] = 1;
302 let ptr = buffer.as_mut_ptr();
303 let len = buffer.len();
304 mem::forget(buffer);
305
306 let buffer = unsafe { LockedBuffer::from_raw_parts(ptr, len) };
307 assert_eq!(&[0u64, 0, 1, 0, 0], buffer.as_slice());
308 drop(buffer);
309 }
310
311 #[test]
312 fn zero_length_buffer() {
313 let _context = crate::quick_init().unwrap();
314 let buffer = LockedBuffer::new(&0u64, 0).unwrap();
315 drop(buffer);
316 }
317
318 #[test]
319 fn zero_size_type() {
320 let _context = crate::quick_init().unwrap();
321 let buffer = LockedBuffer::new(&ZeroSizedType, 10).unwrap();
322 drop(buffer);
323 }
324
325 #[test]
326 fn overflows_usize() {
327 let _context = crate::quick_init().unwrap();
328 let err = LockedBuffer::new(&0u64, ::std::usize::MAX - 1).unwrap_err();
329 assert_eq!(CudaError::InvalidMemoryAllocation, err);
330 }
331
332 #[test]
333 fn test_allocate_correct_size() {
334 let _context = crate::quick_init().unwrap();
335
336 // Placeholder - read out available system memory here
337 let allocation_size = 1;
338 unsafe {
339 // Test if allocation fails with an out-of-memory error
340 let _buffer = LockedBuffer::<u64>::uninitialized(allocation_size).unwrap();
341 }
342 }
343}