cust/memory/pointer.rs
1use crate::memory::DeviceCopy;
2use cust_raw::CUdeviceptr;
3
4use core::{
5 fmt::{self, Debug, Pointer},
6 hash::Hash,
7 ptr,
8};
9use std::ffi::c_void;
10use std::marker::PhantomData;
11use std::mem::size_of;
12
13/// A pointer to device memory.
14///
15/// `DevicePointer` cannot be dereferenced by the CPU, as it is a pointer to a memory allocation in
16/// the device. It can be safely copied to the device (eg. as part of a kernel launch) and either
17/// unwrapped or transmuted to an appropriate pointer.
18///
19/// `DevicePointer` is guaranteed to have an equivalent internal representation to a raw pointer.
20/// Thus, it can be safely reinterpreted or transmuted to `*mut T`. It is safe to pass a
21/// `DevicePointer` through an FFI boundary to C code expecting a `*mut T`, so long as the code on
22/// the other side of that boundary does not attempt to dereference the pointer on the CPU. It is
23/// thus possible to pass a `DevicePointer` to a CUDA kernel written in C.
24#[repr(transparent)]
25#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
26pub struct DevicePointer<T: ?Sized + DeviceCopy> {
27 ptr: CUdeviceptr,
28 marker: PhantomData<*mut T>,
29}
30
31unsafe impl<T: ?Sized + DeviceCopy> DeviceCopy for DevicePointer<T> {}
32
33impl<T: DeviceCopy> Pointer for DevicePointer<T> {
34 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
35 let ptr = self.ptr as *const c_void;
36 fmt::Pointer::fmt(&ptr, f)
37 }
38}
39
40impl<T: ?Sized + DeviceCopy> DevicePointer<T> {
41 /// Returns a rust [`pointer`] created from this pointer, meant for FFI purposes.
42 /// **The pointer is not dereferenceable from the CPU!**
43 pub fn as_ptr(&self) -> *const T {
44 self.ptr as *const T
45 }
46
47 /// Returns a rust [`pointer`] created from this pointer, meant for FFI purposes.
48 /// **The pointer is not dereferenceable from the CPU!**
49 pub fn as_mut_ptr(&self) -> *mut T {
50 self.ptr as *mut T
51 }
52
53 /// Returns the contained CUdeviceptr.
54 pub fn as_raw(&self) -> CUdeviceptr {
55 self.ptr
56 }
57
58 /// Create a DevicePointer from a raw CUDA pointer
59 pub fn from_raw(ptr: CUdeviceptr) -> Self {
60 Self {
61 ptr,
62 marker: PhantomData,
63 }
64 }
65
66 /// Returns true if the pointer is null.
67 /// # Examples
68 ///
69 /// ```
70 /// # let _context = cust::quick_init().unwrap();
71 /// use cust::memory::*;
72 /// use std::ptr;
73 /// unsafe {
74 /// let null : *mut u64 = ptr::null_mut();
75 /// assert!(DevicePointer::wrap(null).is_null());
76 /// }
77 /// ```
78 pub fn is_null(self) -> bool {
79 self.ptr == 0
80 }
81
82 /// Returns a null device pointer.
83 ///
84 // TODO (AL): do we even want this?
85 pub fn null() -> Self
86 where
87 T: Sized,
88 {
89 Self {
90 ptr: 0,
91 marker: PhantomData,
92 }
93 }
94
95 /// Calculates the offset from a device pointer.
96 ///
97 /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
98 /// `3 * size_of::<T>()` bytes.
99 ///
100 /// # Safety
101 ///
102 /// If any of the following conditions are violated, the result is Undefined
103 /// Behavior:
104 ///
105 /// * Both the starting and resulting pointer must be either in bounds or one
106 /// byte past the end of *the same* allocated object.
107 ///
108 /// * The computed offset, **in bytes**, cannot overflow an `isize`.
109 ///
110 /// * The offset being in bounds cannot rely on "wrapping around" the address
111 /// space. That is, the infinite-precision sum, **in bytes** must fit in a usize.
112 ///
113 /// Consider using `wrapping_offset` instead if these constraints are
114 /// difficult to satisfy. The only advantage of this method is that it
115 /// enables more aggressive compiler optimizations.
116 ///
117 /// # Examples
118 ///
119 /// ```
120 /// # let _context = cust::quick_init().unwrap();
121 /// use cust::memory::*;
122 /// unsafe {
123 /// let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
124 /// let offset = dev_ptr.offset(1); // Points to the 2nd u64 in the buffer
125 /// cuda_free(dev_ptr); // Must free the buffer using the original pointer
126 /// }
127 /// ```
128 pub unsafe fn offset(self, count: isize) -> Self
129 where
130 T: Sized,
131 {
132 let ptr = self.ptr + (count as usize * size_of::<T>()) as u64;
133 Self {
134 ptr,
135 marker: PhantomData,
136 }
137 }
138
139 /// Calculates the offset from a device pointer using wrapping arithmetic.
140 ///
141 /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
142 /// `3 * size_of::<T>()` bytes.
143 ///
144 /// # Safety
145 ///
146 /// The resulting pointer does not need to be in bounds, but it is
147 /// potentially hazardous to dereference (which requires `unsafe`).
148 /// In particular, the resulting pointer may *not* be used to access a
149 /// different allocated object than the one `self` points to. In other
150 /// words, `x.wrapping_offset(y.wrapping_offset_from(x))` is
151 /// *not* the same as `y`, and dereferencing it is undefined behavior
152 /// unless `x` and `y` point into the same allocated object.
153 ///
154 /// Always use `.offset(count)` instead when possible, because `offset`
155 /// allows the compiler to optimize better. If you need to cross object
156 /// boundaries, cast the pointer to an integer and do the arithmetic there.
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// # let _context = cust::quick_init().unwrap();
162 /// use cust::memory::*;
163 /// unsafe {
164 /// let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
165 /// let offset = dev_ptr.wrapping_offset(1); // Points to the 2nd u64 in the buffer
166 /// cuda_free(dev_ptr); // Must free the buffer using the original pointer
167 /// }
168 /// ```
169 pub fn wrapping_offset(self, count: isize) -> Self
170 where
171 T: Sized,
172 {
173 let ptr = self
174 .ptr
175 .wrapping_add((count as usize * size_of::<T>()) as u64);
176 Self {
177 ptr,
178 marker: PhantomData,
179 }
180 }
181
182 /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
183 ///
184 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
185 /// offset of `3 * size_of::<T>()` bytes.
186 ///
187 /// # Safety
188 ///
189 /// If any of the following conditions are violated, the result is Undefined
190 /// Behavior:
191 ///
192 /// * Both the starting and resulting pointer must be either in bounds or one
193 /// byte past the end of an allocated object.
194 ///
195 /// * The computed offset, **in bytes**, cannot overflow an `isize`.
196 ///
197 /// * The offset being in bounds cannot rely on "wrapping around" the address
198 /// space. That is, the infinite-precision sum must fit in a `usize`.
199 ///
200 /// Consider using `wrapping_offset` instead if these constraints are
201 /// difficult to satisfy. The only advantage of this method is that it
202 /// enables more aggressive compiler optimizations.
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// # let _context = cust::quick_init().unwrap();
208 /// use cust::memory::*;
209 /// unsafe {
210 /// let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
211 /// let offset = dev_ptr.add(1); // Points to the 2nd u64 in the buffer
212 /// cuda_free(dev_ptr); // Must free the buffer using the original pointer
213 /// }
214 /// ```
215 #[allow(clippy::should_implement_trait)]
216 pub unsafe fn add(self, count: usize) -> Self
217 where
218 T: Sized,
219 {
220 self.offset(count as isize)
221 }
222
223 /// Calculates the offset from a pointer (convenience for
224 /// `.offset((count as isize).wrapping_neg())`).
225 ///
226 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
227 /// offset of `3 * size_of::<T>()` bytes.
228 ///
229 /// # Safety
230 ///
231 /// If any of the following conditions are violated, the result is Undefined
232 /// Behavior:
233 ///
234 /// * Both the starting and resulting pointer must be either in bounds or one
235 /// byte past the end of an allocated object.
236 ///
237 /// * The computed offset, **in bytes**, cannot overflow an `isize`.
238 ///
239 /// * The offset being in bounds cannot rely on "wrapping around" the address
240 /// space. That is, the infinite-precision sum must fit in a `usize`.
241 ///
242 /// Consider using `wrapping_offset` instead if these constraints are
243 /// difficult to satisfy. The only advantage of this method is that it
244 /// enables more aggressive compiler optimizations.
245 ///
246 /// # Examples
247 ///
248 /// ```
249 /// # let _context = cust::quick_init().unwrap();
250 /// use cust::memory::*;
251 /// unsafe {
252 /// let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
253 /// let offset = dev_ptr.add(4).sub(3); // Points to the 2nd u64 in the buffer
254 /// cuda_free(dev_ptr); // Must free the buffer using the original pointer
255 /// }
256 #[allow(clippy::should_implement_trait)]
257 pub unsafe fn sub(self, count: usize) -> Self
258 where
259 T: Sized,
260 {
261 self.offset((count as isize).wrapping_neg())
262 }
263
264 /// Calculates the offset from a pointer using wrapping arithmetic.
265 /// (convenience for `.wrapping_offset(count as isize)`)
266 ///
267 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
268 /// offset of `3 * size_of::<T>()` bytes.
269 ///
270 /// # Safety
271 ///
272 /// The resulting pointer does not need to be in bounds, but it is
273 /// potentially hazardous to dereference.
274 ///
275 /// Always use `.add(count)` instead when possible, because `add`
276 /// allows the compiler to optimize better.
277 ///
278 /// # Examples
279 ///
280 /// ```
281 /// # let _context = cust::quick_init().unwrap();
282 /// use cust::memory::*;
283 /// unsafe {
284 /// let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
285 /// let offset = dev_ptr.wrapping_add(1); // Points to the 2nd u64 in the buffer
286 /// cuda_free(dev_ptr); // Must free the buffer using the original pointer
287 /// }
288 /// ```
289 pub fn wrapping_add(self, count: usize) -> Self
290 where
291 T: Sized,
292 {
293 self.wrapping_offset(count as isize)
294 }
295
296 /// Calculates the offset from a pointer using wrapping arithmetic.
297 /// (convenience for `.wrapping_offset((count as isize).wrapping_sub())`)
298 ///
299 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
300 /// offset of `3 * size_of::<T>()` bytes.
301 ///
302 /// # Safety
303 ///
304 /// The resulting pointer does not need to be in bounds, but it is
305 /// potentially hazardous to dereference (which requires `unsafe`).
306 ///
307 /// Always use `.sub(count)` instead when possible, because `sub`
308 /// allows the compiler to optimize better.
309 ///
310 /// # Examples
311 ///
312 /// ```
313 /// # let _context = cust::quick_init().unwrap();
314 /// use cust::memory::*;
315 /// unsafe {
316 /// let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
317 /// let offset = dev_ptr.wrapping_add(4).wrapping_sub(3); // Points to the 2nd u64 in the buffer
318 /// cuda_free(dev_ptr); // Must free the buffer using the original pointer
319 /// }
320 /// ```
321 pub fn wrapping_sub(self, count: usize) -> Self
322 where
323 T: Sized,
324 {
325 self.wrapping_offset((count as isize).wrapping_neg())
326 }
327
328 /// Casts this device pointer to another type.
329 pub fn cast<U: DeviceCopy>(self) -> DevicePointer<U> {
330 DevicePointer::from_raw(self.ptr)
331 }
332}
333
334/// A pointer to unified memory.
335///
336/// `UnifiedPointer` can be safely dereferenced by the CPU, as the memory allocation it points to is
337/// shared between the CPU and the GPU. It can also be safely copied to the device (eg. as part of
338/// a kernel launch).
339///
340/// `UnifiedPointer` is guaranteed to have an equivalent internal representation to a raw pointer.
341/// Thus, it can be safely reinterpreted or transmuted to `*mut T`. It is also safe to pass a
342/// `UnifiedPointer` through an FFI boundary to C code expecting a `*mut T`. It is
343/// thus possible to pass a `UnifiedPointer` to a CUDA kernel written in C.
344#[repr(transparent)]
345#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
346pub struct UnifiedPointer<T: ?Sized + DeviceCopy>(*mut T);
347
348unsafe impl<T: ?Sized + DeviceCopy> DeviceCopy for UnifiedPointer<T> {}
349
350impl<T: DeviceCopy> Pointer for UnifiedPointer<T> {
351 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
352 fmt::Pointer::fmt(&self.0, f)
353 }
354}
355
356impl<T: ?Sized + DeviceCopy> UnifiedPointer<T> {
357 /// Wrap the given raw pointer in a UnifiedPointer. The given pointer is assumed to be a valid,
358 /// unified-memory pointer or null.
359 ///
360 /// # Safety
361 ///
362 /// The given pointer must have been allocated with
363 /// [`cuda_malloc_unified`](fn.cuda_malloc_unified.html) or be null.
364 ///
365 /// # Examples
366 ///
367 /// ```
368 /// # let _context = cust::quick_init().unwrap();
369 /// use cust::memory::*;
370 /// use std::ptr;
371 /// unsafe {
372 /// let null : *mut u64 = ptr::null_mut();
373 /// assert!(UnifiedPointer::wrap(null).is_null());
374 /// }
375 /// ```
376 pub unsafe fn wrap(ptr: *mut T) -> Self {
377 UnifiedPointer(ptr)
378 }
379
380 /// Returns the contained pointer as a raw pointer.
381 ///
382 /// # Examples
383 ///
384 /// ```
385 /// # let _context = cust::quick_init().unwrap();
386 /// use cust::memory::*;
387 /// unsafe {
388 /// let unified_ptr = cuda_malloc_unified::<u64>(1).unwrap();
389 /// let ptr: *const u64 = unified_ptr.as_raw();
390 /// cuda_free_unified(unified_ptr);
391 /// }
392 /// ```
393 pub fn as_raw(self) -> *const T {
394 self.0
395 }
396
397 /// Returns the contained pointer as a mutable raw pointer.
398 ///
399 /// # Examples
400 ///
401 /// ```
402 /// # let _context = cust::quick_init().unwrap();
403 /// use cust::memory::*;
404 /// unsafe {
405 /// let mut unified_ptr = cuda_malloc_unified::<u64>(1).unwrap();
406 /// let ptr: *mut u64 = unified_ptr.as_raw_mut();
407 /// *ptr = 5u64;
408 /// cuda_free_unified(unified_ptr);
409 /// }
410 /// ```
411 pub fn as_raw_mut(&mut self) -> *mut T {
412 self.0
413 }
414
415 /// Returns true if the pointer is null.
416 ///
417 /// # Examples
418 ///
419 /// ```
420 /// # let _context = cust::quick_init().unwrap();
421 /// use cust::memory::*;
422 /// use std::ptr;
423 /// unsafe {
424 /// let null : *mut u64 = ptr::null_mut();
425 /// assert!(UnifiedPointer::wrap(null).is_null());
426 /// }
427 /// ```
428 pub fn is_null(self) -> bool {
429 self.0.is_null()
430 }
431
432 /// Returns a null unified pointer.
433 ///
434 /// # Examples:
435 ///
436 /// ```
437 /// # let _context = cust::quick_init().unwrap();
438 /// use cust::memory::*;
439 /// let ptr : UnifiedPointer<u64> = UnifiedPointer::null();
440 /// assert!(ptr.is_null());
441 /// ```
442 pub fn null() -> Self
443 where
444 T: Sized,
445 {
446 unsafe { Self::wrap(ptr::null_mut()) }
447 }
448
449 /// Calculates the offset from a unified pointer.
450 ///
451 /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
452 /// `3 * size_of::<T>()` bytes.
453 ///
454 /// # Safety
455 ///
456 /// If any of the following conditions are violated, the result is Undefined
457 /// Behavior:
458 ///
459 /// * Both the starting and resulting pointer must be either in bounds or one
460 /// byte past the end of *the same* allocated object.
461 ///
462 /// * The computed offset, **in bytes**, cannot overflow an `isize`.
463 ///
464 /// * The offset being in bounds cannot rely on "wrapping around" the address
465 /// space. That is, the infinite-precision sum, **in bytes** must fit in a usize.
466 ///
467 /// Consider using `wrapping_offset` instead if these constraints are
468 /// difficult to satisfy. The only advantage of this method is that it
469 /// enables more aggressive compiler optimizations.
470 ///
471 /// # Examples
472 ///
473 /// ```
474 /// # let _context = cust::quick_init().unwrap();
475 /// use cust::memory::*;
476 /// unsafe {
477 /// let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
478 /// let offset = unified_ptr.offset(1); // Points to the 2nd u64 in the buffer
479 /// cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
480 /// }
481 /// ```
482 pub unsafe fn offset(self, count: isize) -> Self
483 where
484 T: Sized,
485 {
486 Self::wrap(self.0.offset(count))
487 }
488
489 /// Calculates the offset from a unified pointer using wrapping arithmetic.
490 ///
491 /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
492 /// `3 * size_of::<T>()` bytes.
493 ///
494 /// # Safety
495 ///
496 /// The resulting pointer does not need to be in bounds, but it is
497 /// potentially hazardous to dereference (which requires `unsafe`).
498 /// In particular, the resulting pointer may *not* be used to access a
499 /// different allocated object than the one `self` points to. In other
500 /// words, `x.wrapping_offset(y.wrapping_offset_from(x))` is
501 /// *not* the same as `y`, and dereferencing it is undefined behavior
502 /// unless `x` and `y` point into the same allocated object.
503 ///
504 /// Always use `.offset(count)` instead when possible, because `offset`
505 /// allows the compiler to optimize better. If you need to cross object
506 /// boundaries, cast the pointer to an integer and do the arithmetic there.
507 ///
508 /// # Examples
509 ///
510 /// ```
511 /// # let _context = cust::quick_init().unwrap();
512 /// use cust::memory::*;
513 /// unsafe {
514 /// let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
515 /// let offset = unified_ptr.wrapping_offset(1); // Points to the 2nd u64 in the buffer
516 /// cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
517 /// }
518 /// ```
519 pub fn wrapping_offset(self, count: isize) -> Self
520 where
521 T: Sized,
522 {
523 unsafe { Self::wrap(self.0.wrapping_offset(count)) }
524 }
525
526 /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
527 ///
528 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
529 /// offset of `3 * size_of::<T>()` bytes.
530 ///
531 /// # Safety
532 ///
533 /// If any of the following conditions are violated, the result is Undefined
534 /// Behavior:
535 ///
536 /// * Both the starting and resulting pointer must be either in bounds or one
537 /// byte past the end of an allocated object.
538 ///
539 /// * The computed offset, **in bytes**, cannot overflow an `isize`.
540 ///
541 /// * The offset being in bounds cannot rely on "wrapping around" the address
542 /// space. That is, the infinite-precision sum must fit in a `usize`.
543 ///
544 /// Consider using `wrapping_offset` instead if these constraints are
545 /// difficult to satisfy. The only advantage of this method is that it
546 /// enables more aggressive compiler optimizations.
547 ///
548 /// # Examples
549 ///
550 /// ```
551 /// # let _context = cust::quick_init().unwrap();
552 /// use cust::memory::*;
553 /// unsafe {
554 /// let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
555 /// let offset = unified_ptr.add(1); // Points to the 2nd u64 in the buffer
556 /// cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
557 /// }
558 /// ```
559 #[allow(clippy::should_implement_trait)]
560 pub unsafe fn add(self, count: usize) -> Self
561 where
562 T: Sized,
563 {
564 self.offset(count as isize)
565 }
566
567 /// Calculates the offset from a pointer (convenience for
568 /// `.offset((count as isize).wrapping_neg())`).
569 ///
570 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
571 /// offset of `3 * size_of::<T>()` bytes.
572 ///
573 /// # Safety
574 ///
575 /// If any of the following conditions are violated, the result is Undefined
576 /// Behavior:
577 ///
578 /// * Both the starting and resulting pointer must be either in bounds or one
579 /// byte past the end of an allocated object.
580 ///
581 /// * The computed offset, **in bytes**, cannot overflow an `isize`.
582 ///
583 /// * The offset being in bounds cannot rely on "wrapping around" the address
584 /// space. That is, the infinite-precision sum must fit in a `usize`.
585 ///
586 /// Consider using `wrapping_offset` instead if these constraints are
587 /// difficult to satisfy. The only advantage of this method is that it
588 /// enables more aggressive compiler optimizations.
589 ///
590 /// # Examples
591 ///
592 /// ```
593 /// # let _context = cust::quick_init().unwrap();
594 /// use cust::memory::*;
595 /// unsafe {
596 /// let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
597 /// let offset = unified_ptr.add(4).sub(3); // Points to the 2nd u64 in the buffer
598 /// cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
599 /// }
600 #[allow(clippy::should_implement_trait)]
601 pub unsafe fn sub(self, count: usize) -> Self
602 where
603 T: Sized,
604 {
605 self.offset((count as isize).wrapping_neg())
606 }
607
608 /// Calculates the offset from a pointer using wrapping arithmetic.
609 /// (convenience for `.wrapping_offset(count as isize)`)
610 ///
611 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
612 /// offset of `3 * size_of::<T>()` bytes.
613 ///
614 /// # Safety
615 ///
616 /// The resulting pointer does not need to be in bounds, but it is
617 /// potentially hazardous to dereference.
618 ///
619 /// Always use `.add(count)` instead when possible, because `add`
620 /// allows the compiler to optimize better.
621 ///
622 /// # Examples
623 ///
624 /// ```
625 /// # let _context = cust::quick_init().unwrap();
626 /// use cust::memory::*;
627 /// unsafe {
628 /// let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
629 /// let offset = unified_ptr.wrapping_add(1); // Points to the 2nd u64 in the buffer
630 /// cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
631 /// }
632 /// ```
633 pub fn wrapping_add(self, count: usize) -> Self
634 where
635 T: Sized,
636 {
637 self.wrapping_offset(count as isize)
638 }
639
640 /// Calculates the offset from a pointer using wrapping arithmetic.
641 /// (convenience for `.wrapping_offset((count as isize).wrapping_sub())`)
642 ///
643 /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
644 /// offset of `3 * size_of::<T>()` bytes.
645 ///
646 /// # Safety
647 ///
648 /// The resulting pointer does not need to be in bounds, but it is
649 /// potentially hazardous to dereference (which requires `unsafe`).
650 ///
651 /// Always use `.sub(count)` instead when possible, because `sub`
652 /// allows the compiler to optimize better.
653 ///
654 /// # Examples
655 ///
656 /// ```
657 /// # let _context = cust::quick_init().unwrap();
658 /// use cust::memory::*;
659 /// unsafe {
660 /// let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
661 /// let offset = unified_ptr.wrapping_add(4).wrapping_sub(3); // Points to the 2nd u64 in the buffer
662 /// cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
663 /// }
664 /// ```
665 pub fn wrapping_sub(self, count: usize) -> Self
666 where
667 T: Sized,
668 {
669 self.wrapping_offset((count as isize).wrapping_neg())
670 }
671}