rustacuda_core/memory/
pointer.rs

1use crate::memory::DeviceCopy;
2
3use core::{
4    cmp::Ordering,
5    fmt::{self, Debug, Pointer},
6    hash::{Hash, Hasher},
7    ptr,
8};
9
10macro_rules! derive_traits {
11    ( $( $Ptr:ty )* ) => ($(
12        impl<T: ?Sized> Debug for $Ptr {
13            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
14                Debug::fmt(&self.0, f)
15            }
16        }
17        impl<T: ?Sized> Pointer for $Ptr {
18            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
19                Pointer::fmt(&self.0, f)
20            }
21        }
22
23        impl<T: ?Sized> Hash for $Ptr {
24            fn hash<H: Hasher>(&self, h: &mut H) {
25                Hash::hash(&self.0, h);
26            }
27        }
28
29        impl<T: ?Sized> PartialEq for $Ptr {
30            fn eq(&self, other: &$Ptr) -> bool {
31                PartialEq::eq(&self.0, &other.0)
32            }
33        }
34
35        impl<T: ?Sized> Eq for $Ptr {}
36
37        impl<T: ?Sized> PartialOrd for $Ptr {
38            fn partial_cmp(&self, other: &$Ptr) -> Option<Ordering> {
39                PartialOrd::partial_cmp(&self.0, &other.0)
40            }
41        }
42
43        impl<T: ?Sized> Ord for $Ptr {
44            fn cmp(&self, other: &$Ptr) -> Ordering {
45                Ord::cmp(&self.0, &other.0)
46            }
47        }
48
49        impl<T: ?Sized> Clone for $Ptr {
50            fn clone(&self) -> Self {
51                Self(self.0)
52            }
53        }
54        impl<T: ?Sized> Copy for $Ptr {}
55    )*)
56}
57derive_traits!(DevicePointer<T> UnifiedPointer<T>);
58
59/// A pointer to device memory.
60///
61/// `DevicePointer` cannot be dereferenced by the CPU, as it is a pointer to a memory allocation in
62/// the device. It can be safely copied to the device (eg. as part of a kernel launch) and either
63/// unwrapped or transmuted to an appropriate pointer.
64///
65/// `DevicePointer` is guaranteed to have an equivalent internal representation to a raw pointer.
66/// Thus, it can be safely reinterpreted or transmuted to `*mut T`. It is safe to pass a
67/// `DevicePointer` through an FFI boundary to C code expecting a `*mut T`, so long as the code on
68/// the other side of that boundary does not attempt to dereference the pointer on the CPU. It is
69/// thus possible to pass a `DevicePointer` to a CUDA kernel written in C.
70#[repr(transparent)]
71pub struct DevicePointer<T: ?Sized>(*mut T);
72
73unsafe impl<T: ?Sized> DeviceCopy for DevicePointer<T> {}
74
75impl<T: ?Sized> DevicePointer<T> {
76    /// Wrap the given raw pointer in a DevicePointer. The given pointer is assumed to be a valid,
77    /// device pointer or null.
78    ///
79    /// # Safety
80    ///
81    /// The given pointer must have been allocated with [`cuda_malloc`](fn.cuda_malloc.html) or
82    /// be null.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// # let _context = rustacuda::quick_init().unwrap();
88    /// use rustacuda::memory::*;
89    /// use std::ptr;
90    /// unsafe {
91    ///     let null : *mut u64 = ptr::null_mut();
92    ///     assert!(DevicePointer::wrap(null).is_null());
93    /// }
94    /// ```
95    pub unsafe fn wrap(ptr: *mut T) -> Self {
96        DevicePointer(ptr)
97    }
98
99    /// Returns the contained pointer as a raw pointer. The returned pointer is not valid on the CPU
100    /// and must not be dereferenced.
101    ///
102    /// # Examples
103    ///
104    /// ```
105    /// # let _context = rustacuda::quick_init().unwrap();
106    /// use rustacuda::memory::*;
107    /// unsafe {
108    ///     let dev_ptr = cuda_malloc::<u64>(1).unwrap();
109    ///     let ptr: *const u64 = dev_ptr.as_raw();
110    ///     cuda_free(dev_ptr);
111    /// }
112    /// ```
113    pub fn as_raw(self) -> *const T {
114        self.0
115    }
116
117    /// Returns the contained pointer as a mutable raw pointer. The returned pointer is not valid on the CPU
118    /// and must not be dereferenced.
119    ///
120    /// # Examples
121    ///
122    /// ```
123    /// # let _context = rustacuda::quick_init().unwrap();
124    /// use rustacuda::memory::*;
125    /// unsafe {
126    ///     let mut dev_ptr = cuda_malloc::<u64>(1).unwrap();
127    ///     let ptr: *mut u64 = dev_ptr.as_raw_mut();
128    ///     cuda_free(dev_ptr);
129    /// }
130    /// ```
131    pub fn as_raw_mut(&mut self) -> *mut T {
132        self.0
133    }
134
135    /// Returns true if the pointer is null.
136    /// # Examples
137    ///
138    /// ```
139    /// # let _context = rustacuda::quick_init().unwrap();
140    /// use rustacuda::memory::*;
141    /// use std::ptr;
142    /// unsafe {
143    ///     let null : *mut u64 = ptr::null_mut();
144    ///     assert!(DevicePointer::wrap(null).is_null());
145    /// }
146    /// ```
147    pub fn is_null(self) -> bool {
148        self.0.is_null()
149    }
150
151    /// Returns a null device pointer.
152    ///
153    /// # Examples:
154    ///
155    /// ```
156    /// # let _context = rustacuda::quick_init().unwrap();
157    /// use rustacuda::memory::*;
158    /// let ptr : DevicePointer<u64> = DevicePointer::null();
159    /// assert!(ptr.is_null());
160    /// ```
161    pub fn null() -> Self
162    where
163        T: Sized,
164    {
165        unsafe { Self::wrap(ptr::null_mut()) }
166    }
167
168    /// Calculates the offset from a device pointer.
169    ///
170    /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
171    /// `3 * size_of::<T>()` bytes.
172    ///
173    /// # Safety
174    ///
175    /// If any of the following conditions are violated, the result is Undefined
176    /// Behavior:
177    ///
178    /// * Both the starting and resulting pointer must be either in bounds or one
179    ///   byte past the end of *the same* allocated object.
180    ///
181    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
182    ///
183    /// * The offset being in bounds cannot rely on "wrapping around" the address
184    ///   space. That is, the infinite-precision sum, **in bytes** must fit in a usize.
185    ///
186    /// Consider using `wrapping_offset` instead if these constraints are
187    /// difficult to satisfy. The only advantage of this method is that it
188    /// enables more aggressive compiler optimizations.
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// # let _context = rustacuda::quick_init().unwrap();
194    /// use rustacuda::memory::*;
195    /// unsafe {
196    ///     let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
197    ///     let offset = dev_ptr.offset(1); // Points to the 2nd u64 in the buffer
198    ///     cuda_free(dev_ptr); // Must free the buffer using the original pointer
199    /// }
200    /// ```
201    pub unsafe fn offset(self, count: isize) -> Self
202    where
203        T: Sized,
204    {
205        Self::wrap(self.0.offset(count))
206    }
207
208    /// Calculates the offset from a device pointer using wrapping arithmetic.
209    ///
210    /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
211    /// `3 * size_of::<T>()` bytes.
212    ///
213    /// # Safety
214    ///
215    /// The resulting pointer does not need to be in bounds, but it is
216    /// potentially hazardous to dereference (which requires `unsafe`).
217    /// In particular, the resulting pointer may *not* be used to access a
218    /// different allocated object than the one `self` points to. In other
219    /// words, `x.wrapping_offset(y.wrapping_offset_from(x))` is
220    /// *not* the same as `y`, and dereferencing it is undefined behavior
221    /// unless `x` and `y` point into the same allocated object.
222    ///
223    /// Always use `.offset(count)` instead when possible, because `offset`
224    /// allows the compiler to optimize better.  If you need to cross object
225    /// boundaries, cast the pointer to an integer and do the arithmetic there.
226    ///
227    /// # Examples
228    ///
229    /// ```
230    /// # let _context = rustacuda::quick_init().unwrap();
231    /// use rustacuda::memory::*;
232    /// unsafe {
233    ///     let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
234    ///     let offset = dev_ptr.wrapping_offset(1); // Points to the 2nd u64 in the buffer
235    ///     cuda_free(dev_ptr); // Must free the buffer using the original pointer
236    /// }
237    /// ```
238    pub fn wrapping_offset(self, count: isize) -> Self
239    where
240        T: Sized,
241    {
242        unsafe { Self::wrap(self.0.wrapping_offset(count)) }
243    }
244
245    /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
246    ///
247    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
248    /// offset of `3 * size_of::<T>()` bytes.
249    ///
250    /// # Safety
251    ///
252    /// If any of the following conditions are violated, the result is Undefined
253    /// Behavior:
254    ///
255    /// * Both the starting and resulting pointer must be either in bounds or one
256    ///   byte past the end of an allocated object.
257    ///
258    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
259    ///
260    /// * The offset being in bounds cannot rely on "wrapping around" the address
261    ///   space. That is, the infinite-precision sum must fit in a `usize`.
262    ///
263    /// Consider using `wrapping_offset` instead if these constraints are
264    /// difficult to satisfy. The only advantage of this method is that it
265    /// enables more aggressive compiler optimizations.
266    ///
267    /// # Examples
268    ///
269    /// ```
270    /// # let _context = rustacuda::quick_init().unwrap();
271    /// use rustacuda::memory::*;
272    /// unsafe {
273    ///     let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
274    ///     let offset = dev_ptr.add(1); // Points to the 2nd u64 in the buffer
275    ///     cuda_free(dev_ptr); // Must free the buffer using the original pointer
276    /// }
277    /// ```
278    #[allow(clippy::should_implement_trait)]
279    pub unsafe fn add(self, count: usize) -> Self
280    where
281        T: Sized,
282    {
283        self.offset(count as isize)
284    }
285
286    /// Calculates the offset from a pointer (convenience for
287    /// `.offset((count as isize).wrapping_neg())`).
288    ///
289    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
290    /// offset of `3 * size_of::<T>()` bytes.
291    ///
292    /// # Safety
293    ///
294    /// If any of the following conditions are violated, the result is Undefined
295    /// Behavior:
296    ///
297    /// * Both the starting and resulting pointer must be either in bounds or one
298    ///   byte past the end of an allocated object.
299    ///
300    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
301    ///
302    /// * The offset being in bounds cannot rely on "wrapping around" the address
303    ///   space. That is, the infinite-precision sum must fit in a `usize`.
304    ///
305    /// Consider using `wrapping_offset` instead if these constraints are
306    /// difficult to satisfy. The only advantage of this method is that it
307    /// enables more aggressive compiler optimizations.
308    ///
309    /// # Examples
310    ///
311    /// ```
312    /// # let _context = rustacuda::quick_init().unwrap();
313    /// use rustacuda::memory::*;
314    /// unsafe {
315    ///     let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
316    ///     let offset = dev_ptr.add(4).sub(3); // Points to the 2nd u64 in the buffer
317    ///     cuda_free(dev_ptr); // Must free the buffer using the original pointer
318    /// }
319    #[allow(clippy::should_implement_trait)]
320    pub unsafe fn sub(self, count: usize) -> Self
321    where
322        T: Sized,
323    {
324        self.offset((count as isize).wrapping_neg())
325    }
326
327    /// Calculates the offset from a pointer using wrapping arithmetic.
328    /// (convenience for `.wrapping_offset(count as isize)`)
329    ///
330    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
331    /// offset of `3 * size_of::<T>()` bytes.
332    ///
333    /// # Safety
334    ///
335    /// The resulting pointer does not need to be in bounds, but it is
336    /// potentially hazardous to dereference.
337    ///
338    /// Always use `.add(count)` instead when possible, because `add`
339    /// allows the compiler to optimize better.
340    ///
341    /// # Examples
342    ///
343    /// ```
344    /// # let _context = rustacuda::quick_init().unwrap();
345    /// use rustacuda::memory::*;
346    /// unsafe {
347    ///     let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
348    ///     let offset = dev_ptr.wrapping_add(1); // Points to the 2nd u64 in the buffer
349    ///     cuda_free(dev_ptr); // Must free the buffer using the original pointer
350    /// }
351    /// ```
352    pub fn wrapping_add(self, count: usize) -> Self
353    where
354        T: Sized,
355    {
356        self.wrapping_offset(count as isize)
357    }
358
359    /// Calculates the offset from a pointer using wrapping arithmetic.
360    /// (convenience for `.wrapping_offset((count as isize).wrapping_sub())`)
361    ///
362    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
363    /// offset of `3 * size_of::<T>()` bytes.
364    ///
365    /// # Safety
366    ///
367    /// The resulting pointer does not need to be in bounds, but it is
368    /// potentially hazardous to dereference (which requires `unsafe`).
369    ///
370    /// Always use `.sub(count)` instead when possible, because `sub`
371    /// allows the compiler to optimize better.
372    ///
373    /// # Examples
374    ///
375    /// ```
376    /// # let _context = rustacuda::quick_init().unwrap();
377    /// use rustacuda::memory::*;
378    /// unsafe {
379    ///     let mut dev_ptr = cuda_malloc::<u64>(5).unwrap();
380    ///     let offset = dev_ptr.wrapping_add(4).wrapping_sub(3); // Points to the 2nd u64 in the buffer
381    ///     cuda_free(dev_ptr); // Must free the buffer using the original pointer
382    /// }
383    /// ```
384    pub fn wrapping_sub(self, count: usize) -> Self
385    where
386        T: Sized,
387    {
388        self.wrapping_offset((count as isize).wrapping_neg())
389    }
390}
391
392/// A pointer to unified memory.
393///
394/// `UnifiedPointer` can be safely dereferenced by the CPU, as the memory allocation it points to is
395/// shared between the CPU and the GPU. It can also be safely copied to the device (eg. as part of
396/// a kernel launch).
397///
398/// `UnifiedPointer` is guaranteed to have an equivalent internal representation to a raw pointer.
399/// Thus, it can be safely reinterpreted or transmuted to `*mut T`. It is also safe to pass a
400/// `UnifiedPointer` through an FFI boundary to C code expecting a `*mut T`. It is
401/// thus possible to pass a `UnifiedPointer` to a CUDA kernel written in C.
402#[repr(transparent)]
403pub struct UnifiedPointer<T: ?Sized>(*mut T);
404
405unsafe impl<T: ?Sized + DeviceCopy> DeviceCopy for UnifiedPointer<T> {}
406
407impl<T: ?Sized> UnifiedPointer<T> {
408    /// Wrap the given raw pointer in a UnifiedPointer. The given pointer is assumed to be a valid,
409    /// unified-memory pointer or null.
410    ///
411    /// # Safety
412    ///
413    /// The given pointer must have been allocated with
414    /// [`cuda_malloc_unified`](fn.cuda_malloc_unified.html) or be null.
415    ///
416    /// # Examples
417    ///
418    /// ```
419    /// # let _context = rustacuda::quick_init().unwrap();
420    /// use rustacuda::memory::*;
421    /// use std::ptr;
422    /// unsafe {
423    ///     let null : *mut u64 = ptr::null_mut();
424    ///     assert!(UnifiedPointer::wrap(null).is_null());
425    /// }
426    /// ```
427    pub unsafe fn wrap(ptr: *mut T) -> Self {
428        UnifiedPointer(ptr)
429    }
430
431    /// Returns the contained pointer as a raw pointer.
432    ///
433    /// # Examples
434    ///
435    /// ```
436    /// # let _context = rustacuda::quick_init().unwrap();
437    /// use rustacuda::memory::*;
438    /// unsafe {
439    ///     let unified_ptr = cuda_malloc_unified::<u64>(1).unwrap();
440    ///     let ptr: *const u64 = unified_ptr.as_raw();
441    ///     cuda_free_unified(unified_ptr);
442    /// }
443    /// ```
444    pub fn as_raw(self) -> *const T {
445        self.0
446    }
447
448    /// Returns the contained pointer as a mutable raw pointer.
449    ///
450    /// # Examples
451    ///
452    /// ```
453    /// # let _context = rustacuda::quick_init().unwrap();
454    /// use rustacuda::memory::*;
455    /// unsafe {
456    ///     let mut unified_ptr = cuda_malloc_unified::<u64>(1).unwrap();
457    ///     let ptr: *mut u64 = unified_ptr.as_raw_mut();
458    ///     *ptr = 5u64;
459    ///     cuda_free_unified(unified_ptr);
460    /// }
461    /// ```
462    pub fn as_raw_mut(&mut self) -> *mut T {
463        self.0
464    }
465
466    /// Returns true if the pointer is null.
467    ///
468    /// # Examples
469    ///
470    /// ```
471    /// # let _context = rustacuda::quick_init().unwrap();
472    /// use rustacuda::memory::*;
473    /// use std::ptr;
474    /// unsafe {
475    ///     let null : *mut u64 = ptr::null_mut();
476    ///     assert!(UnifiedPointer::wrap(null).is_null());
477    /// }
478    /// ```
479    pub fn is_null(self) -> bool {
480        self.0.is_null()
481    }
482
483    /// Returns a null unified pointer.
484    ///
485    /// # Examples:
486    ///
487    /// ```
488    /// # let _context = rustacuda::quick_init().unwrap();
489    /// use rustacuda::memory::*;
490    /// let ptr : UnifiedPointer<u64> = UnifiedPointer::null();
491    /// assert!(ptr.is_null());
492    /// ```
493    pub fn null() -> Self
494    where
495        T: Sized,
496    {
497        unsafe { Self::wrap(ptr::null_mut()) }
498    }
499
500    /// Calculates the offset from a unified pointer.
501    ///
502    /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
503    /// `3 * size_of::<T>()` bytes.
504    ///
505    /// # Safety
506    ///
507    /// If any of the following conditions are violated, the result is Undefined
508    /// Behavior:
509    ///
510    /// * Both the starting and resulting pointer must be either in bounds or one
511    ///   byte past the end of *the same* allocated object.
512    ///
513    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
514    ///
515    /// * The offset being in bounds cannot rely on "wrapping around" the address
516    ///   space. That is, the infinite-precision sum, **in bytes** must fit in a usize.
517    ///
518    /// Consider using `wrapping_offset` instead if these constraints are
519    /// difficult to satisfy. The only advantage of this method is that it
520    /// enables more aggressive compiler optimizations.
521    ///
522    /// # Examples
523    ///
524    /// ```
525    /// # let _context = rustacuda::quick_init().unwrap();
526    /// use rustacuda::memory::*;
527    /// unsafe {
528    ///     let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
529    ///     let offset = unified_ptr.offset(1); // Points to the 2nd u64 in the buffer
530    ///     cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
531    /// }
532    /// ```
533    pub unsafe fn offset(self, count: isize) -> Self
534    where
535        T: Sized,
536    {
537        Self::wrap(self.0.offset(count))
538    }
539
540    /// Calculates the offset from a unified pointer using wrapping arithmetic.
541    ///
542    /// `count` is in units of T; eg. a `count` of 3 represents a pointer offset of
543    /// `3 * size_of::<T>()` bytes.
544    ///
545    /// # Safety
546    ///
547    /// The resulting pointer does not need to be in bounds, but it is
548    /// potentially hazardous to dereference (which requires `unsafe`).
549    /// In particular, the resulting pointer may *not* be used to access a
550    /// different allocated object than the one `self` points to. In other
551    /// words, `x.wrapping_offset(y.wrapping_offset_from(x))` is
552    /// *not* the same as `y`, and dereferencing it is undefined behavior
553    /// unless `x` and `y` point into the same allocated object.
554    ///
555    /// Always use `.offset(count)` instead when possible, because `offset`
556    /// allows the compiler to optimize better.  If you need to cross object
557    /// boundaries, cast the pointer to an integer and do the arithmetic there.
558    ///
559    /// # Examples
560    ///
561    /// ```
562    /// # let _context = rustacuda::quick_init().unwrap();
563    /// use rustacuda::memory::*;
564    /// unsafe {
565    ///     let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
566    ///     let offset = unified_ptr.wrapping_offset(1); // Points to the 2nd u64 in the buffer
567    ///     cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
568    /// }
569    /// ```
570    pub fn wrapping_offset(self, count: isize) -> Self
571    where
572        T: Sized,
573    {
574        unsafe { Self::wrap(self.0.wrapping_offset(count)) }
575    }
576
577    /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
578    ///
579    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
580    /// offset of `3 * size_of::<T>()` bytes.
581    ///
582    /// # Safety
583    ///
584    /// If any of the following conditions are violated, the result is Undefined
585    /// Behavior:
586    ///
587    /// * Both the starting and resulting pointer must be either in bounds or one
588    ///   byte past the end of an allocated object.
589    ///
590    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
591    ///
592    /// * The offset being in bounds cannot rely on "wrapping around" the address
593    ///   space. That is, the infinite-precision sum must fit in a `usize`.
594    ///
595    /// Consider using `wrapping_offset` instead if these constraints are
596    /// difficult to satisfy. The only advantage of this method is that it
597    /// enables more aggressive compiler optimizations.
598    ///
599    /// # Examples
600    ///
601    /// ```
602    /// # let _context = rustacuda::quick_init().unwrap();
603    /// use rustacuda::memory::*;
604    /// unsafe {
605    ///     let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
606    ///     let offset = unified_ptr.add(1); // Points to the 2nd u64 in the buffer
607    ///     cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
608    /// }
609    /// ```
610    #[allow(clippy::should_implement_trait)]
611    pub unsafe fn add(self, count: usize) -> Self
612    where
613        T: Sized,
614    {
615        self.offset(count as isize)
616    }
617
618    /// Calculates the offset from a pointer (convenience for
619    /// `.offset((count as isize).wrapping_neg())`).
620    ///
621    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
622    /// offset of `3 * size_of::<T>()` bytes.
623    ///
624    /// # Safety
625    ///
626    /// If any of the following conditions are violated, the result is Undefined
627    /// Behavior:
628    ///
629    /// * Both the starting and resulting pointer must be either in bounds or one
630    ///   byte past the end of an allocated object.
631    ///
632    /// * The computed offset, **in bytes**, cannot overflow an `isize`.
633    ///
634    /// * The offset being in bounds cannot rely on "wrapping around" the address
635    ///   space. That is, the infinite-precision sum must fit in a `usize`.
636    ///
637    /// Consider using `wrapping_offset` instead if these constraints are
638    /// difficult to satisfy. The only advantage of this method is that it
639    /// enables more aggressive compiler optimizations.
640    ///
641    /// # Examples
642    ///
643    /// ```
644    /// # let _context = rustacuda::quick_init().unwrap();
645    /// use rustacuda::memory::*;
646    /// unsafe {
647    ///     let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
648    ///     let offset = unified_ptr.add(4).sub(3); // Points to the 2nd u64 in the buffer
649    ///     cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
650    /// }
651    #[allow(clippy::should_implement_trait)]
652    pub unsafe fn sub(self, count: usize) -> Self
653    where
654        T: Sized,
655    {
656        self.offset((count as isize).wrapping_neg())
657    }
658
659    /// Calculates the offset from a pointer using wrapping arithmetic.
660    /// (convenience for `.wrapping_offset(count as isize)`)
661    ///
662    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
663    /// offset of `3 * size_of::<T>()` bytes.
664    ///
665    /// # Safety
666    ///
667    /// The resulting pointer does not need to be in bounds, but it is
668    /// potentially hazardous to dereference.
669    ///
670    /// Always use `.add(count)` instead when possible, because `add`
671    /// allows the compiler to optimize better.
672    ///
673    /// # Examples
674    ///
675    /// ```
676    /// # let _context = rustacuda::quick_init().unwrap();
677    /// use rustacuda::memory::*;
678    /// unsafe {
679    ///     let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
680    ///     let offset = unified_ptr.wrapping_add(1); // Points to the 2nd u64 in the buffer
681    ///     cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
682    /// }
683    /// ```
684    pub fn wrapping_add(self, count: usize) -> Self
685    where
686        T: Sized,
687    {
688        self.wrapping_offset(count as isize)
689    }
690
691    /// Calculates the offset from a pointer using wrapping arithmetic.
692    /// (convenience for `.wrapping_offset((count as isize).wrapping_sub())`)
693    ///
694    /// `count` is in units of T; e.g. a `count` of 3 represents a pointer
695    /// offset of `3 * size_of::<T>()` bytes.
696    ///
697    /// # Safety
698    ///
699    /// The resulting pointer does not need to be in bounds, but it is
700    /// potentially hazardous to dereference (which requires `unsafe`).
701    ///
702    /// Always use `.sub(count)` instead when possible, because `sub`
703    /// allows the compiler to optimize better.
704    ///
705    /// # Examples
706    ///
707    /// ```
708    /// # let _context = rustacuda::quick_init().unwrap();
709    /// use rustacuda::memory::*;
710    /// unsafe {
711    ///     let mut unified_ptr = cuda_malloc_unified::<u64>(5).unwrap();
712    ///     let offset = unified_ptr.wrapping_add(4).wrapping_sub(3); // Points to the 2nd u64 in the buffer
713    ///     cuda_free_unified(unified_ptr); // Must free the buffer using the original pointer
714    /// }
715    /// ```
716    pub fn wrapping_sub(self, count: usize) -> Self
717    where
718        T: Sized,
719    {
720        self.wrapping_offset((count as isize).wrapping_neg())
721    }
722}