cuda_std/ptr.rs
1//! CUDA-specific pointer handling logic.
2
3use crate::gpu_only;
4
5/// Special areas of GPU memory where a pointer could reside.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7pub enum AddressSpace {
8 /// Memory available for reading and writing to the entire device.
9 Global,
10 /// Block-local read/write memory available to all threads in a block.
11 Shared,
12 /// Read-only memory available to the whole device.
13 Constant,
14 /// Thread-local read/write memory only available to an individual thread.
15 Local,
16}
17
18/// Determines whether a pointer is in a specific address space.
19///
20/// # Safety
21///
22/// The pointer must be valid for an instance of `T`, otherwise Undefined Behavior is exhibited.
23// TODO(RDambrosio016): Investigate subpar codegen for this function. It seems nvcc implements this not using
24// inline asm, but instead with some sort of compiler intrinsic, because its able to optimize away the function
25// a lot of the time.
26#[gpu_only]
27pub unsafe fn is_in_address_space<T>(ptr: *const T, address_space: AddressSpace) -> bool {
28 let ret: u32;
29 // create a predicate register to store the result of the isspacep into.
30 asm!(".reg .pred p;");
31
32 // perform the actual isspacep operation, and store the result in the predicate register we made.
33 match address_space {
34 AddressSpace::Global => asm!("isspacep.global p, {}", in(reg64) ptr),
35 AddressSpace::Shared => asm!("isspacep.shared p, {}", in(reg64) ptr),
36 AddressSpace::Constant => asm!("isspacep.const p, {}", in(reg64) ptr),
37 AddressSpace::Local => asm!("isspacep.local p, {}", in(reg64) ptr),
38 }
39
40 // finally, use the predicate register to write out a value.
41 asm!("selp.u32 {}, 1, 0, p;", out(reg32) ret);
42
43 ret != 0
44}
45
46/// Converts a pointer from a generic address space, to a specific address space.
47/// This maps directly to the [`cvta`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta) PTX instruction.
48///
49/// # Safety
50///
51/// The pointer must be valid for an instance of `T`, and the pointer must fall in the specific address space in memory,
52/// otherwise Undefined Behavior is exhibited.
53#[gpu_only]
54pub unsafe fn convert_generic_to_specific_address_space<T>(
55 ptr: *const T,
56 address_space: AddressSpace,
57) -> *const T {
58 let ret: *const T;
59
60 match address_space {
61 AddressSpace::Global => asm!(
62 "cvta.to.global.u64 {}, {}",
63 out(reg64) ret,
64 in(reg64) ptr
65 ),
66 AddressSpace::Shared => asm!(
67 "cvta.to.shared.u64 {}, {}",
68 out(reg64) ret,
69 in(reg64) ptr
70 ),
71 AddressSpace::Constant => asm!(
72 "cvta.to.const.u64 {}, {}",
73 out(reg64) ret,
74 in(reg64) ptr
75 ),
76 AddressSpace::Local => asm!(
77 "cvta.to.local.u64 {}, {}",
78 out(reg64) ret,
79 in(reg64) ptr
80 ),
81 }
82
83 ret
84}
85
86/// Converts a pointer in a specific address space, to a generic address space.
87/// This maps directly to the [`cvta`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta) PTX instruction.
88///
89/// # Safety
90///
91/// The pointer must be valid for an instance of `T`, and the pointer must fall in the specific address space in memory,
92/// otherwise Undefined Behavior is exhibited.
93#[gpu_only]
94pub unsafe fn convert_specific_address_space_to_generic<T>(
95 ptr: *const T,
96 address_space: AddressSpace,
97) -> *const T {
98 let ret: *const T;
99
100 match address_space {
101 AddressSpace::Global => asm!(
102 "cvta.global.u64 {}, {}",
103 out(reg64) ret,
104 in(reg64) ptr
105 ),
106 AddressSpace::Shared => asm!(
107 "cvta.shared.u64 {}, {}",
108 out(reg64) ret,
109 in(reg64) ptr
110 ),
111 AddressSpace::Constant => asm!(
112 "cvta.const.u64 {}, {}",
113 out(reg64) ret,
114 in(reg64) ptr
115 ),
116 AddressSpace::Local => asm!(
117 "cvta.local.u64 {}, {}",
118 out(reg64) ret,
119 in(reg64) ptr
120 ),
121 }
122
123 ret
124}