cuda_std/
ptr.rs

1//! CUDA-specific pointer handling logic.
2
3use crate::gpu_only;
4
5/// Special areas of GPU memory where a pointer could reside.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7pub enum AddressSpace {
8    /// Memory available for reading and writing to the entire device.
9    Global,
10    /// Block-local read/write memory available to all threads in a block.
11    Shared,
12    /// Read-only memory available to the whole device.
13    Constant,
14    /// Thread-local read/write memory only available to an individual thread.
15    Local,
16}
17
18/// Determines whether a pointer is in a specific address space.
19///
20/// # Safety
21///
22/// The pointer must be valid for an instance of `T`, otherwise Undefined Behavior is exhibited.
23// TODO(RDambrosio016): Investigate subpar codegen for this function. It seems nvcc implements this not using
24// inline asm, but instead with some sort of compiler intrinsic, because its able to optimize away the function
25// a lot of the time.
26#[gpu_only]
27pub unsafe fn is_in_address_space<T>(ptr: *const T, address_space: AddressSpace) -> bool {
28    let ret: u32;
29    // create a predicate register to store the result of the isspacep into.
30    asm!(".reg .pred p;");
31
32    // perform the actual isspacep operation, and store the result in the predicate register we made.
33    match address_space {
34        AddressSpace::Global => asm!("isspacep.global p, {}", in(reg64) ptr),
35        AddressSpace::Shared => asm!("isspacep.shared p, {}", in(reg64) ptr),
36        AddressSpace::Constant => asm!("isspacep.const p, {}", in(reg64) ptr),
37        AddressSpace::Local => asm!("isspacep.local p, {}", in(reg64) ptr),
38    }
39
40    // finally, use the predicate register to write out a value.
41    asm!("selp.u32 {}, 1, 0, p;", out(reg32) ret);
42
43    ret != 0
44}
45
46/// Converts a pointer from a generic address space, to a specific address space.
47/// This maps directly to the [`cvta`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta) PTX instruction.
48///
49/// # Safety
50///
51/// The pointer must be valid for an instance of `T`, and the pointer must fall in the specific address space in memory,
52/// otherwise Undefined Behavior is exhibited.
53#[gpu_only]
54pub unsafe fn convert_generic_to_specific_address_space<T>(
55    ptr: *const T,
56    address_space: AddressSpace,
57) -> *const T {
58    let ret: *const T;
59
60    match address_space {
61        AddressSpace::Global => asm!(
62            "cvta.to.global.u64 {}, {}",
63            out(reg64) ret,
64            in(reg64) ptr
65        ),
66        AddressSpace::Shared => asm!(
67            "cvta.to.shared.u64 {}, {}",
68            out(reg64) ret,
69            in(reg64) ptr
70        ),
71        AddressSpace::Constant => asm!(
72            "cvta.to.const.u64 {}, {}",
73            out(reg64) ret,
74            in(reg64) ptr
75        ),
76        AddressSpace::Local => asm!(
77            "cvta.to.local.u64 {}, {}",
78            out(reg64) ret,
79            in(reg64) ptr
80        ),
81    }
82
83    ret
84}
85
86/// Converts a pointer in a specific address space, to a generic address space.
87/// This maps directly to the [`cvta`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta) PTX instruction.
88///
89/// # Safety
90///
91/// The pointer must be valid for an instance of `T`, and the pointer must fall in the specific address space in memory,
92/// otherwise Undefined Behavior is exhibited.
93#[gpu_only]
94pub unsafe fn convert_specific_address_space_to_generic<T>(
95    ptr: *const T,
96    address_space: AddressSpace,
97) -> *const T {
98    let ret: *const T;
99
100    match address_space {
101        AddressSpace::Global => asm!(
102            "cvta.global.u64 {}, {}",
103            out(reg64) ret,
104            in(reg64) ptr
105        ),
106        AddressSpace::Shared => asm!(
107            "cvta.shared.u64 {}, {}",
108            out(reg64) ret,
109            in(reg64) ptr
110        ),
111        AddressSpace::Constant => asm!(
112            "cvta.const.u64 {}, {}",
113            out(reg64) ret,
114            in(reg64) ptr
115        ),
116        AddressSpace::Local => asm!(
117            "cvta.local.u64 {}, {}",
118            out(reg64) ret,
119            in(reg64) ptr
120        ),
121    }
122
123    ret
124}