ringkernel_cuda_codegen/dsl.rs
1//! Rust DSL functions for writing CUDA kernels.
2//!
3//! This module provides Rust functions that map to CUDA intrinsics during transpilation.
4//! These functions have CPU fallback implementations for testing but are transpiled
5//! to the corresponding CUDA operations when used in kernel code.
6//!
7//! # Thread/Block Index Access
8//!
9//! ```ignore
10//! use ringkernel_cuda_codegen::dsl::*;
11//!
12//! fn my_kernel(...) {
13//! let tx = thread_idx_x(); // -> threadIdx.x
14//! let bx = block_idx_x(); // -> blockIdx.x
15//! let idx = bx * block_dim_x() + tx; // Global thread index
16//! }
17//! ```
18//!
19//! # Thread Synchronization
20//!
21//! ```ignore
22//! sync_threads(); // -> __syncthreads()
23//! ```
24
25/// Get the thread index within a block (x dimension).
26/// Transpiles to: `threadIdx.x`
27#[inline]
28pub fn thread_idx_x() -> i32 {
29 // CPU fallback: single-threaded execution uses index 0
30 0
31}
32
33/// Get the thread index within a block (y dimension).
34/// Transpiles to: `threadIdx.y`
35#[inline]
36pub fn thread_idx_y() -> i32 {
37 0
38}
39
40/// Get the thread index within a block (z dimension).
41/// Transpiles to: `threadIdx.z`
42#[inline]
43pub fn thread_idx_z() -> i32 {
44 0
45}
46
47/// Get the block index within a grid (x dimension).
48/// Transpiles to: `blockIdx.x`
49#[inline]
50pub fn block_idx_x() -> i32 {
51 0
52}
53
54/// Get the block index within a grid (y dimension).
55/// Transpiles to: `blockIdx.y`
56#[inline]
57pub fn block_idx_y() -> i32 {
58 0
59}
60
61/// Get the block index within a grid (z dimension).
62/// Transpiles to: `blockIdx.z`
63#[inline]
64pub fn block_idx_z() -> i32 {
65 0
66}
67
68/// Get the block dimension (x dimension).
69/// Transpiles to: `blockDim.x`
70#[inline]
71pub fn block_dim_x() -> i32 {
72 1
73}
74
75/// Get the block dimension (y dimension).
76/// Transpiles to: `blockDim.y`
77#[inline]
78pub fn block_dim_y() -> i32 {
79 1
80}
81
82/// Get the block dimension (z dimension).
83/// Transpiles to: `blockDim.z`
84#[inline]
85pub fn block_dim_z() -> i32 {
86 1
87}
88
89/// Get the grid dimension (x dimension).
90/// Transpiles to: `gridDim.x`
91#[inline]
92pub fn grid_dim_x() -> i32 {
93 1
94}
95
96/// Get the grid dimension (y dimension).
97/// Transpiles to: `gridDim.y`
98#[inline]
99pub fn grid_dim_y() -> i32 {
100 1
101}
102
103/// Get the grid dimension (z dimension).
104/// Transpiles to: `gridDim.z`
105#[inline]
106pub fn grid_dim_z() -> i32 {
107 1
108}
109
110/// Synchronize all threads in a block.
111/// Transpiles to: `__syncthreads()`
112#[inline]
113pub fn sync_threads() {
114 // CPU fallback: no-op (single-threaded)
115}
116
117/// Thread memory fence.
118/// Transpiles to: `__threadfence()`
119#[inline]
120pub fn thread_fence() {
121 std::sync::atomic::fence(std::sync::atomic::Ordering::SeqCst);
122}
123
124/// Block-level memory fence.
125/// Transpiles to: `__threadfence_block()`
126#[inline]
127pub fn thread_fence_block() {
128 std::sync::atomic::fence(std::sync::atomic::Ordering::Release);
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134
135 #[test]
136 fn test_thread_indices_default() {
137 assert_eq!(thread_idx_x(), 0);
138 assert_eq!(thread_idx_y(), 0);
139 assert_eq!(thread_idx_z(), 0);
140 }
141
142 #[test]
143 fn test_block_indices_default() {
144 assert_eq!(block_idx_x(), 0);
145 assert_eq!(block_idx_y(), 0);
146 assert_eq!(block_idx_z(), 0);
147 }
148
149 #[test]
150 fn test_dimensions_default() {
151 assert_eq!(block_dim_x(), 1);
152 assert_eq!(block_dim_y(), 1);
153 assert_eq!(grid_dim_x(), 1);
154 }
155}