ringkernel_cuda_codegen/
dsl.rs

1//! Rust DSL functions for writing CUDA kernels.
2//!
3//! This module provides Rust functions that map to CUDA intrinsics during transpilation.
4//! These functions have CPU fallback implementations for testing but are transpiled
5//! to the corresponding CUDA operations when used in kernel code.
6//!
7//! # Thread/Block Index Access
8//!
9//! ```ignore
10//! use ringkernel_cuda_codegen::dsl::*;
11//!
12//! fn my_kernel(...) {
13//!     let tx = thread_idx_x();  // -> threadIdx.x
14//!     let bx = block_idx_x();   // -> blockIdx.x
15//!     let idx = bx * block_dim_x() + tx;  // Global thread index
16//! }
17//! ```
18//!
19//! # Thread Synchronization
20//!
21//! ```ignore
22//! sync_threads();  // -> __syncthreads()
23//! ```
24
25/// Get the thread index within a block (x dimension).
26/// Transpiles to: `threadIdx.x`
27#[inline]
28pub fn thread_idx_x() -> i32 {
29    // CPU fallback: single-threaded execution uses index 0
30    0
31}
32
33/// Get the thread index within a block (y dimension).
34/// Transpiles to: `threadIdx.y`
35#[inline]
36pub fn thread_idx_y() -> i32 {
37    0
38}
39
40/// Get the thread index within a block (z dimension).
41/// Transpiles to: `threadIdx.z`
42#[inline]
43pub fn thread_idx_z() -> i32 {
44    0
45}
46
47/// Get the block index within a grid (x dimension).
48/// Transpiles to: `blockIdx.x`
49#[inline]
50pub fn block_idx_x() -> i32 {
51    0
52}
53
54/// Get the block index within a grid (y dimension).
55/// Transpiles to: `blockIdx.y`
56#[inline]
57pub fn block_idx_y() -> i32 {
58    0
59}
60
61/// Get the block index within a grid (z dimension).
62/// Transpiles to: `blockIdx.z`
63#[inline]
64pub fn block_idx_z() -> i32 {
65    0
66}
67
68/// Get the block dimension (x dimension).
69/// Transpiles to: `blockDim.x`
70#[inline]
71pub fn block_dim_x() -> i32 {
72    1
73}
74
75/// Get the block dimension (y dimension).
76/// Transpiles to: `blockDim.y`
77#[inline]
78pub fn block_dim_y() -> i32 {
79    1
80}
81
82/// Get the block dimension (z dimension).
83/// Transpiles to: `blockDim.z`
84#[inline]
85pub fn block_dim_z() -> i32 {
86    1
87}
88
89/// Get the grid dimension (x dimension).
90/// Transpiles to: `gridDim.x`
91#[inline]
92pub fn grid_dim_x() -> i32 {
93    1
94}
95
96/// Get the grid dimension (y dimension).
97/// Transpiles to: `gridDim.y`
98#[inline]
99pub fn grid_dim_y() -> i32 {
100    1
101}
102
103/// Get the grid dimension (z dimension).
104/// Transpiles to: `gridDim.z`
105#[inline]
106pub fn grid_dim_z() -> i32 {
107    1
108}
109
110/// Synchronize all threads in a block.
111/// Transpiles to: `__syncthreads()`
112#[inline]
113pub fn sync_threads() {
114    // CPU fallback: no-op (single-threaded)
115}
116
117/// Thread memory fence.
118/// Transpiles to: `__threadfence()`
119#[inline]
120pub fn thread_fence() {
121    std::sync::atomic::fence(std::sync::atomic::Ordering::SeqCst);
122}
123
124/// Block-level memory fence.
125/// Transpiles to: `__threadfence_block()`
126#[inline]
127pub fn thread_fence_block() {
128    std::sync::atomic::fence(std::sync::atomic::Ordering::Release);
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn test_thread_indices_default() {
137        assert_eq!(thread_idx_x(), 0);
138        assert_eq!(thread_idx_y(), 0);
139        assert_eq!(thread_idx_z(), 0);
140    }
141
142    #[test]
143    fn test_block_indices_default() {
144        assert_eq!(block_idx_x(), 0);
145        assert_eq!(block_idx_y(), 0);
146        assert_eq!(block_idx_z(), 0);
147    }
148
149    #[test]
150    fn test_dimensions_default() {
151        assert_eq!(block_dim_x(), 1);
152        assert_eq!(block_dim_y(), 1);
153        assert_eq!(grid_dim_x(), 1);
154    }
155}