ringkernel_procint/cuda/
types.rs

1//! GPU-compatible type definitions for CUDA kernels.
2
3#![allow(missing_docs)]
4
5use crate::models::{
6    ConformanceResult, GpuDFGEdge, GpuDFGGraph, GpuDFGNode, GpuObjectEvent, GpuPartialOrderTrace,
7    GpuPatternMatch,
8};
9
10/// GPU buffer wrapper for type safety.
11#[derive(Debug)]
12pub struct GpuBuffer<T> {
13    /// Number of elements.
14    pub len: usize,
15    /// Element type marker.
16    _marker: std::marker::PhantomData<T>,
17    /// Raw pointer (null on CPU, valid on GPU).
18    #[cfg(feature = "cuda")]
19    pub ptr: *mut T,
20}
21
22impl<T> Default for GpuBuffer<T> {
23    fn default() -> Self {
24        Self {
25            len: 0,
26            _marker: std::marker::PhantomData,
27            #[cfg(feature = "cuda")]
28            ptr: std::ptr::null_mut(),
29        }
30    }
31}
32
33impl<T> GpuBuffer<T> {
34    /// Create a new empty buffer.
35    pub fn new() -> Self {
36        Self::default()
37    }
38
39    /// Create with capacity.
40    pub fn with_capacity(capacity: usize) -> Self {
41        Self {
42            len: capacity,
43            _marker: std::marker::PhantomData,
44            #[cfg(feature = "cuda")]
45            ptr: std::ptr::null_mut(),
46        }
47    }
48
49    /// Check if buffer is empty.
50    pub fn is_empty(&self) -> bool {
51        self.len == 0
52    }
53}
54
55/// Type aliases for common GPU buffers.
56pub type EventBuffer = GpuBuffer<GpuObjectEvent>;
57pub type DFGNodeBuffer = GpuBuffer<GpuDFGNode>;
58pub type DFGEdgeBuffer = GpuBuffer<GpuDFGEdge>;
59pub type DFGGraphBuffer = GpuBuffer<GpuDFGGraph>;
60pub type PatternBuffer = GpuBuffer<GpuPatternMatch>;
61pub type ConformanceBuffer = GpuBuffer<ConformanceResult>;
62pub type PartialOrderBuffer = GpuBuffer<GpuPartialOrderTrace>;
63
64/// Kernel launch configuration.
65#[derive(Debug, Clone, Copy)]
66pub struct LaunchConfig {
67    /// Grid dimensions.
68    pub grid_dim: (u32, u32, u32),
69    /// Block dimensions.
70    pub block_dim: (u32, u32, u32),
71    /// Shared memory size in bytes.
72    pub shared_mem_bytes: u32,
73}
74
75impl Default for LaunchConfig {
76    fn default() -> Self {
77        Self {
78            grid_dim: (1, 1, 1),
79            block_dim: (256, 1, 1),
80            shared_mem_bytes: 0,
81        }
82    }
83}
84
85impl LaunchConfig {
86    /// Create 1D launch configuration.
87    pub fn linear(num_elements: u32, block_size: u32) -> Self {
88        let grid_size = num_elements.div_ceil(block_size);
89        Self {
90            grid_dim: (grid_size, 1, 1),
91            block_dim: (block_size, 1, 1),
92            shared_mem_bytes: 0,
93        }
94    }
95
96    /// Create 2D launch configuration.
97    pub fn grid_2d(width: u32, height: u32, tile_size: u32) -> Self {
98        Self {
99            grid_dim: (width.div_ceil(tile_size), height.div_ceil(tile_size), 1),
100            block_dim: (tile_size, tile_size, 1),
101            shared_mem_bytes: 0,
102        }
103    }
104
105    /// Set shared memory size.
106    pub fn with_shared_mem(mut self, bytes: u32) -> Self {
107        self.shared_mem_bytes = bytes;
108        self
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn test_launch_config_linear() {
118        let config = LaunchConfig::linear(10000, 256);
119        assert_eq!(config.grid_dim.0, 40); // ceil(10000/256)
120        assert_eq!(config.block_dim.0, 256);
121    }
122
123    #[test]
124    fn test_launch_config_2d() {
125        let config = LaunchConfig::grid_2d(1024, 768, 16);
126        assert_eq!(config.grid_dim.0, 64);
127        assert_eq!(config.grid_dim.1, 48);
128    }
129}
ringkernel_procint/cuda/types.rs

ringkernel_procint/cuda/
types.rs