ringkernel_cuda_codegen/
intrinsics.rs

1//! GPU intrinsic mapping for CUDA code generation.
2//!
3//! This module provides mappings from high-level Rust operations to
4//! CUDA intrinsics and built-in functions.
5
6use std::collections::HashMap;
7
8/// GPU intrinsic operations.
9#[derive(Debug, Clone, PartialEq)]
10pub enum GpuIntrinsic {
11    /// Thread synchronization.
12    SyncThreads,
13    /// Thread fence (memory ordering).
14    ThreadFence,
15    ThreadFenceBlock,
16    ThreadFenceSystem,
17
18    /// Atomic operations.
19    AtomicAdd,
20    AtomicSub,
21    AtomicMin,
22    AtomicMax,
23    AtomicExch,
24    AtomicCas,
25
26    /// Math functions.
27    Sqrt,
28    Rsqrt,
29    Abs,
30    Fabs,
31    Floor,
32    Ceil,
33    Round,
34    Sin,
35    Cos,
36    Tan,
37    Exp,
38    Log,
39    Pow,
40    Fma,
41    Min,
42    Max,
43
44    /// Warp-level operations.
45    WarpShfl,
46    WarpShflUp,
47    WarpShflDown,
48    WarpShflXor,
49    WarpActiveMask,
50    WarpBallot,
51    WarpAll,
52    WarpAny,
53
54    /// CUDA thread/block indices.
55    ThreadIdxX,
56    ThreadIdxY,
57    ThreadIdxZ,
58    BlockIdxX,
59    BlockIdxY,
60    BlockIdxZ,
61    BlockDimX,
62    BlockDimY,
63    BlockDimZ,
64    GridDimX,
65    GridDimY,
66    GridDimZ,
67}
68
69impl GpuIntrinsic {
70    /// Convert to CUDA function/intrinsic name.
71    pub fn to_cuda_string(&self) -> &'static str {
72        match self {
73            GpuIntrinsic::SyncThreads => "__syncthreads()",
74            GpuIntrinsic::ThreadFence => "__threadfence()",
75            GpuIntrinsic::ThreadFenceBlock => "__threadfence_block()",
76            GpuIntrinsic::ThreadFenceSystem => "__threadfence_system()",
77            GpuIntrinsic::AtomicAdd => "atomicAdd",
78            GpuIntrinsic::AtomicSub => "atomicSub",
79            GpuIntrinsic::AtomicMin => "atomicMin",
80            GpuIntrinsic::AtomicMax => "atomicMax",
81            GpuIntrinsic::AtomicExch => "atomicExch",
82            GpuIntrinsic::AtomicCas => "atomicCAS",
83            GpuIntrinsic::Sqrt => "sqrtf",
84            GpuIntrinsic::Rsqrt => "rsqrtf",
85            GpuIntrinsic::Abs => "abs",
86            GpuIntrinsic::Fabs => "fabsf",
87            GpuIntrinsic::Floor => "floorf",
88            GpuIntrinsic::Ceil => "ceilf",
89            GpuIntrinsic::Round => "roundf",
90            GpuIntrinsic::Sin => "sinf",
91            GpuIntrinsic::Cos => "cosf",
92            GpuIntrinsic::Tan => "tanf",
93            GpuIntrinsic::Exp => "expf",
94            GpuIntrinsic::Log => "logf",
95            GpuIntrinsic::Pow => "powf",
96            GpuIntrinsic::Fma => "fmaf",
97            GpuIntrinsic::Min => "fminf",
98            GpuIntrinsic::Max => "fmaxf",
99            GpuIntrinsic::WarpShfl => "__shfl_sync",
100            GpuIntrinsic::WarpShflUp => "__shfl_up_sync",
101            GpuIntrinsic::WarpShflDown => "__shfl_down_sync",
102            GpuIntrinsic::WarpShflXor => "__shfl_xor_sync",
103            GpuIntrinsic::WarpActiveMask => "__activemask()",
104            GpuIntrinsic::WarpBallot => "__ballot_sync",
105            GpuIntrinsic::WarpAll => "__all_sync",
106            GpuIntrinsic::WarpAny => "__any_sync",
107            GpuIntrinsic::ThreadIdxX => "threadIdx.x",
108            GpuIntrinsic::ThreadIdxY => "threadIdx.y",
109            GpuIntrinsic::ThreadIdxZ => "threadIdx.z",
110            GpuIntrinsic::BlockIdxX => "blockIdx.x",
111            GpuIntrinsic::BlockIdxY => "blockIdx.y",
112            GpuIntrinsic::BlockIdxZ => "blockIdx.z",
113            GpuIntrinsic::BlockDimX => "blockDim.x",
114            GpuIntrinsic::BlockDimY => "blockDim.y",
115            GpuIntrinsic::BlockDimZ => "blockDim.z",
116            GpuIntrinsic::GridDimX => "gridDim.x",
117            GpuIntrinsic::GridDimY => "gridDim.y",
118            GpuIntrinsic::GridDimZ => "gridDim.z",
119        }
120    }
121}
122
123/// Registry for mapping Rust function names to GPU intrinsics.
124#[derive(Debug)]
125pub struct IntrinsicRegistry {
126    mappings: HashMap<String, GpuIntrinsic>,
127}
128
129impl Default for IntrinsicRegistry {
130    fn default() -> Self {
131        Self::new()
132    }
133}
134
135impl IntrinsicRegistry {
136    /// Create a new registry with default mappings.
137    pub fn new() -> Self {
138        let mut mappings = HashMap::new();
139
140        // Synchronization
141        mappings.insert("sync_threads".to_string(), GpuIntrinsic::SyncThreads);
142        mappings.insert("thread_fence".to_string(), GpuIntrinsic::ThreadFence);
143        mappings.insert(
144            "thread_fence_block".to_string(),
145            GpuIntrinsic::ThreadFenceBlock,
146        );
147        mappings.insert(
148            "thread_fence_system".to_string(),
149            GpuIntrinsic::ThreadFenceSystem,
150        );
151
152        // Atomics (common naming)
153        mappings.insert("atomic_add".to_string(), GpuIntrinsic::AtomicAdd);
154        mappings.insert("atomic_sub".to_string(), GpuIntrinsic::AtomicSub);
155        mappings.insert("atomic_min".to_string(), GpuIntrinsic::AtomicMin);
156        mappings.insert("atomic_max".to_string(), GpuIntrinsic::AtomicMax);
157        mappings.insert("atomic_exchange".to_string(), GpuIntrinsic::AtomicExch);
158        mappings.insert("atomic_cas".to_string(), GpuIntrinsic::AtomicCas);
159
160        // Math functions (Rust std naming)
161        mappings.insert("sqrt".to_string(), GpuIntrinsic::Sqrt);
162        mappings.insert("abs".to_string(), GpuIntrinsic::Fabs);
163        mappings.insert("floor".to_string(), GpuIntrinsic::Floor);
164        mappings.insert("ceil".to_string(), GpuIntrinsic::Ceil);
165        mappings.insert("round".to_string(), GpuIntrinsic::Round);
166        mappings.insert("sin".to_string(), GpuIntrinsic::Sin);
167        mappings.insert("cos".to_string(), GpuIntrinsic::Cos);
168        mappings.insert("tan".to_string(), GpuIntrinsic::Tan);
169        mappings.insert("exp".to_string(), GpuIntrinsic::Exp);
170        mappings.insert("ln".to_string(), GpuIntrinsic::Log);
171        mappings.insert("log".to_string(), GpuIntrinsic::Log);
172        mappings.insert("powf".to_string(), GpuIntrinsic::Pow);
173        mappings.insert("powi".to_string(), GpuIntrinsic::Pow);
174        mappings.insert("mul_add".to_string(), GpuIntrinsic::Fma);
175        mappings.insert("min".to_string(), GpuIntrinsic::Min);
176        mappings.insert("max".to_string(), GpuIntrinsic::Max);
177
178        // CUDA thread/block indices (function-style access in Rust DSL)
179        mappings.insert("thread_idx_x".to_string(), GpuIntrinsic::ThreadIdxX);
180        mappings.insert("thread_idx_y".to_string(), GpuIntrinsic::ThreadIdxY);
181        mappings.insert("thread_idx_z".to_string(), GpuIntrinsic::ThreadIdxZ);
182        mappings.insert("block_idx_x".to_string(), GpuIntrinsic::BlockIdxX);
183        mappings.insert("block_idx_y".to_string(), GpuIntrinsic::BlockIdxY);
184        mappings.insert("block_idx_z".to_string(), GpuIntrinsic::BlockIdxZ);
185        mappings.insert("block_dim_x".to_string(), GpuIntrinsic::BlockDimX);
186        mappings.insert("block_dim_y".to_string(), GpuIntrinsic::BlockDimY);
187        mappings.insert("block_dim_z".to_string(), GpuIntrinsic::BlockDimZ);
188        mappings.insert("grid_dim_x".to_string(), GpuIntrinsic::GridDimX);
189        mappings.insert("grid_dim_y".to_string(), GpuIntrinsic::GridDimY);
190        mappings.insert("grid_dim_z".to_string(), GpuIntrinsic::GridDimZ);
191
192        Self { mappings }
193    }
194
195    /// Look up an intrinsic by Rust function name.
196    pub fn lookup(&self, name: &str) -> Option<&GpuIntrinsic> {
197        self.mappings.get(name)
198    }
199
200    /// Register a custom intrinsic mapping.
201    pub fn register(&mut self, rust_name: &str, intrinsic: GpuIntrinsic) {
202        self.mappings.insert(rust_name.to_string(), intrinsic);
203    }
204
205    /// Check if a name is a known intrinsic.
206    pub fn is_intrinsic(&self, name: &str) -> bool {
207        self.mappings.contains_key(name)
208    }
209}
210
211/// Stencil-specific intrinsics for neighbor access.
212///
213/// These are special intrinsics that the transpiler handles
214/// differently based on stencil configuration.
215#[derive(Debug, Clone, PartialEq)]
216pub enum StencilIntrinsic {
217    /// Get current cell index: `pos.idx()`
218    Index,
219    /// Access north neighbor: `pos.north(buf)`
220    North,
221    /// Access south neighbor: `pos.south(buf)`
222    South,
223    /// Access east neighbor: `pos.east(buf)`
224    East,
225    /// Access west neighbor: `pos.west(buf)`
226    West,
227    /// Access neighbor at offset: `pos.at(buf, dx, dy)`
228    At,
229    /// 3D: Access neighbor above: `pos.up(buf)`
230    Up,
231    /// 3D: Access neighbor below: `pos.down(buf)`
232    Down,
233}
234
235impl StencilIntrinsic {
236    /// Parse a method name to stencil intrinsic.
237    pub fn from_method_name(name: &str) -> Option<Self> {
238        match name {
239            "idx" => Some(StencilIntrinsic::Index),
240            "north" => Some(StencilIntrinsic::North),
241            "south" => Some(StencilIntrinsic::South),
242            "east" => Some(StencilIntrinsic::East),
243            "west" => Some(StencilIntrinsic::West),
244            "at" => Some(StencilIntrinsic::At),
245            "up" => Some(StencilIntrinsic::Up),
246            "down" => Some(StencilIntrinsic::Down),
247            _ => None,
248        }
249    }
250}
251
252/// Ring kernel intrinsics for persistent actor kernels.
253///
254/// These intrinsics provide access to control block state, queue operations,
255/// and HLC (Hybrid Logical Clock) functionality within ring kernel handlers.
256#[derive(Debug, Clone, Copy, PartialEq, Eq)]
257pub enum RingKernelIntrinsic {
258    // === Control Block Access ===
259    /// Check if kernel is active: `is_active()`
260    IsActive,
261    /// Check if termination requested: `should_terminate()`
262    ShouldTerminate,
263    /// Mark kernel as terminated: `mark_terminated()`
264    MarkTerminated,
265    /// Get messages processed count: `messages_processed()`
266    GetMessagesProcessed,
267
268    // === Queue Operations ===
269    /// Get input queue size: `input_queue_size()`
270    InputQueueSize,
271    /// Get output queue size: `output_queue_size()`
272    OutputQueueSize,
273    /// Check if input queue empty: `input_queue_empty()`
274    InputQueueEmpty,
275    /// Check if output queue empty: `output_queue_empty()`
276    OutputQueueEmpty,
277    /// Enqueue a response: `enqueue_response(&response)`
278    EnqueueResponse,
279
280    // === HLC Operations ===
281    /// Increment HLC logical counter: `hlc_tick()`
282    HlcTick,
283    /// Update HLC with received timestamp: `hlc_update(received_ts)`
284    HlcUpdate,
285    /// Get current HLC timestamp: `hlc_now()`
286    HlcNow,
287
288    // === K2K Operations ===
289    /// Send message to another kernel: `k2k_send(target_id, &msg)`
290    K2kSend,
291    /// Try to receive K2K message: `k2k_try_recv()`
292    K2kTryRecv,
293    /// Check for K2K messages: `k2k_has_message()`
294    K2kHasMessage,
295    /// Peek at next K2K message without consuming: `k2k_peek()`
296    K2kPeek,
297    /// Get number of pending K2K messages: `k2k_pending_count()`
298    K2kPendingCount,
299
300    // === Timing ===
301    /// Sleep for nanoseconds: `nanosleep(ns)`
302    Nanosleep,
303}
304
305impl RingKernelIntrinsic {
306    /// Get the CUDA code for this intrinsic.
307    pub fn to_cuda(&self, args: &[String]) -> String {
308        match self {
309            Self::IsActive => "atomicAdd(&control->is_active, 0) != 0".to_string(),
310            Self::ShouldTerminate => "atomicAdd(&control->should_terminate, 0) != 0".to_string(),
311            Self::MarkTerminated => "atomicExch(&control->has_terminated, 1)".to_string(),
312            Self::GetMessagesProcessed => "atomicAdd(&control->messages_processed, 0)".to_string(),
313
314            Self::InputQueueSize => {
315                "(atomicAdd(&control->input_head, 0) - atomicAdd(&control->input_tail, 0))"
316                    .to_string()
317            }
318            Self::OutputQueueSize => {
319                "(atomicAdd(&control->output_head, 0) - atomicAdd(&control->output_tail, 0))"
320                    .to_string()
321            }
322            Self::InputQueueEmpty => {
323                "(atomicAdd(&control->input_head, 0) == atomicAdd(&control->input_tail, 0))"
324                    .to_string()
325            }
326            Self::OutputQueueEmpty => {
327                "(atomicAdd(&control->output_head, 0) == atomicAdd(&control->output_tail, 0))"
328                    .to_string()
329            }
330            Self::EnqueueResponse => {
331                if !args.is_empty() {
332                    format!(
333                        "{{ unsigned long long _out_idx = atomicAdd(&control->output_head, 1) & control->output_mask; \
334                         memcpy(&output_buffer[_out_idx * RESP_SIZE], {}, RESP_SIZE); }}",
335                        args[0]
336                    )
337                } else {
338                    "/* enqueue_response requires response pointer */".to_string()
339                }
340            }
341
342            Self::HlcTick => "hlc_logical++".to_string(),
343            Self::HlcUpdate => {
344                if !args.is_empty() {
345                    format!(
346                        "{{ if ({} > hlc_physical) {{ hlc_physical = {}; hlc_logical = 0; }} else {{ hlc_logical++; }} }}",
347                        args[0], args[0]
348                    )
349                } else {
350                    "hlc_logical++".to_string()
351                }
352            }
353            Self::HlcNow => "(hlc_physical << 32) | (hlc_logical & 0xFFFFFFFF)".to_string(),
354
355            Self::K2kSend => {
356                if args.len() >= 2 {
357                    // k2k_send(target_id, msg_ptr) -> k2k_send(k2k_routes, target_id, msg_ptr, sizeof(*msg_ptr))
358                    format!(
359                        "k2k_send(k2k_routes, {}, {}, sizeof(*{}))",
360                        args[0], args[1], args[1]
361                    )
362                } else {
363                    "/* k2k_send requires target_id and msg_ptr */".to_string()
364                }
365            }
366            Self::K2kTryRecv => "k2k_try_recv(k2k_inbox)".to_string(),
367            Self::K2kHasMessage => "k2k_has_message(k2k_inbox)".to_string(),
368            Self::K2kPeek => "k2k_peek(k2k_inbox)".to_string(),
369            Self::K2kPendingCount => "k2k_pending_count(k2k_inbox)".to_string(),
370
371            Self::Nanosleep => {
372                if !args.is_empty() {
373                    format!("__nanosleep({})", args[0])
374                } else {
375                    "__nanosleep(1000)".to_string()
376                }
377            }
378        }
379    }
380
381    /// Parse a function name to get the intrinsic.
382    pub fn from_name(name: &str) -> Option<Self> {
383        match name {
384            "is_active" | "is_kernel_active" => Some(Self::IsActive),
385            "should_terminate" => Some(Self::ShouldTerminate),
386            "mark_terminated" => Some(Self::MarkTerminated),
387            "messages_processed" | "get_messages_processed" => Some(Self::GetMessagesProcessed),
388
389            "input_queue_size" => Some(Self::InputQueueSize),
390            "output_queue_size" => Some(Self::OutputQueueSize),
391            "input_queue_empty" => Some(Self::InputQueueEmpty),
392            "output_queue_empty" => Some(Self::OutputQueueEmpty),
393            "enqueue_response" | "enqueue" => Some(Self::EnqueueResponse),
394
395            "hlc_tick" => Some(Self::HlcTick),
396            "hlc_update" => Some(Self::HlcUpdate),
397            "hlc_now" => Some(Self::HlcNow),
398
399            "k2k_send" => Some(Self::K2kSend),
400            "k2k_try_recv" => Some(Self::K2kTryRecv),
401            "k2k_has_message" => Some(Self::K2kHasMessage),
402            "k2k_peek" => Some(Self::K2kPeek),
403            "k2k_pending_count" | "k2k_pending" => Some(Self::K2kPendingCount),
404
405            "nanosleep" => Some(Self::Nanosleep),
406
407            _ => None,
408        }
409    }
410
411    /// Check if this intrinsic requires the control block.
412    pub fn requires_control_block(&self) -> bool {
413        matches!(
414            self,
415            Self::IsActive
416                | Self::ShouldTerminate
417                | Self::MarkTerminated
418                | Self::GetMessagesProcessed
419                | Self::InputQueueSize
420                | Self::OutputQueueSize
421                | Self::InputQueueEmpty
422                | Self::OutputQueueEmpty
423                | Self::EnqueueResponse
424        )
425    }
426
427    /// Check if this intrinsic requires HLC state.
428    pub fn requires_hlc(&self) -> bool {
429        matches!(self, Self::HlcTick | Self::HlcUpdate | Self::HlcNow)
430    }
431
432    /// Check if this intrinsic requires K2K support.
433    pub fn requires_k2k(&self) -> bool {
434        matches!(
435            self,
436            Self::K2kSend
437                | Self::K2kTryRecv
438                | Self::K2kHasMessage
439                | Self::K2kPeek
440                | Self::K2kPendingCount
441        )
442    }
443}
444
445impl StencilIntrinsic {
446    /// Get the index offset for 2D stencil (relative to buffer_width).
447    ///
448    /// Returns (row_offset, col_offset) where final offset is:
449    /// `row_offset * buffer_width + col_offset`
450    pub fn get_offset_2d(&self) -> Option<(i32, i32)> {
451        match self {
452            StencilIntrinsic::Index => Some((0, 0)),
453            StencilIntrinsic::North => Some((-1, 0)),
454            StencilIntrinsic::South => Some((1, 0)),
455            StencilIntrinsic::East => Some((0, 1)),
456            StencilIntrinsic::West => Some((0, -1)),
457            StencilIntrinsic::At => None, // Requires runtime offset
458            StencilIntrinsic::Up | StencilIntrinsic::Down => None, // 3D only
459        }
460    }
461
462    /// Generate CUDA index expression for 2D stencil.
463    ///
464    /// # Arguments
465    /// * `buffer_name` - Name of the buffer variable
466    /// * `buffer_width` - Width expression (e.g., "18" for tile_size + 2*halo)
467    /// * `idx_var` - Name of the current index variable
468    pub fn to_cuda_index_2d(&self, buffer_name: &str, buffer_width: &str, idx_var: &str) -> String {
469        match self {
470            StencilIntrinsic::Index => format!("{}[{}]", buffer_name, idx_var),
471            StencilIntrinsic::North => {
472                format!("{}[{} - {}]", buffer_name, idx_var, buffer_width)
473            }
474            StencilIntrinsic::South => {
475                format!("{}[{} + {}]", buffer_name, idx_var, buffer_width)
476            }
477            StencilIntrinsic::East => format!("{}[{} + 1]", buffer_name, idx_var),
478            StencilIntrinsic::West => format!("{}[{} - 1]", buffer_name, idx_var),
479            StencilIntrinsic::At => {
480                // This should be handled specially with provided offsets
481                format!("{}[{}]", buffer_name, idx_var)
482            }
483            _ => format!("{}[{}]", buffer_name, idx_var),
484        }
485    }
486}
487
488#[cfg(test)]
489mod tests {
490    use super::*;
491
492    #[test]
493    fn test_intrinsic_lookup() {
494        let registry = IntrinsicRegistry::new();
495
496        assert_eq!(
497            registry.lookup("sync_threads"),
498            Some(&GpuIntrinsic::SyncThreads)
499        );
500        assert_eq!(registry.lookup("sqrt"), Some(&GpuIntrinsic::Sqrt));
501        assert_eq!(registry.lookup("unknown_func"), None);
502    }
503
504    #[test]
505    fn test_intrinsic_cuda_output() {
506        assert_eq!(
507            GpuIntrinsic::SyncThreads.to_cuda_string(),
508            "__syncthreads()"
509        );
510        assert_eq!(GpuIntrinsic::AtomicAdd.to_cuda_string(), "atomicAdd");
511        assert_eq!(GpuIntrinsic::Sqrt.to_cuda_string(), "sqrtf");
512    }
513
514    #[test]
515    fn test_stencil_intrinsic_parsing() {
516        assert_eq!(
517            StencilIntrinsic::from_method_name("north"),
518            Some(StencilIntrinsic::North)
519        );
520        assert_eq!(
521            StencilIntrinsic::from_method_name("idx"),
522            Some(StencilIntrinsic::Index)
523        );
524        assert_eq!(StencilIntrinsic::from_method_name("unknown"), None);
525    }
526
527    #[test]
528    fn test_stencil_cuda_index() {
529        let north = StencilIntrinsic::North;
530        assert_eq!(
531            north.to_cuda_index_2d("p", "buffer_width", "idx"),
532            "p[idx - buffer_width]"
533        );
534
535        let east = StencilIntrinsic::East;
536        assert_eq!(east.to_cuda_index_2d("p", "18", "idx"), "p[idx + 1]");
537    }
538
539    #[test]
540    fn test_stencil_offset() {
541        assert_eq!(StencilIntrinsic::North.get_offset_2d(), Some((-1, 0)));
542        assert_eq!(StencilIntrinsic::East.get_offset_2d(), Some((0, 1)));
543        assert_eq!(StencilIntrinsic::Index.get_offset_2d(), Some((0, 0)));
544    }
545
546    #[test]
547    fn test_ring_kernel_intrinsic_lookup() {
548        assert_eq!(
549            RingKernelIntrinsic::from_name("is_active"),
550            Some(RingKernelIntrinsic::IsActive)
551        );
552        assert_eq!(
553            RingKernelIntrinsic::from_name("should_terminate"),
554            Some(RingKernelIntrinsic::ShouldTerminate)
555        );
556        assert_eq!(
557            RingKernelIntrinsic::from_name("hlc_tick"),
558            Some(RingKernelIntrinsic::HlcTick)
559        );
560        assert_eq!(
561            RingKernelIntrinsic::from_name("enqueue_response"),
562            Some(RingKernelIntrinsic::EnqueueResponse)
563        );
564        assert_eq!(RingKernelIntrinsic::from_name("unknown"), None);
565    }
566
567    #[test]
568    fn test_ring_kernel_intrinsic_cuda_output() {
569        assert!(RingKernelIntrinsic::IsActive
570            .to_cuda(&[])
571            .contains("is_active"));
572        assert!(RingKernelIntrinsic::ShouldTerminate
573            .to_cuda(&[])
574            .contains("should_terminate"));
575        assert!(RingKernelIntrinsic::HlcTick
576            .to_cuda(&[])
577            .contains("hlc_logical"));
578        assert!(RingKernelIntrinsic::InputQueueEmpty
579            .to_cuda(&[])
580            .contains("input_head"));
581    }
582
583    #[test]
584    fn test_ring_kernel_queue_intrinsics() {
585        let enqueue = RingKernelIntrinsic::EnqueueResponse;
586        let cuda = enqueue.to_cuda(&["&response".to_string()]);
587        assert!(cuda.contains("output_head"));
588        assert!(cuda.contains("memcpy"));
589    }
590
591    #[test]
592    fn test_k2k_intrinsics() {
593        // Test k2k_send
594        let send = RingKernelIntrinsic::K2kSend;
595        let cuda = send.to_cuda(&["target_id".to_string(), "&msg".to_string()]);
596        assert!(cuda.contains("k2k_send"));
597        assert!(cuda.contains("k2k_routes"));
598        assert!(cuda.contains("target_id"));
599
600        // Test k2k_try_recv
601        assert_eq!(
602            RingKernelIntrinsic::K2kTryRecv.to_cuda(&[]),
603            "k2k_try_recv(k2k_inbox)"
604        );
605
606        // Test k2k_has_message
607        assert_eq!(
608            RingKernelIntrinsic::K2kHasMessage.to_cuda(&[]),
609            "k2k_has_message(k2k_inbox)"
610        );
611
612        // Test k2k_peek
613        assert_eq!(
614            RingKernelIntrinsic::K2kPeek.to_cuda(&[]),
615            "k2k_peek(k2k_inbox)"
616        );
617
618        // Test k2k_pending_count
619        assert_eq!(
620            RingKernelIntrinsic::K2kPendingCount.to_cuda(&[]),
621            "k2k_pending_count(k2k_inbox)"
622        );
623    }
624
625    #[test]
626    fn test_k2k_intrinsic_lookup() {
627        assert_eq!(
628            RingKernelIntrinsic::from_name("k2k_send"),
629            Some(RingKernelIntrinsic::K2kSend)
630        );
631        assert_eq!(
632            RingKernelIntrinsic::from_name("k2k_try_recv"),
633            Some(RingKernelIntrinsic::K2kTryRecv)
634        );
635        assert_eq!(
636            RingKernelIntrinsic::from_name("k2k_has_message"),
637            Some(RingKernelIntrinsic::K2kHasMessage)
638        );
639        assert_eq!(
640            RingKernelIntrinsic::from_name("k2k_peek"),
641            Some(RingKernelIntrinsic::K2kPeek)
642        );
643        assert_eq!(
644            RingKernelIntrinsic::from_name("k2k_pending_count"),
645            Some(RingKernelIntrinsic::K2kPendingCount)
646        );
647    }
648
649    #[test]
650    fn test_intrinsic_requirements() {
651        // K2K intrinsics require K2K
652        assert!(RingKernelIntrinsic::K2kSend.requires_k2k());
653        assert!(RingKernelIntrinsic::K2kTryRecv.requires_k2k());
654        assert!(RingKernelIntrinsic::K2kPeek.requires_k2k());
655        assert!(!RingKernelIntrinsic::HlcTick.requires_k2k());
656
657        // HLC intrinsics require HLC
658        assert!(RingKernelIntrinsic::HlcTick.requires_hlc());
659        assert!(RingKernelIntrinsic::HlcNow.requires_hlc());
660        assert!(!RingKernelIntrinsic::K2kSend.requires_hlc());
661
662        // Control block intrinsics require control block
663        assert!(RingKernelIntrinsic::IsActive.requires_control_block());
664        assert!(RingKernelIntrinsic::EnqueueResponse.requires_control_block());
665        assert!(!RingKernelIntrinsic::HlcTick.requires_control_block());
666    }
667}