1#![warn(missing_docs)]
33
34#[cfg(feature = "cuda")]
35mod device;
36#[cfg(feature = "cuda")]
37mod kernel;
38#[cfg(feature = "cuda")]
39mod memory;
40#[cfg(feature = "cuda")]
41mod runtime;
42#[cfg(feature = "cuda")]
43mod stencil;
44
45#[cfg(feature = "cuda")]
46pub use device::CudaDevice;
47#[cfg(feature = "cuda")]
48pub use kernel::CudaKernel;
49#[cfg(feature = "cuda")]
50pub use memory::{CudaBuffer, CudaControlBlock, CudaMemoryPool, CudaMessageQueue};
51#[cfg(feature = "cuda")]
52pub use runtime::CudaRuntime;
53#[cfg(feature = "cuda")]
54pub use stencil::{CompiledStencilKernel, LaunchConfig, StencilKernelLoader};
55
56#[cfg(feature = "cuda")]
58pub mod memory_exports {
59 pub use super::memory::{CudaBuffer, CudaControlBlock, CudaMemoryPool, CudaMessageQueue};
60}
61
62#[cfg(not(feature = "cuda"))]
64mod stub {
65 use async_trait::async_trait;
66 use ringkernel_core::error::{Result, RingKernelError};
67 use ringkernel_core::runtime::{
68 Backend, KernelHandle, KernelId, LaunchOptions, RingKernelRuntime, RuntimeMetrics,
69 };
70
71 pub struct CudaRuntime;
73
74 impl CudaRuntime {
75 pub async fn new() -> Result<Self> {
77 Err(RingKernelError::BackendUnavailable(
78 "CUDA feature not enabled".to_string(),
79 ))
80 }
81 }
82
83 #[async_trait]
84 impl RingKernelRuntime for CudaRuntime {
85 fn backend(&self) -> Backend {
86 Backend::Cuda
87 }
88
89 fn is_backend_available(&self, _backend: Backend) -> bool {
90 false
91 }
92
93 async fn launch(&self, _kernel_id: &str, _options: LaunchOptions) -> Result<KernelHandle> {
94 Err(RingKernelError::BackendUnavailable("CUDA".to_string()))
95 }
96
97 fn get_kernel(&self, _kernel_id: &KernelId) -> Option<KernelHandle> {
98 None
99 }
100
101 fn list_kernels(&self) -> Vec<KernelId> {
102 vec![]
103 }
104
105 fn metrics(&self) -> RuntimeMetrics {
106 RuntimeMetrics::default()
107 }
108
109 async fn shutdown(&self) -> Result<()> {
110 Ok(())
111 }
112 }
113}
114
115#[cfg(not(feature = "cuda"))]
116pub use stub::CudaRuntime;
117
118pub fn is_cuda_available() -> bool {
127 #[cfg(feature = "cuda")]
128 {
129 std::panic::catch_unwind(|| {
131 cudarc::driver::CudaDevice::count()
132 .map(|c| c > 0)
133 .unwrap_or(false)
134 })
135 .unwrap_or(false)
136 }
137 #[cfg(not(feature = "cuda"))]
138 {
139 false
140 }
141}
142
143pub fn cuda_device_count() -> usize {
147 #[cfg(feature = "cuda")]
148 {
149 std::panic::catch_unwind(|| cudarc::driver::CudaDevice::count().unwrap_or(0) as usize)
151 .unwrap_or(0)
152 }
153 #[cfg(not(feature = "cuda"))]
154 {
155 0
156 }
157}
158
159pub const RING_KERNEL_PTX_TEMPLATE: &str = r#"
164.version 8.0
165.target sm_89
166.address_size 64
167
168.visible .entry ring_kernel_main(
169 .param .u64 control_block_ptr,
170 .param .u64 input_queue_ptr,
171 .param .u64 output_queue_ptr,
172 .param .u64 shared_state_ptr
173) {
174 .reg .u64 %cb_ptr;
175 .reg .u32 %one;
176
177 // Load control block pointer
178 ld.param.u64 %cb_ptr, [control_block_ptr];
179
180 // Mark as terminated immediately (offset 8)
181 mov.u32 %one, 1;
182 st.global.u32 [%cb_ptr + 8], %one;
183
184 ret;
185}
186"#;