1#![warn(missing_docs)]
34
35#[cfg(feature = "cooperative")]
36pub mod cooperative;
37#[cfg(feature = "cuda")]
38mod device;
39#[cfg(feature = "cuda")]
40pub mod driver_api;
41#[cfg(feature = "cuda")]
42pub mod k2k_gpu;
43#[cfg(feature = "cuda")]
44mod kernel;
45#[cfg(feature = "cuda")]
46mod memory;
47#[cfg(feature = "cuda")]
48pub mod persistent;
49#[cfg(feature = "cuda")]
50mod runtime;
51#[cfg(feature = "cuda")]
52mod stencil;
53
54#[cfg(feature = "cuda")]
55pub use device::CudaDevice;
56#[cfg(feature = "cuda")]
57pub use kernel::CudaKernel;
58#[cfg(feature = "cuda")]
59pub use memory::{CudaBuffer, CudaControlBlock, CudaMemoryPool, CudaMessageQueue};
60#[cfg(feature = "cuda")]
61pub use runtime::CudaRuntime;
62#[cfg(feature = "cuda")]
63pub use stencil::{CompiledStencilKernel, LaunchConfig, StencilKernelLoader};
64
65#[cfg(feature = "cuda")]
67pub mod memory_exports {
68 pub use super::memory::{CudaBuffer, CudaControlBlock, CudaMemoryPool, CudaMessageQueue};
69}
70
71#[cfg(not(feature = "cuda"))]
73mod stub {
74 use async_trait::async_trait;
75 use ringkernel_core::error::{Result, RingKernelError};
76 use ringkernel_core::runtime::{
77 Backend, KernelHandle, KernelId, LaunchOptions, RingKernelRuntime, RuntimeMetrics,
78 };
79
80 pub struct CudaRuntime;
82
83 impl CudaRuntime {
84 pub async fn new() -> Result<Self> {
86 Err(RingKernelError::BackendUnavailable(
87 "CUDA feature not enabled".to_string(),
88 ))
89 }
90 }
91
92 #[async_trait]
93 impl RingKernelRuntime for CudaRuntime {
94 fn backend(&self) -> Backend {
95 Backend::Cuda
96 }
97
98 fn is_backend_available(&self, _backend: Backend) -> bool {
99 false
100 }
101
102 async fn launch(&self, _kernel_id: &str, _options: LaunchOptions) -> Result<KernelHandle> {
103 Err(RingKernelError::BackendUnavailable("CUDA".to_string()))
104 }
105
106 fn get_kernel(&self, _kernel_id: &KernelId) -> Option<KernelHandle> {
107 None
108 }
109
110 fn list_kernels(&self) -> Vec<KernelId> {
111 vec![]
112 }
113
114 fn metrics(&self) -> RuntimeMetrics {
115 RuntimeMetrics::default()
116 }
117
118 async fn shutdown(&self) -> Result<()> {
119 Ok(())
120 }
121 }
122}
123
124#[cfg(not(feature = "cuda"))]
125pub use stub::CudaRuntime;
126
127pub fn is_cuda_available() -> bool {
136 #[cfg(feature = "cuda")]
137 {
138 std::panic::catch_unwind(|| {
140 cudarc::driver::CudaContext::device_count()
141 .map(|c| c > 0)
142 .unwrap_or(false)
143 })
144 .unwrap_or(false)
145 }
146 #[cfg(not(feature = "cuda"))]
147 {
148 false
149 }
150}
151
152pub fn cuda_device_count() -> usize {
156 #[cfg(feature = "cuda")]
157 {
158 std::panic::catch_unwind(|| {
160 cudarc::driver::CudaContext::device_count().unwrap_or(0) as usize
161 })
162 .unwrap_or(0)
163 }
164 #[cfg(not(feature = "cuda"))]
165 {
166 0
167 }
168}
169
170pub const RING_KERNEL_PTX_TEMPLATE: &str = r#"
175.version 8.0
176.target sm_89
177.address_size 64
178
179.visible .entry ring_kernel_main(
180 .param .u64 control_block_ptr,
181 .param .u64 input_queue_ptr,
182 .param .u64 output_queue_ptr,
183 .param .u64 shared_state_ptr
184) {
185 .reg .u64 %cb_ptr;
186 .reg .u32 %one;
187
188 // Load control block pointer
189 ld.param.u64 %cb_ptr, [control_block_ptr];
190
191 // Mark as terminated immediately (offset 8)
192 mov.u32 %one, 1;
193 st.global.u32 [%cb_ptr + 8], %one;
194
195 ret;
196}
197"#;