ringkernel_core/
runtime.rs

1//! Runtime traits and types for kernel management.
2//!
3//! This module defines the core runtime abstraction that backends implement
4//! to provide kernel lifecycle management, message passing, and monitoring.
5//!
6//! # Overview
7//!
8//! The runtime module provides the central abstractions for managing GPU kernels:
9//!
10//! - [`RingKernelRuntime`] - The main trait implemented by backends (CPU, CUDA, Metal, WebGPU)
11//! - [`KernelHandle`] - A handle for interacting with launched kernels
12//! - [`LaunchOptions`] - Configuration options for kernel launches
13//! - [`KernelState`] - Lifecycle states (Created → Launched → Active → Terminated)
14//!
15//! # Kernel Lifecycle
16//!
17//! ```text
18//! ┌─────────┐     ┌──────────┐     ┌────────┐     ┌────────────┐
19//! │ Created │ ──► │ Launched │ ──► │ Active │ ──► │ Terminated │
20//! └─────────┘     └──────────┘     └────────┘     └────────────┘
21//!                       │              ▲  │
22//!                       │              │  ▼
23//!                       │        ┌─────────────┐
24//!                       └──────► │ Deactivated │
25//!                                └─────────────┘
26//! ```
27//!
28//! # Example
29//!
30//! ```ignore
31//! use ringkernel_core::runtime::{RingKernelRuntime, LaunchOptions, KernelState};
32//! use ringkernel_cpu::CpuRuntime;
33//!
34//! #[tokio::main]
35//! async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
36//!     // Create a runtime
37//!     let runtime = CpuRuntime::new().await?;
38//!
39//!     // Launch a kernel with custom options
40//!     let options = LaunchOptions::single_block(256)
41//!         .with_queue_capacity(2048)
42//!         .with_k2k(true);  // Enable kernel-to-kernel messaging
43//!
44//!     let kernel = runtime.launch("my_processor", options).await?;
45//!
46//!     // Kernel auto-activates by default
47//!     assert!(kernel.is_active());
48//!
49//!     // Send messages to the kernel
50//!     kernel.send(MyMessage { value: 42 }).await?;
51//!
52//!     // Receive responses
53//!     let response = kernel.receive_timeout(Duration::from_secs(1)).await?;
54//!
55//!     // Terminate when done
56//!     kernel.terminate().await?;
57//!
58//!     Ok(())
59//! }
60//! ```
61//!
62//! # Backend Selection
63//!
64//! Use [`Backend::Auto`] to automatically select the best available backend,
65//! or specify a specific backend for testing/deployment:
66//!
67//! ```ignore
68//! use ringkernel_core::runtime::{RuntimeBuilder, Backend};
69//!
70//! // Auto-select: CUDA → Metal → WebGPU → CPU
71//! let builder = RuntimeBuilder::new().backend(Backend::Auto);
72//!
73//! // Force CPU for testing
74//! let builder = RuntimeBuilder::new().backend(Backend::Cpu);
75//!
76//! // Use CUDA with specific device
77//! let builder = RuntimeBuilder::new()
78//!     .backend(Backend::Cuda)
79//!     .device(1)  // Second GPU
80//!     .profiling(true);
81//! ```
82
83use std::future::Future;
84use std::pin::Pin;
85use std::sync::Arc;
86use std::time::Duration;
87
88use async_trait::async_trait;
89
90use crate::error::Result;
91use crate::message::{MessageEnvelope, RingMessage};
92use crate::telemetry::KernelMetrics;
93use crate::types::KernelMode;
94
95/// Unique kernel identifier.
96#[derive(Debug, Clone, PartialEq, Eq, Hash)]
97pub struct KernelId(pub String);
98
99impl KernelId {
100    /// Create a new kernel ID.
101    pub fn new(id: impl Into<String>) -> Self {
102        Self(id.into())
103    }
104
105    /// Get the ID as a string slice.
106    pub fn as_str(&self) -> &str {
107        &self.0
108    }
109}
110
111impl std::fmt::Display for KernelId {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        write!(f, "{}", self.0)
114    }
115}
116
117impl From<&str> for KernelId {
118    fn from(s: &str) -> Self {
119        Self(s.to_string())
120    }
121}
122
123impl From<String> for KernelId {
124    fn from(s: String) -> Self {
125        Self(s)
126    }
127}
128
129/// Kernel lifecycle state.
130#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
131pub enum KernelState {
132    /// Kernel is created but not launched.
133    Created,
134    /// Kernel is launched and initializing.
135    Launched,
136    /// Kernel is active and processing messages.
137    Active,
138    /// Kernel is deactivated (paused).
139    Deactivated,
140    /// Kernel is terminating.
141    Terminating,
142    /// Kernel has terminated.
143    Terminated,
144}
145
146impl KernelState {
147    /// Check if kernel can be activated.
148    pub fn can_activate(&self) -> bool {
149        matches!(self, Self::Launched | Self::Deactivated)
150    }
151
152    /// Check if kernel can be deactivated.
153    pub fn can_deactivate(&self) -> bool {
154        matches!(self, Self::Active)
155    }
156
157    /// Check if kernel can be terminated.
158    pub fn can_terminate(&self) -> bool {
159        matches!(self, Self::Active | Self::Deactivated | Self::Launched)
160    }
161
162    /// Check if kernel is running (can process messages).
163    pub fn is_running(&self) -> bool {
164        matches!(self, Self::Active)
165    }
166
167    /// Check if kernel is finished.
168    pub fn is_finished(&self) -> bool {
169        matches!(self, Self::Terminated)
170    }
171}
172
173/// Kernel status including state and metrics.
174#[derive(Debug, Clone)]
175pub struct KernelStatus {
176    /// Kernel identifier.
177    pub id: KernelId,
178    /// Current state.
179    pub state: KernelState,
180    /// Execution mode.
181    pub mode: KernelMode,
182    /// Messages in input queue.
183    pub input_queue_depth: usize,
184    /// Messages in output queue.
185    pub output_queue_depth: usize,
186    /// Total messages processed.
187    pub messages_processed: u64,
188    /// Uptime since launch.
189    pub uptime: Duration,
190}
191
192/// GPU backend type.
193#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
194pub enum Backend {
195    /// CPU backend (for testing).
196    Cpu,
197    /// NVIDIA CUDA backend.
198    Cuda,
199    /// Apple Metal backend.
200    Metal,
201    /// WebGPU cross-platform backend.
202    Wgpu,
203    /// Automatically select best available backend.
204    #[default]
205    Auto,
206}
207
208impl Backend {
209    /// Get display name.
210    pub fn name(&self) -> &'static str {
211        match self {
212            Backend::Cpu => "CPU",
213            Backend::Cuda => "CUDA",
214            Backend::Metal => "Metal",
215            Backend::Wgpu => "WebGPU",
216            Backend::Auto => "Auto",
217        }
218    }
219}
220
221impl std::fmt::Display for Backend {
222    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
223        write!(f, "{}", self.name())
224    }
225}
226
227/// Options for launching a kernel.
228#[derive(Debug, Clone)]
229pub struct LaunchOptions {
230    /// Execution mode (persistent or event-driven).
231    pub mode: KernelMode,
232    /// Grid size (number of blocks).
233    pub grid_size: u32,
234    /// Block size (threads per block).
235    pub block_size: u32,
236    /// Input queue capacity.
237    pub input_queue_capacity: usize,
238    /// Output queue capacity.
239    pub output_queue_capacity: usize,
240    /// Shared memory size in bytes.
241    pub shared_memory_size: usize,
242    /// Whether to activate immediately after launch.
243    pub auto_activate: bool,
244    /// Enable cooperative groups for grid-wide synchronization.
245    /// Requires GPU support for cooperative kernel launch.
246    pub cooperative: bool,
247    /// Enable K2K (kernel-to-kernel) messaging.
248    /// Allocates routing table and inbox buffers on GPU.
249    pub enable_k2k: bool,
250}
251
252impl Default for LaunchOptions {
253    fn default() -> Self {
254        Self {
255            mode: KernelMode::Persistent,
256            grid_size: 1,
257            block_size: 256,
258            input_queue_capacity: 1024,
259            output_queue_capacity: 1024,
260            shared_memory_size: 0,
261            auto_activate: true,
262            cooperative: false,
263            enable_k2k: false,
264        }
265    }
266}
267
268impl LaunchOptions {
269    /// Create options for a single-block kernel.
270    pub fn single_block(block_size: u32) -> Self {
271        Self {
272            block_size,
273            ..Default::default()
274        }
275    }
276
277    /// Create options for a multi-block kernel.
278    pub fn multi_block(grid_size: u32, block_size: u32) -> Self {
279        Self {
280            grid_size,
281            block_size,
282            ..Default::default()
283        }
284    }
285
286    /// Set execution mode.
287    pub fn with_mode(mut self, mode: KernelMode) -> Self {
288        self.mode = mode;
289        self
290    }
291
292    /// Set queue capacities.
293    pub fn with_queue_capacity(mut self, capacity: usize) -> Self {
294        self.input_queue_capacity = capacity;
295        self.output_queue_capacity = capacity;
296        self
297    }
298
299    /// Set shared memory size.
300    pub fn with_shared_memory(mut self, size: usize) -> Self {
301        self.shared_memory_size = size;
302        self
303    }
304
305    /// Disable auto-activation.
306    pub fn without_auto_activate(mut self) -> Self {
307        self.auto_activate = false;
308        self
309    }
310
311    /// Set the grid size (number of blocks).
312    pub fn with_grid_size(mut self, grid_size: u32) -> Self {
313        self.grid_size = grid_size;
314        self
315    }
316
317    /// Set the block size (threads per block).
318    pub fn with_block_size(mut self, block_size: u32) -> Self {
319        self.block_size = block_size;
320        self
321    }
322
323    /// Enable cooperative groups for grid-wide synchronization.
324    ///
325    /// When enabled, the kernel will be launched cooperatively, allowing
326    /// all blocks to synchronize via `grid.sync()`. Requires GPU support
327    /// and nvcc at build time.
328    pub fn with_cooperative(mut self, cooperative: bool) -> Self {
329        self.cooperative = cooperative;
330        self
331    }
332
333    /// Enable K2K (kernel-to-kernel) messaging.
334    ///
335    /// When enabled, allocates routing table and inbox buffers on GPU
336    /// for direct kernel-to-kernel communication without host intervention.
337    pub fn with_k2k(mut self, enable: bool) -> Self {
338        self.enable_k2k = enable;
339        self
340    }
341
342    /// Set priority hint for kernel scheduling.
343    ///
344    /// Note: This is a hint for future use - currently ignored by backends.
345    pub fn with_priority(self, _priority: u8) -> Self {
346        // Priority hint stored for future scheduling use
347        self
348    }
349
350    /// Set input queue capacity only.
351    pub fn with_input_queue_capacity(mut self, capacity: usize) -> Self {
352        self.input_queue_capacity = capacity;
353        self
354    }
355
356    /// Set output queue capacity only.
357    pub fn with_output_queue_capacity(mut self, capacity: usize) -> Self {
358        self.output_queue_capacity = capacity;
359        self
360    }
361}
362
363/// Type-erased future for async operations.
364pub type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
365
366/// Backend-agnostic runtime trait for kernel management.
367///
368/// This trait is implemented by each backend (CPU, CUDA, Metal, WebGPU)
369/// to provide kernel lifecycle management and message passing.
370#[async_trait]
371pub trait RingKernelRuntime: Send + Sync {
372    /// Get the backend type.
373    fn backend(&self) -> Backend;
374
375    /// Check if a specific backend is available.
376    fn is_backend_available(&self, backend: Backend) -> bool;
377
378    /// Launch a kernel.
379    async fn launch(&self, kernel_id: &str, options: LaunchOptions) -> Result<KernelHandle>;
380
381    /// Get a handle to an existing kernel.
382    fn get_kernel(&self, kernel_id: &KernelId) -> Option<KernelHandle>;
383
384    /// List all kernel IDs.
385    fn list_kernels(&self) -> Vec<KernelId>;
386
387    /// Get runtime metrics.
388    fn metrics(&self) -> RuntimeMetrics;
389
390    /// Shutdown the runtime and terminate all kernels.
391    async fn shutdown(&self) -> Result<()>;
392}
393
394/// Handle to a launched kernel.
395///
396/// Provides an ergonomic API for interacting with a kernel.
397#[derive(Clone)]
398pub struct KernelHandle {
399    /// Kernel identifier.
400    id: KernelId,
401    /// Inner implementation.
402    inner: Arc<dyn KernelHandleInner>,
403}
404
405impl KernelHandle {
406    /// Create a new kernel handle.
407    pub fn new(id: KernelId, inner: Arc<dyn KernelHandleInner>) -> Self {
408        Self { id, inner }
409    }
410
411    /// Get the kernel ID.
412    pub fn id(&self) -> &KernelId {
413        &self.id
414    }
415
416    /// Activate the kernel.
417    pub async fn activate(&self) -> Result<()> {
418        self.inner.activate().await
419    }
420
421    /// Deactivate the kernel.
422    pub async fn deactivate(&self) -> Result<()> {
423        self.inner.deactivate().await
424    }
425
426    /// Terminate the kernel.
427    pub async fn terminate(&self) -> Result<()> {
428        self.inner.terminate().await
429    }
430
431    /// Send a message to the kernel.
432    pub async fn send<M: RingMessage>(&self, message: M) -> Result<()> {
433        let envelope = MessageEnvelope::new(
434            &message,
435            0, // Host source
436            self.inner.kernel_id_num(),
437            self.inner.current_timestamp(),
438        );
439        self.inner.send_envelope(envelope).await
440    }
441
442    /// Send a raw envelope.
443    pub async fn send_envelope(&self, envelope: MessageEnvelope) -> Result<()> {
444        self.inner.send_envelope(envelope).await
445    }
446
447    /// Receive a message from the kernel.
448    pub async fn receive(&self) -> Result<MessageEnvelope> {
449        self.inner.receive().await
450    }
451
452    /// Receive a message with timeout.
453    pub async fn receive_timeout(&self, timeout: Duration) -> Result<MessageEnvelope> {
454        self.inner.receive_timeout(timeout).await
455    }
456
457    /// Try to receive a message (non-blocking).
458    pub fn try_receive(&self) -> Result<MessageEnvelope> {
459        self.inner.try_receive()
460    }
461
462    /// Send request and wait for response (call pattern).
463    pub async fn call<M: RingMessage>(
464        &self,
465        message: M,
466        timeout: Duration,
467    ) -> Result<MessageEnvelope> {
468        // Generate correlation ID
469        let correlation = crate::message::CorrelationId::generate();
470
471        // Create envelope with correlation
472        let mut envelope = MessageEnvelope::new(
473            &message,
474            0,
475            self.inner.kernel_id_num(),
476            self.inner.current_timestamp(),
477        );
478        envelope.header.correlation_id = correlation;
479
480        // Send and wait for correlated response
481        self.inner.send_envelope(envelope).await?;
482        self.inner.receive_correlated(correlation, timeout).await
483    }
484
485    /// Get kernel status.
486    pub fn status(&self) -> KernelStatus {
487        self.inner.status()
488    }
489
490    /// Get kernel metrics.
491    pub fn metrics(&self) -> KernelMetrics {
492        self.inner.metrics()
493    }
494
495    /// Wait for kernel to terminate.
496    pub async fn wait(&self) -> Result<()> {
497        self.inner.wait().await
498    }
499
500    /// Get the current kernel state.
501    ///
502    /// This is a convenience method that returns just the state from status().
503    pub fn state(&self) -> KernelState {
504        self.status().state
505    }
506
507    /// Suspend (deactivate) the kernel.
508    ///
509    /// This is an alias for `deactivate()` for more intuitive API usage.
510    pub async fn suspend(&self) -> Result<()> {
511        self.deactivate().await
512    }
513
514    /// Resume (activate) the kernel.
515    ///
516    /// This is an alias for `activate()` for more intuitive API usage.
517    pub async fn resume(&self) -> Result<()> {
518        self.activate().await
519    }
520
521    /// Check if the kernel is currently active.
522    pub fn is_active(&self) -> bool {
523        self.state() == KernelState::Active
524    }
525
526    /// Check if the kernel has terminated.
527    pub fn is_terminated(&self) -> bool {
528        self.state() == KernelState::Terminated
529    }
530}
531
532impl std::fmt::Debug for KernelHandle {
533    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
534        f.debug_struct("KernelHandle")
535            .field("id", &self.id)
536            .finish()
537    }
538}
539
540/// Inner trait for kernel handle implementation.
541///
542/// This is implemented by each backend to provide the actual functionality.
543#[async_trait]
544pub trait KernelHandleInner: Send + Sync {
545    /// Get numeric kernel ID.
546    fn kernel_id_num(&self) -> u64;
547
548    /// Get current timestamp.
549    fn current_timestamp(&self) -> crate::hlc::HlcTimestamp;
550
551    /// Activate kernel.
552    async fn activate(&self) -> Result<()>;
553
554    /// Deactivate kernel.
555    async fn deactivate(&self) -> Result<()>;
556
557    /// Terminate kernel.
558    async fn terminate(&self) -> Result<()>;
559
560    /// Send message envelope.
561    async fn send_envelope(&self, envelope: MessageEnvelope) -> Result<()>;
562
563    /// Receive message.
564    async fn receive(&self) -> Result<MessageEnvelope>;
565
566    /// Receive with timeout.
567    async fn receive_timeout(&self, timeout: Duration) -> Result<MessageEnvelope>;
568
569    /// Try receive (non-blocking).
570    fn try_receive(&self) -> Result<MessageEnvelope>;
571
572    /// Receive correlated response.
573    async fn receive_correlated(
574        &self,
575        correlation: crate::message::CorrelationId,
576        timeout: Duration,
577    ) -> Result<MessageEnvelope>;
578
579    /// Get status.
580    fn status(&self) -> KernelStatus;
581
582    /// Get metrics.
583    fn metrics(&self) -> KernelMetrics;
584
585    /// Wait for termination.
586    async fn wait(&self) -> Result<()>;
587}
588
589/// Runtime-level metrics.
590#[derive(Debug, Clone, Default)]
591pub struct RuntimeMetrics {
592    /// Number of active kernels.
593    pub active_kernels: usize,
594    /// Total kernels launched.
595    pub total_launched: u64,
596    /// Total messages sent.
597    pub messages_sent: u64,
598    /// Total messages received.
599    pub messages_received: u64,
600    /// GPU memory used (bytes).
601    pub gpu_memory_used: u64,
602    /// Host memory used (bytes).
603    pub host_memory_used: u64,
604}
605
606/// Builder for creating a runtime instance.
607#[derive(Debug, Clone)]
608pub struct RuntimeBuilder {
609    /// Selected backend.
610    pub backend: Backend,
611    /// Device index (for multi-GPU).
612    pub device_index: usize,
613    /// Enable debug mode.
614    pub debug: bool,
615    /// Enable profiling.
616    pub profiling: bool,
617}
618
619impl Default for RuntimeBuilder {
620    fn default() -> Self {
621        Self {
622            backend: Backend::Auto,
623            device_index: 0,
624            debug: false,
625            profiling: false,
626        }
627    }
628}
629
630impl RuntimeBuilder {
631    /// Create a new builder.
632    pub fn new() -> Self {
633        Self::default()
634    }
635
636    /// Set the backend.
637    pub fn backend(mut self, backend: Backend) -> Self {
638        self.backend = backend;
639        self
640    }
641
642    /// Set device index.
643    pub fn device(mut self, index: usize) -> Self {
644        self.device_index = index;
645        self
646    }
647
648    /// Enable debug mode.
649    pub fn debug(mut self, enable: bool) -> Self {
650        self.debug = enable;
651        self
652    }
653
654    /// Enable profiling.
655    pub fn profiling(mut self, enable: bool) -> Self {
656        self.profiling = enable;
657        self
658    }
659}
660
661#[cfg(test)]
662mod tests {
663    use super::*;
664
665    #[test]
666    fn test_kernel_state_transitions() {
667        assert!(KernelState::Launched.can_activate());
668        assert!(KernelState::Deactivated.can_activate());
669        assert!(!KernelState::Active.can_activate());
670        assert!(!KernelState::Terminated.can_activate());
671
672        assert!(KernelState::Active.can_deactivate());
673        assert!(!KernelState::Launched.can_deactivate());
674
675        assert!(KernelState::Active.can_terminate());
676        assert!(KernelState::Deactivated.can_terminate());
677        assert!(!KernelState::Terminated.can_terminate());
678    }
679
680    #[test]
681    fn test_launch_options_builder() {
682        let opts = LaunchOptions::multi_block(4, 128)
683            .with_mode(KernelMode::EventDriven)
684            .with_queue_capacity(2048)
685            .with_shared_memory(4096)
686            .without_auto_activate();
687
688        assert_eq!(opts.grid_size, 4);
689        assert_eq!(opts.block_size, 128);
690        assert_eq!(opts.mode, KernelMode::EventDriven);
691        assert_eq!(opts.input_queue_capacity, 2048);
692        assert_eq!(opts.shared_memory_size, 4096);
693        assert!(!opts.auto_activate);
694    }
695
696    #[test]
697    fn test_kernel_id() {
698        let id1 = KernelId::new("test_kernel");
699        let id2: KernelId = "test_kernel".into();
700        assert_eq!(id1, id2);
701        assert_eq!(id1.as_str(), "test_kernel");
702    }
703
704    #[test]
705    fn test_backend_name() {
706        assert_eq!(Backend::Cpu.name(), "CPU");
707        assert_eq!(Backend::Cuda.name(), "CUDA");
708        assert_eq!(Backend::Metal.name(), "Metal");
709        assert_eq!(Backend::Wgpu.name(), "WebGPU");
710    }
711}