ringkernel_core/
runtime.rs

1//! Runtime traits and types for kernel management.
2//!
3//! This module defines the core runtime abstraction that backends implement
4//! to provide kernel lifecycle management, message passing, and monitoring.
5
6use std::future::Future;
7use std::pin::Pin;
8use std::sync::Arc;
9use std::time::Duration;
10
11use async_trait::async_trait;
12
13use crate::error::Result;
14use crate::message::{MessageEnvelope, RingMessage};
15use crate::telemetry::KernelMetrics;
16use crate::types::KernelMode;
17
18/// Unique kernel identifier.
19#[derive(Debug, Clone, PartialEq, Eq, Hash)]
20pub struct KernelId(pub String);
21
22impl KernelId {
23    /// Create a new kernel ID.
24    pub fn new(id: impl Into<String>) -> Self {
25        Self(id.into())
26    }
27
28    /// Get the ID as a string slice.
29    pub fn as_str(&self) -> &str {
30        &self.0
31    }
32}
33
34impl std::fmt::Display for KernelId {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        write!(f, "{}", self.0)
37    }
38}
39
40impl From<&str> for KernelId {
41    fn from(s: &str) -> Self {
42        Self(s.to_string())
43    }
44}
45
46impl From<String> for KernelId {
47    fn from(s: String) -> Self {
48        Self(s)
49    }
50}
51
52/// Kernel lifecycle state.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
54pub enum KernelState {
55    /// Kernel is created but not launched.
56    Created,
57    /// Kernel is launched and initializing.
58    Launched,
59    /// Kernel is active and processing messages.
60    Active,
61    /// Kernel is deactivated (paused).
62    Deactivated,
63    /// Kernel is terminating.
64    Terminating,
65    /// Kernel has terminated.
66    Terminated,
67}
68
69impl KernelState {
70    /// Check if kernel can be activated.
71    pub fn can_activate(&self) -> bool {
72        matches!(self, Self::Launched | Self::Deactivated)
73    }
74
75    /// Check if kernel can be deactivated.
76    pub fn can_deactivate(&self) -> bool {
77        matches!(self, Self::Active)
78    }
79
80    /// Check if kernel can be terminated.
81    pub fn can_terminate(&self) -> bool {
82        matches!(self, Self::Active | Self::Deactivated | Self::Launched)
83    }
84
85    /// Check if kernel is running (can process messages).
86    pub fn is_running(&self) -> bool {
87        matches!(self, Self::Active)
88    }
89
90    /// Check if kernel is finished.
91    pub fn is_finished(&self) -> bool {
92        matches!(self, Self::Terminated)
93    }
94}
95
96/// Kernel status including state and metrics.
97#[derive(Debug, Clone)]
98pub struct KernelStatus {
99    /// Kernel identifier.
100    pub id: KernelId,
101    /// Current state.
102    pub state: KernelState,
103    /// Execution mode.
104    pub mode: KernelMode,
105    /// Messages in input queue.
106    pub input_queue_depth: usize,
107    /// Messages in output queue.
108    pub output_queue_depth: usize,
109    /// Total messages processed.
110    pub messages_processed: u64,
111    /// Uptime since launch.
112    pub uptime: Duration,
113}
114
115/// GPU backend type.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
117pub enum Backend {
118    /// CPU backend (for testing).
119    Cpu,
120    /// NVIDIA CUDA backend.
121    Cuda,
122    /// Apple Metal backend.
123    Metal,
124    /// WebGPU cross-platform backend.
125    Wgpu,
126    /// Automatically select best available backend.
127    #[default]
128    Auto,
129}
130
131impl Backend {
132    /// Get display name.
133    pub fn name(&self) -> &'static str {
134        match self {
135            Backend::Cpu => "CPU",
136            Backend::Cuda => "CUDA",
137            Backend::Metal => "Metal",
138            Backend::Wgpu => "WebGPU",
139            Backend::Auto => "Auto",
140        }
141    }
142}
143
144impl std::fmt::Display for Backend {
145    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146        write!(f, "{}", self.name())
147    }
148}
149
150/// Options for launching a kernel.
151#[derive(Debug, Clone)]
152pub struct LaunchOptions {
153    /// Execution mode (persistent or event-driven).
154    pub mode: KernelMode,
155    /// Grid size (number of blocks).
156    pub grid_size: u32,
157    /// Block size (threads per block).
158    pub block_size: u32,
159    /// Input queue capacity.
160    pub input_queue_capacity: usize,
161    /// Output queue capacity.
162    pub output_queue_capacity: usize,
163    /// Shared memory size in bytes.
164    pub shared_memory_size: usize,
165    /// Whether to activate immediately after launch.
166    pub auto_activate: bool,
167}
168
169impl Default for LaunchOptions {
170    fn default() -> Self {
171        Self {
172            mode: KernelMode::Persistent,
173            grid_size: 1,
174            block_size: 256,
175            input_queue_capacity: 1024,
176            output_queue_capacity: 1024,
177            shared_memory_size: 0,
178            auto_activate: true,
179        }
180    }
181}
182
183impl LaunchOptions {
184    /// Create options for a single-block kernel.
185    pub fn single_block(block_size: u32) -> Self {
186        Self {
187            block_size,
188            ..Default::default()
189        }
190    }
191
192    /// Create options for a multi-block kernel.
193    pub fn multi_block(grid_size: u32, block_size: u32) -> Self {
194        Self {
195            grid_size,
196            block_size,
197            ..Default::default()
198        }
199    }
200
201    /// Set execution mode.
202    pub fn with_mode(mut self, mode: KernelMode) -> Self {
203        self.mode = mode;
204        self
205    }
206
207    /// Set queue capacities.
208    pub fn with_queue_capacity(mut self, capacity: usize) -> Self {
209        self.input_queue_capacity = capacity;
210        self.output_queue_capacity = capacity;
211        self
212    }
213
214    /// Set shared memory size.
215    pub fn with_shared_memory(mut self, size: usize) -> Self {
216        self.shared_memory_size = size;
217        self
218    }
219
220    /// Disable auto-activation.
221    pub fn without_auto_activate(mut self) -> Self {
222        self.auto_activate = false;
223        self
224    }
225
226    /// Set the grid size (number of blocks).
227    pub fn with_grid_size(mut self, grid_size: u32) -> Self {
228        self.grid_size = grid_size;
229        self
230    }
231
232    /// Set the block size (threads per block).
233    pub fn with_block_size(mut self, block_size: u32) -> Self {
234        self.block_size = block_size;
235        self
236    }
237
238    /// Set priority hint for kernel scheduling.
239    ///
240    /// Note: This is a hint for future use - currently ignored by backends.
241    pub fn with_priority(self, _priority: u8) -> Self {
242        // Priority hint stored for future scheduling use
243        self
244    }
245
246    /// Set input queue capacity only.
247    pub fn with_input_queue_capacity(mut self, capacity: usize) -> Self {
248        self.input_queue_capacity = capacity;
249        self
250    }
251
252    /// Set output queue capacity only.
253    pub fn with_output_queue_capacity(mut self, capacity: usize) -> Self {
254        self.output_queue_capacity = capacity;
255        self
256    }
257}
258
259/// Type-erased future for async operations.
260pub type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
261
262/// Backend-agnostic runtime trait for kernel management.
263///
264/// This trait is implemented by each backend (CPU, CUDA, Metal, WebGPU)
265/// to provide kernel lifecycle management and message passing.
266#[async_trait]
267pub trait RingKernelRuntime: Send + Sync {
268    /// Get the backend type.
269    fn backend(&self) -> Backend;
270
271    /// Check if a specific backend is available.
272    fn is_backend_available(&self, backend: Backend) -> bool;
273
274    /// Launch a kernel.
275    async fn launch(&self, kernel_id: &str, options: LaunchOptions) -> Result<KernelHandle>;
276
277    /// Get a handle to an existing kernel.
278    fn get_kernel(&self, kernel_id: &KernelId) -> Option<KernelHandle>;
279
280    /// List all kernel IDs.
281    fn list_kernels(&self) -> Vec<KernelId>;
282
283    /// Get runtime metrics.
284    fn metrics(&self) -> RuntimeMetrics;
285
286    /// Shutdown the runtime and terminate all kernels.
287    async fn shutdown(&self) -> Result<()>;
288}
289
290/// Handle to a launched kernel.
291///
292/// Provides an ergonomic API for interacting with a kernel.
293#[derive(Clone)]
294pub struct KernelHandle {
295    /// Kernel identifier.
296    id: KernelId,
297    /// Inner implementation.
298    inner: Arc<dyn KernelHandleInner>,
299}
300
301impl KernelHandle {
302    /// Create a new kernel handle.
303    pub fn new(id: KernelId, inner: Arc<dyn KernelHandleInner>) -> Self {
304        Self { id, inner }
305    }
306
307    /// Get the kernel ID.
308    pub fn id(&self) -> &KernelId {
309        &self.id
310    }
311
312    /// Activate the kernel.
313    pub async fn activate(&self) -> Result<()> {
314        self.inner.activate().await
315    }
316
317    /// Deactivate the kernel.
318    pub async fn deactivate(&self) -> Result<()> {
319        self.inner.deactivate().await
320    }
321
322    /// Terminate the kernel.
323    pub async fn terminate(&self) -> Result<()> {
324        self.inner.terminate().await
325    }
326
327    /// Send a message to the kernel.
328    pub async fn send<M: RingMessage>(&self, message: M) -> Result<()> {
329        let envelope = MessageEnvelope::new(
330            &message,
331            0, // Host source
332            self.inner.kernel_id_num(),
333            self.inner.current_timestamp(),
334        );
335        self.inner.send_envelope(envelope).await
336    }
337
338    /// Send a raw envelope.
339    pub async fn send_envelope(&self, envelope: MessageEnvelope) -> Result<()> {
340        self.inner.send_envelope(envelope).await
341    }
342
343    /// Receive a message from the kernel.
344    pub async fn receive(&self) -> Result<MessageEnvelope> {
345        self.inner.receive().await
346    }
347
348    /// Receive a message with timeout.
349    pub async fn receive_timeout(&self, timeout: Duration) -> Result<MessageEnvelope> {
350        self.inner.receive_timeout(timeout).await
351    }
352
353    /// Try to receive a message (non-blocking).
354    pub fn try_receive(&self) -> Result<MessageEnvelope> {
355        self.inner.try_receive()
356    }
357
358    /// Send request and wait for response (call pattern).
359    pub async fn call<M: RingMessage>(
360        &self,
361        message: M,
362        timeout: Duration,
363    ) -> Result<MessageEnvelope> {
364        // Generate correlation ID
365        let correlation = crate::message::CorrelationId::generate();
366
367        // Create envelope with correlation
368        let mut envelope = MessageEnvelope::new(
369            &message,
370            0,
371            self.inner.kernel_id_num(),
372            self.inner.current_timestamp(),
373        );
374        envelope.header.correlation_id = correlation;
375
376        // Send and wait for correlated response
377        self.inner.send_envelope(envelope).await?;
378        self.inner.receive_correlated(correlation, timeout).await
379    }
380
381    /// Get kernel status.
382    pub fn status(&self) -> KernelStatus {
383        self.inner.status()
384    }
385
386    /// Get kernel metrics.
387    pub fn metrics(&self) -> KernelMetrics {
388        self.inner.metrics()
389    }
390
391    /// Wait for kernel to terminate.
392    pub async fn wait(&self) -> Result<()> {
393        self.inner.wait().await
394    }
395
396    /// Get the current kernel state.
397    ///
398    /// This is a convenience method that returns just the state from status().
399    pub fn state(&self) -> KernelState {
400        self.status().state
401    }
402
403    /// Suspend (deactivate) the kernel.
404    ///
405    /// This is an alias for `deactivate()` for more intuitive API usage.
406    pub async fn suspend(&self) -> Result<()> {
407        self.deactivate().await
408    }
409
410    /// Resume (activate) the kernel.
411    ///
412    /// This is an alias for `activate()` for more intuitive API usage.
413    pub async fn resume(&self) -> Result<()> {
414        self.activate().await
415    }
416
417    /// Check if the kernel is currently active.
418    pub fn is_active(&self) -> bool {
419        self.state() == KernelState::Active
420    }
421
422    /// Check if the kernel has terminated.
423    pub fn is_terminated(&self) -> bool {
424        self.state() == KernelState::Terminated
425    }
426}
427
428impl std::fmt::Debug for KernelHandle {
429    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
430        f.debug_struct("KernelHandle")
431            .field("id", &self.id)
432            .finish()
433    }
434}
435
436/// Inner trait for kernel handle implementation.
437///
438/// This is implemented by each backend to provide the actual functionality.
439#[async_trait]
440pub trait KernelHandleInner: Send + Sync {
441    /// Get numeric kernel ID.
442    fn kernel_id_num(&self) -> u64;
443
444    /// Get current timestamp.
445    fn current_timestamp(&self) -> crate::hlc::HlcTimestamp;
446
447    /// Activate kernel.
448    async fn activate(&self) -> Result<()>;
449
450    /// Deactivate kernel.
451    async fn deactivate(&self) -> Result<()>;
452
453    /// Terminate kernel.
454    async fn terminate(&self) -> Result<()>;
455
456    /// Send message envelope.
457    async fn send_envelope(&self, envelope: MessageEnvelope) -> Result<()>;
458
459    /// Receive message.
460    async fn receive(&self) -> Result<MessageEnvelope>;
461
462    /// Receive with timeout.
463    async fn receive_timeout(&self, timeout: Duration) -> Result<MessageEnvelope>;
464
465    /// Try receive (non-blocking).
466    fn try_receive(&self) -> Result<MessageEnvelope>;
467
468    /// Receive correlated response.
469    async fn receive_correlated(
470        &self,
471        correlation: crate::message::CorrelationId,
472        timeout: Duration,
473    ) -> Result<MessageEnvelope>;
474
475    /// Get status.
476    fn status(&self) -> KernelStatus;
477
478    /// Get metrics.
479    fn metrics(&self) -> KernelMetrics;
480
481    /// Wait for termination.
482    async fn wait(&self) -> Result<()>;
483}
484
485/// Runtime-level metrics.
486#[derive(Debug, Clone, Default)]
487pub struct RuntimeMetrics {
488    /// Number of active kernels.
489    pub active_kernels: usize,
490    /// Total kernels launched.
491    pub total_launched: u64,
492    /// Total messages sent.
493    pub messages_sent: u64,
494    /// Total messages received.
495    pub messages_received: u64,
496    /// GPU memory used (bytes).
497    pub gpu_memory_used: u64,
498    /// Host memory used (bytes).
499    pub host_memory_used: u64,
500}
501
502/// Builder for creating a runtime instance.
503#[derive(Debug, Clone)]
504pub struct RuntimeBuilder {
505    /// Selected backend.
506    pub backend: Backend,
507    /// Device index (for multi-GPU).
508    pub device_index: usize,
509    /// Enable debug mode.
510    pub debug: bool,
511    /// Enable profiling.
512    pub profiling: bool,
513}
514
515impl Default for RuntimeBuilder {
516    fn default() -> Self {
517        Self {
518            backend: Backend::Auto,
519            device_index: 0,
520            debug: false,
521            profiling: false,
522        }
523    }
524}
525
526impl RuntimeBuilder {
527    /// Create a new builder.
528    pub fn new() -> Self {
529        Self::default()
530    }
531
532    /// Set the backend.
533    pub fn backend(mut self, backend: Backend) -> Self {
534        self.backend = backend;
535        self
536    }
537
538    /// Set device index.
539    pub fn device(mut self, index: usize) -> Self {
540        self.device_index = index;
541        self
542    }
543
544    /// Enable debug mode.
545    pub fn debug(mut self, enable: bool) -> Self {
546        self.debug = enable;
547        self
548    }
549
550    /// Enable profiling.
551    pub fn profiling(mut self, enable: bool) -> Self {
552        self.profiling = enable;
553        self
554    }
555}
556
557#[cfg(test)]
558mod tests {
559    use super::*;
560
561    #[test]
562    fn test_kernel_state_transitions() {
563        assert!(KernelState::Launched.can_activate());
564        assert!(KernelState::Deactivated.can_activate());
565        assert!(!KernelState::Active.can_activate());
566        assert!(!KernelState::Terminated.can_activate());
567
568        assert!(KernelState::Active.can_deactivate());
569        assert!(!KernelState::Launched.can_deactivate());
570
571        assert!(KernelState::Active.can_terminate());
572        assert!(KernelState::Deactivated.can_terminate());
573        assert!(!KernelState::Terminated.can_terminate());
574    }
575
576    #[test]
577    fn test_launch_options_builder() {
578        let opts = LaunchOptions::multi_block(4, 128)
579            .with_mode(KernelMode::EventDriven)
580            .with_queue_capacity(2048)
581            .with_shared_memory(4096)
582            .without_auto_activate();
583
584        assert_eq!(opts.grid_size, 4);
585        assert_eq!(opts.block_size, 128);
586        assert_eq!(opts.mode, KernelMode::EventDriven);
587        assert_eq!(opts.input_queue_capacity, 2048);
588        assert_eq!(opts.shared_memory_size, 4096);
589        assert!(!opts.auto_activate);
590    }
591
592    #[test]
593    fn test_kernel_id() {
594        let id1 = KernelId::new("test_kernel");
595        let id2: KernelId = "test_kernel".into();
596        assert_eq!(id1, id2);
597        assert_eq!(id1.as_str(), "test_kernel");
598    }
599
600    #[test]
601    fn test_backend_name() {
602        assert_eq!(Backend::Cpu.name(), "CPU");
603        assert_eq!(Backend::Cuda.name(), "CUDA");
604        assert_eq!(Backend::Metal.name(), "Metal");
605        assert_eq!(Backend::Wgpu.name(), "WebGPU");
606    }
607}