Skip to main content

rustkernel_core/
traits.rs

1//! Core kernel traits.
2//!
3//! This module defines the fundamental traits that all kernels implement:
4//! - `GpuKernel`: Base trait for all GPU kernels
5//! - `BatchKernel`: Trait for batch (CPU-orchestrated) kernels
6//! - `RingKernelHandler`: Trait for ring (persistent actor) kernels
7//! - `CheckpointableKernel`: Trait for kernels that support checkpoint/restore (0.3.1)
8//!
9//! ## Enterprise Features (0.3.1)
10//!
11//! - Health checking for liveness/readiness probes
12//! - Execution context with auth, tenant, and tracing
13//! - Secure message handling with authentication
14//! - Checkpoint/restore for recovery
15
16use crate::error::{KernelError, Result};
17use crate::kernel::KernelMetadata;
18use async_trait::async_trait;
19use ringkernel_core::{RingContext, RingMessage};
20use serde::{Deserialize, Serialize};
21use std::fmt::{self, Debug};
22use std::marker::PhantomData;
23use std::time::Duration;
24use uuid::Uuid;
25
26// ============================================================================
27// Health & Status Types
28// ============================================================================
29
30/// Health status for kernel health checks
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
32pub enum HealthStatus {
33    /// Kernel is healthy and operational
34    #[default]
35    Healthy,
36    /// Kernel is degraded but still operational
37    Degraded,
38    /// Kernel is unhealthy and should not receive traffic
39    Unhealthy,
40    /// Health status is unknown (check failed)
41    Unknown,
42}
43
44impl std::fmt::Display for HealthStatus {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        match self {
47            Self::Healthy => write!(f, "healthy"),
48            Self::Degraded => write!(f, "degraded"),
49            Self::Unhealthy => write!(f, "unhealthy"),
50            Self::Unknown => write!(f, "unknown"),
51        }
52    }
53}
54
55// ============================================================================
56// Execution Context Types
57// ============================================================================
58
59/// Execution context for kernel invocations.
60///
61/// Provides authentication, tenant isolation, and distributed tracing context
62/// for kernel execution.
63#[derive(Debug, Clone, Default)]
64pub struct ExecutionContext {
65    /// Request ID for tracing
66    pub request_id: Option<Uuid>,
67    /// Trace ID for distributed tracing
68    pub trace_id: Option<String>,
69    /// Span ID for distributed tracing
70    pub span_id: Option<String>,
71    /// Authenticated user ID (if any)
72    pub user_id: Option<String>,
73    /// Tenant ID for multi-tenancy
74    pub tenant_id: Option<String>,
75    /// Request timeout (if specified)
76    pub timeout: Option<Duration>,
77    /// Additional metadata
78    pub metadata: std::collections::HashMap<String, String>,
79}
80
81impl ExecutionContext {
82    /// Create a new execution context
83    pub fn new() -> Self {
84        Self {
85            request_id: Some(Uuid::new_v4()),
86            ..Default::default()
87        }
88    }
89
90    /// Create context with request ID
91    pub fn with_request_id(mut self, id: Uuid) -> Self {
92        self.request_id = Some(id);
93        self
94    }
95
96    /// Set trace context
97    pub fn with_trace(mut self, trace_id: impl Into<String>, span_id: impl Into<String>) -> Self {
98        self.trace_id = Some(trace_id.into());
99        self.span_id = Some(span_id.into());
100        self
101    }
102
103    /// Set authenticated user
104    pub fn with_user(mut self, user_id: impl Into<String>) -> Self {
105        self.user_id = Some(user_id.into());
106        self
107    }
108
109    /// Set tenant
110    pub fn with_tenant(mut self, tenant_id: impl Into<String>) -> Self {
111        self.tenant_id = Some(tenant_id.into());
112        self
113    }
114
115    /// Set timeout
116    pub fn with_timeout(mut self, timeout: Duration) -> Self {
117        self.timeout = Some(timeout);
118        self
119    }
120
121    /// Add metadata
122    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
123        self.metadata.insert(key.into(), value.into());
124        self
125    }
126}
127
128/// Secure ring context with authentication.
129///
130/// Wraps `RingContext` with security context for authenticated message handling.
131pub struct SecureRingContext<'ctx, 'ring> {
132    /// The underlying ring context
133    pub ring_ctx: &'ctx mut RingContext<'ring>,
134    /// Execution context with auth info
135    pub exec_ctx: &'ctx ExecutionContext,
136}
137
138impl<'ctx, 'ring> SecureRingContext<'ctx, 'ring> {
139    /// Create a new secure context
140    pub fn new(ring_ctx: &'ctx mut RingContext<'ring>, exec_ctx: &'ctx ExecutionContext) -> Self {
141        Self { ring_ctx, exec_ctx }
142    }
143
144    /// Get the authenticated user ID
145    pub fn user_id(&self) -> Option<&str> {
146        self.exec_ctx.user_id.as_deref()
147    }
148
149    /// Get the tenant ID
150    pub fn tenant_id(&self) -> Option<&str> {
151        self.exec_ctx.tenant_id.as_deref()
152    }
153
154    /// Check if request is authenticated
155    pub fn is_authenticated(&self) -> bool {
156        self.exec_ctx.user_id.is_some()
157    }
158}
159
160// ============================================================================
161// Kernel Configuration
162// ============================================================================
163
164/// Runtime configuration for a kernel instance.
165#[derive(Debug, Clone, Default, Serialize, Deserialize)]
166pub struct KernelConfig {
167    /// Maximum queue depth
168    pub max_queue_depth: Option<usize>,
169    /// Execution timeout
170    pub timeout: Option<Duration>,
171    /// Enable tracing
172    pub tracing_enabled: bool,
173    /// Enable metrics collection
174    pub metrics_enabled: bool,
175    /// Custom configuration values
176    pub custom: std::collections::HashMap<String, serde_json::Value>,
177}
178
179impl KernelConfig {
180    /// Create a new kernel config
181    pub fn new() -> Self {
182        Self::default()
183    }
184
185    /// Set queue depth
186    pub fn with_queue_depth(mut self, depth: usize) -> Self {
187        self.max_queue_depth = Some(depth);
188        self
189    }
190
191    /// Set timeout
192    pub fn with_timeout(mut self, timeout: Duration) -> Self {
193        self.timeout = Some(timeout);
194        self
195    }
196
197    /// Enable tracing
198    pub fn with_tracing(mut self, enabled: bool) -> Self {
199        self.tracing_enabled = enabled;
200        self
201    }
202
203    /// Enable metrics
204    pub fn with_metrics(mut self, enabled: bool) -> Self {
205        self.metrics_enabled = enabled;
206        self
207    }
208
209    /// Set custom value
210    pub fn with_custom(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
211        self.custom.insert(key.into(), value);
212        self
213    }
214}
215
216// ============================================================================
217// Core Kernel Traits
218// ============================================================================
219
220/// Base trait for all GPU kernels.
221///
222/// Provides access to kernel metadata, health checking, and lifecycle management.
223///
224/// ## Enterprise Features (0.3.1)
225///
226/// - `health_check()` - Report kernel health for liveness/readiness probes
227/// - `shutdown()` - Graceful shutdown with resource cleanup
228/// - `refresh_config()` - Hot configuration reload
229pub trait GpuKernel: Send + Sync + Debug {
230    /// Returns the kernel metadata.
231    fn metadata(&self) -> &KernelMetadata;
232
233    /// Validate kernel configuration.
234    ///
235    /// Called before kernel launch to ensure configuration is valid.
236    fn validate(&self) -> Result<()> {
237        Ok(())
238    }
239
240    /// Returns the kernel ID.
241    fn id(&self) -> &str {
242        &self.metadata().id
243    }
244
245    /// Returns true if this kernel requires GPU-native execution.
246    fn requires_gpu_native(&self) -> bool {
247        self.metadata().requires_gpu_native
248    }
249
250    // ========================================================================
251    // Enterprise Features (0.3.1)
252    // ========================================================================
253
254    /// Perform a health check on this kernel.
255    ///
256    /// Used by liveness and readiness probes. Override to implement
257    /// custom health checking logic (e.g., checking GPU memory, connections).
258    ///
259    /// # Returns
260    ///
261    /// The current health status of the kernel.
262    fn health_check(&self) -> HealthStatus {
263        HealthStatus::Healthy
264    }
265
266    /// Graceful shutdown of the kernel.
267    ///
268    /// Called during runtime shutdown to release resources. Override to
269    /// implement custom cleanup (e.g., flushing buffers, closing connections).
270    ///
271    /// Default implementation does nothing.
272    fn shutdown(&self) -> Result<()> {
273        Ok(())
274    }
275
276    /// Refresh kernel configuration at runtime.
277    ///
278    /// Called when configuration is hot-reloaded. Only safe-to-reload
279    /// configuration values should be applied.
280    ///
281    /// # Arguments
282    ///
283    /// * `config` - The new configuration to apply
284    ///
285    /// # Returns
286    ///
287    /// Ok if configuration was applied, Err if configuration is invalid.
288    fn refresh_config(&mut self, _config: &KernelConfig) -> Result<()> {
289        Ok(())
290    }
291}
292
293/// Trait for batch (CPU-orchestrated) kernels.
294///
295/// Batch kernels are launched on-demand with CPU orchestration.
296/// They have 10-50μs launch overhead and state resides in CPU memory.
297///
298/// ## Enterprise Features (0.3.1)
299///
300/// - `execute_with_context()` - Execute with auth, tenant, and tracing context
301/// - `execute_with_timeout()` - Execute with deadline enforcement
302///
303/// # Type Parameters
304///
305/// - `I`: Input type
306/// - `O`: Output type
307#[async_trait]
308pub trait BatchKernel<I, O>: GpuKernel
309where
310    I: Send + Sync,
311    O: Send + Sync,
312{
313    /// Execute the kernel with the given input.
314    ///
315    /// # Arguments
316    ///
317    /// * `input` - The input data for the kernel
318    ///
319    /// # Returns
320    ///
321    /// The kernel output or an error.
322    async fn execute(&self, input: I) -> Result<O>;
323
324    /// Validate the input before execution.
325    ///
326    /// Override to provide custom input validation.
327    fn validate_input(&self, _input: &I) -> Result<()> {
328        Ok(())
329    }
330
331    // ========================================================================
332    // Enterprise Features (0.3.1)
333    // ========================================================================
334
335    /// Execute the kernel with execution context.
336    ///
337    /// Provides authentication, tenant isolation, and distributed tracing
338    /// context for the kernel execution.
339    ///
340    /// # Arguments
341    ///
342    /// * `ctx` - The execution context with auth, tenant, and tracing info
343    /// * `input` - The input data for the kernel
344    ///
345    /// # Returns
346    ///
347    /// The kernel output or an error.
348    ///
349    /// # Default Implementation
350    ///
351    /// Delegates to `execute()` ignoring the context. Override to use context.
352    async fn execute_with_context(&self, ctx: &ExecutionContext, input: I) -> Result<O>
353    where
354        I: 'async_trait,
355    {
356        // Default: ignore context, just execute
357        let _ = ctx;
358        self.execute(input).await
359    }
360
361    /// Execute the kernel with a timeout.
362    ///
363    /// # Arguments
364    ///
365    /// * `input` - The input data for the kernel
366    /// * `timeout` - Maximum execution time
367    ///
368    /// # Returns
369    ///
370    /// The kernel output or a timeout error.
371    async fn execute_with_timeout(&self, input: I, timeout: Duration) -> Result<O>
372    where
373        I: 'async_trait,
374    {
375        match tokio::time::timeout(timeout, self.execute(input)).await {
376            Ok(result) => result,
377            Err(_elapsed) => Err(crate::error::KernelError::Timeout(timeout)),
378        }
379    }
380}
381
382/// Trait for ring (persistent actor) kernels.
383///
384/// Ring kernels are persistent GPU actors with 100-500ns message latency.
385/// State resides permanently in GPU memory.
386///
387/// ## Enterprise Features (0.3.1)
388///
389/// - `handle_secure()` - Handle messages with security context
390///
391/// # Type Parameters
392///
393/// - `M`: Request message type
394/// - `R`: Response message type
395#[async_trait]
396pub trait RingKernelHandler<M, R>: GpuKernel
397where
398    M: RingMessage + Send + Sync,
399    R: RingMessage + Send + Sync,
400{
401    /// Handle an incoming message.
402    ///
403    /// # Arguments
404    ///
405    /// * `ctx` - The ring kernel context with GPU intrinsics
406    /// * `msg` - The incoming message
407    ///
408    /// # Returns
409    ///
410    /// The response message or an error.
411    async fn handle(&self, ctx: &mut RingContext, msg: M) -> Result<R>;
412
413    /// Initialize the kernel state.
414    ///
415    /// Called once when the kernel is first activated.
416    async fn initialize(&self, _ctx: &mut RingContext) -> Result<()> {
417        Ok(())
418    }
419
420    /// Called when the kernel is being shut down.
421    ///
422    /// Use this to clean up resources.
423    async fn ring_shutdown(&self, _ctx: &mut RingContext) -> Result<()> {
424        Ok(())
425    }
426
427    // ========================================================================
428    // Enterprise Features (0.3.1)
429    // ========================================================================
430
431    /// Handle a message with security context.
432    ///
433    /// Provides authentication and tenant isolation for message handling.
434    /// Use this for operations that require authorization checks.
435    ///
436    /// # Arguments
437    ///
438    /// * `ctx` - Secure ring context with auth info
439    /// * `msg` - The incoming message
440    ///
441    /// # Returns
442    ///
443    /// The response message or an error.
444    ///
445    /// # Default Implementation
446    ///
447    /// Delegates to `handle()` ignoring security context. Override to
448    /// implement authorization checks.
449    async fn handle_secure(&self, ctx: &mut SecureRingContext<'_, '_>, msg: M) -> Result<R>
450    where
451        M: 'async_trait,
452        R: 'async_trait,
453    {
454        // Default: ignore security context, delegate to handle
455        self.handle(ctx.ring_ctx, msg).await
456    }
457}
458
459/// Trait for iterative (multi-pass) kernels.
460///
461/// Provides support for algorithms that require multiple iterations
462/// to converge (e.g., PageRank, K-Means).
463///
464/// # Type Parameters
465///
466/// - `S`: State type
467/// - `I`: Input type
468/// - `O`: Output type
469#[async_trait]
470pub trait IterativeKernel<S, I, O>: GpuKernel
471where
472    S: Send + Sync + 'static,
473    I: Send + Sync + 'static,
474    O: Send + Sync + 'static,
475{
476    /// Create the initial state.
477    fn initial_state(&self, input: &I) -> S;
478
479    /// Perform one iteration.
480    ///
481    /// # Arguments
482    ///
483    /// * `state` - The current state (mutable)
484    /// * `input` - The input data
485    ///
486    /// # Returns
487    ///
488    /// The iteration result.
489    async fn iterate(&self, state: &mut S, input: &I) -> Result<IterationResult<O>>;
490
491    /// Check if the algorithm has converged.
492    ///
493    /// # Arguments
494    ///
495    /// * `state` - The current state
496    /// * `threshold` - The convergence threshold
497    ///
498    /// # Returns
499    ///
500    /// `true` if converged, `false` otherwise.
501    fn converged(&self, state: &S, threshold: f64) -> bool;
502
503    /// Maximum number of iterations.
504    fn max_iterations(&self) -> usize {
505        100
506    }
507
508    /// Default convergence threshold.
509    fn default_threshold(&self) -> f64 {
510        1e-6
511    }
512
513    /// Run the iterative algorithm to convergence.
514    async fn run_to_convergence(&self, input: I) -> Result<O> {
515        self.run_to_convergence_with_threshold(input, self.default_threshold())
516            .await
517    }
518
519    /// Run the iterative algorithm with a custom threshold.
520    async fn run_to_convergence_with_threshold(&self, input: I, threshold: f64) -> Result<O> {
521        let mut state = self.initial_state(&input);
522        let max_iter = self.max_iterations();
523
524        for _ in 0..max_iter {
525            let result = self.iterate(&mut state, &input).await?;
526
527            if let IterationResult::Converged(output) = result {
528                return Ok(output);
529            }
530
531            if self.converged(&state, threshold) {
532                if let IterationResult::Continue(output) = result {
533                    return Ok(output);
534                }
535            }
536        }
537
538        // Return final state even if not converged
539        match self.iterate(&mut state, &input).await? {
540            IterationResult::Converged(output) | IterationResult::Continue(output) => Ok(output),
541        }
542    }
543}
544
545/// Result of a single iteration.
546#[derive(Debug, Clone)]
547pub enum IterationResult<O> {
548    /// Algorithm has converged with final output.
549    Converged(O),
550    /// Algorithm should continue; current intermediate output.
551    Continue(O),
552}
553
554impl<O> IterationResult<O> {
555    /// Returns true if converged.
556    #[must_use]
557    pub fn is_converged(&self) -> bool {
558        matches!(self, IterationResult::Converged(_))
559    }
560
561    /// Extract the output.
562    #[must_use]
563    pub fn into_output(self) -> O {
564        match self {
565            IterationResult::Converged(o) | IterationResult::Continue(o) => o,
566        }
567    }
568}
569
570/// Type-erased batch kernel for registry storage.
571#[async_trait]
572pub trait BatchKernelDyn: GpuKernel {
573    /// Execute with type-erased input/output.
574    async fn execute_dyn(&self, input: &[u8]) -> Result<Vec<u8>>;
575}
576
577/// Type-erased ring kernel for registry storage.
578#[async_trait]
579pub trait RingKernelDyn: GpuKernel {
580    /// Handle with type-erased messages.
581    async fn handle_dyn(&self, ctx: &mut RingContext, msg: &[u8]) -> Result<Vec<u8>>;
582}
583
584// ============================================================================
585// Type-Erased Kernel Adapters
586// ============================================================================
587
588/// Type-erased wrapper for batch kernels enabling dynamic dispatch.
589///
590/// Wraps any `BatchKernel<I, O>` implementation and provides the
591/// `BatchKernelDyn` interface for type-erased execution through
592/// JSON serialization/deserialization.
593///
594/// This enables batch kernels to be stored in the registry and invoked
595/// via REST, gRPC, and other service interfaces without compile-time
596/// knowledge of the kernel's input/output types.
597///
598/// # Example
599///
600/// ```ignore
601/// use rustkernel_core::traits::TypeErasedBatchKernel;
602///
603/// let kernel = TypeErasedBatchKernel::new(MyKernel::new());
604/// let output = kernel.execute_dyn(b"{\"field\": 42}").await?;
605/// ```
606pub struct TypeErasedBatchKernel<K, I, O> {
607    inner: K,
608    // fn(I) -> O is always Send + Sync regardless of I/O bounds
609    _phantom: PhantomData<fn(I) -> O>,
610}
611
612impl<K: Debug, I, O> Debug for TypeErasedBatchKernel<K, I, O> {
613    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
614        f.debug_struct("TypeErasedBatchKernel")
615            .field("inner", &self.inner)
616            .finish()
617    }
618}
619
620impl<K, I, O> TypeErasedBatchKernel<K, I, O> {
621    /// Wrap a typed batch kernel for type-erased execution.
622    pub fn new(kernel: K) -> Self {
623        Self {
624            inner: kernel,
625            _phantom: PhantomData,
626        }
627    }
628
629    /// Access the inner kernel.
630    pub fn inner(&self) -> &K {
631        &self.inner
632    }
633}
634
635impl<K, I, O> GpuKernel for TypeErasedBatchKernel<K, I, O>
636where
637    K: GpuKernel,
638    I: Send + Sync + 'static,
639    O: Send + Sync + 'static,
640{
641    fn metadata(&self) -> &KernelMetadata {
642        self.inner.metadata()
643    }
644
645    fn validate(&self) -> Result<()> {
646        self.inner.validate()
647    }
648
649    fn health_check(&self) -> HealthStatus {
650        self.inner.health_check()
651    }
652
653    fn shutdown(&self) -> Result<()> {
654        self.inner.shutdown()
655    }
656
657    fn refresh_config(&mut self, config: &KernelConfig) -> Result<()> {
658        self.inner.refresh_config(config)
659    }
660}
661
662#[async_trait]
663impl<K, I, O> BatchKernelDyn for TypeErasedBatchKernel<K, I, O>
664where
665    K: BatchKernel<I, O> + 'static,
666    I: serde::de::DeserializeOwned + Send + Sync + 'static,
667    O: serde::Serialize + Send + Sync + 'static,
668{
669    async fn execute_dyn(&self, input: &[u8]) -> Result<Vec<u8>> {
670        let typed_input: I = serde_json::from_slice(input)
671            .map_err(|e| KernelError::DeserializationError(e.to_string()))?;
672        let output = self.inner.execute(typed_input).await?;
673        serde_json::to_vec(&output).map_err(|e| KernelError::SerializationError(e.to_string()))
674    }
675}
676
677/// Type-erased wrapper for ring kernels enabling dynamic dispatch.
678///
679/// Similar to [`TypeErasedBatchKernel`] but for ring kernels that handle
680/// messages through the RingKernel persistent actor model.
681pub struct TypeErasedRingKernel<K, M, R> {
682    inner: K,
683    _phantom: PhantomData<fn(M) -> R>,
684}
685
686impl<K: Debug, M, R> Debug for TypeErasedRingKernel<K, M, R> {
687    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
688        f.debug_struct("TypeErasedRingKernel")
689            .field("inner", &self.inner)
690            .finish()
691    }
692}
693
694impl<K, M, R> TypeErasedRingKernel<K, M, R> {
695    /// Wrap a typed ring kernel for type-erased message handling.
696    pub fn new(kernel: K) -> Self {
697        Self {
698            inner: kernel,
699            _phantom: PhantomData,
700        }
701    }
702}
703
704impl<K, M, R> GpuKernel for TypeErasedRingKernel<K, M, R>
705where
706    K: GpuKernel,
707    M: Send + Sync + 'static,
708    R: Send + Sync + 'static,
709{
710    fn metadata(&self) -> &KernelMetadata {
711        self.inner.metadata()
712    }
713
714    fn validate(&self) -> Result<()> {
715        self.inner.validate()
716    }
717
718    fn health_check(&self) -> HealthStatus {
719        self.inner.health_check()
720    }
721
722    fn shutdown(&self) -> Result<()> {
723        self.inner.shutdown()
724    }
725
726    fn refresh_config(&mut self, config: &KernelConfig) -> Result<()> {
727        self.inner.refresh_config(config)
728    }
729}
730
731#[async_trait]
732impl<K, M, R> RingKernelDyn for TypeErasedRingKernel<K, M, R>
733where
734    K: RingKernelHandler<M, R> + 'static,
735    M: RingMessage + serde::de::DeserializeOwned + Send + Sync + 'static,
736    R: RingMessage + serde::Serialize + Send + Sync + 'static,
737{
738    async fn handle_dyn(&self, ctx: &mut RingContext, msg: &[u8]) -> Result<Vec<u8>> {
739        let typed_msg: M = serde_json::from_slice(msg)
740            .map_err(|e| KernelError::DeserializationError(e.to_string()))?;
741        let response = self.inner.handle(ctx, typed_msg).await?;
742        serde_json::to_vec(&response).map_err(|e| KernelError::SerializationError(e.to_string()))
743    }
744}
745
746// ============================================================================
747// Enterprise Traits (0.3.1)
748// ============================================================================
749
750/// Trait for kernels that support checkpoint/restore.
751///
752/// Enables recovery from failures by saving and restoring kernel state.
753/// Useful for long-running or stateful kernels.
754///
755/// # Type Parameters
756///
757/// - `C`: Checkpoint type (must be serializable)
758#[async_trait]
759pub trait CheckpointableKernel: GpuKernel {
760    /// The checkpoint state type
761    type Checkpoint: Serialize + serde::de::DeserializeOwned + Send + Sync;
762
763    /// Create a checkpoint of current kernel state.
764    ///
765    /// # Returns
766    ///
767    /// A serializable checkpoint that can be used to restore state.
768    async fn checkpoint(&self) -> Result<Self::Checkpoint>;
769
770    /// Restore kernel state from a checkpoint.
771    ///
772    /// # Arguments
773    ///
774    /// * `checkpoint` - Previously saved checkpoint state
775    ///
776    /// # Returns
777    ///
778    /// Ok if state was restored, Err if checkpoint is invalid.
779    async fn restore(&mut self, checkpoint: Self::Checkpoint) -> Result<()>;
780
781    /// Check if checkpointing is currently safe.
782    ///
783    /// Returns false if the kernel is in the middle of an operation
784    /// that cannot be interrupted.
785    fn can_checkpoint(&self) -> bool {
786        true
787    }
788
789    /// Get the size of the checkpoint in bytes (estimate).
790    ///
791    /// Useful for monitoring and capacity planning.
792    fn checkpoint_size_estimate(&self) -> usize {
793        0
794    }
795}
796
797/// Trait for kernels that support graceful degradation.
798///
799/// When resources are constrained, these kernels can operate in
800/// a reduced-functionality mode rather than failing completely.
801pub trait DegradableKernel: GpuKernel {
802    /// Enter degraded mode.
803    ///
804    /// Called when resources are constrained. The kernel should
805    /// reduce functionality while remaining operational.
806    fn enter_degraded_mode(&mut self) -> Result<()>;
807
808    /// Exit degraded mode.
809    ///
810    /// Called when resources are restored. The kernel should
811    /// resume full functionality.
812    fn exit_degraded_mode(&mut self) -> Result<()>;
813
814    /// Check if kernel is in degraded mode.
815    fn is_degraded(&self) -> bool;
816
817    /// Get description of current degradation.
818    fn degradation_info(&self) -> Option<String> {
819        None
820    }
821}
822
823#[cfg(test)]
824mod tests {
825    use super::*;
826
827    #[test]
828    fn test_iteration_result() {
829        let converged: IterationResult<i32> = IterationResult::Converged(42);
830        assert!(converged.is_converged());
831        assert_eq!(converged.into_output(), 42);
832
833        let continuing: IterationResult<i32> = IterationResult::Continue(0);
834        assert!(!continuing.is_converged());
835    }
836
837    #[test]
838    fn test_health_status() {
839        assert_eq!(HealthStatus::default(), HealthStatus::Healthy);
840        assert_eq!(format!("{}", HealthStatus::Healthy), "healthy");
841        assert_eq!(format!("{}", HealthStatus::Degraded), "degraded");
842    }
843
844    #[test]
845    fn test_execution_context() {
846        let ctx = ExecutionContext::new()
847            .with_user("user123")
848            .with_tenant("tenant456")
849            .with_timeout(Duration::from_secs(30));
850
851        assert!(ctx.request_id.is_some());
852        assert_eq!(ctx.user_id.as_deref(), Some("user123"));
853        assert_eq!(ctx.tenant_id.as_deref(), Some("tenant456"));
854        assert_eq!(ctx.timeout, Some(Duration::from_secs(30)));
855    }
856
857    #[test]
858    fn test_kernel_config() {
859        let config = KernelConfig::new()
860            .with_queue_depth(1000)
861            .with_timeout(Duration::from_secs(60))
862            .with_tracing(true)
863            .with_metrics(true);
864
865        assert_eq!(config.max_queue_depth, Some(1000));
866        assert_eq!(config.timeout, Some(Duration::from_secs(60)));
867        assert!(config.tracing_enabled);
868        assert!(config.metrics_enabled);
869    }
870}