Skip to main content

ringkernel_core/rules/
mod.rs

1//! Hot-swappable compiled rule artifacts.
2//!
3//! Per `docs/superpowers/specs/2026-04-17-v1.1-vyngraph-gaps.md` section 3.3,
4//! this module lets RingKernel accept opaque compiled rule artifacts (PTX +
5//! metadata) and hot-swap them atomically without runtime restart.
6//!
7//! ## Design philosophy
8//!
9//! RingKernel stays **rule-format-agnostic**. Callers such as VynGraph own
10//! OWL 2 RL / SHACL parsing and compile rules to PTX using our existing
11//! `ringkernel-cuda-codegen` pipeline. RingKernel receives the compiled
12//! artifact via [`CompiledRule`] and manages versioning, validation,
13//! rollback, and the atomic swap state machine.
14//!
15//! ## Artifact lifecycle
16//!
17//! ```text
18//! CompiledRule  ─register_rule()─►  RuleStatus::Registered
19//!      │                                    │
20//!      │         reload_rule()              │
21//!      ▼                                    ▼
22//! (new version) ─pre_stage/quiesce/swap─► RuleStatus::Active
23//!                                              │
24//!      prior version: Superseded(new_ver)      │
25//!                                              │
26//!                       rollback_rule() ◄──────┤
27//!      current version: Rolledback                │
28//!      prior version: Active                      │
29//! ```
30//!
31//! ## Guarantees
32//!
33//! - Version monotonicity (downgrades rejected unless explicit rollback)
34//! - Bounded history (FIFO eviction beyond `max_history`)
35//! - Validation-before-swap (compute cap, dependencies, signature)
36//! - Pluggable swap backend (`NoopSwapBackend` for tests, CUDA in production)
37//!
38//! ## Example
39//!
40//! ```ignore
41//! use std::sync::Arc;
42//! use ringkernel_core::rules::{
43//!     ActorConfig, CompiledRule, NoopSwapBackend, RuleMetadata, RuleRegistry,
44//! };
45//!
46//! # async fn example() {
47//! let registry = RuleRegistry::new(5, Arc::new(NoopSwapBackend));
48//! let rule = CompiledRule {
49//!     rule_id: "gaap-consolidation".into(),
50//!     version: 1,
51//!     ptx: b".version 8.0\n.target sm_90\n".to_vec(),
52//!     compute_cap: "sm_90".into(),
53//!     depends_on: vec![],
54//!     signature: None,
55//!     actor_config: ActorConfig::default(),
56//!     metadata: RuleMetadata::default(),
57//! };
58//! let handle = registry.register_rule(rule, "sm_90").await.unwrap();
59//! assert_eq!(handle.version, 1);
60//! # }
61//! ```
62//!
63//! [`HotReloadManager::rule_registry()`] exposes the registry for use by
64//! existing multi-GPU hot-reload plumbing.
65//!
66//! [`HotReloadManager::rule_registry()`]: crate::multi_gpu::HotReloadManager::rule_registry
67
68use std::time::{Duration, SystemTime};
69
70pub mod registry;
71
72pub use registry::{NoopSwapBackend, RuleRegistry, RuleSwapBackend, SignatureVerifier};
73
74/// A compiled rule artifact ready for GPU hot-swap.
75///
76/// RingKernel does not inspect `ptx` beyond validating compute capability,
77/// dependencies and (optionally) signature. The caller owns semantic
78/// correctness of the compilation.
79#[derive(Debug, Clone)]
80pub struct CompiledRule {
81    /// Caller-scoped rule set identifier (e.g. `"gaap-consolidation"`).
82    pub rule_id: String,
83    /// Monotonically increasing version; later versions must be strictly
84    /// greater than the currently active version.
85    pub version: u64,
86    /// Compiled PTX bytes for the actor kernel.
87    pub ptx: Vec<u8>,
88    /// Required compute capability, e.g. `"sm_90"` for H100.
89    pub compute_cap: String,
90    /// Other `rule_id`s that must already be registered before this rule
91    /// can be installed. Used for inference-rule dependency graphs.
92    pub depends_on: Vec<String>,
93    /// Optional integrity signature (format is verifier-specific).
94    pub signature: Option<Vec<u8>>,
95    /// Actor launch configuration.
96    pub actor_config: ActorConfig,
97    /// Opaque metadata passed through for audit/logging. RingKernel does
98    /// not interpret any of these fields.
99    pub metadata: RuleMetadata,
100}
101
102/// Launch configuration for the rule's actor kernel.
103#[derive(Debug, Clone)]
104pub struct ActorConfig {
105    /// CUDA block dimensions `(x, y, z)`.
106    pub block_dim: (u32, u32, u32),
107    /// CUDA grid dimensions `(x, y, z)`.
108    pub grid_dim: (u32, u32, u32),
109    /// Dynamic shared-memory bytes to allocate per block.
110    pub shared_mem_bytes: u32,
111    /// Maximum number of in-flight messages this actor accepts.
112    pub max_in_flight: u32,
113}
114
115impl Default for ActorConfig {
116    fn default() -> Self {
117        Self {
118            block_dim: (1, 1, 1),
119            grid_dim: (1, 1, 1),
120            shared_mem_bytes: 0,
121            max_in_flight: 1024,
122        }
123    }
124}
125
126/// Opaque metadata attached to a compiled rule.
127///
128/// All fields are optional and none of them influence the swap state
129/// machine. They exist solely for audit trails, observability, and
130/// attribution. Callers are free to ignore them or fill them in as they
131/// see fit; RingKernel passes them through unchanged.
132#[derive(Debug, Clone, Default)]
133pub struct RuleMetadata {
134    /// Human-readable description of the source language, e.g.
135    /// `"OWL 2 RL"`, `"SHACL"`, `"custom DSL"`. Opaque to RingKernel.
136    pub source_language: Option<String>,
137    /// SHA-256 of the rule source text, for audit reproducibility.
138    pub source_hash: Option<[u8; 32]>,
139    /// When the rule was compiled.
140    pub compiled_at: Option<SystemTime>,
141    /// Version string of the compiler that produced this artifact.
142    pub compiler_version: Option<String>,
143    /// Principal who authored / compiled the rule.
144    pub author: Option<String>,
145}
146
147/// Lightweight handle returned after a successful registry operation.
148#[derive(Debug, Clone)]
149pub struct RuleHandle {
150    /// Rule identifier.
151    pub rule_id: String,
152    /// Rule version.
153    pub version: u64,
154    /// Lifecycle status of this specific version.
155    pub status: RuleStatus,
156    /// When the version was registered with the registry.
157    pub registered_at: SystemTime,
158}
159
160/// Lifecycle status of a specific rule version.
161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
162pub enum RuleStatus {
163    /// Loaded and validated but not yet the active version.
164    Registered,
165    /// Currently executing on the device.
166    Active,
167    /// Being drained ahead of a swap.
168    Quiescing,
169    /// Replaced by the specified newer version.
170    Superseded(u64),
171    /// Rolled back away from (prior `Active` version the user chose to revert).
172    Rolledback,
173    /// Validation or swap backend failed; this version is unusable.
174    Failed,
175}
176
177/// Report emitted after a successful reload (or rollback).
178#[derive(Debug, Clone)]
179pub struct ReloadReport {
180    /// Rule identifier.
181    pub rule_id: String,
182    /// Version we moved away from (0 if this was the initial activation).
183    pub from_version: u64,
184    /// Version that is now `Active`.
185    pub to_version: u64,
186    /// Time spent draining the old actor.
187    pub quiesce_duration: Duration,
188    /// Time spent performing the atomic pointer swap.
189    pub swap_duration: Duration,
190    /// Messages that were in-flight during the swap window
191    /// (as reported by the swap backend).
192    pub messages_in_flight_during_swap: u64,
193    /// Whether the previous version is still retained in history and can
194    /// be the target of a subsequent rollback.
195    pub rollback_available: bool,
196}
197
198/// Errors produced by the rule registry.
199#[derive(Debug, thiserror::Error)]
200pub enum RuleError {
201    /// No such rule in the registry.
202    #[error("rule not found: {0}")]
203    NotFound(String),
204
205    /// Incoming version is not strictly newer than the current active version.
206    #[error("version downgrade rejected: current={current}, proposed={proposed}")]
207    VersionDowngrade {
208        /// Currently active version.
209        current: u64,
210        /// Version the caller tried to install.
211        proposed: u64,
212    },
213
214    /// Rule targets a compute capability the device does not meet.
215    #[error("compute capability mismatch: rule={required}, device={available}")]
216    ComputeCapMismatch {
217        /// Compute cap the rule requires.
218        required: String,
219        /// Compute cap the device actually has.
220        available: String,
221    },
222
223    /// Rule depends on another rule that is not registered.
224    #[error("dependency missing: {0}")]
225    MissingDependency(String),
226
227    /// Signature check did not succeed.
228    #[error("signature verification failed")]
229    InvalidSignature,
230
231    /// Caller asked to roll back to a version no longer in history.
232    #[error("rollback target not in history: version={0}")]
233    RollbackTargetMissing(u64),
234
235    /// No version is currently active — nothing to roll back from.
236    #[error("no active version to rollback")]
237    NoActiveVersion,
238
239    /// Quiesce window elapsed before the actor finished draining.
240    #[error("quiesce timeout after {0:?}")]
241    QuiesceTimeout(Duration),
242
243    /// Swap backend refused the operation (wraps backend-specific detail).
244    #[error("swap backend error: {0}")]
245    BackendError(String),
246
247    /// Version was already registered and we do not allow re-register of
248    /// the same `(rule_id, version)` tuple.
249    #[error("duplicate version: rule={rule_id}, version={version}")]
250    DuplicateVersion {
251        /// Rule identifier.
252        rule_id: String,
253        /// Version that was already present.
254        version: u64,
255    },
256}