ringkernel_core/rules/mod.rs
1//! Hot-swappable compiled rule artifacts.
2//!
3//! Per `docs/superpowers/specs/2026-04-17-v1.1-vyngraph-gaps.md` section 3.3,
4//! this module lets RingKernel accept opaque compiled rule artifacts (PTX +
5//! metadata) and hot-swap them atomically without runtime restart.
6//!
7//! ## Design philosophy
8//!
9//! RingKernel stays **rule-format-agnostic**. Callers such as VynGraph own
10//! OWL 2 RL / SHACL parsing and compile rules to PTX using our existing
11//! `ringkernel-cuda-codegen` pipeline. RingKernel receives the compiled
12//! artifact via [`CompiledRule`] and manages versioning, validation,
13//! rollback, and the atomic swap state machine.
14//!
15//! ## Artifact lifecycle
16//!
17//! ```text
18//! CompiledRule ─register_rule()─► RuleStatus::Registered
19//! │ │
20//! │ reload_rule() │
21//! ▼ ▼
22//! (new version) ─pre_stage/quiesce/swap─► RuleStatus::Active
23//! │
24//! prior version: Superseded(new_ver) │
25//! │
26//! rollback_rule() ◄──────┤
27//! current version: Rolledback │
28//! prior version: Active │
29//! ```
30//!
31//! ## Guarantees
32//!
33//! - Version monotonicity (downgrades rejected unless explicit rollback)
34//! - Bounded history (FIFO eviction beyond `max_history`)
35//! - Validation-before-swap (compute cap, dependencies, signature)
36//! - Pluggable swap backend (`NoopSwapBackend` for tests, CUDA in production)
37//!
38//! ## Example
39//!
40//! ```ignore
41//! use std::sync::Arc;
42//! use ringkernel_core::rules::{
43//! ActorConfig, CompiledRule, NoopSwapBackend, RuleMetadata, RuleRegistry,
44//! };
45//!
46//! # async fn example() {
47//! let registry = RuleRegistry::new(5, Arc::new(NoopSwapBackend));
48//! let rule = CompiledRule {
49//! rule_id: "gaap-consolidation".into(),
50//! version: 1,
51//! ptx: b".version 8.0\n.target sm_90\n".to_vec(),
52//! compute_cap: "sm_90".into(),
53//! depends_on: vec![],
54//! signature: None,
55//! actor_config: ActorConfig::default(),
56//! metadata: RuleMetadata::default(),
57//! };
58//! let handle = registry.register_rule(rule, "sm_90").await.unwrap();
59//! assert_eq!(handle.version, 1);
60//! # }
61//! ```
62//!
63//! [`HotReloadManager::rule_registry()`] exposes the registry for use by
64//! existing multi-GPU hot-reload plumbing.
65//!
66//! [`HotReloadManager::rule_registry()`]: crate::multi_gpu::HotReloadManager::rule_registry
67
68use std::time::{Duration, SystemTime};
69
70pub mod registry;
71
72pub use registry::{NoopSwapBackend, RuleRegistry, RuleSwapBackend, SignatureVerifier};
73
74/// A compiled rule artifact ready for GPU hot-swap.
75///
76/// RingKernel does not inspect `ptx` beyond validating compute capability,
77/// dependencies and (optionally) signature. The caller owns semantic
78/// correctness of the compilation.
79#[derive(Debug, Clone)]
80pub struct CompiledRule {
81 /// Caller-scoped rule set identifier (e.g. `"gaap-consolidation"`).
82 pub rule_id: String,
83 /// Monotonically increasing version; later versions must be strictly
84 /// greater than the currently active version.
85 pub version: u64,
86 /// Compiled PTX bytes for the actor kernel.
87 pub ptx: Vec<u8>,
88 /// Required compute capability, e.g. `"sm_90"` for H100.
89 pub compute_cap: String,
90 /// Other `rule_id`s that must already be registered before this rule
91 /// can be installed. Used for inference-rule dependency graphs.
92 pub depends_on: Vec<String>,
93 /// Optional integrity signature (format is verifier-specific).
94 pub signature: Option<Vec<u8>>,
95 /// Actor launch configuration.
96 pub actor_config: ActorConfig,
97 /// Opaque metadata passed through for audit/logging. RingKernel does
98 /// not interpret any of these fields.
99 pub metadata: RuleMetadata,
100}
101
102/// Launch configuration for the rule's actor kernel.
103#[derive(Debug, Clone)]
104pub struct ActorConfig {
105 /// CUDA block dimensions `(x, y, z)`.
106 pub block_dim: (u32, u32, u32),
107 /// CUDA grid dimensions `(x, y, z)`.
108 pub grid_dim: (u32, u32, u32),
109 /// Dynamic shared-memory bytes to allocate per block.
110 pub shared_mem_bytes: u32,
111 /// Maximum number of in-flight messages this actor accepts.
112 pub max_in_flight: u32,
113}
114
115impl Default for ActorConfig {
116 fn default() -> Self {
117 Self {
118 block_dim: (1, 1, 1),
119 grid_dim: (1, 1, 1),
120 shared_mem_bytes: 0,
121 max_in_flight: 1024,
122 }
123 }
124}
125
126/// Opaque metadata attached to a compiled rule.
127///
128/// All fields are optional and none of them influence the swap state
129/// machine. They exist solely for audit trails, observability, and
130/// attribution. Callers are free to ignore them or fill them in as they
131/// see fit; RingKernel passes them through unchanged.
132#[derive(Debug, Clone, Default)]
133pub struct RuleMetadata {
134 /// Human-readable description of the source language, e.g.
135 /// `"OWL 2 RL"`, `"SHACL"`, `"custom DSL"`. Opaque to RingKernel.
136 pub source_language: Option<String>,
137 /// SHA-256 of the rule source text, for audit reproducibility.
138 pub source_hash: Option<[u8; 32]>,
139 /// When the rule was compiled.
140 pub compiled_at: Option<SystemTime>,
141 /// Version string of the compiler that produced this artifact.
142 pub compiler_version: Option<String>,
143 /// Principal who authored / compiled the rule.
144 pub author: Option<String>,
145}
146
147/// Lightweight handle returned after a successful registry operation.
148#[derive(Debug, Clone)]
149pub struct RuleHandle {
150 /// Rule identifier.
151 pub rule_id: String,
152 /// Rule version.
153 pub version: u64,
154 /// Lifecycle status of this specific version.
155 pub status: RuleStatus,
156 /// When the version was registered with the registry.
157 pub registered_at: SystemTime,
158}
159
160/// Lifecycle status of a specific rule version.
161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
162pub enum RuleStatus {
163 /// Loaded and validated but not yet the active version.
164 Registered,
165 /// Currently executing on the device.
166 Active,
167 /// Being drained ahead of a swap.
168 Quiescing,
169 /// Replaced by the specified newer version.
170 Superseded(u64),
171 /// Rolled back away from (prior `Active` version the user chose to revert).
172 Rolledback,
173 /// Validation or swap backend failed; this version is unusable.
174 Failed,
175}
176
177/// Report emitted after a successful reload (or rollback).
178#[derive(Debug, Clone)]
179pub struct ReloadReport {
180 /// Rule identifier.
181 pub rule_id: String,
182 /// Version we moved away from (0 if this was the initial activation).
183 pub from_version: u64,
184 /// Version that is now `Active`.
185 pub to_version: u64,
186 /// Time spent draining the old actor.
187 pub quiesce_duration: Duration,
188 /// Time spent performing the atomic pointer swap.
189 pub swap_duration: Duration,
190 /// Messages that were in-flight during the swap window
191 /// (as reported by the swap backend).
192 pub messages_in_flight_during_swap: u64,
193 /// Whether the previous version is still retained in history and can
194 /// be the target of a subsequent rollback.
195 pub rollback_available: bool,
196}
197
198/// Errors produced by the rule registry.
199#[derive(Debug, thiserror::Error)]
200pub enum RuleError {
201 /// No such rule in the registry.
202 #[error("rule not found: {0}")]
203 NotFound(String),
204
205 /// Incoming version is not strictly newer than the current active version.
206 #[error("version downgrade rejected: current={current}, proposed={proposed}")]
207 VersionDowngrade {
208 /// Currently active version.
209 current: u64,
210 /// Version the caller tried to install.
211 proposed: u64,
212 },
213
214 /// Rule targets a compute capability the device does not meet.
215 #[error("compute capability mismatch: rule={required}, device={available}")]
216 ComputeCapMismatch {
217 /// Compute cap the rule requires.
218 required: String,
219 /// Compute cap the device actually has.
220 available: String,
221 },
222
223 /// Rule depends on another rule that is not registered.
224 #[error("dependency missing: {0}")]
225 MissingDependency(String),
226
227 /// Signature check did not succeed.
228 #[error("signature verification failed")]
229 InvalidSignature,
230
231 /// Caller asked to roll back to a version no longer in history.
232 #[error("rollback target not in history: version={0}")]
233 RollbackTargetMissing(u64),
234
235 /// No version is currently active — nothing to roll back from.
236 #[error("no active version to rollback")]
237 NoActiveVersion,
238
239 /// Quiesce window elapsed before the actor finished draining.
240 #[error("quiesce timeout after {0:?}")]
241 QuiesceTimeout(Duration),
242
243 /// Swap backend refused the operation (wraps backend-specific detail).
244 #[error("swap backend error: {0}")]
245 BackendError(String),
246
247 /// Version was already registered and we do not allow re-register of
248 /// the same `(rule_id, version)` tuple.
249 #[error("duplicate version: rule={rule_id}, version={version}")]
250 DuplicateVersion {
251 /// Rule identifier.
252 rule_id: String,
253 /// Version that was already present.
254 version: u64,
255 },
256}