khive_runtime/validation.rs
1//! Validation pipeline types for pack-contributed KG rules (ADR-034).
2//!
3//! This module defines the trait surface and supporting types used by packs
4//! to contribute domain-specific validation rules. Rules are compiled into the
5//! pack binary and collected at boot time via the `Pack::VALIDATION_RULES` IDs
6//! plus runtime rule implementations registered through `PackRuntime`.
7//!
8//! # Two rule shapes
9//!
10//! ADR-034 §9a defines two complementary rule shapes:
11//!
12//! - **`CorpusCheck`**: whole-corpus rules that receive all entities and edges
13//! together. Right for rules that need cross-entity joins (referential
14//! integrity, remote resolution, min-edge-density).
15//!
16//! - **`StreamingRule`**: per-record rules that evaluate one record at a time.
17//! Cheaper for rules that check individual entities or edges without joins
18//! (required properties, naming conventions, no-self-loops).
19//!
20//! Both shapes return `Vec<Violation>` per invocation. The validator aggregates
21//! them into a `ValidationReport`.
22
23use std::collections::BTreeMap;
24
25// ── Rule identity ─────────────────────────────────────────────────────────────
26
27/// Stable rule identifier, namespaced by pack: `"<pack>/<rule-id>"`.
28///
29/// Built-in rules use no namespace prefix (e.g. `"min-edge-density"`).
30/// Pack-contributed rules MUST be namespaced (e.g. `"biology/required-taxa-rank"`).
31pub type RuleId = &'static str;
32
33/// Severity of a validation finding (ADR-034 §1).
34///
35/// - `Error`: causes `kkernel kg validate` to exit with code 1.
36/// - `Warning`: reported but does not affect exit code (unless `--strict`).
37/// - `Info`: informational; no exit-code effect.
38#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
39pub enum Severity {
40 Info,
41 Warning,
42 Error,
43}
44
45// ── Corpus snapshot ───────────────────────────────────────────────────────────
46
47/// Opaque snapshot of the KG corpus passed to `CorpusCheck::check`.
48///
49/// v1 exposes the bare field set needed for the built-in rules. Pack authors
50/// that need richer access should open an ADR to extend this surface — do NOT
51/// reach through this struct to the storage layer.
52#[non_exhaustive]
53pub struct GraphSnapshot {
54 /// Total entity count in the snapshot.
55 pub entity_count: usize,
56 /// Total edge count in the snapshot.
57 pub edge_count: usize,
58}
59
60/// Context passed to all rule implementations.
61///
62/// Carries configuration overrides from `.khive/kg/rules.yaml` merged with
63/// pack defaults. Rules read per-rule config from `config[rule_id]`.
64#[non_exhaustive]
65pub struct ValidationContext<'a> {
66 /// The corpus snapshot for whole-corpus rules.
67 pub snapshot: &'a GraphSnapshot,
68 /// Per-rule config overrides, keyed by rule ID.
69 pub config: &'a BTreeMap<&'static str, serde_json::Value>,
70}
71
72// ── Violation ─────────────────────────────────────────────────────────────────
73
74/// A single rule violation produced by a rule implementation (ADR-034 §5).
75#[non_exhaustive]
76pub struct Violation {
77 /// The rule that produced this violation.
78 pub rule_id: &'static str,
79 /// Violation severity (may differ from rule-level severity for pack rules
80 /// that emit mixed-severity output within one rule).
81 pub severity: Severity,
82 /// Human-readable explanation of the violation.
83 pub message: String,
84 /// Whether the violation can be fixed by `kkernel kg validate --fix`.
85 pub fixable: bool,
86 /// Optional entity UUID (short-form) that the violation targets.
87 pub entity_id: Option<String>,
88 /// Optional edge UUID (short-form) that the violation targets.
89 pub edge_id: Option<String>,
90}
91
92impl Violation {
93 /// Construct a non-fixable violation without a specific entity/edge target.
94 pub fn new(rule_id: &'static str, severity: Severity, message: impl Into<String>) -> Self {
95 Self {
96 rule_id,
97 severity,
98 message: message.into(),
99 fixable: false,
100 entity_id: None,
101 edge_id: None,
102 }
103 }
104
105 /// Attach an entity identifier to an existing violation.
106 pub fn with_entity(mut self, id: impl Into<String>) -> Self {
107 self.entity_id = Some(id.into());
108 self
109 }
110}
111
112// ── Rule function type ────────────────────────────────────────────────────────
113
114/// Whole-corpus check function type (ADR-034 §2, §9a).
115///
116/// Receives the corpus snapshot and config context; returns all violations
117/// produced by the rule in one call.
118pub type RuleFn = fn(&ValidationContext<'_>) -> Vec<Violation>;
119
120/// Optional auto-fix function type (ADR-034 §7).
121///
122/// Receives the context and violations emitted by the corresponding `RuleFn`.
123/// Returns a `GraphPatch` (opaque in v1 — see below) that the validator applies
124/// before writing NDJSON. Returning `None` leaves the graph unchanged.
125///
126/// `GraphPatch` is a placeholder type in v1; the git-native write path
127/// (ADR-020) is out of scope for this cluster.
128pub type FixFn = fn(&ValidationContext<'_>, &[Violation]) -> Option<GraphPatch>;
129
130/// Opaque graph patch produced by a fix function (ADR-034 §7).
131///
132/// v1 carries no fields — the auto-fix machinery is stubbed. The type exists
133/// so pack authors can write `fix: Some(my_fix as FixFn)` without a
134/// compile-time change when the v1 fix path is wired up.
135#[non_exhaustive]
136pub struct GraphPatch;
137
138// ── ValidationRule ────────────────────────────────────────────────────────────
139
140/// A pack-contributed validation rule (ADR-034 §9).
141///
142/// Pack authors declare an array of these in their `Pack` implementation
143/// (through the runtime `PackRuntime::validation_rules()` method). Rule IDs
144/// must follow the `<pack>/<rule-id>` namespace convention.
145///
146/// # Example
147///
148/// ```ignore
149/// use khive_runtime::validation::{ValidationRule, Severity};
150///
151/// fn check_taxa(ctx: &ValidationContext<'_>) -> Vec<Violation> {
152/// // ... domain-specific check ...
153/// vec![]
154/// }
155///
156/// pub const RULES: &[ValidationRule] = &[
157/// ValidationRule {
158/// id: "biology/required-taxa-rank",
159/// severity: Severity::Warning,
160/// description: "All species entities must carry a taxa_rank property",
161/// check: check_taxa,
162/// fix: None,
163/// },
164/// ];
165/// ```
166pub struct ValidationRule {
167 /// Stable rule identifier in `<pack>/<rule-id>` format.
168 pub id: RuleId,
169 /// Default severity; can be overridden in `.khive/kg/rules.yaml`.
170 pub severity: Severity,
171 /// Human-readable description shown in `kkernel kg validate` output.
172 pub description: &'static str,
173 /// Whole-corpus check function.
174 pub check: RuleFn,
175 /// Optional auto-fix function (ADR-034 §7). `None` for unfixable rules.
176 pub fix: Option<FixFn>,
177}
178
179// ── Aggregated report ─────────────────────────────────────────────────────────
180
181/// Aggregated result of running the full rule pipeline (ADR-034 §5).
182#[derive(Default)]
183pub struct ValidationReport {
184 /// Violations grouped by rule ID, sorted canonically per ADR-034 §9a.
185 pub violations_by_rule: BTreeMap<String, Vec<Violation>>,
186}
187
188impl ValidationReport {
189 /// Add violations for a given rule to the report.
190 pub fn add(&mut self, rule_id: &str, violations: Vec<Violation>) {
191 self.violations_by_rule
192 .entry(rule_id.to_string())
193 .or_default()
194 .extend(violations);
195 }
196
197 /// Total number of violations at `Severity::Error` across all rules.
198 pub fn error_count(&self) -> usize {
199 self.violations_by_rule
200 .values()
201 .flat_map(|vs| vs.iter())
202 .filter(|v| v.severity == Severity::Error)
203 .count()
204 }
205
206 /// Total number of violations at `Severity::Warning` across all rules.
207 pub fn warning_count(&self) -> usize {
208 self.violations_by_rule
209 .values()
210 .flat_map(|vs| vs.iter())
211 .filter(|v| v.severity == Severity::Warning)
212 .count()
213 }
214
215 /// `true` when no errors were found (the standard exit-0 condition).
216 pub fn passed(&self) -> bool {
217 self.error_count() == 0
218 }
219}
220
221#[cfg(test)]
222mod tests {
223 use super::*;
224
225 #[test]
226 fn violation_builder() {
227 let v = Violation::new("test/rule", Severity::Warning, "something is off")
228 .with_entity("abc123");
229 assert_eq!(v.rule_id, "test/rule");
230 assert_eq!(v.severity, Severity::Warning);
231 assert!(!v.fixable);
232 assert_eq!(v.entity_id.as_deref(), Some("abc123"));
233 }
234
235 #[test]
236 fn report_error_count() {
237 let mut report = ValidationReport::default();
238 report.add(
239 "test/rule",
240 vec![
241 Violation::new("test/rule", Severity::Error, "bad"),
242 Violation::new("test/rule", Severity::Warning, "meh"),
243 ],
244 );
245 assert_eq!(report.error_count(), 1);
246 assert_eq!(report.warning_count(), 1);
247 assert!(!report.passed());
248 }
249
250 #[test]
251 fn report_passed_when_no_errors() {
252 let mut report = ValidationReport::default();
253 report.add(
254 "test/rule",
255 vec![Violation::new("test/rule", Severity::Warning, "meh")],
256 );
257 assert!(report.passed());
258 }
259
260 #[test]
261 fn graph_patch_is_constructible() {
262 // Ensure the placeholder type can be named and constructed.
263 let _patch = GraphPatch;
264 }
265
266 #[test]
267 fn validation_rule_fields() {
268 fn dummy_check(_ctx: &ValidationContext<'_>) -> Vec<Violation> {
269 vec![]
270 }
271 let rule = ValidationRule {
272 id: "bio/taxa",
273 severity: Severity::Warning,
274 description: "taxa must exist",
275 check: dummy_check,
276 fix: None,
277 };
278 assert_eq!(rule.id, "bio/taxa");
279 assert!(rule.fix.is_none());
280 }
281}