Skip to main content

khive_runtime/
validation.rs

1//! Validation pipeline types for pack-contributed KG rules (ADR-034).
2//!
3//! This module defines the trait surface and supporting types used by packs
4//! to contribute domain-specific validation rules. Rules are compiled into the
5//! pack binary and collected at boot time via the `Pack::VALIDATION_RULES` IDs
6//! plus runtime rule implementations registered through `PackRuntime`.
7//!
8//! # Two rule shapes
9//!
10//! ADR-034 §9a defines two complementary rule shapes:
11//!
12//! - **`CorpusCheck`**: whole-corpus rules that receive all entities and edges
13//!   together. Right for rules that need cross-entity joins (referential
14//!   integrity, remote resolution, min-edge-density).
15//!
16//! - **`StreamingRule`**: per-record rules that evaluate one record at a time.
17//!   Cheaper for rules that check individual entities or edges without joins
18//!   (required properties, naming conventions, no-self-loops).
19//!
20//! Both shapes return `Vec<Violation>` per invocation. The validator aggregates
21//! them into a `ValidationReport`.
22
23use std::collections::BTreeMap;
24
25// ── Rule identity ─────────────────────────────────────────────────────────────
26
27/// Stable rule identifier, namespaced by pack: `"<pack>/<rule-id>"`.
28///
29/// Built-in rules use no namespace prefix (e.g. `"min-edge-density"`).
30/// Pack-contributed rules MUST be namespaced (e.g. `"biology/required-taxa-rank"`).
31pub type RuleId = &'static str;
32
33/// Severity of a validation finding (ADR-034 §1).
34///
35/// - `Error`: causes `kkernel kg validate` to exit with code 1.
36/// - `Warning`: reported but does not affect exit code (unless `--strict`).
37/// - `Info`: informational; no exit-code effect.
38#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
39pub enum Severity {
40    Info,
41    Warning,
42    Error,
43}
44
45// ── Corpus snapshot ───────────────────────────────────────────────────────────
46
47/// Opaque snapshot of the KG corpus passed to `CorpusCheck::check`.
48///
49/// v1 exposes the bare field set needed for the built-in rules. Pack authors
50/// that need richer access should open an ADR to extend this surface — do NOT
51/// reach through this struct to the storage layer.
52#[non_exhaustive]
53pub struct GraphSnapshot {
54    /// Total entity count in the snapshot.
55    pub entity_count: usize,
56    /// Total edge count in the snapshot.
57    pub edge_count: usize,
58}
59
60/// Context passed to all rule implementations.
61///
62/// Carries configuration overrides from `.khive/kg/rules.yaml` merged with
63/// pack defaults. Rules read per-rule config from `config[rule_id]`.
64#[non_exhaustive]
65pub struct ValidationContext<'a> {
66    /// The corpus snapshot for whole-corpus rules.
67    pub snapshot: &'a GraphSnapshot,
68    /// Per-rule config overrides, keyed by rule ID.
69    pub config: &'a BTreeMap<&'static str, serde_json::Value>,
70}
71
72// ── Violation ─────────────────────────────────────────────────────────────────
73
74/// A single rule violation produced by a rule implementation (ADR-034 §5).
75#[non_exhaustive]
76pub struct Violation {
77    /// The rule that produced this violation.
78    pub rule_id: &'static str,
79    /// Violation severity (may differ from rule-level severity for pack rules
80    /// that emit mixed-severity output within one rule).
81    pub severity: Severity,
82    /// Human-readable explanation of the violation.
83    pub message: String,
84    /// Whether the violation can be fixed by `kkernel kg validate --fix`.
85    pub fixable: bool,
86    /// Optional entity UUID (short-form) that the violation targets.
87    pub entity_id: Option<String>,
88    /// Optional edge UUID (short-form) that the violation targets.
89    pub edge_id: Option<String>,
90}
91
92impl Violation {
93    /// Construct a non-fixable violation without a specific entity/edge target.
94    pub fn new(rule_id: &'static str, severity: Severity, message: impl Into<String>) -> Self {
95        Self {
96            rule_id,
97            severity,
98            message: message.into(),
99            fixable: false,
100            entity_id: None,
101            edge_id: None,
102        }
103    }
104
105    /// Attach an entity identifier to an existing violation.
106    pub fn with_entity(mut self, id: impl Into<String>) -> Self {
107        self.entity_id = Some(id.into());
108        self
109    }
110}
111
112// ── Rule function type ────────────────────────────────────────────────────────
113
114/// Whole-corpus check function type (ADR-034 §2, §9a).
115///
116/// Receives the corpus snapshot and config context; returns all violations
117/// produced by the rule in one call.
118pub type RuleFn = fn(&ValidationContext<'_>) -> Vec<Violation>;
119
120/// Optional auto-fix function type (ADR-034 §7).
121///
122/// Receives the context and violations emitted by the corresponding `RuleFn`.
123/// Returns a `GraphPatch` (opaque in v1 — see below) that the validator applies
124/// before writing NDJSON. Returning `None` leaves the graph unchanged.
125///
126/// `GraphPatch` is a placeholder type in v1; the git-native write path
127/// (ADR-020) is out of scope for this cluster.
128pub type FixFn = fn(&ValidationContext<'_>, &[Violation]) -> Option<GraphPatch>;
129
130/// Opaque graph patch produced by a fix function (ADR-034 §7).
131///
132/// v1 carries no fields — the auto-fix machinery is stubbed. The type exists
133/// so pack authors can write `fix: Some(my_fix as FixFn)` without a
134/// compile-time change when the v1 fix path is wired up.
135#[non_exhaustive]
136pub struct GraphPatch;
137
138// ── ValidationRule ────────────────────────────────────────────────────────────
139
140/// A pack-contributed validation rule (ADR-034 §9).
141///
142/// Pack authors declare an array of these in their `Pack` implementation
143/// (through the runtime `PackRuntime::validation_rules()` method). Rule IDs
144/// must follow the `<pack>/<rule-id>` namespace convention.
145///
146/// # Example
147///
148/// ```ignore
149/// use khive_runtime::validation::{ValidationRule, Severity};
150///
151/// fn check_taxa(ctx: &ValidationContext<'_>) -> Vec<Violation> {
152///     // ... domain-specific check ...
153///     vec![]
154/// }
155///
156/// pub const RULES: &[ValidationRule] = &[
157///     ValidationRule {
158///         id: "biology/required-taxa-rank",
159///         severity: Severity::Warning,
160///         description: "All species entities must carry a taxa_rank property",
161///         check: check_taxa,
162///         fix: None,
163///     },
164/// ];
165/// ```
166pub struct ValidationRule {
167    /// Stable rule identifier in `<pack>/<rule-id>` format.
168    pub id: RuleId,
169    /// Default severity; can be overridden in `.khive/kg/rules.yaml`.
170    pub severity: Severity,
171    /// Human-readable description shown in `kkernel kg validate` output.
172    pub description: &'static str,
173    /// Whole-corpus check function.
174    pub check: RuleFn,
175    /// Optional auto-fix function (ADR-034 §7). `None` for unfixable rules.
176    pub fix: Option<FixFn>,
177}
178
179// ── Aggregated report ─────────────────────────────────────────────────────────
180
181/// Aggregated result of running the full rule pipeline (ADR-034 §5).
182#[derive(Default)]
183pub struct ValidationReport {
184    /// Violations grouped by rule ID, sorted canonically per ADR-034 §9a.
185    pub violations_by_rule: BTreeMap<String, Vec<Violation>>,
186}
187
188impl ValidationReport {
189    /// Add violations for a given rule to the report.
190    pub fn add(&mut self, rule_id: &str, violations: Vec<Violation>) {
191        self.violations_by_rule
192            .entry(rule_id.to_string())
193            .or_default()
194            .extend(violations);
195    }
196
197    /// Total number of violations at `Severity::Error` across all rules.
198    pub fn error_count(&self) -> usize {
199        self.violations_by_rule
200            .values()
201            .flat_map(|vs| vs.iter())
202            .filter(|v| v.severity == Severity::Error)
203            .count()
204    }
205
206    /// Total number of violations at `Severity::Warning` across all rules.
207    pub fn warning_count(&self) -> usize {
208        self.violations_by_rule
209            .values()
210            .flat_map(|vs| vs.iter())
211            .filter(|v| v.severity == Severity::Warning)
212            .count()
213    }
214
215    /// `true` when no errors were found (the standard exit-0 condition).
216    pub fn passed(&self) -> bool {
217        self.error_count() == 0
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn violation_builder() {
227        let v = Violation::new("test/rule", Severity::Warning, "something is off")
228            .with_entity("abc123");
229        assert_eq!(v.rule_id, "test/rule");
230        assert_eq!(v.severity, Severity::Warning);
231        assert!(!v.fixable);
232        assert_eq!(v.entity_id.as_deref(), Some("abc123"));
233    }
234
235    #[test]
236    fn report_error_count() {
237        let mut report = ValidationReport::default();
238        report.add(
239            "test/rule",
240            vec![
241                Violation::new("test/rule", Severity::Error, "bad"),
242                Violation::new("test/rule", Severity::Warning, "meh"),
243            ],
244        );
245        assert_eq!(report.error_count(), 1);
246        assert_eq!(report.warning_count(), 1);
247        assert!(!report.passed());
248    }
249
250    #[test]
251    fn report_passed_when_no_errors() {
252        let mut report = ValidationReport::default();
253        report.add(
254            "test/rule",
255            vec![Violation::new("test/rule", Severity::Warning, "meh")],
256        );
257        assert!(report.passed());
258    }
259
260    #[test]
261    fn graph_patch_is_constructible() {
262        // Ensure the placeholder type can be named and constructed.
263        let _patch = GraphPatch;
264    }
265
266    #[test]
267    fn validation_rule_fields() {
268        fn dummy_check(_ctx: &ValidationContext<'_>) -> Vec<Violation> {
269            vec![]
270        }
271        let rule = ValidationRule {
272            id: "bio/taxa",
273            severity: Severity::Warning,
274            description: "taxa must exist",
275            check: dummy_check,
276            fix: None,
277        };
278        assert_eq!(rule.id, "bio/taxa");
279        assert!(rule.fix.is_none());
280    }
281}