Skip to main content

khive_runtime/
validation.rs

1//! Validation pipeline types for pack-contributed KG rules.
2//!
3//! Defines the trait surface for `CorpusCheck` (whole-corpus, cross-entity joins)
4//! and `StreamingRule` (per-record) shapes. Both return `Vec<Violation>` aggregated
5//! into a `ValidationReport`.
6
7use std::collections::BTreeMap;
8
9// ── Rule identity ─────────────────────────────────────────────────────────────
10
11/// Stable rule identifier, namespaced by pack: `"<pack>/<rule-id>"`.
12///
13/// Built-in rules use no namespace prefix (e.g. `"min-edge-density"`).
14/// Pack-contributed rules MUST be namespaced (e.g. `"biology/required-taxa-rank"`).
15pub type RuleId = &'static str;
16
17/// Severity of a validation finding.
18///
19/// - `Error`: causes `kkernel kg validate` to exit with code 1.
20/// - `Warning`: reported but does not affect exit code (unless `--strict`).
21/// - `Info`: informational; no exit-code effect.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
23pub enum Severity {
24    Info,
25    Warning,
26    Error,
27}
28
29// ── Corpus snapshot ───────────────────────────────────────────────────────────
30
31/// Opaque snapshot of the KG corpus passed to `CorpusCheck::check`.
32///
33/// v1 exposes the bare field set needed for the built-in rules. Pack authors
34/// that need richer access should open a design review to extend this surface —
35/// do NOT reach through this struct to the storage layer.
36#[non_exhaustive]
37pub struct GraphSnapshot {
38    /// Total entity count in the snapshot.
39    pub entity_count: usize,
40    /// Total edge count in the snapshot.
41    pub edge_count: usize,
42}
43
44/// Context passed to all rule implementations.
45///
46/// Carries configuration overrides from `.khive/kg/rules.toml` merged with
47/// pack defaults. Rules read per-rule config from `config[rule_id]`.
48#[non_exhaustive]
49pub struct ValidationContext<'a> {
50    /// The corpus snapshot for whole-corpus rules.
51    pub snapshot: &'a GraphSnapshot,
52    /// Per-rule config overrides, keyed by rule ID.
53    pub config: &'a BTreeMap<&'static str, serde_json::Value>,
54}
55
56// ── Violation ─────────────────────────────────────────────────────────────────
57
58/// A single rule violation produced by a rule implementation.
59#[non_exhaustive]
60pub struct Violation {
61    /// The rule that produced this violation.
62    pub rule_id: &'static str,
63    /// Violation severity (may differ from rule-level severity for pack rules
64    /// that emit mixed-severity output within one rule).
65    pub severity: Severity,
66    /// Human-readable explanation of the violation.
67    pub message: String,
68    /// Whether the violation can be fixed by `kkernel kg validate --fix`.
69    pub fixable: bool,
70    /// Optional entity UUID (short-form) that the violation targets.
71    pub entity_id: Option<String>,
72    /// Optional edge UUID (short-form) that the violation targets.
73    pub edge_id: Option<String>,
74}
75
76impl Violation {
77    /// Construct a non-fixable violation without a specific entity/edge target.
78    pub fn new(rule_id: &'static str, severity: Severity, message: impl Into<String>) -> Self {
79        Self {
80            rule_id,
81            severity,
82            message: message.into(),
83            fixable: false,
84            entity_id: None,
85            edge_id: None,
86        }
87    }
88
89    /// Attach an entity identifier to an existing violation.
90    pub fn with_entity(mut self, id: impl Into<String>) -> Self {
91        self.entity_id = Some(id.into());
92        self
93    }
94}
95
96// ── Rule function type ────────────────────────────────────────────────────────
97
98/// Whole-corpus check function type.
99///
100/// Receives the corpus snapshot and config context; returns all violations
101/// produced by the rule in one call.
102pub type RuleFn = fn(&ValidationContext<'_>) -> Vec<Violation>;
103
104/// Optional auto-fix function type.
105///
106/// Receives the context and violations emitted by the corresponding `RuleFn`.
107/// Returns a `GraphPatch` (opaque in v1 — see below) that the validator applies
108/// before writing NDJSON. Returning `None` leaves the graph unchanged.
109///
110/// `GraphPatch` is a placeholder type in v1; the auto-fix write path is out of
111/// scope for this cluster.
112pub type FixFn = fn(&ValidationContext<'_>, &[Violation]) -> Option<GraphPatch>;
113
114/// Opaque graph patch produced by a fix function.
115///
116/// v1 carries no fields — the auto-fix machinery is stubbed. The type exists
117/// so pack authors can write `fix: Some(my_fix as FixFn)` without a
118/// compile-time change when the v1 fix path is wired up.
119#[non_exhaustive]
120pub struct GraphPatch;
121
122// ── ValidationRule ────────────────────────────────────────────────────────────
123
124/// A pack-contributed validation rule.
125///
126/// Rule IDs must follow the `<pack>/<rule-id>` namespace convention.
127/// See `docs/validation.md` for declaration examples and severity override rules.
128pub struct ValidationRule {
129    /// Stable rule identifier in `<pack>/<rule-id>` format.
130    pub id: RuleId,
131    /// Default severity; can be overridden in `.khive/kg/rules.toml`.
132    pub severity: Severity,
133    /// Human-readable description shown in `kkernel kg validate` output.
134    pub description: &'static str,
135    /// Whole-corpus check function.
136    pub check: RuleFn,
137    /// Optional auto-fix function. `None` for unfixable rules.
138    pub fix: Option<FixFn>,
139}
140
141// ── Aggregated report ─────────────────────────────────────────────────────────
142
143/// Aggregated result of running the full rule pipeline.
144#[derive(Default)]
145pub struct ValidationReport {
146    /// Violations grouped by rule ID, sorted canonically by rule ID.
147    pub violations_by_rule: BTreeMap<String, Vec<Violation>>,
148}
149
150impl ValidationReport {
151    /// Add violations for a given rule to the report.
152    pub fn add(&mut self, rule_id: &str, violations: Vec<Violation>) {
153        self.violations_by_rule
154            .entry(rule_id.to_string())
155            .or_default()
156            .extend(violations);
157    }
158
159    /// Total number of violations at `Severity::Error` across all rules.
160    pub fn error_count(&self) -> usize {
161        self.violations_by_rule
162            .values()
163            .flat_map(|vs| vs.iter())
164            .filter(|v| v.severity == Severity::Error)
165            .count()
166    }
167
168    /// Total number of violations at `Severity::Warning` across all rules.
169    pub fn warning_count(&self) -> usize {
170        self.violations_by_rule
171            .values()
172            .flat_map(|vs| vs.iter())
173            .filter(|v| v.severity == Severity::Warning)
174            .count()
175    }
176
177    /// `true` when no errors were found (the standard exit-0 condition).
178    pub fn passed(&self) -> bool {
179        self.error_count() == 0
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    #[test]
188    fn violation_builder() {
189        let v = Violation::new("test/rule", Severity::Warning, "something is off")
190            .with_entity("abc123");
191        assert_eq!(v.rule_id, "test/rule");
192        assert_eq!(v.severity, Severity::Warning);
193        assert!(!v.fixable);
194        assert_eq!(v.entity_id.as_deref(), Some("abc123"));
195    }
196
197    #[test]
198    fn report_error_count() {
199        let mut report = ValidationReport::default();
200        report.add(
201            "test/rule",
202            vec![
203                Violation::new("test/rule", Severity::Error, "bad"),
204                Violation::new("test/rule", Severity::Warning, "meh"),
205            ],
206        );
207        assert_eq!(report.error_count(), 1);
208        assert_eq!(report.warning_count(), 1);
209        assert!(!report.passed());
210    }
211
212    #[test]
213    fn report_passed_when_no_errors() {
214        let mut report = ValidationReport::default();
215        report.add(
216            "test/rule",
217            vec![Violation::new("test/rule", Severity::Warning, "meh")],
218        );
219        assert!(report.passed());
220    }
221
222    #[test]
223    fn graph_patch_is_constructible() {
224        // Ensure the placeholder type can be named and constructed.
225        let _patch = GraphPatch;
226    }
227
228    #[test]
229    fn validation_rule_fields() {
230        fn dummy_check(_ctx: &ValidationContext<'_>) -> Vec<Violation> {
231            vec![]
232        }
233        let rule = ValidationRule {
234            id: "bio/taxa",
235            severity: Severity::Warning,
236            description: "taxa must exist",
237            check: dummy_check,
238            fix: None,
239        };
240        assert_eq!(rule.id, "bio/taxa");
241        assert!(rule.fix.is_none());
242    }
243}