Skip to main content

haystack_core/ontology/
graph_validation.rs

1// Graph-wide validation against a DefNamespace.
2
3use std::collections::{HashMap, HashSet};
4
5use crate::graph::EntityGraph;
6use crate::kinds::Kind;
7
8use super::namespace::DefNamespace;
9use super::validation::FitIssue;
10
11/// Summary statistics for a validation run.
12#[derive(Debug, Clone)]
13pub struct ValidationSummary {
14    pub total_entities: usize,
15    pub valid: usize,
16    pub with_warnings: usize,
17    pub with_errors: usize,
18    pub untyped: usize,
19}
20
21/// Full validation report for a graph.
22#[derive(Debug)]
23pub struct ValidationReport {
24    /// Per-entity fit issues (entity_id → issues).
25    pub entity_issues: HashMap<String, Vec<FitIssue>>,
26    /// Dangling references: (entity_id, tag_name, missing_ref_val).
27    pub dangling_refs: Vec<(String, String, String)>,
28    /// Fraction of entities that match at least one known spec (0.0 - 1.0).
29    pub spec_coverage: f64,
30    /// Summary statistics.
31    pub summary: ValidationSummary,
32}
33
34/// Validate all entities in the graph against loaded specs.
35///
36/// Checks every entity against the namespace for spec conformance,
37/// collects dangling ref issues, and computes coverage statistics.
38pub fn validate_graph(graph: &EntityGraph, ns: &DefNamespace) -> ValidationReport {
39    let all = graph.all();
40    let total = all.len();
41
42    // Collect all entity ids for dangling-ref checking.
43    let id_set: HashSet<&str> = all
44        .iter()
45        .filter_map(|e| e.id().map(|r| r.val.as_str()))
46        .collect();
47
48    let mut entity_issues: HashMap<String, Vec<FitIssue>> = HashMap::new();
49    let mut dangling_refs: Vec<(String, String, String)> = Vec::new();
50    let mut typed_count: usize = 0;
51    let mut error_count: usize = 0;
52    let mut mandatory_cache: HashMap<&str, HashSet<String>> = HashMap::new();
53
54    for entity in &all {
55        let entity_id = match entity.id() {
56            Some(r) => r.val.clone(),
57            None => continue,
58        };
59
60        // Determine types this entity claims via marker tags that are known defs.
61        let mut is_typed = false;
62        let mut issues: Vec<FitIssue> = Vec::new();
63
64        for (tag_name, val) in entity.iter() {
65            if !matches!(val, Kind::Marker) {
66                continue;
67            }
68            if !ns.contains(tag_name) {
69                continue;
70            }
71            is_typed = true;
72            let mandatory = mandatory_cache
73                .entry(tag_name)
74                .or_insert_with(|| ns.mandatory_tags(tag_name));
75            for tag in mandatory.iter() {
76                if entity.missing(tag) {
77                    issues.push(FitIssue::MissingMarker {
78                        tag: tag.clone(),
79                        spec: tag_name.to_string(),
80                    });
81                }
82            }
83        }
84
85        // Check dangling refs.
86        for (tag_name, val) in entity.iter() {
87            if tag_name == "id" {
88                continue;
89            }
90            if let Kind::Ref(href) = val
91                && !id_set.contains(href.val.as_str())
92            {
93                dangling_refs.push((entity_id.clone(), tag_name.to_string(), href.val.clone()));
94            }
95        }
96
97        if !issues.is_empty() {
98            error_count += 1;
99            entity_issues.insert(entity_id, issues);
100        }
101        if is_typed {
102            typed_count += 1;
103        }
104    }
105
106    let untyped = total - typed_count;
107    let spec_coverage = if total == 0 {
108        0.0
109    } else {
110        typed_count as f64 / total as f64
111    };
112
113    ValidationReport {
114        entity_issues,
115        dangling_refs,
116        spec_coverage,
117        summary: ValidationSummary {
118            total_entities: total,
119            valid: total - error_count - untyped,
120            with_warnings: 0,
121            with_errors: error_count,
122            untyped,
123        },
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    use crate::data::HDict;
131    use crate::graph::EntityGraph;
132    use crate::kinds::{HRef, Kind};
133    use crate::ontology::DefNamespace;
134
135    fn empty_ns() -> DefNamespace {
136        DefNamespace::new()
137    }
138
139    fn make_entity(id: &str, tags: &[(&str, Kind)]) -> HDict {
140        let mut d = HDict::new();
141        d.set("id", Kind::Ref(HRef::from_val(id)));
142        for (k, v) in tags {
143            d.set(*k, v.clone());
144        }
145        d
146    }
147
148    #[test]
149    fn validate_graph_empty_graph() {
150        let g = EntityGraph::new();
151        let ns = empty_ns();
152        let report = validate_graph(&g, &ns);
153        assert_eq!(report.summary.total_entities, 0);
154        assert_eq!(report.summary.valid, 0);
155        assert_eq!(report.summary.with_warnings, 0);
156        assert_eq!(report.summary.with_errors, 0);
157        assert_eq!(report.summary.untyped, 0);
158        assert!(report.entity_issues.is_empty());
159        assert!(report.dangling_refs.is_empty());
160        assert_eq!(report.spec_coverage, 0.0);
161    }
162
163    #[test]
164    fn validate_graph_dangling_refs() {
165        let mut g = EntityGraph::new();
166        let e = make_entity("e1", &[("siteRef", Kind::Ref(HRef::from_val("missing")))]);
167        g.add(e).unwrap();
168        let ns = empty_ns();
169        let report = validate_graph(&g, &ns);
170        assert_eq!(report.dangling_refs.len(), 1);
171        assert_eq!(report.dangling_refs[0].0, "e1");
172        assert_eq!(report.dangling_refs[0].1, "siteRef");
173        assert_eq!(report.dangling_refs[0].2, "missing");
174    }
175
176    #[test]
177    fn validate_graph_no_dangling_refs() {
178        let mut g = EntityGraph::new();
179        let e1 = make_entity("site1", &[("dis", Kind::Str("Site 1".into()))]);
180        let e2 = make_entity("equip1", &[("siteRef", Kind::Ref(HRef::from_val("site1")))]);
181        g.add(e1).unwrap();
182        g.add(e2).unwrap();
183        let ns = empty_ns();
184        let report = validate_graph(&g, &ns);
185        assert!(report.dangling_refs.is_empty());
186    }
187
188    #[test]
189    fn validate_graph_summary_counts() {
190        let mut g = EntityGraph::new();
191        // Two entities, neither has marker tags that are defs → both untyped
192        let e1 = make_entity("a", &[("dis", Kind::Str("A".into()))]);
193        let e2 = make_entity("b", &[("dis", Kind::Str("B".into()))]);
194        g.add(e1).unwrap();
195        g.add(e2).unwrap();
196        let ns = empty_ns();
197        let report = validate_graph(&g, &ns);
198        assert_eq!(report.summary.total_entities, 2);
199        assert_eq!(report.summary.untyped, 2);
200        assert_eq!(report.summary.with_errors, 0);
201        assert_eq!(report.spec_coverage, 0.0);
202    }
203}