Skip to main content

kg/
validate.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::graph::GraphFile;
5
6pub struct ValidationReport {
7    pub errors: Vec<String>,
8    pub warnings: Vec<String>,
9}
10
11// ---------------------------------------------------------------------------
12// Static ontology data
13// ---------------------------------------------------------------------------
14
15pub const VALID_TYPES: &[&str] = &[
16    "Concept",
17    "Process",
18    "DataStore",
19    "Interface",
20    "Rule",
21    "Feature",
22    "Decision",
23    "Convention",
24    "Note",
25    "Bug",
26];
27
28pub const VALID_RELATIONS: &[&str] = &[
29    "HAS",
30    "STORED_IN",
31    "TRIGGERS",
32    "CREATED_BY",
33    "AFFECTED_BY",
34    "AVAILABLE_IN",
35    "DOCUMENTED_IN",
36    "DEPENDS_ON",
37    "TRANSITIONS",
38    "DECIDED_BY",
39    "GOVERNED_BY",
40    "USES",
41    "READS_FROM",
42];
43
44/// Maps node type -> expected id prefix.
45pub const TYPE_TO_PREFIX: &[(&str, &str)] = &[
46    ("Concept", "concept"),
47    ("Process", "process"),
48    ("DataStore", "datastore"),
49    ("Interface", "interface"),
50    ("Rule", "rule"),
51    ("Feature", "feature"),
52    ("Decision", "decision"),
53    ("Convention", "convention"),
54    ("Note", "note"),
55    ("Bug", "bug"),
56];
57
58/// (relation, valid_source_types, valid_target_types)
59/// Empty slice = no constraint for that side.
60pub const EDGE_TYPE_RULES: &[(&str, &[&str], &[&str])] = &[
61    (
62        "HAS",
63        &["Concept", "Process", "Interface"],
64        &["Concept", "Feature", "DataStore", "Rule", "Interface"],
65    ),
66    ("STORED_IN", &["Concept", "Process", "Rule"], &["DataStore"]),
67    (
68        "CREATED_BY",
69        &["Concept", "DataStore", "Interface", "Decision"],
70        &["Process"],
71    ),
72    (
73        "TRIGGERS",
74        &["Process", "Rule"],
75        &["Process", "Bug", "Rule"],
76    ),
77    (
78        "AFFECTED_BY",
79        &["Concept", "Process", "Decision"],
80        &["Bug", "Rule", "Decision"],
81    ),
82    (
83        "AVAILABLE_IN",
84        &["Feature", "DataStore", "Concept"],
85        &["Interface"],
86    ),
87    (
88        "DOCUMENTED_IN",
89        &["Concept", "Process", "Decision", "Rule", "Feature", "Bug"],
90        &["Interface", "Note"],
91    ),
92    (
93        "DEPENDS_ON",
94        &["Feature", "Process", "Interface"],
95        &["Feature", "DataStore", "Interface", "Concept"],
96    ),
97    ("TRANSITIONS", &["Process", "Rule"], &["Process", "Rule"]),
98    (
99        "DECIDED_BY",
100        &["Concept", "Process", "Interface"],
101        &["Decision"],
102    ),
103    (
104        "GOVERNED_BY",
105        &["Process", "Interface", "DataStore"],
106        &["Convention", "Rule"],
107    ),
108];
109
110// ---------------------------------------------------------------------------
111// Core validation
112// ---------------------------------------------------------------------------
113
114pub fn validate_graph(
115    graph: &GraphFile,
116    cwd: &Path,
117    deep: bool,
118    base_dir: Option<&str>,
119) -> ValidationReport {
120    let mut errors = Vec::new();
121    let mut warnings = Vec::new();
122
123    let type_to_prefix: HashMap<&str, &str> = TYPE_TO_PREFIX.iter().copied().collect();
124    let edge_rules: HashMap<&str, (&[&str], &[&str])> = EDGE_TYPE_RULES
125        .iter()
126        .map(|(rel, src, tgt)| (*rel, (*src, *tgt)))
127        .collect();
128
129    // -- metadata --
130    if graph.metadata.name.trim().is_empty() {
131        errors.push("metadata.name missing".to_owned());
132    }
133
134    // -- nodes --
135    let mut id_counts = HashMap::<&str, usize>::new();
136    for node in &graph.nodes {
137        *id_counts.entry(node.id.as_str()).or_insert(0) += 1;
138
139        if !VALID_TYPES.contains(&node.r#type.as_str()) {
140            errors.push(format!("node {} has invalid type {}", node.id, node.r#type));
141        }
142        if node.name.trim().is_empty() {
143            errors.push(format!("node {} missing name", node.id));
144        }
145        if node.source_files.is_empty() {
146            errors.push(format!("node {} missing source_files", node.id));
147        }
148
149        // id convention: prefix:snake_case
150        match node.id.split_once(':') {
151            Some((prefix, suffix)) => {
152                let valid_suffix = !suffix.is_empty()
153                    && suffix
154                        .chars()
155                        .next()
156                        .is_some_and(|c| c.is_ascii_lowercase())
157                    && suffix
158                        .chars()
159                        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_');
160                if !valid_suffix {
161                    errors.push(format!(
162                        "node id {} does not match prefix:snake_case",
163                        node.id
164                    ));
165                }
166                if let Some(expected) = type_to_prefix.get(node.r#type.as_str()) {
167                    if prefix != *expected {
168                        errors.push(format!(
169                            "node {} prefix {} does not match type {}",
170                            node.id, prefix, node.r#type
171                        ));
172                    }
173                }
174            }
175            None => {
176                errors.push(format!(
177                    "node id {} does not match prefix:snake_case",
178                    node.id
179                ));
180            }
181        }
182
183        // quality warnings (skip Feature nodes)
184        if node.r#type != "Feature" {
185            if node.properties.description.trim().is_empty() {
186                warnings.push(format!("node {} missing description", node.id));
187            }
188            if node.properties.key_facts.is_empty() {
189                warnings.push(format!("node {} missing key_facts", node.id));
190            }
191            if node.properties.provenance.trim().is_empty() {
192                warnings.push(format!("node {} missing provenance", node.id));
193            }
194        }
195        if let Some(confidence) = node.properties.confidence {
196            if !(0.0..=1.0).contains(&confidence) {
197                warnings.push(format!(
198                    "node {} confidence out of range: {}",
199                    node.id, confidence
200                ));
201            }
202        }
203        if !(1..=6).contains(&node.properties.importance) {
204            errors.push(format!(
205                "node {} importance out of range: {}",
206                node.id, node.properties.importance
207            ));
208        }
209    }
210    for (node_id, count) in &id_counts {
211        if *count > 1 {
212            errors.push(format!("duplicate node id: {} ({})", node_id, count));
213        }
214    }
215
216    // -- edges --
217    let node_type_map: HashMap<&str, &str> = graph
218        .nodes
219        .iter()
220        .map(|node| (node.id.as_str(), node.r#type.as_str()))
221        .collect();
222    let node_ids: HashSet<&str> = node_type_map.keys().copied().collect();
223    let mut touched = HashSet::new();
224    let mut edge_keys = HashSet::new();
225
226    for edge in &graph.edges {
227        if !VALID_RELATIONS.contains(&edge.relation.as_str()) {
228            errors.push(format!(
229                "edge has invalid relation: {} {} {}",
230                edge.source_id, edge.relation, edge.target_id
231            ));
232        }
233        if !node_ids.contains(edge.source_id.as_str()) {
234            errors.push(format!(
235                "edge source missing: {} {} {}",
236                edge.source_id, edge.relation, edge.target_id
237            ));
238        }
239        if !node_ids.contains(edge.target_id.as_str()) {
240            errors.push(format!(
241                "edge target missing: {} {} {}",
242                edge.source_id, edge.relation, edge.target_id
243            ));
244        }
245
246        // Enforce relation semantics from decision table rules.
247        if let (Some(src_type), Some(tgt_type)) = (
248            node_type_map.get(edge.source_id.as_str()),
249            node_type_map.get(edge.target_id.as_str()),
250        ) {
251            if let Some((valid_src, valid_tgt)) = edge_rules.get(edge.relation.as_str()) {
252                if !valid_src.is_empty() && !valid_src.contains(src_type) {
253                    errors.push(format!(
254                        "edge source type invalid for relation: {} {} {} (got {}, expected one of {:?})",
255                        edge.source_id, edge.relation, edge.target_id, src_type, valid_src
256                    ));
257                }
258                if !valid_tgt.is_empty() && !valid_tgt.contains(tgt_type) {
259                    errors.push(format!(
260                        "edge target type invalid for relation: {} {} {} (got {}, expected one of {:?})",
261                        edge.source_id, edge.relation, edge.target_id, tgt_type, valid_tgt
262                    ));
263                }
264            }
265        }
266
267        touched.insert(edge.source_id.as_str());
268        touched.insert(edge.target_id.as_str());
269        let key = format!("{}|{}|{}", edge.source_id, edge.relation, edge.target_id);
270        if !edge_keys.insert(key.clone()) {
271            errors.push(format!("duplicate edge: {}", key.replace('|', " ")));
272        }
273    }
274
275    // orphan nodes = errors (not connected to any edge)
276    for node in &graph.nodes {
277        if !touched.contains(node.id.as_str()) {
278            errors.push(format!("orphan node: {}", node.id));
279        }
280    }
281
282    // deep: verify source files exist on disk
283    if deep {
284        let base = base_dir
285            .map(|d| cwd.join(d))
286            .unwrap_or_else(|| cwd.to_path_buf());
287        for node in &graph.nodes {
288            for source in &node.source_files {
289                if !base.join(source).exists() {
290                    errors.push(format!("missing source file: {} -> {}", node.id, source));
291                }
292            }
293        }
294    }
295
296    errors.sort();
297    warnings.sort();
298    ValidationReport { errors, warnings }
299}