Skip to main content

kg/
validate.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::graph::GraphFile;
5
6pub struct ValidationReport {
7    pub errors: Vec<String>,
8    pub warnings: Vec<String>,
9}
10
11// ---------------------------------------------------------------------------
12// Static ontology data
13// ---------------------------------------------------------------------------
14
15pub const VALID_TYPES: &[&str] = &[
16    "Concept",
17    "Process",
18    "DataStore",
19    "Interface",
20    "Rule",
21    "Feature",
22    "Decision",
23    "Convention",
24    "Note",
25    "Bug",
26];
27
28pub const VALID_RELATIONS: &[&str] = &[
29    "HAS",
30    "STORED_IN",
31    "TRIGGERS",
32    "CREATED_BY",
33    "AFFECTED_BY",
34    "AVAILABLE_IN",
35    "DOCUMENTED_IN",
36    "DEPENDS_ON",
37    "TRANSITIONS",
38    "DECIDED_BY",
39    "GOVERNED_BY",
40    "USES",
41    "READS_FROM",
42];
43
44/// Maps node type -> expected id prefix.
45pub const TYPE_TO_PREFIX: &[(&str, &str)] = &[
46    ("Concept", "concept"),
47    ("Process", "process"),
48    ("DataStore", "datastore"),
49    ("Interface", "interface"),
50    ("Rule", "rule"),
51    ("Feature", "feature"),
52    ("Decision", "decision"),
53    ("Convention", "convention"),
54    ("Note", "note"),
55    ("Bug", "bug"),
56];
57
58/// (relation, valid_source_types, valid_target_types)
59/// Empty slice = no constraint for that side.
60pub const EDGE_TYPE_RULES: &[(&str, &[&str], &[&str])] = &[
61    (
62        "HAS",
63        &["Concept", "Process", "Interface"],
64        &["Concept", "Feature", "DataStore", "Rule", "Interface"],
65    ),
66    ("STORED_IN", &["Concept", "Process", "Rule"], &["DataStore"]),
67    (
68        "CREATED_BY",
69        &["Concept", "DataStore", "Interface", "Decision"],
70        &["Process"],
71    ),
72    (
73        "TRIGGERS",
74        &["Process", "Rule"],
75        &["Process", "Bug", "Rule"],
76    ),
77    (
78        "AFFECTED_BY",
79        &[
80            "Concept",
81            "Process",
82            "DataStore",
83            "Interface",
84            "Rule",
85            "Feature",
86            "Decision",
87            "Bug",
88        ],
89        &[
90            "Concept",
91            "Process",
92            "DataStore",
93            "Interface",
94            "Rule",
95            "Feature",
96            "Decision",
97            "Convention",
98            "Bug",
99        ],
100    ),
101    (
102        "AVAILABLE_IN",
103        &["Feature", "DataStore", "Concept", "Process"],
104        &["Interface"],
105    ),
106    (
107        "DOCUMENTED_IN",
108        &["Concept", "Process", "Decision", "Rule", "Feature", "Bug"],
109        &["Interface", "Note"],
110    ),
111    (
112        "DEPENDS_ON",
113        &["Feature", "Process", "Interface"],
114        &["Feature", "DataStore", "Interface", "Concept"],
115    ),
116    ("TRANSITIONS", &["Process", "Rule"], &["Process", "Rule"]),
117    (
118        "DECIDED_BY",
119        &["Concept", "Process", "Interface"],
120        &["Decision"],
121    ),
122    (
123        "GOVERNED_BY",
124        &["Process", "Interface", "DataStore"],
125        &["Convention", "Rule"],
126    ),
127];
128
129// ---------------------------------------------------------------------------
130// Core validation
131// ---------------------------------------------------------------------------
132
133pub fn edge_type_rule(
134    relation: &str,
135) -> Option<(&'static [&'static str], &'static [&'static str])> {
136    EDGE_TYPE_RULES
137        .iter()
138        .find(|(rule_relation, _, _)| *rule_relation == relation)
139        .map(|(_, source_types, target_types)| (*source_types, *target_types))
140}
141
142pub fn format_edge_source_type_error(
143    source_type: &str,
144    relation: &str,
145    allowed_source_types: &[impl AsRef<str>],
146) -> String {
147    format!(
148        "{} cannot be source of {} (allowed: {})",
149        source_type,
150        relation,
151        allowed_source_types
152            .iter()
153            .map(|value| value.as_ref())
154            .collect::<Vec<_>>()
155            .join(", ")
156    )
157}
158
159pub fn format_edge_target_type_error(
160    target_type: &str,
161    relation: &str,
162    allowed_target_types: &[impl AsRef<str>],
163) -> String {
164    format!(
165        "{} cannot be target of {} (allowed: {})",
166        target_type,
167        relation,
168        allowed_target_types
169            .iter()
170            .map(|value| value.as_ref())
171            .collect::<Vec<_>>()
172            .join(", ")
173    )
174}
175
176pub fn validate_graph(
177    graph: &GraphFile,
178    cwd: &Path,
179    deep: bool,
180    base_dir: Option<&str>,
181) -> ValidationReport {
182    let mut errors = Vec::new();
183    let mut warnings = Vec::new();
184
185    let type_to_prefix: HashMap<&str, &str> = TYPE_TO_PREFIX.iter().copied().collect();
186    // -- metadata --
187    if graph.metadata.name.trim().is_empty() {
188        errors.push("metadata.name missing".to_owned());
189    }
190
191    // -- nodes --
192    let mut id_counts = HashMap::<&str, usize>::new();
193    for node in &graph.nodes {
194        *id_counts.entry(node.id.as_str()).or_insert(0) += 1;
195
196        if !VALID_TYPES.contains(&node.r#type.as_str()) {
197            errors.push(format!("node {} has invalid type {}", node.id, node.r#type));
198        }
199        if node.name.trim().is_empty() {
200            errors.push(format!("node {} missing name", node.id));
201        }
202        if node.source_files.is_empty() {
203            errors.push(format!("node {} missing source_files", node.id));
204        }
205
206        // id convention: prefix:snake_case
207        match node.id.split_once(':') {
208            Some((prefix, suffix)) => {
209                let valid_suffix = !suffix.is_empty()
210                    && suffix
211                        .chars()
212                        .next()
213                        .is_some_and(|c| c.is_ascii_lowercase())
214                    && suffix
215                        .chars()
216                        .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_');
217                if !valid_suffix {
218                    errors.push(format!(
219                        "node id {} does not match prefix:snake_case",
220                        node.id
221                    ));
222                }
223                if let Some(expected) = type_to_prefix.get(node.r#type.as_str()) {
224                    if prefix != *expected {
225                        errors.push(format!(
226                            "node {} prefix {} does not match type {}",
227                            node.id, prefix, node.r#type
228                        ));
229                    }
230                }
231            }
232            None => {
233                errors.push(format!(
234                    "node id {} does not match prefix:snake_case",
235                    node.id
236                ));
237            }
238        }
239
240        // quality warnings (skip Feature nodes)
241        if node.r#type != "Feature" {
242            if node.properties.description.trim().is_empty() {
243                warnings.push(format!("node {} missing description", node.id));
244            }
245            if node.properties.key_facts.is_empty() {
246                warnings.push(format!("node {} missing key_facts", node.id));
247            }
248            if node.properties.provenance.trim().is_empty() {
249                warnings.push(format!("node {} missing provenance", node.id));
250            }
251        }
252        if let Some(confidence) = node.properties.confidence {
253            if !(0.0..=1.0).contains(&confidence) {
254                warnings.push(format!(
255                    "node {} confidence out of range: {}",
256                    node.id, confidence
257                ));
258            }
259        }
260        if !(1..=6).contains(&node.properties.importance) {
261            errors.push(format!(
262                "node {} importance out of range: {}",
263                node.id, node.properties.importance
264            ));
265        }
266    }
267    for (node_id, count) in &id_counts {
268        if *count > 1 {
269            errors.push(format!("duplicate node id: {} ({})", node_id, count));
270        }
271    }
272
273    // -- edges --
274    let node_type_map: HashMap<&str, &str> = graph
275        .nodes
276        .iter()
277        .map(|node| (node.id.as_str(), node.r#type.as_str()))
278        .collect();
279    let node_ids: HashSet<&str> = node_type_map.keys().copied().collect();
280    let mut touched = HashSet::new();
281    let mut edge_keys = HashSet::new();
282
283    for edge in &graph.edges {
284        if !VALID_RELATIONS.contains(&edge.relation.as_str()) {
285            errors.push(format!(
286                "edge has invalid relation: {} {} {}",
287                edge.source_id, edge.relation, edge.target_id
288            ));
289        }
290        if !node_ids.contains(edge.source_id.as_str()) {
291            errors.push(format!(
292                "edge source missing: {} {} {}",
293                edge.source_id, edge.relation, edge.target_id
294            ));
295        }
296        if !node_ids.contains(edge.target_id.as_str()) {
297            errors.push(format!(
298                "edge target missing: {} {} {}",
299                edge.source_id, edge.relation, edge.target_id
300            ));
301        }
302
303        // Enforce relation semantics from decision table rules.
304        if let (Some(src_type), Some(tgt_type)) = (
305            node_type_map.get(edge.source_id.as_str()),
306            node_type_map.get(edge.target_id.as_str()),
307        ) {
308            if let Some((valid_src, valid_tgt)) = edge_type_rule(edge.relation.as_str()) {
309                if !valid_src.is_empty() && !valid_src.contains(src_type) {
310                    errors.push(format!(
311                        "edge {} {} {} invalid: {}",
312                        edge.source_id,
313                        edge.relation,
314                        edge.target_id,
315                        format_edge_source_type_error(src_type, edge.relation.as_str(), valid_src)
316                    ));
317                }
318                if !valid_tgt.is_empty() && !valid_tgt.contains(tgt_type) {
319                    errors.push(format!(
320                        "edge {} {} {} invalid: {}",
321                        edge.source_id,
322                        edge.relation,
323                        edge.target_id,
324                        format_edge_target_type_error(tgt_type, edge.relation.as_str(), valid_tgt)
325                    ));
326                }
327            }
328        }
329
330        touched.insert(edge.source_id.as_str());
331        touched.insert(edge.target_id.as_str());
332        let key = format!("{}|{}|{}", edge.source_id, edge.relation, edge.target_id);
333        if !edge_keys.insert(key.clone()) {
334            errors.push(format!("duplicate edge: {}", key.replace('|', " ")));
335        }
336    }
337
338    // orphan nodes = errors (not connected to any edge)
339    for node in &graph.nodes {
340        if !touched.contains(node.id.as_str()) {
341            errors.push(format!("orphan node: {}", node.id));
342        }
343    }
344
345    // deep: verify source files exist on disk
346    if deep {
347        let base = base_dir
348            .map(|d| cwd.join(d))
349            .unwrap_or_else(|| cwd.to_path_buf());
350        for node in &graph.nodes {
351            for source in &node.source_files {
352                if !base.join(source).exists() {
353                    errors.push(format!("missing source file: {} -> {}", node.id, source));
354                }
355            }
356        }
357    }
358
359    errors.sort();
360    warnings.sort();
361    ValidationReport { errors, warnings }
362}