Skip to main content

kg/
validate.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::graph::GraphFile;
5
6pub struct ValidationReport {
7    pub errors: Vec<String>,
8    pub warnings: Vec<String>,
9}
10
11// ---------------------------------------------------------------------------
12// Static ontology data
13// ---------------------------------------------------------------------------
14
15pub const VALID_TYPES: &[&str] = &[
16    "Concept",
17    "Process",
18    "DataStore",
19    "Interface",
20    "Rule",
21    "Feature",
22    "Decision",
23    "Convention",
24    "Note",
25    "Bug",
26];
27
28pub const VALID_RELATIONS: &[&str] = &[
29    "HAS",
30    "STORED_IN",
31    "TRIGGERS",
32    "CREATED_BY",
33    "AFFECTED_BY",
34    "AVAILABLE_IN",
35    "DOCUMENTED_IN",
36    "DEPENDS_ON",
37    "TRANSITIONS",
38    "DECIDED_BY",
39    "GOVERNED_BY",
40    "USES",
41    "READS_FROM",
42];
43
44pub const VALID_PROVENANCE_CODES: &[&str] = &["U", "D", "A"];
45
46pub const VALID_SOURCE_TYPES: &[&str] = &[
47    "URL",
48    "SVN",
49    "SOURCECODE",
50    "WIKI",
51    "CONFLUENCE",
52    "CONVERSATION",
53    "GIT_COMMIT",
54    "PULL_REQUEST",
55    "ISSUE",
56    "DOC",
57    "LOG",
58    "OTHER",
59];
60
61const MAX_CUSTOM_TYPE_LEN: usize = 48;
62const MAX_CUSTOM_RELATION_LEN: usize = 64;
63
64/// Maps node type -> expected id prefix.
65pub const TYPE_TO_PREFIX: &[(&str, &str)] = &[
66    ("Concept", "concept"),
67    ("Process", "process"),
68    ("DataStore", "datastore"),
69    ("Interface", "interface"),
70    ("Rule", "rule"),
71    ("Feature", "feature"),
72    ("Decision", "decision"),
73    ("Convention", "convention"),
74    ("Note", "note"),
75    ("Bug", "bug"),
76];
77
78/// Maps node type -> canonical short code used in IDs.
79pub const TYPE_TO_CODE: &[(&str, &str)] = &[
80    ("Concept", "K"),
81    ("Process", "P"),
82    ("DataStore", "D"),
83    ("Interface", "I"),
84    ("Rule", "R"),
85    ("Feature", "F"),
86    ("Decision", "Z"),
87    ("Convention", "C"),
88    ("Note", "N"),
89    ("Bug", "B"),
90];
91
92/// (relation, valid_source_types, valid_target_types)
93/// Empty slice = no constraint for that side.
94pub const EDGE_TYPE_RULES: &[(&str, &[&str], &[&str])] = &[
95    (
96        "HAS",
97        &["Concept", "Process", "Interface"],
98        &["Concept", "Feature", "DataStore", "Rule", "Interface"],
99    ),
100    ("STORED_IN", &["Concept", "Process", "Rule"], &["DataStore"]),
101    (
102        "CREATED_BY",
103        &["Concept", "DataStore", "Interface", "Decision"],
104        &["Process"],
105    ),
106    (
107        "TRIGGERS",
108        &["Process", "Rule"],
109        &["Process", "Bug", "Rule"],
110    ),
111    (
112        "AFFECTED_BY",
113        &[
114            "Concept",
115            "Process",
116            "DataStore",
117            "Interface",
118            "Rule",
119            "Feature",
120            "Decision",
121            "Bug",
122        ],
123        &[
124            "Concept",
125            "Process",
126            "DataStore",
127            "Interface",
128            "Rule",
129            "Feature",
130            "Decision",
131            "Convention",
132            "Bug",
133        ],
134    ),
135    (
136        "AVAILABLE_IN",
137        &["Feature", "DataStore", "Concept", "Process"],
138        &["Interface"],
139    ),
140    (
141        "DOCUMENTED_IN",
142        &["Concept", "Process", "Decision", "Rule", "Feature", "Bug"],
143        &["Interface", "Note"],
144    ),
145    (
146        "DEPENDS_ON",
147        &["Feature", "Process", "Interface"],
148        &["Feature", "DataStore", "Interface", "Concept"],
149    ),
150    ("TRANSITIONS", &["Process", "Rule"], &["Process", "Rule"]),
151    (
152        "DECIDED_BY",
153        &["Concept", "Process", "Interface"],
154        &["Decision"],
155    ),
156    (
157        "GOVERNED_BY",
158        &["Process", "Interface", "DataStore"],
159        &["Convention", "Rule"],
160    ),
161];
162
163// ---------------------------------------------------------------------------
164// Core validation
165// ---------------------------------------------------------------------------
166
167pub fn edge_type_rule(
168    relation: &str,
169) -> Option<(&'static [&'static str], &'static [&'static str])> {
170    EDGE_TYPE_RULES
171        .iter()
172        .find(|(rule_relation, _, _)| *rule_relation == relation)
173        .map(|(_, source_types, target_types)| (*source_types, *target_types))
174}
175
176pub fn canonical_type_code_for(node_type: &str) -> Option<&'static str> {
177    TYPE_TO_CODE
178        .iter()
179        .find(|(typ, _)| *typ == node_type)
180        .map(|(_, code)| *code)
181}
182
183fn type_for_prefix(prefix: &str) -> Option<&'static str> {
184    TYPE_TO_PREFIX
185        .iter()
186        .find(|(_, known_prefix)| *known_prefix == prefix)
187        .map(|(typ, _)| *typ)
188}
189
190fn type_for_code(code: &str) -> Option<&'static str> {
191    TYPE_TO_CODE
192        .iter()
193        .find(|(_, known_code)| *known_code == code)
194        .map(|(typ, _)| *typ)
195}
196
197fn valid_id_suffix(suffix: &str) -> bool {
198    !suffix.is_empty()
199        && suffix
200            .chars()
201            .next()
202            .is_some_and(|c| c.is_ascii_lowercase())
203        && suffix
204            .chars()
205            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
206}
207
208fn is_valid_custom_token(token: &str, max_len: usize) -> bool {
209    if token.is_empty() || token.len() > max_len {
210        return false;
211    }
212    if token.chars().any(char::is_whitespace) {
213        return false;
214    }
215    token.chars().all(|ch| ch.is_ascii_graphic())
216}
217
218pub fn is_valid_node_type(value: &str) -> bool {
219    VALID_TYPES.contains(&value) || is_valid_custom_token(value, MAX_CUSTOM_TYPE_LEN)
220}
221
222pub fn is_valid_relation(value: &str) -> bool {
223    VALID_RELATIONS.contains(&value) || is_valid_custom_token(value, MAX_CUSTOM_RELATION_LEN)
224}
225
226fn parse_similarity_score(value: &str) -> Option<f64> {
227    let score = value.trim().parse::<f64>().ok()?;
228    if (0.0..=1.0).contains(&score) {
229        Some(score)
230    } else {
231        None
232    }
233}
234
235fn is_valid_score_component_label(value: &str) -> bool {
236    let mut chars = value.chars();
237    matches!(chars.next(), Some('C'))
238        && chars.clone().next().is_some()
239        && chars.all(|ch| ch.is_ascii_digit())
240}
241
242pub fn validate_bidirectional_similarity_edge(
243    source_id: &str,
244    relation: &str,
245    target_id: &str,
246    detail: &str,
247    bidirectional: bool,
248) -> Result<(), String> {
249    if !bidirectional {
250        return Ok(());
251    }
252    if relation != "~" {
253        return Err(format!(
254            "bidirectional edge requires '~' relation: {} {} {}",
255            source_id, relation, target_id
256        ));
257    }
258    if source_id > target_id {
259        return Err(format!(
260            "bidirectional edge must be canonicalized (source <= target): {} ~ {}",
261            source_id, target_id
262        ));
263    }
264    if parse_similarity_score(detail).is_none() {
265        return Err(format!(
266            "bidirectional similarity edge requires score in range 0..1: {} ~ {}",
267            source_id, target_id
268        ));
269    }
270    Ok(())
271}
272
273pub fn is_valid_iso_utc_timestamp(value: &str) -> bool {
274    if value.len() != 20 {
275        return false;
276    }
277    let bytes = value.as_bytes();
278    let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
279    if !(is_digit(0)
280        && is_digit(1)
281        && is_digit(2)
282        && is_digit(3)
283        && bytes.get(4) == Some(&b'-')
284        && is_digit(5)
285        && is_digit(6)
286        && bytes.get(7) == Some(&b'-')
287        && is_digit(8)
288        && is_digit(9)
289        && bytes.get(10) == Some(&b'T')
290        && is_digit(11)
291        && is_digit(12)
292        && bytes.get(13) == Some(&b':')
293        && is_digit(14)
294        && is_digit(15)
295        && bytes.get(16) == Some(&b':')
296        && is_digit(17)
297        && is_digit(18)
298        && bytes.get(19) == Some(&b'Z'))
299    {
300        return false;
301    }
302
303    let month = value[5..7].parse::<u32>().ok();
304    let day = value[8..10].parse::<u32>().ok();
305    let hour = value[11..13].parse::<u32>().ok();
306    let minute = value[14..16].parse::<u32>().ok();
307    let second = value[17..19].parse::<u32>().ok();
308    matches!(month, Some(1..=12))
309        && matches!(day, Some(1..=31))
310        && matches!(hour, Some(0..=23))
311        && matches!(minute, Some(0..=59))
312        && matches!(second, Some(0..=59))
313}
314
315pub fn is_valid_iso_date(value: &str) -> bool {
316    if value.len() != 10 {
317        return false;
318    }
319    let bytes = value.as_bytes();
320    let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
321    if !(is_digit(0)
322        && is_digit(1)
323        && is_digit(2)
324        && is_digit(3)
325        && bytes.get(4) == Some(&b'-')
326        && is_digit(5)
327        && is_digit(6)
328        && bytes.get(7) == Some(&b'-')
329        && is_digit(8)
330        && is_digit(9))
331    {
332        return false;
333    }
334    let month = value[5..7].parse::<u32>().ok();
335    let day = value[8..10].parse::<u32>().ok();
336    matches!(month, Some(1..=12)) && matches!(day, Some(1..=31))
337}
338
339pub fn validate_source_reference(value: &str) -> Result<(), String> {
340    let trimmed = value.trim();
341    if trimmed.is_empty() {
342        return Err("source entry cannot be empty".to_owned());
343    }
344
345    let parts: Vec<&str> = trimmed.split_whitespace().collect();
346    if parts.len() < 2 {
347        return Err(format!(
348            "source '{}' must have format '<TYPE> <LINK_OR_DATE> <OPTIONAL_DETAILS>'",
349            value
350        ));
351    }
352
353    let source_type = parts[0];
354    if !VALID_SOURCE_TYPES.contains(&source_type) {
355        return Err(format!(
356            "source '{}' uses invalid type '{}'; valid types: {}",
357            value,
358            source_type,
359            VALID_SOURCE_TYPES.join(", ")
360        ));
361    }
362
363    match source_type {
364        "CONVERSATION" => {
365            if !is_valid_iso_date(parts[1]) {
366                return Err(format!(
367                    "source '{}' must use date format YYYY-MM-DD for CONVERSATION",
368                    value
369                ));
370            }
371        }
372        "GIT_COMMIT" => {
373            if parts.len() < 3 {
374                return Err(format!(
375                    "source '{}' must use format 'GIT_COMMIT <REPO_URL_OR_NAME> <COMMIT_SHA> <OPTIONAL_DETAILS>'",
376                    value
377                ));
378            }
379        }
380        _ => {}
381    }
382
383    Ok(())
384}
385
386pub fn normalize_source_reference(value: &str) -> String {
387    let trimmed = value.trim();
388    if trimmed.is_empty() {
389        return String::new();
390    }
391    let source_type = trimmed.split_whitespace().next().unwrap_or_default();
392    if VALID_SOURCE_TYPES.contains(&source_type) {
393        return trimmed.to_owned();
394    }
395    format!("DOC {trimmed}")
396}
397
398pub fn is_valid_importance(value: f64) -> bool {
399    (0.0..=1.0).contains(&value)
400}
401
402pub fn is_legacy_importance(value: f64) -> bool {
403    value > 1.0 && (1.0..=6.0).contains(&value) && value.fract() == 0.0
404}
405
406/// Normalize a node id to legacy `<type_prefix>:snake_case` when possible.
407///
408/// Accepted inputs include both canonical `TYPE_CODE:snake_case` and legacy
409/// `prefix:snake_case` forms. Unknown prefixes are returned unchanged.
410pub fn normalize_node_id(id: &str) -> String {
411    let Some((head, suffix)) = id.split_once(':') else {
412        return id.to_owned();
413    };
414    let Some(node_type) = type_for_code(head).or_else(|| type_for_prefix(head)) else {
415        return id.to_owned();
416    };
417    let Some(prefix) = TYPE_TO_PREFIX
418        .iter()
419        .find(|(typ, _)| *typ == node_type)
420        .map(|(_, prefix)| *prefix)
421    else {
422        return id.to_owned();
423    };
424    format!("{prefix}:{suffix}")
425}
426
427/// Validate and canonicalize a node id for a concrete node type.
428///
429/// Returns canonical legacy `prefix:snake_case` on success.
430pub fn canonicalize_node_id_for_type(id: &str, node_type: &str) -> Result<String, String> {
431    let Some((head, suffix)) = id.split_once(':') else {
432        return Err(format!(
433            "node id '{}' must be in format <type_code>:snake_case",
434            id
435        ));
436    };
437    if !valid_id_suffix(suffix) {
438        return Err(format!(
439            "node id '{}' must use snake_case suffix (lowercase, digits, underscore only)",
440            id
441        ));
442    }
443
444    if !is_valid_node_type(node_type) {
445        return Err(format!("invalid node type '{node_type}'"));
446    }
447
448    let Some(expected_code) = canonical_type_code_for(node_type) else {
449        if head == node_type {
450            return Ok(format!("{node_type}:{suffix}"));
451        }
452        return Err(format!(
453            "node id '{}' has type marker '{}'; expected '{}' for custom node type",
454            id, head, node_type
455        ));
456    };
457    let Some(expected_prefix) = TYPE_TO_PREFIX
458        .iter()
459        .find(|(typ, _)| *typ == node_type)
460        .map(|(_, prefix)| *prefix)
461    else {
462        return Err(format!("invalid node type '{node_type}'"));
463    };
464
465    if head == expected_code || head == expected_prefix {
466        return Ok(format!("{expected_prefix}:{suffix}"));
467    }
468
469    if let Some(actual_type) = type_for_code(head).or_else(|| type_for_prefix(head)) {
470        return Err(format!(
471            "node id '{}' has type marker '{}' (type '{}') but node_type is '{}'",
472            id, head, actual_type, node_type
473        ));
474    }
475
476    Err(format!(
477        "node id '{}' has unknown type marker '{}'; expected '{}' or '{}'",
478        id, head, expected_code, expected_prefix
479    ))
480}
481
482pub fn format_edge_source_type_error(
483    source_type: &str,
484    relation: &str,
485    allowed_source_types: &[impl AsRef<str>],
486) -> String {
487    format!(
488        "{} cannot be source of {} (allowed: {})",
489        source_type,
490        relation,
491        allowed_source_types
492            .iter()
493            .map(|value| value.as_ref())
494            .collect::<Vec<_>>()
495            .join(", ")
496    )
497}
498
499pub fn format_edge_target_type_error(
500    target_type: &str,
501    relation: &str,
502    allowed_target_types: &[impl AsRef<str>],
503) -> String {
504    format!(
505        "{} cannot be target of {} (allowed: {})",
506        target_type,
507        relation,
508        allowed_target_types
509            .iter()
510            .map(|value| value.as_ref())
511            .collect::<Vec<_>>()
512            .join(", ")
513    )
514}
515
516pub fn validate_graph(
517    graph: &GraphFile,
518    cwd: &Path,
519    deep: bool,
520    base_dir: Option<&str>,
521) -> ValidationReport {
522    let mut errors = Vec::new();
523    let mut warnings = Vec::new();
524
525    let type_to_prefix: HashMap<&str, &str> = TYPE_TO_PREFIX.iter().copied().collect();
526    let type_to_code: HashMap<&str, &str> = TYPE_TO_CODE.iter().copied().collect();
527    // -- metadata --
528    if graph.metadata.name.trim().is_empty() {
529        errors.push("metadata.name missing".to_owned());
530    }
531
532    // -- nodes --
533    let mut id_counts = HashMap::<&str, usize>::new();
534    for node in &graph.nodes {
535        *id_counts.entry(node.id.as_str()).or_insert(0) += 1;
536
537        if !is_valid_node_type(&node.r#type) {
538            errors.push(format!("node {} has invalid type {}", node.id, node.r#type));
539        }
540        if node.name.trim().is_empty() {
541            errors.push(format!("node {} missing name", node.id));
542        }
543        if node.source_files.is_empty() {
544            errors.push(format!("node {} missing source_files", node.id));
545        }
546
547        match canonicalize_node_id_for_type(&node.id, &node.r#type) {
548            Ok(_) => {}
549            Err(_) => {
550                if let Some((head, _)) = node.id.split_once(':') {
551                    if let (Some(expected_code), Some(expected_prefix)) = (
552                        type_to_code.get(node.r#type.as_str()),
553                        type_to_prefix.get(node.r#type.as_str()),
554                    ) {
555                        errors.push(format!(
556                            "node id {} invalid for type {} (expected {}:* or {}:*)",
557                            node.id, node.r#type, expected_code, expected_prefix
558                        ));
559                        if type_for_code(head).is_none() && type_for_prefix(head).is_none() {
560                            errors.push(format!(
561                                "node id {} has unknown type marker '{}'",
562                                node.id, head
563                            ));
564                        }
565                    } else {
566                        errors.push(format!(
567                            "node id {} invalid for custom type {} (expected {}:*)",
568                            node.id, node.r#type, node.r#type
569                        ));
570                    }
571                } else {
572                    errors.push(format!(
573                        "node id {} does not match prefix:snake_case",
574                        node.id
575                    ));
576                }
577            }
578        }
579
580        // quality warnings (skip Feature nodes)
581        if node.r#type != "Feature" {
582            if node.properties.description.trim().is_empty() {
583                warnings.push(format!("node {} missing description", node.id));
584            }
585            if node.properties.key_facts.is_empty() {
586                warnings.push(format!("node {} missing key_facts", node.id));
587            }
588            if node.properties.provenance.trim().is_empty() {
589                warnings.push(format!("node {} missing provenance", node.id));
590            }
591        }
592        if let Some(confidence) = node.properties.confidence {
593            if !(0.0..=1.0).contains(&confidence) {
594                warnings.push(format!(
595                    "node {} confidence out of range: {}",
596                    node.id, confidence
597                ));
598            }
599        }
600        if is_legacy_importance(node.properties.importance) {
601            warnings.push(format!(
602                "node {} uses legacy importance scale (1..6): {}",
603                node.id, node.properties.importance
604            ));
605        } else if !is_valid_importance(node.properties.importance) {
606            errors.push(format!(
607                "node {} importance out of range: {}",
608                node.id, node.properties.importance
609            ));
610        }
611
612        if !node.properties.provenance.trim().is_empty()
613            && !VALID_PROVENANCE_CODES.contains(&node.properties.provenance.as_str())
614        {
615            warnings.push(format!(
616                "node {} has non-dictionary provenance '{}' (expected one of: {})",
617                node.id,
618                node.properties.provenance,
619                VALID_PROVENANCE_CODES.join(", ")
620            ));
621        }
622
623        for source in &node.source_files {
624            if let Err(err) = validate_source_reference(source) {
625                warnings.push(format!(
626                    "node {} has non-standard source '{}': {}",
627                    node.id, source, err
628                ));
629            }
630        }
631    }
632    for (node_id, count) in &id_counts {
633        if *count > 1 {
634            errors.push(format!("duplicate node id: {} ({})", node_id, count));
635        }
636    }
637
638    // -- edges --
639    let node_type_map: HashMap<&str, &str> = graph
640        .nodes
641        .iter()
642        .map(|node| (node.id.as_str(), node.r#type.as_str()))
643        .collect();
644    let node_ids: HashSet<&str> = node_type_map.keys().copied().collect();
645    let mut touched = HashSet::new();
646    let mut edge_keys = HashSet::new();
647
648    for edge in &graph.edges {
649        if !is_valid_relation(&edge.relation) {
650            errors.push(format!(
651                "edge has invalid relation: {} {} {}",
652                edge.source_id, edge.relation, edge.target_id
653            ));
654        }
655        if !node_ids.contains(edge.source_id.as_str()) {
656            errors.push(format!(
657                "edge source missing: {} {} {}",
658                edge.source_id, edge.relation, edge.target_id
659            ));
660        }
661        if !node_ids.contains(edge.target_id.as_str()) {
662            errors.push(format!(
663                "edge target missing: {} {} {}",
664                edge.source_id, edge.relation, edge.target_id
665            ));
666        }
667
668        if let Err(err) = validate_bidirectional_similarity_edge(
669            &edge.source_id,
670            &edge.relation,
671            &edge.target_id,
672            &edge.properties.detail,
673            edge.properties.bidirectional,
674        ) {
675            errors.push(err);
676        }
677
678        for (label, score) in &edge.properties.score_components {
679            if !is_valid_score_component_label(label) {
680                errors.push(format!(
681                    "edge {} {} {} has invalid score component label '{}'",
682                    edge.source_id, edge.relation, edge.target_id, label
683                ));
684            }
685            if !(0.0..=1.0).contains(score) {
686                errors.push(format!(
687                    "edge {} {} {} score component '{}' out of range: {}",
688                    edge.source_id, edge.relation, edge.target_id, label, score
689                ));
690            }
691        }
692
693        // Enforce relation semantics from decision table rules.
694        if let (Some(src_type), Some(tgt_type)) = (
695            node_type_map.get(edge.source_id.as_str()),
696            node_type_map.get(edge.target_id.as_str()),
697        ) {
698            if VALID_TYPES.contains(src_type) && VALID_TYPES.contains(tgt_type) {
699                if let Some((valid_src, valid_tgt)) = edge_type_rule(edge.relation.as_str()) {
700                    if !valid_src.is_empty() && !valid_src.contains(src_type) {
701                        errors.push(format!(
702                            "edge {} {} {} invalid: {}",
703                            edge.source_id,
704                            edge.relation,
705                            edge.target_id,
706                            format_edge_source_type_error(
707                                src_type,
708                                edge.relation.as_str(),
709                                valid_src
710                            )
711                        ));
712                    }
713                    if !valid_tgt.is_empty() && !valid_tgt.contains(tgt_type) {
714                        errors.push(format!(
715                            "edge {} {} {} invalid: {}",
716                            edge.source_id,
717                            edge.relation,
718                            edge.target_id,
719                            format_edge_target_type_error(
720                                tgt_type,
721                                edge.relation.as_str(),
722                                valid_tgt
723                            )
724                        ));
725                    }
726                }
727            }
728        }
729
730        touched.insert(edge.source_id.as_str());
731        touched.insert(edge.target_id.as_str());
732        let key = format!("{}|{}|{}", edge.source_id, edge.relation, edge.target_id);
733        if !edge_keys.insert(key.clone()) {
734            errors.push(format!("duplicate edge: {}", key.replace('|', " ")));
735        }
736    }
737
738    // orphan nodes = errors (not connected to any edge)
739    for node in &graph.nodes {
740        if !touched.contains(node.id.as_str()) {
741            errors.push(format!("orphan node: {}", node.id));
742        }
743    }
744
745    // deep: verify source files exist on disk
746    if deep {
747        let base = base_dir
748            .map(|d| cwd.join(d))
749            .unwrap_or_else(|| cwd.to_path_buf());
750        for node in &graph.nodes {
751            for source in &node.source_files {
752                if !base.join(source).exists() {
753                    errors.push(format!("missing source file: {} -> {}", node.id, source));
754                }
755            }
756        }
757    }
758
759    errors.sort();
760    warnings.sort();
761    ValidationReport { errors, warnings }
762}
763
764#[cfg(test)]
765mod tests {
766    use super::{
767        canonicalize_node_id_for_type, is_valid_node_type, is_valid_relation,
768        validate_bidirectional_similarity_edge,
769    };
770
771    #[test]
772    fn canonicalize_node_id_allows_custom_type_marker() {
773        let canonical = canonicalize_node_id_for_type("~:dedupe_anchor", "~").expect("custom id");
774        assert_eq!(canonical, "~:dedupe_anchor");
775    }
776
777    #[test]
778    fn canonicalize_node_id_rejects_mismatched_custom_marker() {
779        let err = canonicalize_node_id_for_type("custom:dedupe_anchor", "~").unwrap_err();
780        assert!(err.contains("expected '~' for custom node type"));
781    }
782
783    #[test]
784    fn relation_and_node_type_validation_accepts_custom_tokens() {
785        assert!(is_valid_node_type("~"));
786        assert!(is_valid_relation("~"));
787        assert!(!is_valid_node_type(""));
788        assert!(!is_valid_relation(" "));
789    }
790
791    #[test]
792    fn bidirectional_similarity_validation_requires_score_and_canonical_order() {
793        assert!(validate_bidirectional_similarity_edge("~:a", "~", "~:b", "0.8", true).is_ok());
794
795        let invalid_score =
796            validate_bidirectional_similarity_edge("~:a", "~", "~:b", "1.8", true).unwrap_err();
797        assert!(invalid_score.contains("requires score in range 0..1"));
798
799        let invalid_order =
800            validate_bidirectional_similarity_edge("~:b", "~", "~:a", "0.8", true).unwrap_err();
801        assert!(invalid_order.contains("must be canonicalized"));
802    }
803
804    #[test]
805    fn score_component_label_validation_accepts_only_c_numeric() {
806        assert!(super::is_valid_score_component_label("C1"));
807        assert!(super::is_valid_score_component_label("C2"));
808        assert!(!super::is_valid_score_component_label("DESC"));
809        assert!(!super::is_valid_score_component_label("C"));
810    }
811}