1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::graph::GraphFile;
5
6pub struct ValidationReport {
7 pub errors: Vec<String>,
8 pub warnings: Vec<String>,
9}
10
11pub const VALID_TYPES: &[&str] = &[
16 "Concept",
17 "Process",
18 "DataStore",
19 "Interface",
20 "Rule",
21 "Feature",
22 "Decision",
23 "Convention",
24 "Note",
25 "Bug",
26 "D",
27 "F",
28];
29
30pub const VALID_RELATIONS: &[&str] = &[
31 "HAS",
32 "STORED_IN",
33 "TRIGGERS",
34 "CREATED_BY",
35 "AFFECTED_BY",
36 "AVAILABLE_IN",
37 "DOCUMENTED_IN",
38 "DEPENDS_ON",
39 "TRANSITIONS",
40 "DECIDED_BY",
41 "GOVERNED_BY",
42 "USES",
43 "READS_FROM",
44];
45
46pub const VALID_PROVENANCE_CODES: &[&str] = &["U", "D", "A", "G"];
47
48pub const VALID_SOURCE_TYPES: &[&str] = &[
49 "URL",
50 "SVN",
51 "SOURCECODE",
52 "WIKI",
53 "CONFLUENCE",
54 "CONVERSATION",
55 "GIT_COMMIT",
56 "PULL_REQUEST",
57 "ISSUE",
58 "DOC",
59 "LOG",
60 "OTHER",
61];
62
63const MAX_CUSTOM_TYPE_LEN: usize = 48;
64const MAX_CUSTOM_RELATION_LEN: usize = 64;
65
66pub const TYPE_TO_PREFIX: &[(&str, &str)] = &[
68 ("Concept", "concept"),
69 ("Process", "process"),
70 ("DataStore", "datastore"),
71 ("Interface", "interface"),
72 ("Rule", "rule"),
73 ("Feature", "feature"),
74 ("Decision", "decision"),
75 ("Convention", "convention"),
76 ("Note", "note"),
77 ("Bug", "bug"),
78];
79
80pub const TYPE_TO_CODE: &[(&str, &str)] = &[
82 ("Concept", "K"),
83 ("Process", "P"),
84 ("DataStore", "D"),
85 ("Interface", "I"),
86 ("Rule", "R"),
87 ("Feature", "F"),
88 ("Decision", "Z"),
89 ("Convention", "C"),
90 ("Note", "N"),
91 ("Bug", "B"),
92];
93
94pub const EDGE_TYPE_RULES: &[(&str, &[&str], &[&str])] = &[
97 (
98 "HAS",
99 &["Concept", "Process", "Interface", "D", "F"],
100 &[
101 "Concept",
102 "Feature",
103 "DataStore",
104 "Rule",
105 "Interface",
106 "D",
107 "F",
108 ],
109 ),
110 ("STORED_IN", &["Concept", "Process", "Rule"], &["DataStore"]),
111 (
112 "CREATED_BY",
113 &["Concept", "DataStore", "Interface", "Decision"],
114 &["Process"],
115 ),
116 (
117 "TRIGGERS",
118 &["Process", "Rule"],
119 &["Process", "Bug", "Rule"],
120 ),
121 (
122 "AFFECTED_BY",
123 &[
124 "Concept",
125 "Process",
126 "DataStore",
127 "Interface",
128 "Rule",
129 "Feature",
130 "Decision",
131 "Bug",
132 ],
133 &[
134 "Concept",
135 "Process",
136 "DataStore",
137 "Interface",
138 "Rule",
139 "Feature",
140 "Decision",
141 "Convention",
142 "Bug",
143 ],
144 ),
145 (
146 "AVAILABLE_IN",
147 &["Feature", "DataStore", "Concept", "Process"],
148 &["Interface"],
149 ),
150 (
151 "DOCUMENTED_IN",
152 &["Concept", "Process", "Decision", "Rule", "Feature", "Bug"],
153 &["Interface", "Note"],
154 ),
155 (
156 "DEPENDS_ON",
157 &["Feature", "Process", "Interface"],
158 &["Feature", "DataStore", "Interface", "Concept"],
159 ),
160 ("TRANSITIONS", &["Process", "Rule"], &["Process", "Rule"]),
161 (
162 "DECIDED_BY",
163 &["Concept", "Process", "Interface"],
164 &["Decision"],
165 ),
166 (
167 "GOVERNED_BY",
168 &["Process", "Interface", "DataStore"],
169 &["Convention", "Rule"],
170 ),
171];
172
173pub fn edge_type_rule(
178 relation: &str,
179) -> Option<(&'static [&'static str], &'static [&'static str])> {
180 EDGE_TYPE_RULES
181 .iter()
182 .find(|(rule_relation, _, _)| *rule_relation == relation)
183 .map(|(_, source_types, target_types)| (*source_types, *target_types))
184}
185
186pub fn canonical_type_code_for(node_type: &str) -> Option<&'static str> {
187 TYPE_TO_CODE
188 .iter()
189 .find(|(typ, _)| *typ == node_type)
190 .map(|(_, code)| *code)
191}
192
193fn type_for_prefix(prefix: &str) -> Option<&'static str> {
194 TYPE_TO_PREFIX
195 .iter()
196 .find(|(_, known_prefix)| *known_prefix == prefix)
197 .map(|(typ, _)| *typ)
198}
199
200fn type_for_code(code: &str) -> Option<&'static str> {
201 TYPE_TO_CODE
202 .iter()
203 .find(|(_, known_code)| *known_code == code)
204 .map(|(typ, _)| *typ)
205}
206
207fn valid_id_suffix(suffix: &str) -> bool {
208 !suffix.is_empty()
209 && suffix
210 .chars()
211 .next()
212 .is_some_and(|c| c.is_ascii_lowercase())
213 && suffix
214 .chars()
215 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
216}
217
218fn valid_generated_node_suffix(suffix: &str) -> bool {
219 if suffix.is_empty() || suffix.contains(['\n', '\r']) {
220 return false;
221 }
222
223 let name_part = match suffix.rsplit_once(':') {
224 Some((head, tail)) if !tail.is_empty() && tail.chars().all(|ch| ch.is_ascii_digit()) => {
225 head
226 }
227 _ => suffix,
228 };
229
230 !name_part.is_empty() && !name_part.contains(':')
231}
232
233pub fn is_generated_node_type(node_type: &str) -> bool {
234 node_type.starts_with('G') && node_type.len() > 1
235}
236
237pub fn is_generated_relation(value: &str) -> bool {
238 value.starts_with('G') && value.len() > 1
239}
240
241fn is_valid_custom_token(token: &str, max_len: usize) -> bool {
242 if token.is_empty() || token.len() > max_len {
243 return false;
244 }
245 if token.chars().any(char::is_whitespace) {
246 return false;
247 }
248 token.chars().all(|ch| ch.is_ascii_graphic())
249}
250
251pub fn is_valid_node_type(value: &str) -> bool {
252 VALID_TYPES.contains(&value) || is_valid_custom_token(value, MAX_CUSTOM_TYPE_LEN)
253}
254
255pub fn is_valid_relation(value: &str) -> bool {
256 VALID_RELATIONS.contains(&value) || is_valid_custom_token(value, MAX_CUSTOM_RELATION_LEN)
257}
258
259fn parse_similarity_score(value: &str) -> Option<f64> {
260 let score = value.trim().parse::<f64>().ok()?;
261 if (0.0..=1.0).contains(&score) {
262 Some(score)
263 } else {
264 None
265 }
266}
267
268fn is_valid_score_component_label(value: &str) -> bool {
269 let mut chars = value.chars();
270 matches!(chars.next(), Some('C'))
271 && chars.clone().next().is_some()
272 && chars.all(|ch| ch.is_ascii_digit())
273}
274
275pub fn validate_bidirectional_similarity_edge(
276 source_id: &str,
277 relation: &str,
278 target_id: &str,
279 detail: &str,
280 bidirectional: bool,
281) -> Result<(), String> {
282 if !bidirectional {
283 return Ok(());
284 }
285 if relation != "~" {
286 return Err(format!(
287 "bidirectional edge requires '~' relation: {} {} {}",
288 source_id, relation, target_id
289 ));
290 }
291 if source_id > target_id {
292 return Err(format!(
293 "bidirectional edge must be canonicalized (source <= target): {} ~ {}",
294 source_id, target_id
295 ));
296 }
297 if parse_similarity_score(detail).is_none() {
298 return Err(format!(
299 "bidirectional similarity edge requires score in range 0..1: {} ~ {}",
300 source_id, target_id
301 ));
302 }
303 Ok(())
304}
305
306pub fn is_valid_iso_utc_timestamp(value: &str) -> bool {
307 if value.len() != 20 {
308 return false;
309 }
310 let bytes = value.as_bytes();
311 let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
312 if !(is_digit(0)
313 && is_digit(1)
314 && is_digit(2)
315 && is_digit(3)
316 && bytes.get(4) == Some(&b'-')
317 && is_digit(5)
318 && is_digit(6)
319 && bytes.get(7) == Some(&b'-')
320 && is_digit(8)
321 && is_digit(9)
322 && bytes.get(10) == Some(&b'T')
323 && is_digit(11)
324 && is_digit(12)
325 && bytes.get(13) == Some(&b':')
326 && is_digit(14)
327 && is_digit(15)
328 && bytes.get(16) == Some(&b':')
329 && is_digit(17)
330 && is_digit(18)
331 && bytes.get(19) == Some(&b'Z'))
332 {
333 return false;
334 }
335
336 let month = value[5..7].parse::<u32>().ok();
337 let day = value[8..10].parse::<u32>().ok();
338 let hour = value[11..13].parse::<u32>().ok();
339 let minute = value[14..16].parse::<u32>().ok();
340 let second = value[17..19].parse::<u32>().ok();
341 matches!(month, Some(1..=12))
342 && matches!(day, Some(1..=31))
343 && matches!(hour, Some(0..=23))
344 && matches!(minute, Some(0..=59))
345 && matches!(second, Some(0..=59))
346}
347
348pub fn is_valid_iso_date(value: &str) -> bool {
349 if value.len() != 10 {
350 return false;
351 }
352 let bytes = value.as_bytes();
353 let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
354 if !(is_digit(0)
355 && is_digit(1)
356 && is_digit(2)
357 && is_digit(3)
358 && bytes.get(4) == Some(&b'-')
359 && is_digit(5)
360 && is_digit(6)
361 && bytes.get(7) == Some(&b'-')
362 && is_digit(8)
363 && is_digit(9))
364 {
365 return false;
366 }
367 let month = value[5..7].parse::<u32>().ok();
368 let day = value[8..10].parse::<u32>().ok();
369 matches!(month, Some(1..=12)) && matches!(day, Some(1..=31))
370}
371
372pub fn validate_source_reference(value: &str) -> Result<(), String> {
373 let trimmed = value.trim();
374 if trimmed.is_empty() {
375 return Err("source entry cannot be empty".to_owned());
376 }
377
378 let parts: Vec<&str> = trimmed.split_whitespace().collect();
379 if parts.len() < 2 {
380 return Err(format!(
381 "source '{}' must have format '<TYPE> <LINK_OR_DATE> <OPTIONAL_DETAILS>'",
382 value
383 ));
384 }
385
386 let source_type = parts[0];
387 if !VALID_SOURCE_TYPES.contains(&source_type) {
388 return Err(format!(
389 "source '{}' uses invalid type '{}'; valid types: {}",
390 value,
391 source_type,
392 VALID_SOURCE_TYPES.join(", ")
393 ));
394 }
395
396 match source_type {
397 "CONVERSATION" => {
398 if !is_valid_iso_date(parts[1]) {
399 return Err(format!(
400 "source '{}' must use date format YYYY-MM-DD for CONVERSATION",
401 value
402 ));
403 }
404 }
405 "GIT_COMMIT" => {
406 if parts.len() < 3 {
407 return Err(format!(
408 "source '{}' must use format 'GIT_COMMIT <REPO_URL_OR_NAME> <COMMIT_SHA> <OPTIONAL_DETAILS>'",
409 value
410 ));
411 }
412 }
413 _ => {}
414 }
415
416 Ok(())
417}
418
419pub fn normalize_source_reference(value: &str) -> String {
420 let trimmed = value.trim();
421 if trimmed.is_empty() {
422 return String::new();
423 }
424 let source_type = trimmed.split_whitespace().next().unwrap_or_default();
425 if VALID_SOURCE_TYPES.contains(&source_type) {
426 return trimmed.to_owned();
427 }
428 format!("DOC {trimmed}")
429}
430
431pub fn is_valid_importance(value: f64) -> bool {
432 (0.0..=1.0).contains(&value)
433}
434
435pub fn is_legacy_importance(value: f64) -> bool {
436 value > 1.0 && (1.0..=6.0).contains(&value) && value.fract() == 0.0
437}
438
439pub fn normalize_node_id(id: &str) -> String {
444 let Some((head, suffix)) = id.split_once(':') else {
445 return id.to_owned();
446 };
447 let Some(node_type) = type_for_code(head).or_else(|| type_for_prefix(head)) else {
448 return id.to_owned();
449 };
450 let Some(prefix) = TYPE_TO_PREFIX
451 .iter()
452 .find(|(typ, _)| *typ == node_type)
453 .map(|(_, prefix)| *prefix)
454 else {
455 return id.to_owned();
456 };
457 format!("{prefix}:{suffix}")
458}
459
460pub fn canonicalize_node_id_for_type(id: &str, node_type: &str) -> Result<String, String> {
464 if is_generated_node_type(node_type) {
465 let suffix = match id.split_once(':') {
466 Some((head, suffix)) if head == node_type => suffix,
467 Some((head, _)) => {
468 return Err(format!(
469 "node id '{}' has type marker '{}'; expected '{}' or a path-only id",
470 id, head, node_type
471 ));
472 }
473 None => id,
474 };
475 if !valid_generated_node_suffix(suffix) {
476 return Err(format!(
477 "node id '{}' has invalid suffix for type '{}'",
478 id, node_type
479 ));
480 }
481 return Ok(format!("{node_type}:{suffix}"));
482 }
483
484 let Some((head, suffix)) = id.split_once(':') else {
485 return Err(format!(
486 "node id '{}' must be in format <type_code>:snake_case",
487 id
488 ));
489 };
490 let suffix_valid = if matches!(node_type, "D" | "F") {
491 valid_generated_node_suffix(suffix)
492 } else {
493 valid_id_suffix(suffix)
494 };
495 if !suffix_valid {
496 return Err(format!(
497 "node id '{}' has invalid suffix for type '{}'",
498 id, node_type
499 ));
500 }
501
502 if !is_valid_node_type(node_type) {
503 return Err(format!("invalid node type '{node_type}'"));
504 }
505
506 let Some(expected_code) = canonical_type_code_for(node_type) else {
507 if head == node_type {
508 return Ok(format!("{node_type}:{suffix}"));
509 }
510 return Err(format!(
511 "node id '{}' has type marker '{}'; expected '{}' for custom node type",
512 id, head, node_type
513 ));
514 };
515 let Some(expected_prefix) = TYPE_TO_PREFIX
516 .iter()
517 .find(|(typ, _)| *typ == node_type)
518 .map(|(_, prefix)| *prefix)
519 else {
520 return Err(format!("invalid node type '{node_type}'"));
521 };
522
523 if head == expected_code || head == expected_prefix {
524 return Ok(format!("{expected_prefix}:{suffix}"));
525 }
526
527 if let Some(actual_type) = type_for_code(head).or_else(|| type_for_prefix(head)) {
528 return Err(format!(
529 "node id '{}' has type marker '{}' (type '{}') but node_type is '{}'",
530 id, head, actual_type, node_type
531 ));
532 }
533
534 Err(format!(
535 "node id '{}' has unknown type marker '{}'; expected '{}' or '{}'",
536 id, head, expected_code, expected_prefix
537 ))
538}
539
540pub fn format_edge_source_type_error(
541 source_type: &str,
542 relation: &str,
543 allowed_source_types: &[impl AsRef<str>],
544) -> String {
545 format!(
546 "{} cannot be source of {} (allowed: {})",
547 source_type,
548 relation,
549 allowed_source_types
550 .iter()
551 .map(|value| value.as_ref())
552 .collect::<Vec<_>>()
553 .join(", ")
554 )
555}
556
557pub fn format_edge_target_type_error(
558 target_type: &str,
559 relation: &str,
560 allowed_target_types: &[impl AsRef<str>],
561) -> String {
562 format!(
563 "{} cannot be target of {} (allowed: {})",
564 target_type,
565 relation,
566 allowed_target_types
567 .iter()
568 .map(|value| value.as_ref())
569 .collect::<Vec<_>>()
570 .join(", ")
571 )
572}
573
574pub fn validate_graph(
575 graph: &GraphFile,
576 cwd: &Path,
577 deep: bool,
578 base_dir: Option<&str>,
579) -> ValidationReport {
580 let mut errors = Vec::new();
581 let mut warnings = Vec::new();
582
583 let type_to_prefix: HashMap<&str, &str> = TYPE_TO_PREFIX.iter().copied().collect();
584 let type_to_code: HashMap<&str, &str> = TYPE_TO_CODE.iter().copied().collect();
585 if graph.metadata.name.trim().is_empty() {
587 errors.push("metadata.name missing".to_owned());
588 }
589
590 let mut id_counts = HashMap::<&str, usize>::new();
592 for node in &graph.nodes {
593 *id_counts.entry(node.id.as_str()).or_insert(0) += 1;
594 let generated = is_generated_node_type(&node.r#type);
595
596 if !is_valid_node_type(&node.r#type) {
597 errors.push(format!("node {} has invalid type {}", node.id, node.r#type));
598 }
599 if node.name.trim().is_empty() && !generated && node.properties.provenance != "G" {
600 errors.push(format!("node {} missing name", node.id));
601 }
602 if !generated && node.source_files.is_empty() {
603 errors.push(format!("node {} missing source_files", node.id));
604 }
605
606 match canonicalize_node_id_for_type(&node.id, &node.r#type) {
607 Ok(_) => {}
608 Err(_) => {
609 if let Some((head, _)) = node.id.split_once(':') {
610 if let (Some(expected_code), Some(expected_prefix)) = (
611 type_to_code.get(node.r#type.as_str()),
612 type_to_prefix.get(node.r#type.as_str()),
613 ) {
614 errors.push(format!(
615 "node id {} invalid for type {} (expected {}:* or {}:*)",
616 node.id, node.r#type, expected_code, expected_prefix
617 ));
618 if type_for_code(head).is_none() && type_for_prefix(head).is_none() {
619 errors.push(format!(
620 "node id {} has unknown type marker '{}'",
621 node.id, head
622 ));
623 }
624 } else {
625 errors.push(format!(
626 "node id {} invalid for custom type {} (expected {}:*)",
627 node.id, node.r#type, node.r#type
628 ));
629 }
630 } else {
631 errors.push(format!(
632 "node id {} does not match prefix:snake_case",
633 node.id
634 ));
635 }
636 }
637 }
638
639 if !generated && node.r#type != "Feature" && node.properties.provenance != "G" {
641 if node.properties.description.trim().is_empty() {
642 warnings.push(format!("node {} missing description", node.id));
643 }
644 if node.properties.key_facts.is_empty() {
645 warnings.push(format!("node {} missing key_facts", node.id));
646 }
647 if node.properties.provenance.trim().is_empty() {
648 warnings.push(format!("node {} missing provenance", node.id));
649 }
650 }
651 if let Some(confidence) = node.properties.confidence {
652 if !(0.0..=1.0).contains(&confidence) {
653 warnings.push(format!(
654 "node {} confidence out of range: {}",
655 node.id, confidence
656 ));
657 }
658 }
659 if !generated && is_legacy_importance(node.properties.importance) {
660 warnings.push(format!(
661 "node {} uses legacy importance scale (1..6): {}",
662 node.id, node.properties.importance
663 ));
664 } else if !generated && !is_valid_importance(node.properties.importance) {
665 errors.push(format!(
666 "node {} importance out of range: {}",
667 node.id, node.properties.importance
668 ));
669 }
670
671 if !generated
672 && !node.properties.provenance.trim().is_empty()
673 && !VALID_PROVENANCE_CODES.contains(&node.properties.provenance.as_str())
674 {
675 warnings.push(format!(
676 "node {} has non-dictionary provenance '{}' (expected one of: {})",
677 node.id,
678 node.properties.provenance,
679 VALID_PROVENANCE_CODES.join(", ")
680 ));
681 }
682
683 if !generated {
684 for source in &node.source_files {
685 if let Err(err) = validate_source_reference(source) {
686 warnings.push(format!(
687 "node {} has non-standard source '{}': {}",
688 node.id, source, err
689 ));
690 }
691 }
692 }
693 }
694 for (node_id, count) in &id_counts {
695 if *count > 1 {
696 errors.push(format!("duplicate node id: {} ({})", node_id, count));
697 }
698 }
699
700 let node_type_map: HashMap<&str, &str> = graph
702 .nodes
703 .iter()
704 .map(|node| (node.id.as_str(), node.r#type.as_str()))
705 .collect();
706 let node_ids: HashSet<&str> = node_type_map.keys().copied().collect();
707 let mut touched = HashSet::new();
708 let mut edge_keys = HashSet::new();
709
710 for edge in &graph.edges {
711 if !is_valid_relation(&edge.relation) {
712 errors.push(format!(
713 "edge has invalid relation: {} {} {}",
714 edge.source_id, edge.relation, edge.target_id
715 ));
716 }
717 if !node_ids.contains(edge.source_id.as_str()) {
718 errors.push(format!(
719 "edge source missing: {} {} {}",
720 edge.source_id, edge.relation, edge.target_id
721 ));
722 }
723 if !node_ids.contains(edge.target_id.as_str()) {
724 errors.push(format!(
725 "edge target missing: {} {} {}",
726 edge.source_id, edge.relation, edge.target_id
727 ));
728 }
729
730 if let Err(err) = validate_bidirectional_similarity_edge(
731 &edge.source_id,
732 &edge.relation,
733 &edge.target_id,
734 &edge.properties.detail,
735 edge.properties.bidirectional,
736 ) {
737 errors.push(err);
738 }
739
740 for (label, score) in &edge.properties.score_components {
741 if !is_valid_score_component_label(label) {
742 errors.push(format!(
743 "edge {} {} {} has invalid score component label '{}'",
744 edge.source_id, edge.relation, edge.target_id, label
745 ));
746 }
747 if !(0.0..=1.0).contains(score) {
748 errors.push(format!(
749 "edge {} {} {} score component '{}' out of range: {}",
750 edge.source_id, edge.relation, edge.target_id, label, score
751 ));
752 }
753 }
754
755 if let (Some(src_type), Some(tgt_type)) = (
757 node_type_map.get(edge.source_id.as_str()),
758 node_type_map.get(edge.target_id.as_str()),
759 ) {
760 if VALID_TYPES.contains(src_type) && VALID_TYPES.contains(tgt_type) {
761 if let Some((valid_src, valid_tgt)) = edge_type_rule(edge.relation.as_str()) {
762 if !valid_src.is_empty() && !valid_src.contains(src_type) {
763 errors.push(format!(
764 "edge {} {} {} invalid: {}",
765 edge.source_id,
766 edge.relation,
767 edge.target_id,
768 format_edge_source_type_error(
769 src_type,
770 edge.relation.as_str(),
771 valid_src
772 )
773 ));
774 }
775 if !valid_tgt.is_empty() && !valid_tgt.contains(tgt_type) {
776 errors.push(format!(
777 "edge {} {} {} invalid: {}",
778 edge.source_id,
779 edge.relation,
780 edge.target_id,
781 format_edge_target_type_error(
782 tgt_type,
783 edge.relation.as_str(),
784 valid_tgt
785 )
786 ));
787 }
788 }
789 }
790 }
791
792 touched.insert(edge.source_id.as_str());
793 touched.insert(edge.target_id.as_str());
794 let key = format!("{}|{}|{}", edge.source_id, edge.relation, edge.target_id);
795 if !edge_keys.insert(key.clone()) {
796 errors.push(format!("duplicate edge: {}", key.replace('|', " ")));
797 }
798 }
799
800 for node in &graph.nodes {
802 if !touched.contains(node.id.as_str()) {
803 errors.push(format!("orphan node: {}", node.id));
804 }
805 }
806
807 if deep {
809 let base = base_dir
810 .map(|d| cwd.join(d))
811 .unwrap_or_else(|| cwd.to_path_buf());
812 for node in &graph.nodes {
813 for source in &node.source_files {
814 if !base.join(source).exists() {
815 errors.push(format!("missing source file: {} -> {}", node.id, source));
816 }
817 }
818 }
819 }
820
821 errors.sort();
822 warnings.sort();
823 ValidationReport { errors, warnings }
824}
825
826#[cfg(test)]
827mod tests {
828 use super::{
829 canonicalize_node_id_for_type, is_valid_node_type, is_valid_relation,
830 validate_bidirectional_similarity_edge,
831 };
832
833 #[test]
834 fn canonicalize_node_id_allows_custom_type_marker() {
835 let canonical = canonicalize_node_id_for_type("~:dedupe_anchor", "~").expect("custom id");
836 assert_eq!(canonical, "~:dedupe_anchor");
837 }
838
839 #[test]
840 fn canonicalize_node_id_allows_generated_type_marker() {
841 let canonical = canonicalize_node_id_for_type("GDIR:App", "GDIR").expect("generated id");
842 assert_eq!(canonical, "GDIR:App");
843 }
844
845 #[test]
846 fn canonicalize_node_id_rejects_mismatched_custom_marker() {
847 let err = canonicalize_node_id_for_type("custom:dedupe_anchor", "~").unwrap_err();
848 assert!(err.contains("expected '~' for custom node type"));
849 }
850
851 #[test]
852 fn relation_and_node_type_validation_accepts_custom_tokens() {
853 assert!(is_valid_node_type("~"));
854 assert!(is_valid_relation("~"));
855 assert!(!is_valid_node_type(""));
856 assert!(!is_valid_relation(" "));
857 }
858
859 #[test]
860 fn bidirectional_similarity_validation_requires_score_and_canonical_order() {
861 assert!(validate_bidirectional_similarity_edge("~:a", "~", "~:b", "0.8", true).is_ok());
862
863 let invalid_score =
864 validate_bidirectional_similarity_edge("~:a", "~", "~:b", "1.8", true).unwrap_err();
865 assert!(invalid_score.contains("requires score in range 0..1"));
866
867 let invalid_order =
868 validate_bidirectional_similarity_edge("~:b", "~", "~:a", "0.8", true).unwrap_err();
869 assert!(invalid_order.contains("must be canonicalized"));
870 }
871
872 #[test]
873 fn score_component_label_validation_accepts_only_c_numeric() {
874 assert!(super::is_valid_score_component_label("C1"));
875 assert!(super::is_valid_score_component_label("C2"));
876 assert!(!super::is_valid_score_component_label("DESC"));
877 assert!(!super::is_valid_score_component_label("C"));
878 }
879}