1use std::collections::{BTreeMap, HashMap};
2use std::fs;
3use std::io::Write;
4use std::path::{Path, PathBuf};
5use std::time::{SystemTime, UNIX_EPOCH};
6
7use anyhow::{Context, Result};
8use flate2::Compression;
9use flate2::write::GzEncoder;
10use serde::{Deserialize, Serialize};
11
12const GRAPH_INFO_NODE_ID: &str = "^:graph_info";
13const GRAPH_INFO_NODE_TYPE: &str = "^";
14const GRAPH_UUID_FACT_PREFIX: &str = "graph_uuid=";
15
16fn atomic_write(dest: &Path, data: &str) -> Result<()> {
21 let unique = SystemTime::now()
22 .duration_since(UNIX_EPOCH)
23 .unwrap_or_default()
24 .as_nanos();
25 let tmp = dest.with_extension(format!("tmp.{}.{}", std::process::id(), unique));
26 fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
27 if dest.exists() {
28 let bak = backup_bak_path(dest)?;
29 if should_refresh_bak(&bak)? {
30 fs::copy(dest, &bak)
31 .with_context(|| format!("failed to create backup: {}", bak.display()))?;
32 }
33 }
34 fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
35}
36
37const BACKUP_BAK_STALE_SECS: u64 = 5 * 60;
38const BACKUP_STALE_SECS: u64 = 60 * 60;
39
40fn should_refresh_bak(bak_path: &Path) -> Result<bool> {
41 if !bak_path.exists() {
42 return Ok(true);
43 }
44 let modified = fs::metadata(bak_path)
45 .and_then(|m| m.modified())
46 .with_context(|| format!("failed to read backup mtime: {}", bak_path.display()))?;
47 let age_secs = SystemTime::now()
48 .duration_since(modified)
49 .unwrap_or_default()
50 .as_secs();
51 Ok(age_secs >= BACKUP_BAK_STALE_SECS)
52}
53
54fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
55 let cache_dir = backup_cache_dir(path)?;
56 let stem = match path.file_stem().and_then(|s| s.to_str()) {
57 Some(stem) => stem,
58 None => return Ok(()),
59 };
60 let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("json");
61 let backup_prefix = format!("{stem}.{ext}");
62 let now = SystemTime::now()
63 .duration_since(UNIX_EPOCH)
64 .context("time went backwards")?
65 .as_secs();
66 if let Some(latest) = latest_backup_ts(&cache_dir, &backup_prefix)? {
67 if now.saturating_sub(latest) < BACKUP_STALE_SECS {
68 return Ok(());
69 }
70 }
71
72 let backup_path = cache_dir.join(format!("{backup_prefix}.bck.{now}.gz"));
73 let tmp_path = backup_path.with_extension("tmp");
74 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
75 encoder.write_all(data.as_bytes())?;
76 let encoded = encoder.finish()?;
77 fs::write(&tmp_path, encoded)
78 .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
79 fs::rename(&tmp_path, &backup_path)
80 .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
81 Ok(())
82}
83
84fn backup_cache_dir(path: &Path) -> Result<PathBuf> {
85 let dir = crate::cache_paths::cache_root_for_graph(path);
86 fs::create_dir_all(&dir)
87 .with_context(|| format!("failed to create cache directory: {}", dir.display()))?;
88 Ok(dir)
89}
90
91fn backup_bak_path(dest: &Path) -> Result<PathBuf> {
92 let cache_dir = backup_cache_dir(dest)?;
93 let stem = dest.file_stem().and_then(|s| s.to_str()).unwrap_or("graph");
94 let ext = dest.extension().and_then(|s| s.to_str()).unwrap_or("json");
95 Ok(cache_dir.join(format!("{stem}.{ext}.bak")))
96}
97
98fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
99 let prefix = format!("{stem}.bck.");
100 let suffix = ".gz";
101 let mut latest = None;
102 for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
103 let entry = entry?;
104 let name = entry.file_name();
105 let name = name.to_string_lossy();
106 if !name.starts_with(&prefix) || !name.ends_with(suffix) {
107 continue;
108 }
109 let ts_part = &name[prefix.len()..name.len() - suffix.len()];
110 if let Ok(ts) = ts_part.parse::<u64>() {
111 match latest {
112 Some(current) => {
113 if ts > current {
114 latest = Some(ts);
115 }
116 }
117 None => latest = Some(ts),
118 }
119 }
120 }
121 Ok(latest)
122}
123
124fn node_type_to_code(node_type: &str) -> &str {
125 match node_type {
126 "Feature" => "F",
127 "Concept" => "K",
128 "Interface" => "I",
129 "Process" => "P",
130 "DataStore" => "D",
131 "Attribute" => "A",
132 "Entity" => "Y",
133 "Note" => "N",
134 "Rule" => "R",
135 "Convention" => "C",
136 "Bug" => "B",
137 "Decision" => "Z",
138 "OpenQuestion" => "O",
139 "Claim" => "Q",
140 "Insight" => "W",
141 "Reference" => "M",
142 "Term" => "T",
143 "Status" => "S",
144 "Doubt" => "L",
145 _ => node_type,
146 }
147}
148
149fn code_to_node_type(code: &str) -> &str {
150 match code {
151 "F" => "Feature",
152 "K" => "Concept",
153 "I" => "Interface",
154 "P" => "Process",
155 "D" => "DataStore",
156 "A" => "Attribute",
157 "Y" => "Entity",
158 "N" => "Note",
159 "R" => "Rule",
160 "C" => "Convention",
161 "B" => "Bug",
162 "Z" => "Decision",
163 "O" => "OpenQuestion",
164 "Q" => "Claim",
165 "W" => "Insight",
166 "M" => "Reference",
167 "T" => "Term",
168 "S" => "Status",
169 "L" => "Doubt",
170 _ => code,
171 }
172}
173
174fn relation_to_code(relation: &str) -> &str {
175 match relation {
176 "DOCUMENTED_IN" | "DOCUMENTS" => "D",
177 "HAS" => "H",
178 "TRIGGERS" => "T",
179 "AFFECTED_BY" | "AFFECTS" => "A",
180 "READS_FROM" | "READS" => "R",
181 "GOVERNED_BY" | "GOVERNS" => "G",
182 "DEPENDS_ON" => "O",
183 "AVAILABLE_IN" => "I",
184 "SUPPORTS" => "S",
185 "SUMMARIZES" => "U",
186 "RELATED_TO" => "L",
187 "CONTRADICTS" => "V",
188 "CREATED_BY" | "CREATES" => "C",
189 _ => relation,
190 }
191}
192
193fn code_to_relation(code: &str) -> &str {
194 match code {
195 "D" => "DOCUMENTED_IN",
196 "H" => "HAS",
197 "T" => "TRIGGERS",
198 "A" => "AFFECTED_BY",
199 "R" => "READS_FROM",
200 "G" => "GOVERNED_BY",
201 "O" => "DEPENDS_ON",
202 "I" => "AVAILABLE_IN",
203 "S" => "SUPPORTS",
204 "U" => "SUMMARIZES",
205 "L" => "RELATED_TO",
206 "V" => "CONTRADICTS",
207 "C" => "CREATED_BY",
208 _ => code,
209 }
210}
211
212fn canonicalize_bidirectional_pair(a: &str, b: &str) -> (String, String) {
213 if a <= b {
214 (a.to_owned(), b.to_owned())
215 } else {
216 (b.to_owned(), a.to_owned())
217 }
218}
219
220fn is_score_component_label(value: &str) -> bool {
221 let mut chars = value.chars();
222 matches!(chars.next(), Some('C'))
223 && chars.clone().next().is_some()
224 && chars.all(|ch| ch.is_ascii_digit())
225}
226
227fn sort_case_insensitive(values: &[String]) -> Vec<String> {
228 let mut sorted = values.to_vec();
229 sorted.sort_by(|a, b| {
230 let la = a.to_ascii_lowercase();
231 let lb = b.to_ascii_lowercase();
232 la.cmp(&lb).then_with(|| a.cmp(b))
233 });
234 sorted
235}
236
237fn decode_kg_text(value: &str) -> String {
238 let mut out = String::new();
239 let mut chars = value.chars();
240 while let Some(ch) = chars.next() {
241 if ch != '\\' {
242 out.push(ch);
243 continue;
244 }
245 match chars.next() {
246 Some('n') => out.push('\n'),
247 Some('r') => out.push('\r'),
248 Some('\\') => out.push('\\'),
249 Some(other) => {
250 out.push('\\');
251 out.push(other);
252 }
253 None => out.push('\\'),
254 }
255 }
256 out
257}
258
259fn escape_kg_text(value: &str) -> String {
260 let mut out = String::new();
261 for ch in value.chars() {
262 match ch {
263 '\\' => out.push_str("\\\\"),
264 '\n' => out.push_str("\\n"),
265 '\r' => out.push_str("\\r"),
266 _ => out.push(ch),
267 }
268 }
269 out
270}
271
272fn parse_text_field(value: &str) -> String {
273 decode_kg_text(value)
274}
275
276fn push_text_line(out: &mut String, key: &str, value: &str) {
277 out.push_str(key);
278 out.push(' ');
279 out.push_str(&escape_kg_text(value));
280 out.push('\n');
281}
282
283fn dedupe_case_insensitive(values: Vec<String>) -> Vec<String> {
284 let mut seen = std::collections::HashSet::new();
285 let mut out = Vec::new();
286 for value in values {
287 let key = value.to_ascii_lowercase();
288 if seen.insert(key) {
289 out.push(value);
290 }
291 }
292 out
293}
294
295fn parse_utc_timestamp(value: &str) -> bool {
296 if value.len() != 20 {
297 return false;
298 }
299 let bytes = value.as_bytes();
300 let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
301 if !(is_digit(0)
302 && is_digit(1)
303 && is_digit(2)
304 && is_digit(3)
305 && bytes.get(4) == Some(&b'-')
306 && is_digit(5)
307 && is_digit(6)
308 && bytes.get(7) == Some(&b'-')
309 && is_digit(8)
310 && is_digit(9)
311 && bytes.get(10) == Some(&b'T')
312 && is_digit(11)
313 && is_digit(12)
314 && bytes.get(13) == Some(&b':')
315 && is_digit(14)
316 && is_digit(15)
317 && bytes.get(16) == Some(&b':')
318 && is_digit(17)
319 && is_digit(18)
320 && bytes.get(19) == Some(&b'Z'))
321 {
322 return false;
323 }
324
325 let month = value[5..7].parse::<u32>().ok();
326 let day = value[8..10].parse::<u32>().ok();
327 let hour = value[11..13].parse::<u32>().ok();
328 let minute = value[14..16].parse::<u32>().ok();
329 let second = value[17..19].parse::<u32>().ok();
330 matches!(month, Some(1..=12))
331 && matches!(day, Some(1..=31))
332 && matches!(hour, Some(0..=23))
333 && matches!(minute, Some(0..=59))
334 && matches!(second, Some(0..=59))
335}
336
337fn strict_kg_mode() -> bool {
338 let Ok(value) = std::env::var("KG_STRICT_FORMAT") else {
339 return false;
340 };
341 matches!(
342 value.trim().to_ascii_lowercase().as_str(),
343 "1" | "true" | "yes" | "on"
344 )
345}
346
347fn abbreviated_line(line: &str) -> String {
348 const MAX_CHARS: usize = 160;
349 let trimmed = line.trim();
350 let mut out = String::new();
351 for (idx, ch) in trimmed.chars().enumerate() {
352 if idx >= MAX_CHARS {
353 out.push_str("...");
354 break;
355 }
356 out.push(ch);
357 }
358 out
359}
360
361fn line_fragment(line: &str) -> String {
362 let snippet = abbreviated_line(line);
363 if snippet.is_empty() {
364 "fragment: <empty line>".to_owned()
365 } else {
366 format!("fragment: {snippet}")
367 }
368}
369
370fn json_error_detail(label: &str, path: &Path, raw: &str, error: &serde_json::Error) -> String {
371 let line_no = error.line();
372 let column = error.column();
373 let fragment = raw
374 .lines()
375 .nth(line_no.saturating_sub(1))
376 .map(line_fragment)
377 .unwrap_or_else(|| "fragment: <unavailable>".to_owned());
378 format!(
379 "{label}: {} at line {line_no}, column {column}: {error}\n{fragment}",
380 path.display()
381 )
382}
383
384fn validate_len(
385 line_no: usize,
386 field: &str,
387 value: &str,
388 raw_line: &str,
389 min: usize,
390 max: usize,
391 strict: bool,
392) -> Result<()> {
393 let len = value.chars().count();
394 if strict && (len < min || len > max) {
395 return Err(anyhow::anyhow!(
396 "invalid {field} length at line {line_no}: expected {min}..={max}, got {len}\n{}",
397 line_fragment(raw_line)
398 ));
399 }
400 Ok(())
401}
402
403fn enforce_field_order(
404 line_no: usize,
405 key: &str,
406 rank: u8,
407 last_rank: &mut u8,
408 section: &str,
409 raw_line: &str,
410 strict: bool,
411) -> Result<()> {
412 if strict && rank < *last_rank {
413 return Err(anyhow::anyhow!(
414 "invalid field order at line {line_no}: {key} in {section} block\n{}",
415 line_fragment(raw_line)
416 ));
417 }
418 if rank > *last_rank {
419 *last_rank = rank;
420 }
421 Ok(())
422}
423
424fn field_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
425 if line == key {
426 Some("")
427 } else {
428 line.strip_prefix(key)
429 .and_then(|rest| rest.strip_prefix(' '))
430 }
431}
432
433fn fail_or_warn(strict: bool, warnings: &mut Vec<String>, message: String) -> Result<()> {
434 if strict {
435 Err(anyhow::anyhow!(message))
436 } else {
437 warnings.push(message);
438 Ok(())
439 }
440}
441
442#[cfg(test)]
443fn parse_kg(raw: &str, graph_name: &str, strict: bool) -> Result<GraphFile> {
444 Ok(parse_kg_with_warnings(raw, graph_name, strict)?.0)
445}
446
447fn parse_kg_with_warnings(
448 raw: &str,
449 graph_name: &str,
450 strict: bool,
451) -> Result<(GraphFile, Vec<String>)> {
452 let mut graph = GraphFile::new(graph_name);
453 let mut warnings = Vec::new();
454 let mut current_node: Option<Node> = None;
455 let mut current_note: Option<Note> = None;
456 let mut current_edge_index: Option<usize> = None;
457 let mut last_node_rank: u8 = 0;
458 let mut last_note_rank: u8 = 0;
459 let mut last_edge_rank: u8 = 0;
460
461 for (idx, line) in raw.lines().enumerate() {
462 let line_no = idx + 1;
463 let raw_line = line.strip_suffix('\r').unwrap_or(line);
464 let trimmed = raw_line.trim();
465 if trimmed.is_empty() || trimmed.starts_with('#') {
466 continue;
467 }
468
469 if let Some(rest) = trimmed.strip_prefix("@ ") {
470 if let Some(note) = current_note.take() {
471 graph.notes.push(note);
472 }
473 if let Some(node) = current_node.take() {
474 graph.nodes.push(node);
475 }
476 let Some((type_code, node_id)) = rest.split_once(':') else {
477 fail_or_warn(
478 strict,
479 &mut warnings,
480 format!("invalid node header at line {line_no}: {trimmed}"),
481 )?;
482 current_edge_index = None;
483 continue;
484 };
485 let parsed_id = {
486 let raw_id = node_id.trim();
487 if raw_id.contains(':') {
488 crate::validate::normalize_node_id(raw_id)
489 } else if code_to_node_type(type_code.trim()) != type_code.trim() {
490 crate::validate::normalize_node_id(&format!("{}:{raw_id}", type_code.trim()))
491 } else {
492 format!("{}:{raw_id}", type_code.trim())
493 }
494 };
495 current_node = Some(Node {
496 id: parsed_id,
497 r#type: code_to_node_type(type_code.trim()).to_owned(),
498 name: String::new(),
499 properties: NodeProperties::default(),
500 source_files: Vec::new(),
501 });
502 current_edge_index = None;
503 last_node_rank = 0;
504 last_edge_rank = 0;
505 continue;
506 }
507
508 if let Some(rest) = trimmed.strip_prefix("! ") {
509 if let Some(node) = current_node.take() {
510 graph.nodes.push(node);
511 }
512 if let Some(note) = current_note.take() {
513 graph.notes.push(note);
514 }
515 let mut parts = rest.split_whitespace();
516 let Some(id) = parts.next() else {
517 fail_or_warn(
518 strict,
519 &mut warnings,
520 format!("invalid note header at line {line_no}: {trimmed}"),
521 )?;
522 current_edge_index = None;
523 continue;
524 };
525 let Some(node_id) = parts.next() else {
526 fail_or_warn(
527 strict,
528 &mut warnings,
529 format!("invalid note header at line {line_no}: {trimmed}"),
530 )?;
531 current_edge_index = None;
532 continue;
533 };
534 current_note = Some(Note {
535 id: id.to_owned(),
536 node_id: node_id.to_owned(),
537 ..Default::default()
538 });
539 current_edge_index = None;
540 last_note_rank = 0;
541 continue;
542 }
543
544 if let Some(note) = current_note.as_mut() {
545 if let Some(rest) = field_value(raw_line, "b") {
546 enforce_field_order(
547 line_no,
548 "b",
549 1,
550 &mut last_note_rank,
551 "note",
552 raw_line,
553 strict,
554 )?;
555 note.body = parse_text_field(rest);
556 continue;
557 }
558 if let Some(rest) = field_value(raw_line, "t") {
559 enforce_field_order(
560 line_no,
561 "t",
562 2,
563 &mut last_note_rank,
564 "note",
565 raw_line,
566 strict,
567 )?;
568 let value = parse_text_field(rest);
569 if !value.is_empty() {
570 note.tags.push(value);
571 }
572 continue;
573 }
574 if let Some(rest) = field_value(raw_line, "a") {
575 enforce_field_order(
576 line_no,
577 "a",
578 3,
579 &mut last_note_rank,
580 "note",
581 raw_line,
582 strict,
583 )?;
584 note.author = parse_text_field(rest);
585 continue;
586 }
587 if let Some(rest) = field_value(raw_line, "e") {
588 enforce_field_order(
589 line_no,
590 "e",
591 4,
592 &mut last_note_rank,
593 "note",
594 raw_line,
595 strict,
596 )?;
597 note.created_at = rest.trim().to_owned();
598 continue;
599 }
600 if let Some(rest) = field_value(raw_line, "p") {
601 enforce_field_order(
602 line_no,
603 "p",
604 5,
605 &mut last_note_rank,
606 "note",
607 raw_line,
608 strict,
609 )?;
610 note.provenance = parse_text_field(rest);
611 continue;
612 }
613 if let Some(rest) = field_value(raw_line, "s") {
614 enforce_field_order(
615 line_no,
616 "s",
617 6,
618 &mut last_note_rank,
619 "note",
620 raw_line,
621 strict,
622 )?;
623 let value = parse_text_field(rest);
624 if !value.is_empty() {
625 note.source_files.push(value);
626 }
627 continue;
628 }
629 fail_or_warn(
630 strict,
631 &mut warnings,
632 format!("unrecognized note line at {line_no}: {trimmed}"),
633 )?;
634 continue;
635 }
636
637 let Some(node) = current_node.as_mut() else {
638 fail_or_warn(
639 strict,
640 &mut warnings,
641 format!("unexpected line before first node at line {line_no}: {trimmed}"),
642 )?;
643 continue;
644 };
645
646 if let Some(rest) = field_value(raw_line, "N") {
647 enforce_field_order(
648 line_no,
649 "N",
650 1,
651 &mut last_node_rank,
652 "node",
653 raw_line,
654 strict,
655 )?;
656 let value = parse_text_field(rest);
657 validate_len(line_no, "N", &value, raw_line, 1, 120, strict)?;
658 node.name = value;
659 continue;
660 }
661 if let Some(rest) = field_value(raw_line, "D") {
662 enforce_field_order(
663 line_no,
664 "D",
665 2,
666 &mut last_node_rank,
667 "node",
668 raw_line,
669 strict,
670 )?;
671 let value = parse_text_field(rest);
672 validate_len(line_no, "D", &value, raw_line, 1, 200, strict)?;
673 node.properties.description = value;
674 continue;
675 }
676 if let Some(rest) = field_value(raw_line, "A") {
677 enforce_field_order(
678 line_no,
679 "A",
680 3,
681 &mut last_node_rank,
682 "node",
683 raw_line,
684 strict,
685 )?;
686 let value = parse_text_field(rest);
687 validate_len(line_no, "A", &value, raw_line, 1, 80, strict)?;
688 node.properties.alias.push(value);
689 continue;
690 }
691 if let Some(rest) = field_value(raw_line, "F") {
692 enforce_field_order(
693 line_no,
694 "F",
695 4,
696 &mut last_node_rank,
697 "node",
698 raw_line,
699 strict,
700 )?;
701 let value = parse_text_field(rest);
702 validate_len(line_no, "F", &value, raw_line, 1, 200, strict)?;
703 node.properties.key_facts.push(value);
704 continue;
705 }
706 if let Some(rest) = field_value(raw_line, "E") {
707 enforce_field_order(
708 line_no,
709 "E",
710 5,
711 &mut last_node_rank,
712 "node",
713 raw_line,
714 strict,
715 )?;
716 let value = rest.trim();
717 if !value.is_empty() && !parse_utc_timestamp(value) {
718 fail_or_warn(
719 strict,
720 &mut warnings,
721 format!(
722 "invalid E timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
723 line_fragment(raw_line)
724 ),
725 )?;
726 continue;
727 }
728 node.properties.created_at = value.to_owned();
729 continue;
730 }
731 if let Some(rest) = field_value(raw_line, "C") {
732 enforce_field_order(
733 line_no,
734 "C",
735 6,
736 &mut last_node_rank,
737 "node",
738 raw_line,
739 strict,
740 )?;
741 if !rest.trim().is_empty() {
742 node.properties.confidence = rest.trim().parse::<f64>().ok();
743 }
744 continue;
745 }
746 if let Some(rest) = field_value(raw_line, "V") {
747 enforce_field_order(
748 line_no,
749 "V",
750 7,
751 &mut last_node_rank,
752 "node",
753 raw_line,
754 strict,
755 )?;
756 if let Ok(value) = rest.trim().parse::<f64>() {
757 node.properties.importance = value;
758 }
759 continue;
760 }
761 if let Some(rest) = field_value(raw_line, "P") {
762 enforce_field_order(
763 line_no,
764 "P",
765 8,
766 &mut last_node_rank,
767 "node",
768 raw_line,
769 strict,
770 )?;
771 node.properties.provenance = parse_text_field(rest);
772 continue;
773 }
774 if let Some(rest) = field_value(raw_line, "S") {
775 enforce_field_order(
776 line_no,
777 "S",
778 10,
779 &mut last_node_rank,
780 "node",
781 raw_line,
782 strict,
783 )?;
784 let value = parse_text_field(rest);
785 validate_len(line_no, "S", &value, raw_line, 1, 200, strict)?;
786 node.source_files.push(value);
787 continue;
788 }
789
790 if let Some(rest) = trimmed.strip_prefix("> ") {
791 let mut parts = rest.split_whitespace();
792 let Some(relation) = parts.next() else {
793 fail_or_warn(
794 strict,
795 &mut warnings,
796 format!("missing relation in edge at line {line_no}: {trimmed}"),
797 )?;
798 current_edge_index = None;
799 continue;
800 };
801 let Some(target_id) = parts.next() else {
802 fail_or_warn(
803 strict,
804 &mut warnings,
805 format!("missing target id in edge at line {line_no}: {trimmed}"),
806 )?;
807 current_edge_index = None;
808 continue;
809 };
810 graph.edges.push(Edge {
811 source_id: node.id.clone(),
812 relation: code_to_relation(relation).to_owned(),
813 target_id: crate::validate::normalize_node_id(target_id),
814 properties: EdgeProperties::default(),
815 });
816 current_edge_index = Some(graph.edges.len() - 1);
817 last_edge_rank = 0;
818 continue;
819 }
820
821 if let Some(rest) = trimmed.strip_prefix("= ") {
822 let mut parts = rest.split_whitespace();
823 let Some(relation) = parts.next() else {
824 fail_or_warn(
825 strict,
826 &mut warnings,
827 format!("missing relation in bidirectional edge at line {line_no}: {trimmed}"),
828 )?;
829 current_edge_index = None;
830 continue;
831 };
832 let Some(target_id) = parts.next() else {
833 fail_or_warn(
834 strict,
835 &mut warnings,
836 format!("missing target id in bidirectional edge at line {line_no}: {trimmed}"),
837 )?;
838 current_edge_index = None;
839 continue;
840 };
841 let relation = code_to_relation(relation).to_owned();
842 if relation != "~" {
843 fail_or_warn(
844 strict,
845 &mut warnings,
846 format!(
847 "invalid bidirectional relation at line {line_no}: expected '~', got '{}'",
848 relation
849 ),
850 )?;
851 current_edge_index = None;
852 continue;
853 }
854
855 let target_id = crate::validate::normalize_node_id(target_id);
856 let (source_id, target_id) = canonicalize_bidirectional_pair(&node.id, &target_id);
857 graph.edges.push(Edge {
858 source_id,
859 relation,
860 target_id,
861 properties: EdgeProperties {
862 bidirectional: true,
863 ..EdgeProperties::default()
864 },
865 });
866 current_edge_index = Some(graph.edges.len() - 1);
867 last_edge_rank = 0;
868 continue;
869 }
870
871 if let Some(rest) = field_value(raw_line, "d") {
872 enforce_field_order(
873 line_no,
874 "d",
875 1,
876 &mut last_edge_rank,
877 "edge",
878 raw_line,
879 strict,
880 )?;
881 let Some(edge_idx) = current_edge_index else {
882 fail_or_warn(
883 strict,
884 &mut warnings,
885 format!(
886 "edge detail without preceding edge at line {line_no}\n{}",
887 line_fragment(raw_line)
888 ),
889 )?;
890 continue;
891 };
892 let trimmed_rest = rest.trim();
893 let mut parts = trimmed_rest.split_whitespace();
894 if let (Some(label), Some(raw_score), None) = (parts.next(), parts.next(), parts.next())
895 {
896 if is_score_component_label(label) {
897 let score = raw_score.parse::<f64>().map_err(|_| {
898 anyhow::anyhow!(
899 "invalid score component value at line {line_no}: expected number in '{}', got '{}'",
900 line_fragment(raw_line),
901 raw_score
902 )
903 })?;
904 graph.edges[edge_idx]
905 .properties
906 .score_components
907 .insert(label.to_owned(), score);
908 continue;
909 }
910 }
911
912 let value = parse_text_field(rest);
913 validate_len(line_no, "d", &value, raw_line, 1, 200, strict)?;
914 graph.edges[edge_idx].properties.detail = value;
915 continue;
916 }
917
918 if let Some(rest) = field_value(raw_line, "i") {
919 enforce_field_order(
920 line_no,
921 "i",
922 2,
923 &mut last_edge_rank,
924 "edge",
925 raw_line,
926 strict,
927 )?;
928 let Some(edge_idx) = current_edge_index else {
929 fail_or_warn(
930 strict,
931 &mut warnings,
932 format!(
933 "edge valid_from without preceding edge at line {line_no}\n{}",
934 line_fragment(raw_line)
935 ),
936 )?;
937 continue;
938 };
939 let value = rest.trim();
940 if !value.is_empty() && !parse_utc_timestamp(value) {
941 fail_or_warn(
942 strict,
943 &mut warnings,
944 format!(
945 "invalid i timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
946 line_fragment(raw_line)
947 ),
948 )?;
949 continue;
950 }
951 graph.edges[edge_idx].properties.valid_from = value.to_owned();
952 continue;
953 }
954
955 if let Some(rest) = field_value(raw_line, "x") {
956 enforce_field_order(
957 line_no,
958 "x",
959 3,
960 &mut last_edge_rank,
961 "edge",
962 raw_line,
963 strict,
964 )?;
965 let Some(edge_idx) = current_edge_index else {
966 fail_or_warn(
967 strict,
968 &mut warnings,
969 format!(
970 "edge valid_to without preceding edge at line {line_no}\n{}",
971 line_fragment(raw_line)
972 ),
973 )?;
974 continue;
975 };
976 let value = rest.trim();
977 if !value.is_empty() && !parse_utc_timestamp(value) {
978 fail_or_warn(
979 strict,
980 &mut warnings,
981 format!(
982 "invalid x timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
983 line_fragment(raw_line)
984 ),
985 )?;
986 continue;
987 }
988 graph.edges[edge_idx].properties.valid_to = value.to_owned();
989 continue;
990 }
991
992 if let Some(rest) = field_value(raw_line, "-") {
993 let (key, value) = rest
994 .split_once(char::is_whitespace)
995 .map(|(key, value)| (key.trim(), value))
996 .unwrap_or((rest.trim(), ""));
997 let is_edge_custom = matches!(
998 key,
999 "edge_feedback_score" | "edge_feedback_count" | "edge_feedback_last_ts_ms"
1000 );
1001 if is_edge_custom {
1002 enforce_field_order(
1003 line_no,
1004 "-",
1005 4,
1006 &mut last_edge_rank,
1007 "edge",
1008 raw_line,
1009 strict,
1010 )?;
1011 } else {
1012 enforce_field_order(
1013 line_no,
1014 "-",
1015 9,
1016 &mut last_node_rank,
1017 "node",
1018 raw_line,
1019 strict,
1020 )?;
1021 }
1022 match key {
1023 "domain_area" => node.properties.domain_area = parse_text_field(value),
1024 "feedback_score" => {
1025 node.properties.feedback_score = value.trim().parse::<f64>().unwrap_or(0.0)
1026 }
1027 "feedback_count" => {
1028 node.properties.feedback_count = value.trim().parse::<u64>().unwrap_or(0)
1029 }
1030 "feedback_last_ts_ms" => {
1031 node.properties.feedback_last_ts_ms = value.trim().parse::<u64>().ok()
1032 }
1033 "edge_feedback_score" => {
1034 if let Some(edge_idx) = current_edge_index {
1035 graph.edges[edge_idx].properties.feedback_score =
1036 value.trim().parse::<f64>().unwrap_or(0.0);
1037 }
1038 }
1039 "edge_feedback_count" => {
1040 if let Some(edge_idx) = current_edge_index {
1041 graph.edges[edge_idx].properties.feedback_count =
1042 value.trim().parse::<u64>().unwrap_or(0);
1043 }
1044 }
1045 "edge_feedback_last_ts_ms" => {
1046 if let Some(edge_idx) = current_edge_index {
1047 graph.edges[edge_idx].properties.feedback_last_ts_ms =
1048 value.trim().parse::<u64>().ok();
1049 }
1050 }
1051 _ => {}
1052 }
1053 continue;
1054 }
1055
1056 fail_or_warn(
1057 strict,
1058 &mut warnings,
1059 format!("unrecognized line at {line_no}: {trimmed}"),
1060 )?;
1061 }
1062
1063 if let Some(node) = current_node.take() {
1064 graph.nodes.push(node);
1065 }
1066 if let Some(note) = current_note.take() {
1067 graph.notes.push(note);
1068 }
1069
1070 for node in &mut graph.nodes {
1071 node.properties.alias =
1072 sort_case_insensitive(&dedupe_case_insensitive(node.properties.alias.clone()));
1073 node.properties.key_facts =
1074 sort_case_insensitive(&dedupe_case_insensitive(node.properties.key_facts.clone()));
1075 node.source_files =
1076 sort_case_insensitive(&dedupe_case_insensitive(node.source_files.clone()));
1077 }
1078
1079 graph.edges.sort_by(|a, b| {
1080 a.source_id
1081 .cmp(&b.source_id)
1082 .then_with(|| a.relation.cmp(&b.relation))
1083 .then_with(|| a.target_id.cmp(&b.target_id))
1084 .then_with(|| a.properties.bidirectional.cmp(&b.properties.bidirectional))
1085 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
1086 });
1087
1088 for note in &mut graph.notes {
1089 note.tags = sort_case_insensitive(&dedupe_case_insensitive(note.tags.clone()));
1090 note.source_files =
1091 sort_case_insensitive(&dedupe_case_insensitive(note.source_files.clone()));
1092 }
1093 graph.notes.sort_by(|a, b| {
1094 a.id.cmp(&b.id)
1095 .then_with(|| a.node_id.cmp(&b.node_id))
1096 .then_with(|| a.created_at.cmp(&b.created_at))
1097 });
1098
1099 graph.refresh_counts();
1100 Ok((graph, warnings))
1101}
1102
1103fn serialize_kg(graph: &GraphFile) -> String {
1104 let mut out = String::new();
1105 let mut nodes = graph.nodes.clone();
1106 nodes.sort_by(|a, b| a.id.cmp(&b.id));
1107
1108 for node in nodes {
1109 out.push_str(&format!(
1110 "@ {}:{}\n",
1111 node_type_to_code(&node.r#type),
1112 node.id
1113 ));
1114 push_text_line(&mut out, "N", &node.name);
1115 push_text_line(&mut out, "D", &node.properties.description);
1116
1117 for alias in sort_case_insensitive(&node.properties.alias) {
1118 push_text_line(&mut out, "A", &alias);
1119 }
1120 for fact in sort_case_insensitive(&node.properties.key_facts) {
1121 push_text_line(&mut out, "F", &fact);
1122 }
1123
1124 if !node.properties.created_at.is_empty() {
1125 out.push_str(&format!("E {}\n", node.properties.created_at));
1126 }
1127 if let Some(confidence) = node.properties.confidence {
1128 out.push_str(&format!("C {}\n", confidence));
1129 }
1130 out.push_str(&format!("V {}\n", node.properties.importance));
1131 if !node.properties.provenance.is_empty() {
1132 push_text_line(&mut out, "P", &node.properties.provenance);
1133 }
1134 if !node.properties.domain_area.is_empty() {
1135 out.push_str("- domain_area ");
1136 out.push_str(&escape_kg_text(&node.properties.domain_area));
1137 out.push('\n');
1138 }
1139 if node.properties.feedback_score != 0.0 {
1140 out.push_str(&format!(
1141 "- feedback_score {}\n",
1142 node.properties.feedback_score
1143 ));
1144 }
1145 if node.properties.feedback_count != 0 {
1146 out.push_str(&format!(
1147 "- feedback_count {}\n",
1148 node.properties.feedback_count
1149 ));
1150 }
1151 if let Some(ts) = node.properties.feedback_last_ts_ms {
1152 out.push_str(&format!("- feedback_last_ts_ms {}\n", ts));
1153 }
1154
1155 for source in sort_case_insensitive(&node.source_files) {
1156 push_text_line(&mut out, "S", &source);
1157 }
1158
1159 let mut edges: Vec<Edge> = graph
1160 .edges
1161 .iter()
1162 .filter(|edge| edge.source_id == node.id)
1163 .cloned()
1164 .collect();
1165 edges.sort_by(|a, b| {
1166 a.relation
1167 .cmp(&b.relation)
1168 .then_with(|| a.target_id.cmp(&b.target_id))
1169 .then_with(|| a.properties.bidirectional.cmp(&b.properties.bidirectional))
1170 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
1171 });
1172
1173 for edge in edges {
1174 let op = if edge.properties.bidirectional && edge.relation == "~" {
1175 "="
1176 } else {
1177 ">"
1178 };
1179 out.push_str(&format!(
1180 "{} {} {}\n",
1181 op,
1182 relation_to_code(&edge.relation),
1183 edge.target_id
1184 ));
1185 for (label, score) in &edge.properties.score_components {
1186 out.push_str(&format!("d {} {:.6}\n", label, score));
1187 }
1188 if !edge.properties.detail.is_empty() {
1189 push_text_line(&mut out, "d", &edge.properties.detail);
1190 }
1191 if !edge.properties.valid_from.is_empty() {
1192 out.push_str(&format!("i {}\n", edge.properties.valid_from));
1193 }
1194 if !edge.properties.valid_to.is_empty() {
1195 out.push_str(&format!("x {}\n", edge.properties.valid_to));
1196 }
1197 if edge.properties.feedback_score != 0.0 {
1198 out.push_str(&format!(
1199 "- edge_feedback_score {}\n",
1200 edge.properties.feedback_score
1201 ));
1202 }
1203 if edge.properties.feedback_count != 0 {
1204 out.push_str(&format!(
1205 "- edge_feedback_count {}\n",
1206 edge.properties.feedback_count
1207 ));
1208 }
1209 if let Some(ts) = edge.properties.feedback_last_ts_ms {
1210 out.push_str(&format!("- edge_feedback_last_ts_ms {}\n", ts));
1211 }
1212 }
1213
1214 out.push('\n');
1215 }
1216
1217 let mut notes = graph.notes.clone();
1218 notes.sort_by(|a, b| {
1219 a.id.cmp(&b.id)
1220 .then_with(|| a.node_id.cmp(&b.node_id))
1221 .then_with(|| a.created_at.cmp(&b.created_at))
1222 });
1223 for note in notes {
1224 out.push_str(&format!("! {} {}\n", note.id, note.node_id));
1225 push_text_line(&mut out, "b", ¬e.body);
1226 for tag in sort_case_insensitive(¬e.tags) {
1227 push_text_line(&mut out, "t", &tag);
1228 }
1229 if !note.author.is_empty() {
1230 push_text_line(&mut out, "a", ¬e.author);
1231 }
1232 if !note.created_at.is_empty() {
1233 out.push_str(&format!("e {}\n", note.created_at));
1234 }
1235 if !note.provenance.is_empty() {
1236 push_text_line(&mut out, "p", ¬e.provenance);
1237 }
1238 for source in sort_case_insensitive(¬e.source_files) {
1239 push_text_line(&mut out, "s", &source);
1240 }
1241 out.push('\n');
1242 }
1243
1244 out
1245}
1246
1247#[derive(Debug, Clone, Serialize, Deserialize)]
1248pub struct GraphFile {
1249 pub metadata: Metadata,
1250 #[serde(default)]
1251 pub nodes: Vec<Node>,
1252 #[serde(default)]
1253 pub edges: Vec<Edge>,
1254 #[serde(default)]
1255 pub notes: Vec<Note>,
1256}
1257
1258#[derive(Debug, Clone, Serialize, Deserialize)]
1259pub struct Metadata {
1260 pub name: String,
1261 pub version: String,
1262 pub description: String,
1263 pub node_count: usize,
1264 pub edge_count: usize,
1265}
1266
1267#[derive(Debug, Clone, Serialize, Deserialize)]
1268pub struct Node {
1269 pub id: String,
1270 #[serde(rename = "type")]
1271 pub r#type: String,
1272 pub name: String,
1273 #[serde(default)]
1274 pub properties: NodeProperties,
1275 #[serde(default)]
1276 pub source_files: Vec<String>,
1277}
1278
1279#[derive(Debug, Clone, Serialize, Deserialize)]
1280pub struct NodeProperties {
1281 #[serde(default)]
1282 pub description: String,
1283 #[serde(default)]
1284 pub domain_area: String,
1285 #[serde(default)]
1286 pub provenance: String,
1287 #[serde(default)]
1288 pub confidence: Option<f64>,
1289 #[serde(default)]
1290 pub created_at: String,
1291 #[serde(default = "default_importance")]
1292 pub importance: f64,
1293 #[serde(default)]
1294 pub key_facts: Vec<String>,
1295 #[serde(default)]
1296 pub alias: Vec<String>,
1297 #[serde(default)]
1298 pub feedback_score: f64,
1299 #[serde(default)]
1300 pub feedback_count: u64,
1301 #[serde(default)]
1302 pub feedback_last_ts_ms: Option<u64>,
1303}
1304
1305fn default_importance() -> f64 {
1306 0.5
1307}
1308
1309impl Default for NodeProperties {
1310 fn default() -> Self {
1311 Self {
1312 description: String::new(),
1313 domain_area: String::new(),
1314 provenance: String::new(),
1315 confidence: None,
1316 created_at: String::new(),
1317 importance: default_importance(),
1318 key_facts: Vec::new(),
1319 alias: Vec::new(),
1320 feedback_score: 0.0,
1321 feedback_count: 0,
1322 feedback_last_ts_ms: None,
1323 }
1324 }
1325}
1326
1327#[derive(Debug, Clone, Serialize, Deserialize)]
1328pub struct Edge {
1329 pub source_id: String,
1330 pub relation: String,
1331 pub target_id: String,
1332 #[serde(default)]
1333 pub properties: EdgeProperties,
1334}
1335
1336#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1337pub struct EdgeProperties {
1338 #[serde(default)]
1339 pub detail: String,
1340 #[serde(default)]
1341 pub valid_from: String,
1342 #[serde(default)]
1343 pub valid_to: String,
1344 #[serde(default)]
1345 pub feedback_score: f64,
1346 #[serde(default)]
1347 pub feedback_count: u64,
1348 #[serde(default)]
1349 pub feedback_last_ts_ms: Option<u64>,
1350 #[serde(default)]
1351 pub bidirectional: bool,
1352 #[serde(default)]
1353 pub score_components: BTreeMap<String, f64>,
1354}
1355
1356#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1357pub struct Note {
1358 pub id: String,
1359 pub node_id: String,
1360 #[serde(default)]
1361 pub body: String,
1362 #[serde(default)]
1363 pub tags: Vec<String>,
1364 #[serde(default)]
1365 pub author: String,
1366 #[serde(default)]
1367 pub created_at: String,
1368 #[serde(default)]
1369 pub provenance: String,
1370 #[serde(default)]
1371 pub source_files: Vec<String>,
1372}
1373
1374impl GraphFile {
1375 pub fn new(name: &str) -> Self {
1376 Self {
1377 metadata: Metadata {
1378 name: name.to_owned(),
1379 version: "1.0".to_owned(),
1380 description: format!("Knowledge graph: {name}"),
1381 node_count: 0,
1382 edge_count: 0,
1383 },
1384 nodes: Vec::new(),
1385 edges: Vec::new(),
1386 notes: Vec::new(),
1387 }
1388 }
1389
1390 pub fn load(path: &Path) -> Result<Self> {
1391 let raw = fs::read_to_string(path)
1392 .with_context(|| format!("failed to read graph: {}", path.display()))?;
1393 let ext = path
1394 .extension()
1395 .and_then(|ext| ext.to_str())
1396 .unwrap_or("json");
1397 let mut graph = if ext == "kg" {
1398 if raw.trim_start().starts_with('{') {
1399 serde_json::from_str(&raw).map_err(|error| {
1400 anyhow::anyhow!(json_error_detail(
1401 "invalid legacy JSON payload in .kg file",
1402 path,
1403 &raw,
1404 &error,
1405 ))
1406 })?
1407 } else {
1408 let graph_name = path
1409 .file_stem()
1410 .and_then(|stem| stem.to_str())
1411 .unwrap_or("graph");
1412 let (graph, warnings) = parse_kg_with_warnings(&raw, graph_name, strict_kg_mode())
1413 .with_context(|| format!("failed to parse .kg graph: {}", path.display()))?;
1414 for warning in warnings {
1415 let _ = crate::kg_sidecar::append_warning(
1416 path,
1417 &format!(
1418 "ignored invalid graph entry in {}: {warning}",
1419 path.display()
1420 ),
1421 );
1422 }
1423 graph
1424 }
1425 } else {
1426 serde_json::from_str(&raw).map_err(|error| {
1427 anyhow::anyhow!(json_error_detail("invalid JSON", path, &raw, &error))
1428 })?
1429 };
1430 normalize_graph_ids(&mut graph);
1431 let created_graph_info = ensure_graph_info_node(&mut graph);
1432 graph.refresh_counts();
1433 if created_graph_info {
1434 graph.save(path)?;
1435 }
1436 Ok(graph)
1437 }
1438
1439 pub fn save(&self, path: &Path) -> Result<()> {
1440 let mut graph = self.clone();
1441 ensure_graph_info_node(&mut graph);
1442 graph.refresh_counts();
1443 let ext = path
1444 .extension()
1445 .and_then(|ext| ext.to_str())
1446 .unwrap_or("json");
1447 let raw = if ext == "kg" {
1448 serialize_kg(&graph)
1449 } else {
1450 serde_json::to_string_pretty(&graph).context("failed to serialize graph")?
1451 };
1452 atomic_write(path, &raw)?;
1453 backup_graph_if_stale(path, &raw)
1454 }
1455
1456 pub fn refresh_counts(&mut self) {
1457 self.metadata.node_count = self.nodes.len();
1458 self.metadata.edge_count = self.edges.len();
1459 }
1460
1461 pub fn node_by_id(&self, id: &str) -> Option<&Node> {
1462 self.nodes.iter().find(|node| node.id == id)
1463 }
1464
1465 pub fn node_by_id_sorted(&self, id: &str) -> Option<&Node> {
1466 self.nodes
1467 .binary_search_by(|node| node.id.as_str().cmp(id))
1468 .ok()
1469 .and_then(|idx| self.nodes.get(idx))
1470 }
1471
1472 pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
1473 self.nodes.iter_mut().find(|node| node.id == id)
1474 }
1475
1476 pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
1477 self.edges.iter().any(|edge| {
1478 edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
1479 })
1480 }
1481}
1482
1483fn normalize_graph_ids(graph: &mut GraphFile) {
1484 let mut remap: HashMap<String, String> = HashMap::new();
1485 for node in &mut graph.nodes {
1486 let normalized = crate::validate::normalize_node_id(&node.id);
1487 if normalized != node.id {
1488 remap.insert(node.id.clone(), normalized.clone());
1489 node.id = normalized;
1490 }
1491 }
1492
1493 for edge in &mut graph.edges {
1494 edge.source_id = remap
1495 .get(&edge.source_id)
1496 .cloned()
1497 .unwrap_or_else(|| crate::validate::normalize_node_id(&edge.source_id));
1498 edge.target_id = remap
1499 .get(&edge.target_id)
1500 .cloned()
1501 .unwrap_or_else(|| crate::validate::normalize_node_id(&edge.target_id));
1502 if edge.properties.bidirectional {
1503 let (source_id, target_id) =
1504 canonicalize_bidirectional_pair(&edge.source_id, &edge.target_id);
1505 edge.source_id = source_id;
1506 edge.target_id = target_id;
1507 }
1508 }
1509
1510 for note in &mut graph.notes {
1511 note.node_id = remap
1512 .get(¬e.node_id)
1513 .cloned()
1514 .unwrap_or_else(|| crate::validate::normalize_node_id(¬e.node_id));
1515 }
1516}
1517
1518fn ensure_graph_info_node(graph: &mut GraphFile) -> bool {
1519 if let Some(node) = graph.node_by_id_mut(GRAPH_INFO_NODE_ID) {
1520 let mut changed = false;
1521 if node.r#type != GRAPH_INFO_NODE_TYPE {
1522 node.r#type = GRAPH_INFO_NODE_TYPE.to_owned();
1523 changed = true;
1524 }
1525 if node.name.is_empty() {
1526 node.name = "Graph Metadata".to_owned();
1527 changed = true;
1528 }
1529 if node.properties.description.is_empty() {
1530 node.properties.description =
1531 "Internal graph metadata for cross-graph linking".to_owned();
1532 changed = true;
1533 }
1534 if !node
1535 .properties
1536 .key_facts
1537 .iter()
1538 .any(|fact| fact.starts_with(GRAPH_UUID_FACT_PREFIX))
1539 {
1540 node.properties
1541 .key_facts
1542 .push(format!("{GRAPH_UUID_FACT_PREFIX}{}", generate_graph_uuid()));
1543 changed = true;
1544 }
1545 return changed;
1546 }
1547
1548 graph.nodes.push(Node {
1549 id: GRAPH_INFO_NODE_ID.to_owned(),
1550 r#type: GRAPH_INFO_NODE_TYPE.to_owned(),
1551 name: "Graph Metadata".to_owned(),
1552 properties: NodeProperties {
1553 description: "Internal graph metadata for cross-graph linking".to_owned(),
1554 domain_area: "internal_metadata".to_owned(),
1555 provenance: "A".to_owned(),
1556 importance: 1.0,
1557 key_facts: vec![format!("{GRAPH_UUID_FACT_PREFIX}{}", generate_graph_uuid())],
1558 ..NodeProperties::default()
1559 },
1560 source_files: vec!["DOC .kg/internal/graph_info".to_owned()],
1561 });
1562 true
1563}
1564
1565fn generate_graph_uuid() -> String {
1566 let mut bytes = [0u8; 10];
1567 if fs::File::open("/dev/urandom")
1568 .and_then(|mut file| {
1569 use std::io::Read;
1570 file.read_exact(&mut bytes)
1571 })
1572 .is_err()
1573 {
1574 let nanos = SystemTime::now()
1575 .duration_since(UNIX_EPOCH)
1576 .unwrap_or_default()
1577 .as_nanos();
1578 let pid = std::process::id() as u128;
1579 let mixed = nanos ^ (pid << 64) ^ (nanos.rotate_left(17));
1580 bytes.copy_from_slice(&mixed.to_be_bytes()[6..16]);
1581 }
1582 let mut out = String::with_capacity(20);
1583 for byte in bytes {
1584 out.push_str(&format!("{byte:02x}"));
1585 }
1586 out
1587}
1588
1589#[cfg(test)]
1590mod tests {
1591 use super::{
1592 GRAPH_INFO_NODE_ID, GRAPH_INFO_NODE_TYPE, GRAPH_UUID_FACT_PREFIX, GraphFile, parse_kg,
1593 };
1594
1595 #[test]
1596 fn save_and_load_kg_roundtrip_keeps_core_fields() {
1597 let dir = tempfile::tempdir().expect("temp dir");
1598 let path = dir.path().join("graph.kg");
1599
1600 let mut graph = GraphFile::new("graph");
1601 graph.nodes.push(crate::Node {
1602 id: "concept:refrigerator".to_owned(),
1603 r#type: "Concept".to_owned(),
1604 name: "Lodowka".to_owned(),
1605 properties: crate::NodeProperties {
1606 description: "Urzadzenie chlodzace".to_owned(),
1607 provenance: "U".to_owned(),
1608 created_at: "2026-04-04T12:00:00Z".to_owned(),
1609 importance: 5.0,
1610 key_facts: vec!["A".to_owned(), "b".to_owned()],
1611 alias: vec!["Fridge".to_owned()],
1612 ..Default::default()
1613 },
1614 source_files: vec!["docs/fridge.md".to_owned()],
1615 });
1616 graph.edges.push(crate::Edge {
1617 source_id: "concept:refrigerator".to_owned(),
1618 relation: "READS_FROM".to_owned(),
1619 target_id: "datastore:settings".to_owned(),
1620 properties: crate::EdgeProperties {
1621 detail: "runtime read".to_owned(),
1622 valid_from: "2026-04-04T12:00:00Z".to_owned(),
1623 valid_to: "2026-04-05T12:00:00Z".to_owned(),
1624 ..Default::default()
1625 },
1626 });
1627
1628 graph.save(&path).expect("save kg");
1629 let raw = std::fs::read_to_string(&path).expect("read kg");
1630 assert!(raw.contains("@ K:concept:refrigerator"));
1631 assert!(raw.contains("> R datastore:settings"));
1632
1633 let loaded = GraphFile::load(&path).expect("load kg");
1634 assert_eq!(loaded.nodes.len(), 2);
1635 assert_eq!(loaded.edges.len(), 1);
1636 let node = loaded
1637 .node_by_id("concept:refrigerator")
1638 .expect("domain node");
1639 assert_eq!(node.properties.importance, 5.0);
1640 assert_eq!(node.properties.provenance, "U");
1641 assert_eq!(node.name, "Lodowka");
1642 assert_eq!(loaded.edges[0].relation, "READS_FROM");
1643 assert_eq!(loaded.edges[0].properties.detail, "runtime read");
1644 assert_eq!(
1645 loaded.edges[0].properties.valid_from,
1646 "2026-04-04T12:00:00Z"
1647 );
1648 assert_eq!(loaded.edges[0].properties.valid_to, "2026-04-05T12:00:00Z");
1649 }
1650
1651 #[test]
1652 fn load_supports_legacy_json_payload_with_kg_extension() {
1653 let dir = tempfile::tempdir().expect("temp dir");
1654 let path = dir.path().join("legacy.kg");
1655 std::fs::write(
1656 &path,
1657 r#"{
1658 "metadata": {"name": "legacy", "version": "1.0", "description": "x", "node_count": 0, "edge_count": 0},
1659 "nodes": [],
1660 "edges": [],
1661 "notes": []
1662}"#,
1663 )
1664 .expect("write legacy payload");
1665
1666 let loaded = GraphFile::load(&path).expect("load legacy kg");
1667 assert_eq!(loaded.metadata.name, "legacy");
1668 assert_eq!(loaded.nodes.len(), 1);
1669 assert!(loaded.node_by_id(GRAPH_INFO_NODE_ID).is_some());
1670 }
1671
1672 #[test]
1673 fn load_kg_ignores_invalid_timestamp_format() {
1674 let dir = tempfile::tempdir().expect("temp dir");
1675 let path = dir.path().join("invalid-ts.kg");
1676 std::fs::write(
1677 &path,
1678 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04 12:00:00\nV 4\nP U\n",
1679 )
1680 .expect("write kg");
1681
1682 let loaded = GraphFile::load(&path).expect("invalid timestamp should be ignored");
1683 assert_eq!(loaded.nodes.len(), 2);
1684 assert!(
1685 loaded
1686 .node_by_id("concept:x")
1687 .expect("concept node")
1688 .properties
1689 .created_at
1690 .is_empty()
1691 );
1692 }
1693
1694 #[test]
1695 fn load_kg_ignores_invalid_edge_timestamp_format() {
1696 let dir = tempfile::tempdir().expect("temp dir");
1697 let path = dir.path().join("invalid-edge-ts.kg");
1698 std::fs::write(
1699 &path,
1700 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n> H concept:y\ni 2026-04-04 12:00:00\n",
1701 )
1702 .expect("write kg");
1703
1704 let loaded = GraphFile::load(&path).expect("invalid edge timestamp should be ignored");
1705 assert_eq!(loaded.edges.len(), 1);
1706 assert!(loaded.edges[0].properties.valid_from.is_empty());
1707 }
1708
1709 #[test]
1710 fn load_kg_preserves_whitespace_and_dedupes_exact_duplicates() {
1711 let dir = tempfile::tempdir().expect("temp dir");
1712 let path = dir.path().join("normalize.kg");
1713 std::fs::write(
1714 &path,
1715 "@ K:concept:x\nN Name With Spaces \nD Desc with spaces \nA Alias\nA Alias\nF fact one\nF FACT one\nS docs/a.md\nS docs/a.md\nE 2026-04-04T12:00:00Z\nV 4\nP U\n",
1716 )
1717 .expect("write kg");
1718
1719 let loaded = GraphFile::load(&path).expect("load kg");
1720 let node = loaded.node_by_id("concept:x").expect("concept node");
1721 assert_eq!(node.name, " Name With Spaces ");
1722 assert_eq!(node.properties.description, " Desc with spaces ");
1723 assert_eq!(node.properties.alias.len(), 1);
1724 assert_eq!(node.properties.key_facts.len(), 2);
1725 assert_eq!(node.source_files.len(), 1);
1726 }
1727
1728 #[test]
1729 fn save_and_load_kg_roundtrip_keeps_notes_without_json_fallback() {
1730 let dir = tempfile::tempdir().expect("temp dir");
1731 let path = dir.path().join("graph-notes.kg");
1732
1733 let mut graph = GraphFile::new("graph-notes");
1734 graph.nodes.push(crate::Node {
1735 id: "concept:refrigerator".to_owned(),
1736 r#type: "Concept".to_owned(),
1737 name: "Lodowka".to_owned(),
1738 properties: crate::NodeProperties {
1739 description: "Urzadzenie chlodzace".to_owned(),
1740 provenance: "U".to_owned(),
1741 created_at: "2026-04-04T12:00:00Z".to_owned(),
1742 ..Default::default()
1743 },
1744 source_files: vec!["docs/fridge.md".to_owned()],
1745 });
1746 graph.notes.push(crate::Note {
1747 id: "note:1".to_owned(),
1748 node_id: "concept:refrigerator".to_owned(),
1749 body: "Important maintenance insight".to_owned(),
1750 tags: vec!["Maintenance".to_owned(), "maintenance".to_owned()],
1751 author: "alice".to_owned(),
1752 created_at: "1712345678".to_owned(),
1753 provenance: "U".to_owned(),
1754 source_files: vec!["docs/a.md".to_owned(), "docs/a.md".to_owned()],
1755 });
1756
1757 graph.save(&path).expect("save kg");
1758 let raw = std::fs::read_to_string(&path).expect("read kg");
1759 assert!(raw.contains("! note:1 concept:refrigerator"));
1760 assert!(!raw.trim_start().starts_with('{'));
1761
1762 let loaded = GraphFile::load(&path).expect("load kg");
1763 assert_eq!(loaded.notes.len(), 1);
1764 let note = &loaded.notes[0];
1765 assert_eq!(note.id, "note:1");
1766 assert_eq!(note.node_id, "concept:refrigerator");
1767 assert_eq!(note.body, "Important maintenance insight");
1768 assert_eq!(note.tags.len(), 1);
1769 assert_eq!(note.source_files.len(), 1);
1770 }
1771
1772 #[test]
1773 fn save_and_load_kg_roundtrip_preserves_multiline_text_fields() {
1774 let dir = tempfile::tempdir().expect("temp dir");
1775 let path = dir.path().join("graph-multiline.kg");
1776
1777 let mut graph = GraphFile::new("graph-multiline");
1778 graph.nodes.push(crate::Node {
1779 id: "concept:refrigerator".to_owned(),
1780 r#type: "Concept".to_owned(),
1781 name: "Lodowka\nSmart".to_owned(),
1782 properties: crate::NodeProperties {
1783 description: "Linia 1\nLinia 2\\nliteral".to_owned(),
1784 provenance: "user\nimport".to_owned(),
1785 created_at: "2026-04-04T12:00:00Z".to_owned(),
1786 importance: 5.0,
1787 key_facts: vec!["Fakt 1\nFakt 2".to_owned()],
1788 alias: vec!["Alias\nA".to_owned()],
1789 domain_area: "ops\nfield".to_owned(),
1790 ..Default::default()
1791 },
1792 source_files: vec!["docs/fridge\nnotes.md".to_owned()],
1793 });
1794 graph.edges.push(crate::Edge {
1795 source_id: "concept:refrigerator".to_owned(),
1796 relation: "READS_FROM".to_owned(),
1797 target_id: "datastore:settings".to_owned(),
1798 properties: crate::EdgeProperties {
1799 detail: "runtime\nread".to_owned(),
1800 valid_from: "2026-04-04T12:00:00Z".to_owned(),
1801 valid_to: "2026-04-05T12:00:00Z".to_owned(),
1802 ..Default::default()
1803 },
1804 });
1805 graph.notes.push(crate::Note {
1806 id: "note:1".to_owned(),
1807 node_id: "concept:refrigerator".to_owned(),
1808 body: "line1\nline2\\nkeep".to_owned(),
1809 tags: vec!["multi\nline".to_owned()],
1810 author: "alice\nbob".to_owned(),
1811 created_at: "1712345678".to_owned(),
1812 provenance: "manual\nentry".to_owned(),
1813 source_files: vec!["docs/a\nb.md".to_owned()],
1814 });
1815
1816 graph.save(&path).expect("save kg");
1817 let raw = std::fs::read_to_string(&path).expect("read kg");
1818 assert!(raw.contains("N Lodowka\\nSmart"));
1819 assert!(raw.contains("D Linia 1\\nLinia 2\\\\nliteral"));
1820 assert!(raw.contains("- domain_area ops\\nfield"));
1821 assert!(raw.contains("d runtime\\nread"));
1822 assert!(raw.contains("b line1\\nline2\\\\nkeep"));
1823
1824 let loaded = GraphFile::load(&path).expect("load kg");
1825 let node = loaded
1826 .node_by_id("concept:refrigerator")
1827 .expect("domain node");
1828 assert_eq!(node.name, "Lodowka\nSmart");
1829 assert_eq!(node.properties.description, "Linia 1\nLinia 2\\nliteral");
1830 assert_eq!(node.properties.provenance, "user\nimport");
1831 assert_eq!(node.properties.alias, vec!["Alias\nA".to_owned()]);
1832 assert_eq!(node.properties.key_facts, vec!["Fakt 1\nFakt 2".to_owned()]);
1833 assert_eq!(node.properties.domain_area, "ops\nfield");
1834 assert_eq!(node.source_files, vec!["docs/fridge\nnotes.md".to_owned()]);
1835 assert_eq!(loaded.edges[0].properties.detail, "runtime\nread");
1836 let note = &loaded.notes[0];
1837 assert_eq!(note.body, "line1\nline2\\nkeep");
1838 assert_eq!(note.tags, vec!["multi\nline".to_owned()]);
1839 assert_eq!(note.author, "alice\nbob");
1840 assert_eq!(note.provenance, "manual\nentry");
1841 assert_eq!(note.source_files, vec!["docs/a\nb.md".to_owned()]);
1842 }
1843
1844 #[test]
1845 fn parse_bidirectional_similarity_edge_is_canonical_and_scored() {
1846 let raw = "@ ~:dedupe_b\nN B\nD Desc\nV 0.5\nP U\nS docs/b.md\n= ~ ~:dedupe_a\nd C1 0.11\nd C2 0.83\nd 0.91\n\n@ ~:dedupe_a\nN A\nD Desc\nV 0.5\nP U\nS docs/a.md\n";
1847 let graph = parse_kg(raw, "virt", true).expect("parse kg");
1848
1849 assert_eq!(graph.nodes.len(), 2);
1850 assert_eq!(graph.edges.len(), 1);
1851 let edge = &graph.edges[0];
1852 assert_eq!(edge.relation, "~");
1853 assert_eq!(edge.source_id, "~:dedupe_a");
1854 assert_eq!(edge.target_id, "~:dedupe_b");
1855 assert_eq!(edge.properties.detail, "0.91");
1856 assert!(edge.properties.bidirectional);
1857 assert_eq!(edge.properties.score_components.get("C1"), Some(&0.11));
1858 assert_eq!(edge.properties.score_components.get("C2"), Some(&0.83));
1859 }
1860
1861 #[test]
1862 fn serialize_bidirectional_similarity_edge_uses_equals_operator() {
1863 let dir = tempfile::tempdir().expect("temp dir");
1864 let path = dir.path().join("virt.kg");
1865 let mut graph = GraphFile::new("virt");
1866 graph.nodes.push(crate::Node {
1867 id: "~:dedupe_a".to_owned(),
1868 r#type: "~".to_owned(),
1869 name: "A".to_owned(),
1870 properties: crate::NodeProperties {
1871 description: "Desc".to_owned(),
1872 provenance: "U".to_owned(),
1873 created_at: "2026-04-10T00:00:00Z".to_owned(),
1874 importance: 0.6,
1875 ..Default::default()
1876 },
1877 source_files: vec!["docs/a.md".to_owned()],
1878 });
1879 graph.nodes.push(crate::Node {
1880 id: "~:dedupe_b".to_owned(),
1881 r#type: "~".to_owned(),
1882 name: "B".to_owned(),
1883 properties: crate::NodeProperties {
1884 description: "Desc".to_owned(),
1885 provenance: "U".to_owned(),
1886 created_at: "2026-04-10T00:00:00Z".to_owned(),
1887 importance: 0.6,
1888 ..Default::default()
1889 },
1890 source_files: vec!["docs/b.md".to_owned()],
1891 });
1892 graph.edges.push(crate::Edge {
1893 source_id: "~:dedupe_a".to_owned(),
1894 relation: "~".to_owned(),
1895 target_id: "~:dedupe_b".to_owned(),
1896 properties: crate::EdgeProperties {
1897 detail: "0.75".to_owned(),
1898 bidirectional: true,
1899 score_components: std::collections::BTreeMap::from([
1900 ("C1".to_owned(), 0.2),
1901 ("C2".to_owned(), 0.8),
1902 ]),
1903 ..Default::default()
1904 },
1905 });
1906
1907 graph.save(&path).expect("save");
1908 let raw = std::fs::read_to_string(&path).expect("read");
1909 assert!(raw.contains("= ~ ~:dedupe_b"));
1910 assert!(raw.contains("d C1 0.200000"));
1911 assert!(raw.contains("d C2 0.800000"));
1912 assert!(!raw.contains("> ~ ~:dedupe_b"));
1913
1914 let loaded = GraphFile::load(&path).expect("load");
1915 assert_eq!(loaded.edges.len(), 1);
1916 assert!(loaded.edges[0].properties.bidirectional);
1917 assert_eq!(loaded.edges[0].properties.detail, "0.75");
1918 assert_eq!(
1919 loaded.edges[0].properties.score_components.get("C1"),
1920 Some(&0.2)
1921 );
1922 assert_eq!(
1923 loaded.edges[0].properties.score_components.get("C2"),
1924 Some(&0.8)
1925 );
1926 }
1927
1928 #[test]
1929 fn strict_mode_rejects_bidirectional_relation_other_than_similarity() {
1930 let raw = "@ K:concept:a\nN A\nD Desc\nV 0.5\nP U\nS docs/a.md\n= HAS concept:b\n";
1931 let err = parse_kg(raw, "x", true).expect_err("strict mode should reject invalid '='");
1932 assert!(format!("{err:#}").contains("expected '~'"));
1933 }
1934
1935 #[test]
1936 fn strict_mode_rejects_out_of_order_node_fields() {
1937 let raw = "@ K:concept:x\nD Desc\nN Name\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n";
1938 let err = parse_kg(raw, "x", true).expect_err("strict mode should fail on field order");
1939 assert!(format!("{err:#}").contains("invalid field order"));
1940 }
1941
1942 #[test]
1943 fn strict_mode_rejects_overlong_name_but_compat_mode_allows_it() {
1944 let long_name = "N ".to_owned() + &"X".repeat(121);
1945 let raw = format!(
1946 "@ K:concept:x\n{}\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n",
1947 long_name
1948 );
1949
1950 let strict_err = parse_kg(&raw, "x", true).expect_err("strict mode should fail on length");
1951 assert!(format!("{strict_err:#}").contains("invalid N length"));
1952
1953 parse_kg(&raw, "x", false).expect("compat mode keeps permissive behavior");
1954 }
1955
1956 #[test]
1957 fn save_kg_skips_empty_e_and_p_fields() {
1958 let dir = tempfile::tempdir().expect("temp dir");
1959 let path = dir.path().join("no-empty-ep.kg");
1960
1961 let mut graph = GraphFile::new("graph");
1962 graph.nodes.push(crate::Node {
1963 id: "concept:x".to_owned(),
1964 r#type: "Concept".to_owned(),
1965 name: "X".to_owned(),
1966 properties: crate::NodeProperties {
1967 description: "Desc".to_owned(),
1968 provenance: String::new(),
1969 created_at: String::new(),
1970 ..Default::default()
1971 },
1972 source_files: vec!["docs/a.md".to_owned()],
1973 });
1974
1975 graph.save(&path).expect("save kg");
1976 let raw = std::fs::read_to_string(&path).expect("read kg");
1977 assert!(!raw.contains("\nE \n"));
1978 assert!(!raw.contains("\nP \n"));
1979 }
1980
1981 #[test]
1982 fn load_generates_graph_info_node_when_missing() {
1983 let dir = tempfile::tempdir().expect("temp dir");
1984 let path = dir.path().join("meta.kg");
1985 let raw = "@ K:concept:x\nN X\nD Desc\nV 0.5\nP U\nS docs/a.md\n";
1986 std::fs::write(&path, raw).expect("write kg");
1987
1988 let loaded = GraphFile::load(&path).expect("load kg");
1989 let info = loaded
1990 .node_by_id(GRAPH_INFO_NODE_ID)
1991 .expect("graph info node should be generated");
1992 assert_eq!(info.r#type, GRAPH_INFO_NODE_TYPE);
1993 assert!(
1994 info.properties
1995 .key_facts
1996 .iter()
1997 .any(|fact| fact.starts_with(GRAPH_UUID_FACT_PREFIX))
1998 );
1999
2000 let persisted = std::fs::read_to_string(&path).expect("read persisted kg");
2001 assert!(persisted.contains("graph_info"));
2002 assert!(persisted.contains("graph_uuid="));
2003 }
2004}