1use std::fs;
2use std::io::Write;
3use std::path::Path;
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use anyhow::{Context, Result};
7use flate2::Compression;
8use flate2::write::GzEncoder;
9use serde::{Deserialize, Serialize};
10
11fn atomic_write(dest: &Path, data: &str) -> Result<()> {
16 let tmp = dest.with_extension("tmp");
17 fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
18 if dest.exists() {
19 let bak = dest.with_extension("bak");
20 fs::copy(dest, &bak)
21 .with_context(|| format!("failed to create backup: {}", bak.display()))?;
22 }
23 fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
24}
25
26const BACKUP_STALE_SECS: u64 = 60 * 60;
27
28fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
29 let parent = match path.parent() {
30 Some(parent) => parent,
31 None => return Ok(()),
32 };
33 let stem = match path.file_stem().and_then(|s| s.to_str()) {
34 Some(stem) => stem,
35 None => return Ok(()),
36 };
37 let now = SystemTime::now()
38 .duration_since(UNIX_EPOCH)
39 .context("time went backwards")?
40 .as_secs();
41 if let Some(latest) = latest_backup_ts(parent, stem)? {
42 if now.saturating_sub(latest) < BACKUP_STALE_SECS {
43 return Ok(());
44 }
45 }
46
47 let backup_path = parent.join(format!("{stem}.bck.{now}.gz"));
48 let tmp_path = backup_path.with_extension("tmp");
49 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
50 encoder.write_all(data.as_bytes())?;
51 let encoded = encoder.finish()?;
52 fs::write(&tmp_path, encoded)
53 .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
54 fs::rename(&tmp_path, &backup_path)
55 .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
56 Ok(())
57}
58
59fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
60 let prefix = format!("{stem}.bck.");
61 let suffix = ".gz";
62 let mut latest = None;
63 for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
64 let entry = entry?;
65 let name = entry.file_name();
66 let name = name.to_string_lossy();
67 if !name.starts_with(&prefix) || !name.ends_with(suffix) {
68 continue;
69 }
70 let ts_part = &name[prefix.len()..name.len() - suffix.len()];
71 if let Ok(ts) = ts_part.parse::<u64>() {
72 match latest {
73 Some(current) => {
74 if ts > current {
75 latest = Some(ts);
76 }
77 }
78 None => latest = Some(ts),
79 }
80 }
81 }
82 Ok(latest)
83}
84
85fn node_type_to_code(node_type: &str) -> &str {
86 match node_type {
87 "Feature" => "F",
88 "Concept" => "K",
89 "Interface" => "I",
90 "Process" => "P",
91 "DataStore" => "D",
92 "Attribute" => "A",
93 "Entity" => "Y",
94 "Note" => "N",
95 "Rule" => "R",
96 "Convention" => "C",
97 "Bug" => "B",
98 "Decision" => "Z",
99 "OpenQuestion" => "O",
100 "Claim" => "Q",
101 "Insight" => "W",
102 "Reference" => "M",
103 "Term" => "T",
104 "Status" => "S",
105 "Doubt" => "L",
106 _ => node_type,
107 }
108}
109
110fn code_to_node_type(code: &str) -> &str {
111 match code {
112 "F" => "Feature",
113 "K" => "Concept",
114 "I" => "Interface",
115 "P" => "Process",
116 "D" => "DataStore",
117 "A" => "Attribute",
118 "Y" => "Entity",
119 "N" => "Note",
120 "R" => "Rule",
121 "C" => "Convention",
122 "B" => "Bug",
123 "Z" => "Decision",
124 "O" => "OpenQuestion",
125 "Q" => "Claim",
126 "W" => "Insight",
127 "M" => "Reference",
128 "T" => "Term",
129 "S" => "Status",
130 "L" => "Doubt",
131 _ => code,
132 }
133}
134
135fn relation_to_code(relation: &str) -> &str {
136 match relation {
137 "DOCUMENTED_IN" | "DOCUMENTS" => "D",
138 "HAS" => "H",
139 "TRIGGERS" => "T",
140 "AFFECTED_BY" | "AFFECTS" => "A",
141 "READS_FROM" | "READS" => "R",
142 "GOVERNED_BY" | "GOVERNS" => "G",
143 "DEPENDS_ON" => "O",
144 "AVAILABLE_IN" => "I",
145 "SUPPORTS" => "S",
146 "SUMMARIZES" => "U",
147 "RELATED_TO" => "L",
148 "CONTRADICTS" => "V",
149 "CREATED_BY" | "CREATES" => "C",
150 _ => relation,
151 }
152}
153
154fn code_to_relation(code: &str) -> &str {
155 match code {
156 "D" => "DOCUMENTED_IN",
157 "H" => "HAS",
158 "T" => "TRIGGERS",
159 "A" => "AFFECTED_BY",
160 "R" => "READS_FROM",
161 "G" => "GOVERNED_BY",
162 "O" => "DEPENDS_ON",
163 "I" => "AVAILABLE_IN",
164 "S" => "SUPPORTS",
165 "U" => "SUMMARIZES",
166 "L" => "RELATED_TO",
167 "V" => "CONTRADICTS",
168 "C" => "CREATED_BY",
169 _ => code,
170 }
171}
172
173fn sort_case_insensitive(values: &[String]) -> Vec<String> {
174 let mut sorted = values.to_vec();
175 sorted.sort_by(|a, b| {
176 let la = a.to_ascii_lowercase();
177 let lb = b.to_ascii_lowercase();
178 la.cmp(&lb).then_with(|| a.cmp(b))
179 });
180 sorted
181}
182
183fn decode_kg_text(value: &str) -> String {
184 let mut out = String::new();
185 let mut chars = value.chars();
186 while let Some(ch) = chars.next() {
187 if ch != '\\' {
188 out.push(ch);
189 continue;
190 }
191 match chars.next() {
192 Some('n') => out.push('\n'),
193 Some('r') => out.push('\r'),
194 Some('\\') => out.push('\\'),
195 Some(other) => {
196 out.push('\\');
197 out.push(other);
198 }
199 None => out.push('\\'),
200 }
201 }
202 out
203}
204
205fn escape_kg_text(value: &str) -> String {
206 let mut out = String::new();
207 for ch in value.chars() {
208 match ch {
209 '\\' => out.push_str("\\\\"),
210 '\n' => out.push_str("\\n"),
211 '\r' => out.push_str("\\r"),
212 _ => out.push(ch),
213 }
214 }
215 out
216}
217
218fn parse_text_field(value: &str) -> String {
219 decode_kg_text(value)
220}
221
222fn push_text_line(out: &mut String, key: &str, value: &str) {
223 out.push_str(key);
224 out.push(' ');
225 out.push_str(&escape_kg_text(value));
226 out.push('\n');
227}
228
229fn dedupe_case_insensitive(values: Vec<String>) -> Vec<String> {
230 let mut seen = std::collections::HashSet::new();
231 let mut out = Vec::new();
232 for value in values {
233 let key = value.to_ascii_lowercase();
234 if seen.insert(key) {
235 out.push(value);
236 }
237 }
238 out
239}
240
241fn parse_utc_timestamp(value: &str) -> bool {
242 if value.len() != 20 {
243 return false;
244 }
245 let bytes = value.as_bytes();
246 let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
247 if !(is_digit(0)
248 && is_digit(1)
249 && is_digit(2)
250 && is_digit(3)
251 && bytes.get(4) == Some(&b'-')
252 && is_digit(5)
253 && is_digit(6)
254 && bytes.get(7) == Some(&b'-')
255 && is_digit(8)
256 && is_digit(9)
257 && bytes.get(10) == Some(&b'T')
258 && is_digit(11)
259 && is_digit(12)
260 && bytes.get(13) == Some(&b':')
261 && is_digit(14)
262 && is_digit(15)
263 && bytes.get(16) == Some(&b':')
264 && is_digit(17)
265 && is_digit(18)
266 && bytes.get(19) == Some(&b'Z'))
267 {
268 return false;
269 }
270
271 let month = value[5..7].parse::<u32>().ok();
272 let day = value[8..10].parse::<u32>().ok();
273 let hour = value[11..13].parse::<u32>().ok();
274 let minute = value[14..16].parse::<u32>().ok();
275 let second = value[17..19].parse::<u32>().ok();
276 matches!(month, Some(1..=12))
277 && matches!(day, Some(1..=31))
278 && matches!(hour, Some(0..=23))
279 && matches!(minute, Some(0..=59))
280 && matches!(second, Some(0..=59))
281}
282
283fn strict_kg_mode() -> bool {
284 let Ok(value) = std::env::var("KG_STRICT_FORMAT") else {
285 return false;
286 };
287 matches!(
288 value.trim().to_ascii_lowercase().as_str(),
289 "1" | "true" | "yes" | "on"
290 )
291}
292
293fn abbreviated_line(line: &str) -> String {
294 const MAX_CHARS: usize = 160;
295 let trimmed = line.trim();
296 let mut out = String::new();
297 for (idx, ch) in trimmed.chars().enumerate() {
298 if idx >= MAX_CHARS {
299 out.push_str("...");
300 break;
301 }
302 out.push(ch);
303 }
304 out
305}
306
307fn line_fragment(line: &str) -> String {
308 let snippet = abbreviated_line(line);
309 if snippet.is_empty() {
310 "fragment: <empty line>".to_owned()
311 } else {
312 format!("fragment: {snippet}")
313 }
314}
315
316fn json_error_detail(label: &str, path: &Path, raw: &str, error: &serde_json::Error) -> String {
317 let line_no = error.line();
318 let column = error.column();
319 let fragment = raw
320 .lines()
321 .nth(line_no.saturating_sub(1))
322 .map(line_fragment)
323 .unwrap_or_else(|| "fragment: <unavailable>".to_owned());
324 format!(
325 "{label}: {} at line {line_no}, column {column}: {error}\n{fragment}",
326 path.display()
327 )
328}
329
330fn validate_len(
331 line_no: usize,
332 field: &str,
333 value: &str,
334 raw_line: &str,
335 min: usize,
336 max: usize,
337 strict: bool,
338) -> Result<()> {
339 let len = value.chars().count();
340 if strict && (len < min || len > max) {
341 return Err(anyhow::anyhow!(
342 "invalid {field} length at line {line_no}: expected {min}..={max}, got {len}\n{}",
343 line_fragment(raw_line)
344 ));
345 }
346 Ok(())
347}
348
349fn enforce_field_order(
350 line_no: usize,
351 key: &str,
352 rank: u8,
353 last_rank: &mut u8,
354 section: &str,
355 raw_line: &str,
356 strict: bool,
357) -> Result<()> {
358 if strict && rank < *last_rank {
359 return Err(anyhow::anyhow!(
360 "invalid field order at line {line_no}: {key} in {section} block\n{}",
361 line_fragment(raw_line)
362 ));
363 }
364 if rank > *last_rank {
365 *last_rank = rank;
366 }
367 Ok(())
368}
369
370fn field_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
371 if line == key {
372 Some("")
373 } else {
374 line.strip_prefix(key)
375 .and_then(|rest| rest.strip_prefix(' '))
376 }
377}
378
379fn fail_or_warn(strict: bool, warnings: &mut Vec<String>, message: String) -> Result<()> {
380 if strict {
381 Err(anyhow::anyhow!(message))
382 } else {
383 warnings.push(message);
384 Ok(())
385 }
386}
387
388#[cfg(test)]
389fn parse_kg(raw: &str, graph_name: &str, strict: bool) -> Result<GraphFile> {
390 Ok(parse_kg_with_warnings(raw, graph_name, strict)?.0)
391}
392
393fn parse_kg_with_warnings(
394 raw: &str,
395 graph_name: &str,
396 strict: bool,
397) -> Result<(GraphFile, Vec<String>)> {
398 let mut graph = GraphFile::new(graph_name);
399 let mut warnings = Vec::new();
400 let mut current_node: Option<Node> = None;
401 let mut current_note: Option<Note> = None;
402 let mut current_edge_index: Option<usize> = None;
403 let mut last_node_rank: u8 = 0;
404 let mut last_note_rank: u8 = 0;
405 let mut last_edge_rank: u8 = 0;
406
407 for (idx, line) in raw.lines().enumerate() {
408 let line_no = idx + 1;
409 let raw_line = line.strip_suffix('\r').unwrap_or(line);
410 let trimmed = raw_line.trim();
411 if trimmed.is_empty() || trimmed.starts_with('#') {
412 continue;
413 }
414
415 if let Some(rest) = trimmed.strip_prefix("@ ") {
416 if let Some(note) = current_note.take() {
417 graph.notes.push(note);
418 }
419 if let Some(node) = current_node.take() {
420 graph.nodes.push(node);
421 }
422 let Some((type_code, node_id)) = rest.split_once(':') else {
423 fail_or_warn(
424 strict,
425 &mut warnings,
426 format!("invalid node header at line {line_no}: {trimmed}"),
427 )?;
428 current_edge_index = None;
429 continue;
430 };
431 current_node = Some(Node {
432 id: node_id.trim().to_owned(),
433 r#type: code_to_node_type(type_code.trim()).to_owned(),
434 name: String::new(),
435 properties: NodeProperties::default(),
436 source_files: Vec::new(),
437 });
438 current_edge_index = None;
439 last_node_rank = 0;
440 last_edge_rank = 0;
441 continue;
442 }
443
444 if let Some(rest) = trimmed.strip_prefix("! ") {
445 if let Some(node) = current_node.take() {
446 graph.nodes.push(node);
447 }
448 if let Some(note) = current_note.take() {
449 graph.notes.push(note);
450 }
451 let mut parts = rest.split_whitespace();
452 let Some(id) = parts.next() else {
453 fail_or_warn(
454 strict,
455 &mut warnings,
456 format!("invalid note header at line {line_no}: {trimmed}"),
457 )?;
458 current_edge_index = None;
459 continue;
460 };
461 let Some(node_id) = parts.next() else {
462 fail_or_warn(
463 strict,
464 &mut warnings,
465 format!("invalid note header at line {line_no}: {trimmed}"),
466 )?;
467 current_edge_index = None;
468 continue;
469 };
470 current_note = Some(Note {
471 id: id.to_owned(),
472 node_id: node_id.to_owned(),
473 ..Default::default()
474 });
475 current_edge_index = None;
476 last_note_rank = 0;
477 continue;
478 }
479
480 if let Some(note) = current_note.as_mut() {
481 if let Some(rest) = field_value(raw_line, "b") {
482 enforce_field_order(
483 line_no,
484 "b",
485 1,
486 &mut last_note_rank,
487 "note",
488 raw_line,
489 strict,
490 )?;
491 note.body = parse_text_field(rest);
492 continue;
493 }
494 if let Some(rest) = field_value(raw_line, "t") {
495 enforce_field_order(
496 line_no,
497 "t",
498 2,
499 &mut last_note_rank,
500 "note",
501 raw_line,
502 strict,
503 )?;
504 let value = parse_text_field(rest);
505 if !value.is_empty() {
506 note.tags.push(value);
507 }
508 continue;
509 }
510 if let Some(rest) = field_value(raw_line, "a") {
511 enforce_field_order(
512 line_no,
513 "a",
514 3,
515 &mut last_note_rank,
516 "note",
517 raw_line,
518 strict,
519 )?;
520 note.author = parse_text_field(rest);
521 continue;
522 }
523 if let Some(rest) = field_value(raw_line, "e") {
524 enforce_field_order(
525 line_no,
526 "e",
527 4,
528 &mut last_note_rank,
529 "note",
530 raw_line,
531 strict,
532 )?;
533 note.created_at = rest.trim().to_owned();
534 continue;
535 }
536 if let Some(rest) = field_value(raw_line, "p") {
537 enforce_field_order(
538 line_no,
539 "p",
540 5,
541 &mut last_note_rank,
542 "note",
543 raw_line,
544 strict,
545 )?;
546 note.provenance = parse_text_field(rest);
547 continue;
548 }
549 if let Some(rest) = field_value(raw_line, "s") {
550 enforce_field_order(
551 line_no,
552 "s",
553 6,
554 &mut last_note_rank,
555 "note",
556 raw_line,
557 strict,
558 )?;
559 let value = parse_text_field(rest);
560 if !value.is_empty() {
561 note.source_files.push(value);
562 }
563 continue;
564 }
565 fail_or_warn(
566 strict,
567 &mut warnings,
568 format!("unrecognized note line at {line_no}: {trimmed}"),
569 )?;
570 continue;
571 }
572
573 let Some(node) = current_node.as_mut() else {
574 fail_or_warn(
575 strict,
576 &mut warnings,
577 format!("unexpected line before first node at line {line_no}: {trimmed}"),
578 )?;
579 continue;
580 };
581
582 if let Some(rest) = field_value(raw_line, "N") {
583 enforce_field_order(
584 line_no,
585 "N",
586 1,
587 &mut last_node_rank,
588 "node",
589 raw_line,
590 strict,
591 )?;
592 let value = parse_text_field(rest);
593 validate_len(line_no, "N", &value, raw_line, 1, 120, strict)?;
594 node.name = value;
595 continue;
596 }
597 if let Some(rest) = field_value(raw_line, "D") {
598 enforce_field_order(
599 line_no,
600 "D",
601 2,
602 &mut last_node_rank,
603 "node",
604 raw_line,
605 strict,
606 )?;
607 let value = parse_text_field(rest);
608 validate_len(line_no, "D", &value, raw_line, 1, 200, strict)?;
609 node.properties.description = value;
610 continue;
611 }
612 if let Some(rest) = field_value(raw_line, "A") {
613 enforce_field_order(
614 line_no,
615 "A",
616 3,
617 &mut last_node_rank,
618 "node",
619 raw_line,
620 strict,
621 )?;
622 let value = parse_text_field(rest);
623 validate_len(line_no, "A", &value, raw_line, 1, 80, strict)?;
624 node.properties.alias.push(value);
625 continue;
626 }
627 if let Some(rest) = field_value(raw_line, "F") {
628 enforce_field_order(
629 line_no,
630 "F",
631 4,
632 &mut last_node_rank,
633 "node",
634 raw_line,
635 strict,
636 )?;
637 let value = parse_text_field(rest);
638 validate_len(line_no, "F", &value, raw_line, 1, 200, strict)?;
639 node.properties.key_facts.push(value);
640 continue;
641 }
642 if let Some(rest) = field_value(raw_line, "E") {
643 enforce_field_order(
644 line_no,
645 "E",
646 5,
647 &mut last_node_rank,
648 "node",
649 raw_line,
650 strict,
651 )?;
652 let value = rest.trim();
653 if !value.is_empty() && !parse_utc_timestamp(value) {
654 fail_or_warn(
655 strict,
656 &mut warnings,
657 format!(
658 "invalid E timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
659 line_fragment(raw_line)
660 ),
661 )?;
662 continue;
663 }
664 node.properties.created_at = value.to_owned();
665 continue;
666 }
667 if let Some(rest) = field_value(raw_line, "C") {
668 enforce_field_order(
669 line_no,
670 "C",
671 6,
672 &mut last_node_rank,
673 "node",
674 raw_line,
675 strict,
676 )?;
677 if !rest.trim().is_empty() {
678 node.properties.confidence = rest.trim().parse::<f64>().ok();
679 }
680 continue;
681 }
682 if let Some(rest) = field_value(raw_line, "V") {
683 enforce_field_order(
684 line_no,
685 "V",
686 7,
687 &mut last_node_rank,
688 "node",
689 raw_line,
690 strict,
691 )?;
692 if let Ok(value) = rest.trim().parse::<u8>() {
693 node.properties.importance = value;
694 }
695 continue;
696 }
697 if let Some(rest) = field_value(raw_line, "P") {
698 enforce_field_order(
699 line_no,
700 "P",
701 8,
702 &mut last_node_rank,
703 "node",
704 raw_line,
705 strict,
706 )?;
707 node.properties.provenance = parse_text_field(rest);
708 continue;
709 }
710 if let Some(rest) = field_value(raw_line, "S") {
711 enforce_field_order(
712 line_no,
713 "S",
714 10,
715 &mut last_node_rank,
716 "node",
717 raw_line,
718 strict,
719 )?;
720 let value = parse_text_field(rest);
721 validate_len(line_no, "S", &value, raw_line, 1, 200, strict)?;
722 node.source_files.push(value);
723 continue;
724 }
725
726 if let Some(rest) = trimmed.strip_prefix("> ") {
727 let mut parts = rest.split_whitespace();
728 let Some(relation) = parts.next() else {
729 fail_or_warn(
730 strict,
731 &mut warnings,
732 format!("missing relation in edge at line {line_no}: {trimmed}"),
733 )?;
734 current_edge_index = None;
735 continue;
736 };
737 let Some(target_id) = parts.next() else {
738 fail_or_warn(
739 strict,
740 &mut warnings,
741 format!("missing target id in edge at line {line_no}: {trimmed}"),
742 )?;
743 current_edge_index = None;
744 continue;
745 };
746 graph.edges.push(Edge {
747 source_id: node.id.clone(),
748 relation: code_to_relation(relation).to_owned(),
749 target_id: target_id.to_owned(),
750 properties: EdgeProperties::default(),
751 });
752 current_edge_index = Some(graph.edges.len() - 1);
753 last_edge_rank = 0;
754 continue;
755 }
756
757 if let Some(rest) = field_value(raw_line, "d") {
758 enforce_field_order(
759 line_no,
760 "d",
761 1,
762 &mut last_edge_rank,
763 "edge",
764 raw_line,
765 strict,
766 )?;
767 let Some(edge_idx) = current_edge_index else {
768 fail_or_warn(
769 strict,
770 &mut warnings,
771 format!(
772 "edge detail without preceding edge at line {line_no}\n{}",
773 line_fragment(raw_line)
774 ),
775 )?;
776 continue;
777 };
778 let value = parse_text_field(rest);
779 validate_len(line_no, "d", &value, raw_line, 1, 200, strict)?;
780 graph.edges[edge_idx].properties.detail = value;
781 continue;
782 }
783
784 if let Some(rest) = field_value(raw_line, "i") {
785 enforce_field_order(
786 line_no,
787 "i",
788 2,
789 &mut last_edge_rank,
790 "edge",
791 raw_line,
792 strict,
793 )?;
794 let Some(edge_idx) = current_edge_index else {
795 fail_or_warn(
796 strict,
797 &mut warnings,
798 format!(
799 "edge valid_from without preceding edge at line {line_no}\n{}",
800 line_fragment(raw_line)
801 ),
802 )?;
803 continue;
804 };
805 let value = rest.trim();
806 if !value.is_empty() && !parse_utc_timestamp(value) {
807 fail_or_warn(
808 strict,
809 &mut warnings,
810 format!(
811 "invalid i timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
812 line_fragment(raw_line)
813 ),
814 )?;
815 continue;
816 }
817 graph.edges[edge_idx].properties.valid_from = value.to_owned();
818 continue;
819 }
820
821 if let Some(rest) = field_value(raw_line, "x") {
822 enforce_field_order(
823 line_no,
824 "x",
825 3,
826 &mut last_edge_rank,
827 "edge",
828 raw_line,
829 strict,
830 )?;
831 let Some(edge_idx) = current_edge_index else {
832 fail_or_warn(
833 strict,
834 &mut warnings,
835 format!(
836 "edge valid_to without preceding edge at line {line_no}\n{}",
837 line_fragment(raw_line)
838 ),
839 )?;
840 continue;
841 };
842 let value = rest.trim();
843 if !value.is_empty() && !parse_utc_timestamp(value) {
844 fail_or_warn(
845 strict,
846 &mut warnings,
847 format!(
848 "invalid x timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
849 line_fragment(raw_line)
850 ),
851 )?;
852 continue;
853 }
854 graph.edges[edge_idx].properties.valid_to = value.to_owned();
855 continue;
856 }
857
858 if let Some(rest) = field_value(raw_line, "-") {
859 let (key, value) = rest
860 .split_once(char::is_whitespace)
861 .map(|(key, value)| (key.trim(), value))
862 .unwrap_or((rest.trim(), ""));
863 let is_edge_custom = matches!(
864 key,
865 "edge_feedback_score" | "edge_feedback_count" | "edge_feedback_last_ts_ms"
866 );
867 if is_edge_custom {
868 enforce_field_order(
869 line_no,
870 "-",
871 4,
872 &mut last_edge_rank,
873 "edge",
874 raw_line,
875 strict,
876 )?;
877 } else {
878 enforce_field_order(
879 line_no,
880 "-",
881 9,
882 &mut last_node_rank,
883 "node",
884 raw_line,
885 strict,
886 )?;
887 }
888 match key {
889 "domain_area" => node.properties.domain_area = parse_text_field(value),
890 "feedback_score" => {
891 node.properties.feedback_score = value.trim().parse::<f64>().unwrap_or(0.0)
892 }
893 "feedback_count" => {
894 node.properties.feedback_count = value.trim().parse::<u64>().unwrap_or(0)
895 }
896 "feedback_last_ts_ms" => {
897 node.properties.feedback_last_ts_ms = value.trim().parse::<u64>().ok()
898 }
899 "edge_feedback_score" => {
900 if let Some(edge_idx) = current_edge_index {
901 graph.edges[edge_idx].properties.feedback_score =
902 value.trim().parse::<f64>().unwrap_or(0.0);
903 }
904 }
905 "edge_feedback_count" => {
906 if let Some(edge_idx) = current_edge_index {
907 graph.edges[edge_idx].properties.feedback_count =
908 value.trim().parse::<u64>().unwrap_or(0);
909 }
910 }
911 "edge_feedback_last_ts_ms" => {
912 if let Some(edge_idx) = current_edge_index {
913 graph.edges[edge_idx].properties.feedback_last_ts_ms =
914 value.trim().parse::<u64>().ok();
915 }
916 }
917 _ => {}
918 }
919 continue;
920 }
921
922 fail_or_warn(
923 strict,
924 &mut warnings,
925 format!("unrecognized line at {line_no}: {trimmed}"),
926 )?;
927 }
928
929 if let Some(node) = current_node.take() {
930 graph.nodes.push(node);
931 }
932 if let Some(note) = current_note.take() {
933 graph.notes.push(note);
934 }
935
936 for node in &mut graph.nodes {
937 node.properties.alias =
938 sort_case_insensitive(&dedupe_case_insensitive(node.properties.alias.clone()));
939 node.properties.key_facts =
940 sort_case_insensitive(&dedupe_case_insensitive(node.properties.key_facts.clone()));
941 node.source_files =
942 sort_case_insensitive(&dedupe_case_insensitive(node.source_files.clone()));
943 }
944
945 graph.edges.sort_by(|a, b| {
946 a.source_id
947 .cmp(&b.source_id)
948 .then_with(|| a.relation.cmp(&b.relation))
949 .then_with(|| a.target_id.cmp(&b.target_id))
950 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
951 });
952
953 for note in &mut graph.notes {
954 note.tags = sort_case_insensitive(&dedupe_case_insensitive(note.tags.clone()));
955 note.source_files =
956 sort_case_insensitive(&dedupe_case_insensitive(note.source_files.clone()));
957 }
958 graph.notes.sort_by(|a, b| {
959 a.id.cmp(&b.id)
960 .then_with(|| a.node_id.cmp(&b.node_id))
961 .then_with(|| a.created_at.cmp(&b.created_at))
962 });
963
964 graph.refresh_counts();
965 Ok((graph, warnings))
966}
967
968fn serialize_kg(graph: &GraphFile) -> String {
969 let mut out = String::new();
970 let mut nodes = graph.nodes.clone();
971 nodes.sort_by(|a, b| a.id.cmp(&b.id));
972
973 for node in nodes {
974 out.push_str(&format!(
975 "@ {}:{}\n",
976 node_type_to_code(&node.r#type),
977 node.id
978 ));
979 push_text_line(&mut out, "N", &node.name);
980 push_text_line(&mut out, "D", &node.properties.description);
981
982 for alias in sort_case_insensitive(&node.properties.alias) {
983 push_text_line(&mut out, "A", &alias);
984 }
985 for fact in sort_case_insensitive(&node.properties.key_facts) {
986 push_text_line(&mut out, "F", &fact);
987 }
988
989 if !node.properties.created_at.is_empty() {
990 out.push_str(&format!("E {}\n", node.properties.created_at));
991 }
992 if let Some(confidence) = node.properties.confidence {
993 out.push_str(&format!("C {}\n", confidence));
994 }
995 out.push_str(&format!("V {}\n", node.properties.importance));
996 if !node.properties.provenance.is_empty() {
997 push_text_line(&mut out, "P", &node.properties.provenance);
998 }
999 if !node.properties.domain_area.is_empty() {
1000 out.push_str("- domain_area ");
1001 out.push_str(&escape_kg_text(&node.properties.domain_area));
1002 out.push('\n');
1003 }
1004 if node.properties.feedback_score != 0.0 {
1005 out.push_str(&format!(
1006 "- feedback_score {}\n",
1007 node.properties.feedback_score
1008 ));
1009 }
1010 if node.properties.feedback_count != 0 {
1011 out.push_str(&format!(
1012 "- feedback_count {}\n",
1013 node.properties.feedback_count
1014 ));
1015 }
1016 if let Some(ts) = node.properties.feedback_last_ts_ms {
1017 out.push_str(&format!("- feedback_last_ts_ms {}\n", ts));
1018 }
1019
1020 for source in sort_case_insensitive(&node.source_files) {
1021 push_text_line(&mut out, "S", &source);
1022 }
1023
1024 let mut edges: Vec<Edge> = graph
1025 .edges
1026 .iter()
1027 .filter(|edge| edge.source_id == node.id)
1028 .cloned()
1029 .collect();
1030 edges.sort_by(|a, b| {
1031 a.relation
1032 .cmp(&b.relation)
1033 .then_with(|| a.target_id.cmp(&b.target_id))
1034 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
1035 });
1036
1037 for edge in edges {
1038 out.push_str(&format!(
1039 "> {} {}\n",
1040 relation_to_code(&edge.relation),
1041 edge.target_id
1042 ));
1043 if !edge.properties.detail.is_empty() {
1044 push_text_line(&mut out, "d", &edge.properties.detail);
1045 }
1046 if !edge.properties.valid_from.is_empty() {
1047 out.push_str(&format!("i {}\n", edge.properties.valid_from));
1048 }
1049 if !edge.properties.valid_to.is_empty() {
1050 out.push_str(&format!("x {}\n", edge.properties.valid_to));
1051 }
1052 if edge.properties.feedback_score != 0.0 {
1053 out.push_str(&format!(
1054 "- edge_feedback_score {}\n",
1055 edge.properties.feedback_score
1056 ));
1057 }
1058 if edge.properties.feedback_count != 0 {
1059 out.push_str(&format!(
1060 "- edge_feedback_count {}\n",
1061 edge.properties.feedback_count
1062 ));
1063 }
1064 if let Some(ts) = edge.properties.feedback_last_ts_ms {
1065 out.push_str(&format!("- edge_feedback_last_ts_ms {}\n", ts));
1066 }
1067 }
1068
1069 out.push('\n');
1070 }
1071
1072 let mut notes = graph.notes.clone();
1073 notes.sort_by(|a, b| {
1074 a.id.cmp(&b.id)
1075 .then_with(|| a.node_id.cmp(&b.node_id))
1076 .then_with(|| a.created_at.cmp(&b.created_at))
1077 });
1078 for note in notes {
1079 out.push_str(&format!("! {} {}\n", note.id, note.node_id));
1080 push_text_line(&mut out, "b", ¬e.body);
1081 for tag in sort_case_insensitive(¬e.tags) {
1082 push_text_line(&mut out, "t", &tag);
1083 }
1084 if !note.author.is_empty() {
1085 push_text_line(&mut out, "a", ¬e.author);
1086 }
1087 if !note.created_at.is_empty() {
1088 out.push_str(&format!("e {}\n", note.created_at));
1089 }
1090 if !note.provenance.is_empty() {
1091 push_text_line(&mut out, "p", ¬e.provenance);
1092 }
1093 for source in sort_case_insensitive(¬e.source_files) {
1094 push_text_line(&mut out, "s", &source);
1095 }
1096 out.push('\n');
1097 }
1098
1099 out
1100}
1101
1102#[derive(Debug, Clone, Serialize, Deserialize)]
1103pub struct GraphFile {
1104 pub metadata: Metadata,
1105 #[serde(default)]
1106 pub nodes: Vec<Node>,
1107 #[serde(default)]
1108 pub edges: Vec<Edge>,
1109 #[serde(default)]
1110 pub notes: Vec<Note>,
1111}
1112
1113#[derive(Debug, Clone, Serialize, Deserialize)]
1114pub struct Metadata {
1115 pub name: String,
1116 pub version: String,
1117 pub description: String,
1118 pub node_count: usize,
1119 pub edge_count: usize,
1120}
1121
1122#[derive(Debug, Clone, Serialize, Deserialize)]
1123pub struct Node {
1124 pub id: String,
1125 #[serde(rename = "type")]
1126 pub r#type: String,
1127 pub name: String,
1128 #[serde(default)]
1129 pub properties: NodeProperties,
1130 #[serde(default)]
1131 pub source_files: Vec<String>,
1132}
1133
1134#[derive(Debug, Clone, Serialize, Deserialize)]
1135pub struct NodeProperties {
1136 #[serde(default)]
1137 pub description: String,
1138 #[serde(default)]
1139 pub domain_area: String,
1140 #[serde(default)]
1141 pub provenance: String,
1142 #[serde(default)]
1143 pub confidence: Option<f64>,
1144 #[serde(default)]
1145 pub created_at: String,
1146 #[serde(default = "default_importance")]
1147 pub importance: u8,
1148 #[serde(default)]
1149 pub key_facts: Vec<String>,
1150 #[serde(default)]
1151 pub alias: Vec<String>,
1152 #[serde(default)]
1153 pub feedback_score: f64,
1154 #[serde(default)]
1155 pub feedback_count: u64,
1156 #[serde(default)]
1157 pub feedback_last_ts_ms: Option<u64>,
1158}
1159
1160fn default_importance() -> u8 {
1161 4
1162}
1163
1164impl Default for NodeProperties {
1165 fn default() -> Self {
1166 Self {
1167 description: String::new(),
1168 domain_area: String::new(),
1169 provenance: String::new(),
1170 confidence: None,
1171 created_at: String::new(),
1172 importance: default_importance(),
1173 key_facts: Vec::new(),
1174 alias: Vec::new(),
1175 feedback_score: 0.0,
1176 feedback_count: 0,
1177 feedback_last_ts_ms: None,
1178 }
1179 }
1180}
1181
1182#[derive(Debug, Clone, Serialize, Deserialize)]
1183pub struct Edge {
1184 pub source_id: String,
1185 pub relation: String,
1186 pub target_id: String,
1187 #[serde(default)]
1188 pub properties: EdgeProperties,
1189}
1190
1191#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1192pub struct EdgeProperties {
1193 #[serde(default)]
1194 pub detail: String,
1195 #[serde(default)]
1196 pub valid_from: String,
1197 #[serde(default)]
1198 pub valid_to: String,
1199 #[serde(default)]
1200 pub feedback_score: f64,
1201 #[serde(default)]
1202 pub feedback_count: u64,
1203 #[serde(default)]
1204 pub feedback_last_ts_ms: Option<u64>,
1205}
1206
1207#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1208pub struct Note {
1209 pub id: String,
1210 pub node_id: String,
1211 #[serde(default)]
1212 pub body: String,
1213 #[serde(default)]
1214 pub tags: Vec<String>,
1215 #[serde(default)]
1216 pub author: String,
1217 #[serde(default)]
1218 pub created_at: String,
1219 #[serde(default)]
1220 pub provenance: String,
1221 #[serde(default)]
1222 pub source_files: Vec<String>,
1223}
1224
1225impl GraphFile {
1226 pub fn new(name: &str) -> Self {
1227 Self {
1228 metadata: Metadata {
1229 name: name.to_owned(),
1230 version: "1.0".to_owned(),
1231 description: format!("Knowledge graph: {name}"),
1232 node_count: 0,
1233 edge_count: 0,
1234 },
1235 nodes: Vec::new(),
1236 edges: Vec::new(),
1237 notes: Vec::new(),
1238 }
1239 }
1240
1241 pub fn load(path: &Path) -> Result<Self> {
1242 let raw = fs::read_to_string(path)
1243 .with_context(|| format!("failed to read graph: {}", path.display()))?;
1244 let ext = path
1245 .extension()
1246 .and_then(|ext| ext.to_str())
1247 .unwrap_or("json");
1248 let mut graph = if ext == "kg" {
1249 if raw.trim_start().starts_with('{') {
1250 serde_json::from_str(&raw).map_err(|error| {
1251 anyhow::anyhow!(json_error_detail(
1252 "invalid legacy JSON payload in .kg file",
1253 path,
1254 &raw,
1255 &error,
1256 ))
1257 })?
1258 } else {
1259 let graph_name = path
1260 .file_stem()
1261 .and_then(|stem| stem.to_str())
1262 .unwrap_or("graph");
1263 let (graph, warnings) = parse_kg_with_warnings(&raw, graph_name, strict_kg_mode())
1264 .with_context(|| format!("failed to parse .kg graph: {}", path.display()))?;
1265 for warning in warnings {
1266 let _ = crate::kg_sidecar::append_warning(
1267 path,
1268 &format!(
1269 "ignored invalid graph entry in {}: {warning}",
1270 path.display()
1271 ),
1272 );
1273 }
1274 graph
1275 }
1276 } else {
1277 serde_json::from_str(&raw).map_err(|error| {
1278 anyhow::anyhow!(json_error_detail("invalid JSON", path, &raw, &error))
1279 })?
1280 };
1281 graph.refresh_counts();
1282 Ok(graph)
1283 }
1284
1285 pub fn save(&self, path: &Path) -> Result<()> {
1286 let mut graph = self.clone();
1287 graph.refresh_counts();
1288 let ext = path
1289 .extension()
1290 .and_then(|ext| ext.to_str())
1291 .unwrap_or("json");
1292 let raw = if ext == "kg" {
1293 serialize_kg(&graph)
1294 } else {
1295 serde_json::to_string_pretty(&graph).context("failed to serialize graph")?
1296 };
1297 atomic_write(path, &raw)?;
1298 backup_graph_if_stale(path, &raw)
1299 }
1300
1301 pub fn refresh_counts(&mut self) {
1302 self.metadata.node_count = self.nodes.len();
1303 self.metadata.edge_count = self.edges.len();
1304 }
1305
1306 pub fn node_by_id(&self, id: &str) -> Option<&Node> {
1307 self.nodes.iter().find(|node| node.id == id)
1308 }
1309
1310 pub fn node_by_id_sorted(&self, id: &str) -> Option<&Node> {
1311 self.nodes
1312 .binary_search_by(|node| node.id.as_str().cmp(id))
1313 .ok()
1314 .and_then(|idx| self.nodes.get(idx))
1315 }
1316
1317 pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
1318 self.nodes.iter_mut().find(|node| node.id == id)
1319 }
1320
1321 pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
1322 self.edges.iter().any(|edge| {
1323 edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
1324 })
1325 }
1326}
1327
1328#[cfg(test)]
1329mod tests {
1330 use super::{GraphFile, parse_kg};
1331
1332 #[test]
1333 fn save_and_load_kg_roundtrip_keeps_core_fields() {
1334 let dir = tempfile::tempdir().expect("temp dir");
1335 let path = dir.path().join("graph.kg");
1336
1337 let mut graph = GraphFile::new("graph");
1338 graph.nodes.push(crate::Node {
1339 id: "concept:refrigerator".to_owned(),
1340 r#type: "Concept".to_owned(),
1341 name: "Lodowka".to_owned(),
1342 properties: crate::NodeProperties {
1343 description: "Urzadzenie chlodzace".to_owned(),
1344 provenance: "U".to_owned(),
1345 created_at: "2026-04-04T12:00:00Z".to_owned(),
1346 importance: 5,
1347 key_facts: vec!["A".to_owned(), "b".to_owned()],
1348 alias: vec!["Fridge".to_owned()],
1349 ..Default::default()
1350 },
1351 source_files: vec!["docs/fridge.md".to_owned()],
1352 });
1353 graph.edges.push(crate::Edge {
1354 source_id: "concept:refrigerator".to_owned(),
1355 relation: "READS_FROM".to_owned(),
1356 target_id: "datastore:settings".to_owned(),
1357 properties: crate::EdgeProperties {
1358 detail: "runtime read".to_owned(),
1359 valid_from: "2026-04-04T12:00:00Z".to_owned(),
1360 valid_to: "2026-04-05T12:00:00Z".to_owned(),
1361 ..Default::default()
1362 },
1363 });
1364
1365 graph.save(&path).expect("save kg");
1366 let raw = std::fs::read_to_string(&path).expect("read kg");
1367 assert!(raw.contains("@ K:concept:refrigerator"));
1368 assert!(raw.contains("> R datastore:settings"));
1369
1370 let loaded = GraphFile::load(&path).expect("load kg");
1371 assert_eq!(loaded.nodes.len(), 1);
1372 assert_eq!(loaded.edges.len(), 1);
1373 let node = &loaded.nodes[0];
1374 assert_eq!(node.properties.importance, 5);
1375 assert_eq!(node.properties.provenance, "U");
1376 assert_eq!(node.name, "Lodowka");
1377 assert_eq!(loaded.edges[0].relation, "READS_FROM");
1378 assert_eq!(loaded.edges[0].properties.detail, "runtime read");
1379 assert_eq!(
1380 loaded.edges[0].properties.valid_from,
1381 "2026-04-04T12:00:00Z"
1382 );
1383 assert_eq!(loaded.edges[0].properties.valid_to, "2026-04-05T12:00:00Z");
1384 }
1385
1386 #[test]
1387 fn load_supports_legacy_json_payload_with_kg_extension() {
1388 let dir = tempfile::tempdir().expect("temp dir");
1389 let path = dir.path().join("legacy.kg");
1390 std::fs::write(
1391 &path,
1392 r#"{
1393 "metadata": {"name": "legacy", "version": "1.0", "description": "x", "node_count": 0, "edge_count": 0},
1394 "nodes": [],
1395 "edges": [],
1396 "notes": []
1397}"#,
1398 )
1399 .expect("write legacy payload");
1400
1401 let loaded = GraphFile::load(&path).expect("load legacy kg");
1402 assert_eq!(loaded.metadata.name, "legacy");
1403 assert!(loaded.nodes.is_empty());
1404 }
1405
1406 #[test]
1407 fn load_kg_ignores_invalid_timestamp_format() {
1408 let dir = tempfile::tempdir().expect("temp dir");
1409 let path = dir.path().join("invalid-ts.kg");
1410 std::fs::write(
1411 &path,
1412 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04 12:00:00\nV 4\nP U\n",
1413 )
1414 .expect("write kg");
1415
1416 let loaded = GraphFile::load(&path).expect("invalid timestamp should be ignored");
1417 assert_eq!(loaded.nodes.len(), 1);
1418 assert!(loaded.nodes[0].properties.created_at.is_empty());
1419 }
1420
1421 #[test]
1422 fn load_kg_ignores_invalid_edge_timestamp_format() {
1423 let dir = tempfile::tempdir().expect("temp dir");
1424 let path = dir.path().join("invalid-edge-ts.kg");
1425 std::fs::write(
1426 &path,
1427 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n> H concept:y\ni 2026-04-04 12:00:00\n",
1428 )
1429 .expect("write kg");
1430
1431 let loaded = GraphFile::load(&path).expect("invalid edge timestamp should be ignored");
1432 assert_eq!(loaded.edges.len(), 1);
1433 assert!(loaded.edges[0].properties.valid_from.is_empty());
1434 }
1435
1436 #[test]
1437 fn load_kg_preserves_whitespace_and_dedupes_exact_duplicates() {
1438 let dir = tempfile::tempdir().expect("temp dir");
1439 let path = dir.path().join("normalize.kg");
1440 std::fs::write(
1441 &path,
1442 "@ K:concept:x\nN Name With Spaces \nD Desc with spaces \nA Alias\nA Alias\nF fact one\nF FACT one\nS docs/a.md\nS docs/a.md\nE 2026-04-04T12:00:00Z\nV 4\nP U\n",
1443 )
1444 .expect("write kg");
1445
1446 let loaded = GraphFile::load(&path).expect("load kg");
1447 let node = &loaded.nodes[0];
1448 assert_eq!(node.name, " Name With Spaces ");
1449 assert_eq!(node.properties.description, " Desc with spaces ");
1450 assert_eq!(node.properties.alias.len(), 1);
1451 assert_eq!(node.properties.key_facts.len(), 2);
1452 assert_eq!(node.source_files.len(), 1);
1453 }
1454
1455 #[test]
1456 fn save_and_load_kg_roundtrip_keeps_notes_without_json_fallback() {
1457 let dir = tempfile::tempdir().expect("temp dir");
1458 let path = dir.path().join("graph-notes.kg");
1459
1460 let mut graph = GraphFile::new("graph-notes");
1461 graph.nodes.push(crate::Node {
1462 id: "concept:refrigerator".to_owned(),
1463 r#type: "Concept".to_owned(),
1464 name: "Lodowka".to_owned(),
1465 properties: crate::NodeProperties {
1466 description: "Urzadzenie chlodzace".to_owned(),
1467 provenance: "U".to_owned(),
1468 created_at: "2026-04-04T12:00:00Z".to_owned(),
1469 ..Default::default()
1470 },
1471 source_files: vec!["docs/fridge.md".to_owned()],
1472 });
1473 graph.notes.push(crate::Note {
1474 id: "note:1".to_owned(),
1475 node_id: "concept:refrigerator".to_owned(),
1476 body: "Important maintenance insight".to_owned(),
1477 tags: vec!["Maintenance".to_owned(), "maintenance".to_owned()],
1478 author: "alice".to_owned(),
1479 created_at: "1712345678".to_owned(),
1480 provenance: "U".to_owned(),
1481 source_files: vec!["docs/a.md".to_owned(), "docs/a.md".to_owned()],
1482 });
1483
1484 graph.save(&path).expect("save kg");
1485 let raw = std::fs::read_to_string(&path).expect("read kg");
1486 assert!(raw.contains("! note:1 concept:refrigerator"));
1487 assert!(!raw.trim_start().starts_with('{'));
1488
1489 let loaded = GraphFile::load(&path).expect("load kg");
1490 assert_eq!(loaded.notes.len(), 1);
1491 let note = &loaded.notes[0];
1492 assert_eq!(note.id, "note:1");
1493 assert_eq!(note.node_id, "concept:refrigerator");
1494 assert_eq!(note.body, "Important maintenance insight");
1495 assert_eq!(note.tags.len(), 1);
1496 assert_eq!(note.source_files.len(), 1);
1497 }
1498
1499 #[test]
1500 fn save_and_load_kg_roundtrip_preserves_multiline_text_fields() {
1501 let dir = tempfile::tempdir().expect("temp dir");
1502 let path = dir.path().join("graph-multiline.kg");
1503
1504 let mut graph = GraphFile::new("graph-multiline");
1505 graph.nodes.push(crate::Node {
1506 id: "concept:refrigerator".to_owned(),
1507 r#type: "Concept".to_owned(),
1508 name: "Lodowka\nSmart".to_owned(),
1509 properties: crate::NodeProperties {
1510 description: "Linia 1\nLinia 2\\nliteral".to_owned(),
1511 provenance: "user\nimport".to_owned(),
1512 created_at: "2026-04-04T12:00:00Z".to_owned(),
1513 importance: 5,
1514 key_facts: vec!["Fakt 1\nFakt 2".to_owned()],
1515 alias: vec!["Alias\nA".to_owned()],
1516 domain_area: "ops\nfield".to_owned(),
1517 ..Default::default()
1518 },
1519 source_files: vec!["docs/fridge\nnotes.md".to_owned()],
1520 });
1521 graph.edges.push(crate::Edge {
1522 source_id: "concept:refrigerator".to_owned(),
1523 relation: "READS_FROM".to_owned(),
1524 target_id: "datastore:settings".to_owned(),
1525 properties: crate::EdgeProperties {
1526 detail: "runtime\nread".to_owned(),
1527 valid_from: "2026-04-04T12:00:00Z".to_owned(),
1528 valid_to: "2026-04-05T12:00:00Z".to_owned(),
1529 ..Default::default()
1530 },
1531 });
1532 graph.notes.push(crate::Note {
1533 id: "note:1".to_owned(),
1534 node_id: "concept:refrigerator".to_owned(),
1535 body: "line1\nline2\\nkeep".to_owned(),
1536 tags: vec!["multi\nline".to_owned()],
1537 author: "alice\nbob".to_owned(),
1538 created_at: "1712345678".to_owned(),
1539 provenance: "manual\nentry".to_owned(),
1540 source_files: vec!["docs/a\nb.md".to_owned()],
1541 });
1542
1543 graph.save(&path).expect("save kg");
1544 let raw = std::fs::read_to_string(&path).expect("read kg");
1545 assert!(raw.contains("N Lodowka\\nSmart"));
1546 assert!(raw.contains("D Linia 1\\nLinia 2\\\\nliteral"));
1547 assert!(raw.contains("- domain_area ops\\nfield"));
1548 assert!(raw.contains("d runtime\\nread"));
1549 assert!(raw.contains("b line1\\nline2\\\\nkeep"));
1550
1551 let loaded = GraphFile::load(&path).expect("load kg");
1552 let node = &loaded.nodes[0];
1553 assert_eq!(node.name, "Lodowka\nSmart");
1554 assert_eq!(node.properties.description, "Linia 1\nLinia 2\\nliteral");
1555 assert_eq!(node.properties.provenance, "user\nimport");
1556 assert_eq!(node.properties.alias, vec!["Alias\nA".to_owned()]);
1557 assert_eq!(node.properties.key_facts, vec!["Fakt 1\nFakt 2".to_owned()]);
1558 assert_eq!(node.properties.domain_area, "ops\nfield");
1559 assert_eq!(node.source_files, vec!["docs/fridge\nnotes.md".to_owned()]);
1560 assert_eq!(loaded.edges[0].properties.detail, "runtime\nread");
1561 let note = &loaded.notes[0];
1562 assert_eq!(note.body, "line1\nline2\\nkeep");
1563 assert_eq!(note.tags, vec!["multi\nline".to_owned()]);
1564 assert_eq!(note.author, "alice\nbob");
1565 assert_eq!(note.provenance, "manual\nentry");
1566 assert_eq!(note.source_files, vec!["docs/a\nb.md".to_owned()]);
1567 }
1568
1569 #[test]
1570 fn strict_mode_rejects_out_of_order_node_fields() {
1571 let raw = "@ K:concept:x\nD Desc\nN Name\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n";
1572 let err = parse_kg(raw, "x", true).expect_err("strict mode should fail on field order");
1573 assert!(format!("{err:#}").contains("invalid field order"));
1574 }
1575
1576 #[test]
1577 fn strict_mode_rejects_overlong_name_but_compat_mode_allows_it() {
1578 let long_name = "N ".to_owned() + &"X".repeat(121);
1579 let raw = format!(
1580 "@ K:concept:x\n{}\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n",
1581 long_name
1582 );
1583
1584 let strict_err = parse_kg(&raw, "x", true).expect_err("strict mode should fail on length");
1585 assert!(format!("{strict_err:#}").contains("invalid N length"));
1586
1587 parse_kg(&raw, "x", false).expect("compat mode keeps permissive behavior");
1588 }
1589
1590 #[test]
1591 fn save_kg_skips_empty_e_and_p_fields() {
1592 let dir = tempfile::tempdir().expect("temp dir");
1593 let path = dir.path().join("no-empty-ep.kg");
1594
1595 let mut graph = GraphFile::new("graph");
1596 graph.nodes.push(crate::Node {
1597 id: "concept:x".to_owned(),
1598 r#type: "Concept".to_owned(),
1599 name: "X".to_owned(),
1600 properties: crate::NodeProperties {
1601 description: "Desc".to_owned(),
1602 provenance: String::new(),
1603 created_at: String::new(),
1604 ..Default::default()
1605 },
1606 source_files: vec!["docs/a.md".to_owned()],
1607 });
1608
1609 graph.save(&path).expect("save kg");
1610 let raw = std::fs::read_to_string(&path).expect("read kg");
1611 assert!(!raw.contains("\nE \n"));
1612 assert!(!raw.contains("\nP \n"));
1613 }
1614}