1use std::fs;
2use std::io::Write;
3use std::path::Path;
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use anyhow::{Context, Result};
7use flate2::Compression;
8use flate2::write::GzEncoder;
9use serde::{Deserialize, Serialize};
10
11fn atomic_write(dest: &Path, data: &str) -> Result<()> {
16 let tmp = dest.with_extension("tmp");
17 fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
18 if dest.exists() {
19 let bak = dest.with_extension("bak");
20 fs::copy(dest, &bak)
21 .with_context(|| format!("failed to create backup: {}", bak.display()))?;
22 }
23 fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
24}
25
26const BACKUP_STALE_SECS: u64 = 60 * 60;
27
28fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
29 let parent = match path.parent() {
30 Some(parent) => parent,
31 None => return Ok(()),
32 };
33 let stem = match path.file_stem().and_then(|s| s.to_str()) {
34 Some(stem) => stem,
35 None => return Ok(()),
36 };
37 let now = SystemTime::now()
38 .duration_since(UNIX_EPOCH)
39 .context("time went backwards")?
40 .as_secs();
41 if let Some(latest) = latest_backup_ts(parent, stem)? {
42 if now.saturating_sub(latest) < BACKUP_STALE_SECS {
43 return Ok(());
44 }
45 }
46
47 let backup_path = parent.join(format!("{stem}.bck.{now}.gz"));
48 let tmp_path = backup_path.with_extension("tmp");
49 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
50 encoder.write_all(data.as_bytes())?;
51 let encoded = encoder.finish()?;
52 fs::write(&tmp_path, encoded)
53 .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
54 fs::rename(&tmp_path, &backup_path)
55 .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
56 Ok(())
57}
58
59fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
60 let prefix = format!("{stem}.bck.");
61 let suffix = ".gz";
62 let mut latest = None;
63 for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
64 let entry = entry?;
65 let name = entry.file_name();
66 let name = name.to_string_lossy();
67 if !name.starts_with(&prefix) || !name.ends_with(suffix) {
68 continue;
69 }
70 let ts_part = &name[prefix.len()..name.len() - suffix.len()];
71 if let Ok(ts) = ts_part.parse::<u64>() {
72 match latest {
73 Some(current) => {
74 if ts > current {
75 latest = Some(ts);
76 }
77 }
78 None => latest = Some(ts),
79 }
80 }
81 }
82 Ok(latest)
83}
84
85fn node_type_to_code(node_type: &str) -> &str {
86 match node_type {
87 "Feature" => "F",
88 "Concept" => "K",
89 "Interface" => "I",
90 "Process" => "P",
91 "DataStore" => "D",
92 "Attribute" => "A",
93 "Entity" => "Y",
94 "Note" => "N",
95 "Rule" => "R",
96 "Convention" => "C",
97 "Bug" => "B",
98 "Decision" => "Z",
99 "OpenQuestion" => "O",
100 "Claim" => "Q",
101 "Insight" => "W",
102 "Reference" => "M",
103 "Term" => "T",
104 "Status" => "S",
105 "Doubt" => "L",
106 _ => node_type,
107 }
108}
109
110fn code_to_node_type(code: &str) -> &str {
111 match code {
112 "F" => "Feature",
113 "K" => "Concept",
114 "I" => "Interface",
115 "P" => "Process",
116 "D" => "DataStore",
117 "A" => "Attribute",
118 "Y" => "Entity",
119 "N" => "Note",
120 "R" => "Rule",
121 "C" => "Convention",
122 "B" => "Bug",
123 "Z" => "Decision",
124 "O" => "OpenQuestion",
125 "Q" => "Claim",
126 "W" => "Insight",
127 "M" => "Reference",
128 "T" => "Term",
129 "S" => "Status",
130 "L" => "Doubt",
131 _ => code,
132 }
133}
134
135fn relation_to_code(relation: &str) -> &str {
136 match relation {
137 "DOCUMENTED_IN" | "DOCUMENTS" => "D",
138 "HAS" => "H",
139 "TRIGGERS" => "T",
140 "AFFECTED_BY" | "AFFECTS" => "A",
141 "READS_FROM" | "READS" => "R",
142 "GOVERNED_BY" | "GOVERNS" => "G",
143 "DEPENDS_ON" => "O",
144 "AVAILABLE_IN" => "I",
145 "SUPPORTS" => "S",
146 "SUMMARIZES" => "U",
147 "RELATED_TO" => "L",
148 "CONTRADICTS" => "V",
149 "CREATED_BY" | "CREATES" => "C",
150 _ => relation,
151 }
152}
153
154fn code_to_relation(code: &str) -> &str {
155 match code {
156 "D" => "DOCUMENTED_IN",
157 "H" => "HAS",
158 "T" => "TRIGGERS",
159 "A" => "AFFECTED_BY",
160 "R" => "READS_FROM",
161 "G" => "GOVERNED_BY",
162 "O" => "DEPENDS_ON",
163 "I" => "AVAILABLE_IN",
164 "S" => "SUPPORTS",
165 "U" => "SUMMARIZES",
166 "L" => "RELATED_TO",
167 "V" => "CONTRADICTS",
168 "C" => "CREATED_BY",
169 _ => code,
170 }
171}
172
173fn sort_case_insensitive(values: &[String]) -> Vec<String> {
174 let mut sorted = values.to_vec();
175 sorted.sort_by(|a, b| {
176 let la = a.to_ascii_lowercase();
177 let lb = b.to_ascii_lowercase();
178 la.cmp(&lb).then_with(|| a.cmp(b))
179 });
180 sorted
181}
182
183fn decode_kg_text(value: &str) -> String {
184 let mut out = String::new();
185 let mut chars = value.chars();
186 while let Some(ch) = chars.next() {
187 if ch != '\\' {
188 out.push(ch);
189 continue;
190 }
191 match chars.next() {
192 Some('n') => out.push('\n'),
193 Some('r') => out.push('\r'),
194 Some('\\') => out.push('\\'),
195 Some(other) => {
196 out.push('\\');
197 out.push(other);
198 }
199 None => out.push('\\'),
200 }
201 }
202 out
203}
204
205fn escape_kg_text(value: &str) -> String {
206 let mut out = String::new();
207 for ch in value.chars() {
208 match ch {
209 '\\' => out.push_str("\\\\"),
210 '\n' => out.push_str("\\n"),
211 '\r' => out.push_str("\\r"),
212 _ => out.push(ch),
213 }
214 }
215 out
216}
217
218fn parse_text_field(value: &str) -> String {
219 decode_kg_text(value)
220}
221
222fn push_text_line(out: &mut String, key: &str, value: &str) {
223 out.push_str(key);
224 out.push(' ');
225 out.push_str(&escape_kg_text(value));
226 out.push('\n');
227}
228
229fn dedupe_case_insensitive(values: Vec<String>) -> Vec<String> {
230 let mut seen = std::collections::HashSet::new();
231 let mut out = Vec::new();
232 for value in values {
233 let key = value.to_ascii_lowercase();
234 if seen.insert(key) {
235 out.push(value);
236 }
237 }
238 out
239}
240
241fn parse_utc_timestamp(value: &str) -> bool {
242 if value.len() != 20 {
243 return false;
244 }
245 let bytes = value.as_bytes();
246 let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
247 if !(is_digit(0)
248 && is_digit(1)
249 && is_digit(2)
250 && is_digit(3)
251 && bytes.get(4) == Some(&b'-')
252 && is_digit(5)
253 && is_digit(6)
254 && bytes.get(7) == Some(&b'-')
255 && is_digit(8)
256 && is_digit(9)
257 && bytes.get(10) == Some(&b'T')
258 && is_digit(11)
259 && is_digit(12)
260 && bytes.get(13) == Some(&b':')
261 && is_digit(14)
262 && is_digit(15)
263 && bytes.get(16) == Some(&b':')
264 && is_digit(17)
265 && is_digit(18)
266 && bytes.get(19) == Some(&b'Z'))
267 {
268 return false;
269 }
270
271 let month = value[5..7].parse::<u32>().ok();
272 let day = value[8..10].parse::<u32>().ok();
273 let hour = value[11..13].parse::<u32>().ok();
274 let minute = value[14..16].parse::<u32>().ok();
275 let second = value[17..19].parse::<u32>().ok();
276 matches!(month, Some(1..=12))
277 && matches!(day, Some(1..=31))
278 && matches!(hour, Some(0..=23))
279 && matches!(minute, Some(0..=59))
280 && matches!(second, Some(0..=59))
281}
282
283fn strict_kg_mode() -> bool {
284 let Ok(value) = std::env::var("KG_STRICT_FORMAT") else {
285 return false;
286 };
287 matches!(
288 value.trim().to_ascii_lowercase().as_str(),
289 "1" | "true" | "yes" | "on"
290 )
291}
292
293fn validate_len(
294 line_no: usize,
295 field: &str,
296 value: &str,
297 min: usize,
298 max: usize,
299 strict: bool,
300) -> Result<()> {
301 let len = value.chars().count();
302 if strict && (len < min || len > max) {
303 return Err(anyhow::anyhow!(
304 "invalid {field} length at line {line_no}: expected {min}..={max}, got {len}"
305 ));
306 }
307 Ok(())
308}
309
310fn enforce_field_order(
311 line_no: usize,
312 key: &str,
313 rank: u8,
314 last_rank: &mut u8,
315 section: &str,
316 strict: bool,
317) -> Result<()> {
318 if strict && rank < *last_rank {
319 return Err(anyhow::anyhow!(
320 "invalid field order at line {line_no}: {key} in {section} block"
321 ));
322 }
323 if rank > *last_rank {
324 *last_rank = rank;
325 }
326 Ok(())
327}
328
329fn field_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
330 if line == key {
331 Some("")
332 } else {
333 line.strip_prefix(key)
334 .and_then(|rest| rest.strip_prefix(' '))
335 }
336}
337
338fn parse_kg(raw: &str, graph_name: &str, strict: bool) -> Result<GraphFile> {
339 let mut graph = GraphFile::new(graph_name);
340 let mut current_node: Option<Node> = None;
341 let mut current_note: Option<Note> = None;
342 let mut current_edge_index: Option<usize> = None;
343 let mut last_node_rank: u8 = 0;
344 let mut last_note_rank: u8 = 0;
345 let mut last_edge_rank: u8 = 0;
346
347 for (idx, line) in raw.lines().enumerate() {
348 let line_no = idx + 1;
349 let raw_line = line.strip_suffix('\r').unwrap_or(line);
350 let trimmed = raw_line.trim();
351 if trimmed.is_empty() || trimmed.starts_with('#') {
352 continue;
353 }
354
355 if let Some(rest) = trimmed.strip_prefix("@ ") {
356 if let Some(note) = current_note.take() {
357 graph.notes.push(note);
358 }
359 if let Some(node) = current_node.take() {
360 graph.nodes.push(node);
361 }
362 let (type_code, node_id) = rest.split_once(':').ok_or_else(|| {
363 anyhow::anyhow!("invalid node header at line {line_no}: {trimmed}")
364 })?;
365 current_node = Some(Node {
366 id: node_id.trim().to_owned(),
367 r#type: code_to_node_type(type_code.trim()).to_owned(),
368 name: String::new(),
369 properties: NodeProperties::default(),
370 source_files: Vec::new(),
371 });
372 current_edge_index = None;
373 last_node_rank = 0;
374 last_edge_rank = 0;
375 continue;
376 }
377
378 if let Some(rest) = trimmed.strip_prefix("! ") {
379 if let Some(node) = current_node.take() {
380 graph.nodes.push(node);
381 }
382 if let Some(note) = current_note.take() {
383 graph.notes.push(note);
384 }
385 let mut parts = rest.split_whitespace();
386 let id = parts.next().ok_or_else(|| {
387 anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
388 })?;
389 let node_id = parts.next().ok_or_else(|| {
390 anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
391 })?;
392 current_note = Some(Note {
393 id: id.to_owned(),
394 node_id: node_id.to_owned(),
395 ..Default::default()
396 });
397 current_edge_index = None;
398 last_note_rank = 0;
399 continue;
400 }
401
402 if let Some(note) = current_note.as_mut() {
403 if let Some(rest) = field_value(raw_line, "b") {
404 enforce_field_order(line_no, "b", 1, &mut last_note_rank, "note", strict)?;
405 note.body = parse_text_field(rest);
406 continue;
407 }
408 if let Some(rest) = field_value(raw_line, "t") {
409 enforce_field_order(line_no, "t", 2, &mut last_note_rank, "note", strict)?;
410 let value = parse_text_field(rest);
411 if !value.is_empty() {
412 note.tags.push(value);
413 }
414 continue;
415 }
416 if let Some(rest) = field_value(raw_line, "a") {
417 enforce_field_order(line_no, "a", 3, &mut last_note_rank, "note", strict)?;
418 note.author = parse_text_field(rest);
419 continue;
420 }
421 if let Some(rest) = field_value(raw_line, "e") {
422 enforce_field_order(line_no, "e", 4, &mut last_note_rank, "note", strict)?;
423 note.created_at = rest.trim().to_owned();
424 continue;
425 }
426 if let Some(rest) = field_value(raw_line, "p") {
427 enforce_field_order(line_no, "p", 5, &mut last_note_rank, "note", strict)?;
428 note.provenance = parse_text_field(rest);
429 continue;
430 }
431 if let Some(rest) = field_value(raw_line, "s") {
432 enforce_field_order(line_no, "s", 6, &mut last_note_rank, "note", strict)?;
433 let value = parse_text_field(rest);
434 if !value.is_empty() {
435 note.source_files.push(value);
436 }
437 continue;
438 }
439 return Err(anyhow::anyhow!(
440 "unrecognized note line at {line_no}: {trimmed}"
441 ));
442 }
443
444 let Some(node) = current_node.as_mut() else {
445 return Err(anyhow::anyhow!(
446 "unexpected line before first node at line {line_no}: {trimmed}"
447 ));
448 };
449
450 if let Some(rest) = field_value(raw_line, "N") {
451 enforce_field_order(line_no, "N", 1, &mut last_node_rank, "node", strict)?;
452 let value = parse_text_field(rest);
453 validate_len(line_no, "N", &value, 1, 120, strict)?;
454 node.name = value;
455 continue;
456 }
457 if let Some(rest) = field_value(raw_line, "D") {
458 enforce_field_order(line_no, "D", 2, &mut last_node_rank, "node", strict)?;
459 let value = parse_text_field(rest);
460 validate_len(line_no, "D", &value, 1, 200, strict)?;
461 node.properties.description = value;
462 continue;
463 }
464 if let Some(rest) = field_value(raw_line, "A") {
465 enforce_field_order(line_no, "A", 3, &mut last_node_rank, "node", strict)?;
466 let value = parse_text_field(rest);
467 validate_len(line_no, "A", &value, 1, 80, strict)?;
468 node.properties.alias.push(value);
469 continue;
470 }
471 if let Some(rest) = field_value(raw_line, "F") {
472 enforce_field_order(line_no, "F", 4, &mut last_node_rank, "node", strict)?;
473 let value = parse_text_field(rest);
474 validate_len(line_no, "F", &value, 1, 200, strict)?;
475 node.properties.key_facts.push(value);
476 continue;
477 }
478 if let Some(rest) = field_value(raw_line, "E") {
479 enforce_field_order(line_no, "E", 5, &mut last_node_rank, "node", strict)?;
480 let value = rest.trim();
481 if !value.is_empty() && !parse_utc_timestamp(value) {
482 return Err(anyhow::anyhow!(
483 "invalid E timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
484 ));
485 }
486 node.properties.created_at = value.to_owned();
487 continue;
488 }
489 if let Some(rest) = field_value(raw_line, "C") {
490 enforce_field_order(line_no, "C", 6, &mut last_node_rank, "node", strict)?;
491 if !rest.trim().is_empty() {
492 node.properties.confidence = rest.trim().parse::<f64>().ok();
493 }
494 continue;
495 }
496 if let Some(rest) = field_value(raw_line, "V") {
497 enforce_field_order(line_no, "V", 7, &mut last_node_rank, "node", strict)?;
498 if let Ok(value) = rest.trim().parse::<u8>() {
499 node.properties.importance = value;
500 }
501 continue;
502 }
503 if let Some(rest) = field_value(raw_line, "P") {
504 enforce_field_order(line_no, "P", 8, &mut last_node_rank, "node", strict)?;
505 node.properties.provenance = parse_text_field(rest);
506 continue;
507 }
508 if let Some(rest) = field_value(raw_line, "S") {
509 enforce_field_order(line_no, "S", 10, &mut last_node_rank, "node", strict)?;
510 let value = parse_text_field(rest);
511 validate_len(line_no, "S", &value, 1, 200, strict)?;
512 node.source_files.push(value);
513 continue;
514 }
515
516 if let Some(rest) = trimmed.strip_prefix("> ") {
517 let mut parts = rest.split_whitespace();
518 let relation = parts.next().ok_or_else(|| {
519 anyhow::anyhow!("missing relation in edge at line {line_no}: {trimmed}")
520 })?;
521 let target_id = parts.next().ok_or_else(|| {
522 anyhow::anyhow!("missing target id in edge at line {line_no}: {trimmed}")
523 })?;
524 graph.edges.push(Edge {
525 source_id: node.id.clone(),
526 relation: code_to_relation(relation).to_owned(),
527 target_id: target_id.to_owned(),
528 properties: EdgeProperties::default(),
529 });
530 current_edge_index = Some(graph.edges.len() - 1);
531 last_edge_rank = 0;
532 continue;
533 }
534
535 if let Some(rest) = field_value(raw_line, "d") {
536 enforce_field_order(line_no, "d", 1, &mut last_edge_rank, "edge", strict)?;
537 let edge_idx = current_edge_index.ok_or_else(|| {
538 anyhow::anyhow!("edge detail without preceding edge at line {line_no}")
539 })?;
540 let value = parse_text_field(rest);
541 validate_len(line_no, "d", &value, 1, 200, strict)?;
542 graph.edges[edge_idx].properties.detail = value;
543 continue;
544 }
545
546 if let Some(rest) = field_value(raw_line, "i") {
547 enforce_field_order(line_no, "i", 2, &mut last_edge_rank, "edge", strict)?;
548 let edge_idx = current_edge_index.ok_or_else(|| {
549 anyhow::anyhow!("edge valid_from without preceding edge at line {line_no}")
550 })?;
551 let value = rest.trim();
552 if !value.is_empty() && !parse_utc_timestamp(value) {
553 return Err(anyhow::anyhow!(
554 "invalid i timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
555 ));
556 }
557 graph.edges[edge_idx].properties.valid_from = value.to_owned();
558 continue;
559 }
560
561 if let Some(rest) = field_value(raw_line, "x") {
562 enforce_field_order(line_no, "x", 3, &mut last_edge_rank, "edge", strict)?;
563 let edge_idx = current_edge_index.ok_or_else(|| {
564 anyhow::anyhow!("edge valid_to without preceding edge at line {line_no}")
565 })?;
566 let value = rest.trim();
567 if !value.is_empty() && !parse_utc_timestamp(value) {
568 return Err(anyhow::anyhow!(
569 "invalid x timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
570 ));
571 }
572 graph.edges[edge_idx].properties.valid_to = value.to_owned();
573 continue;
574 }
575
576 if let Some(rest) = field_value(raw_line, "-") {
577 let (key, value) = rest
578 .split_once(char::is_whitespace)
579 .map(|(key, value)| (key.trim(), value))
580 .unwrap_or((rest.trim(), ""));
581 let is_edge_custom = matches!(
582 key,
583 "edge_feedback_score" | "edge_feedback_count" | "edge_feedback_last_ts_ms"
584 );
585 if is_edge_custom {
586 enforce_field_order(line_no, "-", 4, &mut last_edge_rank, "edge", strict)?;
587 } else {
588 enforce_field_order(line_no, "-", 9, &mut last_node_rank, "node", strict)?;
589 }
590 match key {
591 "domain_area" => node.properties.domain_area = parse_text_field(value),
592 "feedback_score" => {
593 node.properties.feedback_score = value.trim().parse::<f64>().unwrap_or(0.0)
594 }
595 "feedback_count" => {
596 node.properties.feedback_count = value.trim().parse::<u64>().unwrap_or(0)
597 }
598 "feedback_last_ts_ms" => {
599 node.properties.feedback_last_ts_ms = value.trim().parse::<u64>().ok()
600 }
601 "edge_feedback_score" => {
602 if let Some(edge_idx) = current_edge_index {
603 graph.edges[edge_idx].properties.feedback_score =
604 value.trim().parse::<f64>().unwrap_or(0.0);
605 }
606 }
607 "edge_feedback_count" => {
608 if let Some(edge_idx) = current_edge_index {
609 graph.edges[edge_idx].properties.feedback_count =
610 value.trim().parse::<u64>().unwrap_or(0);
611 }
612 }
613 "edge_feedback_last_ts_ms" => {
614 if let Some(edge_idx) = current_edge_index {
615 graph.edges[edge_idx].properties.feedback_last_ts_ms =
616 value.trim().parse::<u64>().ok();
617 }
618 }
619 _ => {}
620 }
621 continue;
622 }
623
624 return Err(anyhow::anyhow!("unrecognized line at {line_no}: {trimmed}"));
625 }
626
627 if let Some(node) = current_node.take() {
628 graph.nodes.push(node);
629 }
630 if let Some(note) = current_note.take() {
631 graph.notes.push(note);
632 }
633
634 for node in &mut graph.nodes {
635 node.properties.alias =
636 sort_case_insensitive(&dedupe_case_insensitive(node.properties.alias.clone()));
637 node.properties.key_facts =
638 sort_case_insensitive(&dedupe_case_insensitive(node.properties.key_facts.clone()));
639 node.source_files =
640 sort_case_insensitive(&dedupe_case_insensitive(node.source_files.clone()));
641 }
642
643 graph.edges.sort_by(|a, b| {
644 a.source_id
645 .cmp(&b.source_id)
646 .then_with(|| a.relation.cmp(&b.relation))
647 .then_with(|| a.target_id.cmp(&b.target_id))
648 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
649 });
650
651 for note in &mut graph.notes {
652 note.tags = sort_case_insensitive(&dedupe_case_insensitive(note.tags.clone()));
653 note.source_files =
654 sort_case_insensitive(&dedupe_case_insensitive(note.source_files.clone()));
655 }
656 graph.notes.sort_by(|a, b| {
657 a.id.cmp(&b.id)
658 .then_with(|| a.node_id.cmp(&b.node_id))
659 .then_with(|| a.created_at.cmp(&b.created_at))
660 });
661
662 graph.refresh_counts();
663 Ok(graph)
664}
665
666fn serialize_kg(graph: &GraphFile) -> String {
667 let mut out = String::new();
668 let mut nodes = graph.nodes.clone();
669 nodes.sort_by(|a, b| a.id.cmp(&b.id));
670
671 for node in nodes {
672 out.push_str(&format!(
673 "@ {}:{}\n",
674 node_type_to_code(&node.r#type),
675 node.id
676 ));
677 push_text_line(&mut out, "N", &node.name);
678 push_text_line(&mut out, "D", &node.properties.description);
679
680 for alias in sort_case_insensitive(&node.properties.alias) {
681 push_text_line(&mut out, "A", &alias);
682 }
683 for fact in sort_case_insensitive(&node.properties.key_facts) {
684 push_text_line(&mut out, "F", &fact);
685 }
686
687 if !node.properties.created_at.is_empty() {
688 out.push_str(&format!("E {}\n", node.properties.created_at));
689 }
690 if let Some(confidence) = node.properties.confidence {
691 out.push_str(&format!("C {}\n", confidence));
692 }
693 out.push_str(&format!("V {}\n", node.properties.importance));
694 if !node.properties.provenance.is_empty() {
695 push_text_line(&mut out, "P", &node.properties.provenance);
696 }
697 if !node.properties.domain_area.is_empty() {
698 out.push_str("- domain_area ");
699 out.push_str(&escape_kg_text(&node.properties.domain_area));
700 out.push('\n');
701 }
702 if node.properties.feedback_score != 0.0 {
703 out.push_str(&format!(
704 "- feedback_score {}\n",
705 node.properties.feedback_score
706 ));
707 }
708 if node.properties.feedback_count != 0 {
709 out.push_str(&format!(
710 "- feedback_count {}\n",
711 node.properties.feedback_count
712 ));
713 }
714 if let Some(ts) = node.properties.feedback_last_ts_ms {
715 out.push_str(&format!("- feedback_last_ts_ms {}\n", ts));
716 }
717
718 for source in sort_case_insensitive(&node.source_files) {
719 push_text_line(&mut out, "S", &source);
720 }
721
722 let mut edges: Vec<Edge> = graph
723 .edges
724 .iter()
725 .filter(|edge| edge.source_id == node.id)
726 .cloned()
727 .collect();
728 edges.sort_by(|a, b| {
729 a.relation
730 .cmp(&b.relation)
731 .then_with(|| a.target_id.cmp(&b.target_id))
732 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
733 });
734
735 for edge in edges {
736 out.push_str(&format!(
737 "> {} {}\n",
738 relation_to_code(&edge.relation),
739 edge.target_id
740 ));
741 if !edge.properties.detail.is_empty() {
742 push_text_line(&mut out, "d", &edge.properties.detail);
743 }
744 if !edge.properties.valid_from.is_empty() {
745 out.push_str(&format!("i {}\n", edge.properties.valid_from));
746 }
747 if !edge.properties.valid_to.is_empty() {
748 out.push_str(&format!("x {}\n", edge.properties.valid_to));
749 }
750 if edge.properties.feedback_score != 0.0 {
751 out.push_str(&format!(
752 "- edge_feedback_score {}\n",
753 edge.properties.feedback_score
754 ));
755 }
756 if edge.properties.feedback_count != 0 {
757 out.push_str(&format!(
758 "- edge_feedback_count {}\n",
759 edge.properties.feedback_count
760 ));
761 }
762 if let Some(ts) = edge.properties.feedback_last_ts_ms {
763 out.push_str(&format!("- edge_feedback_last_ts_ms {}\n", ts));
764 }
765 }
766
767 out.push('\n');
768 }
769
770 let mut notes = graph.notes.clone();
771 notes.sort_by(|a, b| {
772 a.id.cmp(&b.id)
773 .then_with(|| a.node_id.cmp(&b.node_id))
774 .then_with(|| a.created_at.cmp(&b.created_at))
775 });
776 for note in notes {
777 out.push_str(&format!("! {} {}\n", note.id, note.node_id));
778 push_text_line(&mut out, "b", ¬e.body);
779 for tag in sort_case_insensitive(¬e.tags) {
780 push_text_line(&mut out, "t", &tag);
781 }
782 if !note.author.is_empty() {
783 push_text_line(&mut out, "a", ¬e.author);
784 }
785 if !note.created_at.is_empty() {
786 out.push_str(&format!("e {}\n", note.created_at));
787 }
788 if !note.provenance.is_empty() {
789 push_text_line(&mut out, "p", ¬e.provenance);
790 }
791 for source in sort_case_insensitive(¬e.source_files) {
792 push_text_line(&mut out, "s", &source);
793 }
794 out.push('\n');
795 }
796
797 out
798}
799
800#[derive(Debug, Clone, Serialize, Deserialize)]
801pub struct GraphFile {
802 pub metadata: Metadata,
803 #[serde(default)]
804 pub nodes: Vec<Node>,
805 #[serde(default)]
806 pub edges: Vec<Edge>,
807 #[serde(default)]
808 pub notes: Vec<Note>,
809}
810
811#[derive(Debug, Clone, Serialize, Deserialize)]
812pub struct Metadata {
813 pub name: String,
814 pub version: String,
815 pub description: String,
816 pub node_count: usize,
817 pub edge_count: usize,
818}
819
820#[derive(Debug, Clone, Serialize, Deserialize)]
821pub struct Node {
822 pub id: String,
823 #[serde(rename = "type")]
824 pub r#type: String,
825 pub name: String,
826 #[serde(default)]
827 pub properties: NodeProperties,
828 #[serde(default)]
829 pub source_files: Vec<String>,
830}
831
832#[derive(Debug, Clone, Serialize, Deserialize)]
833pub struct NodeProperties {
834 #[serde(default)]
835 pub description: String,
836 #[serde(default)]
837 pub domain_area: String,
838 #[serde(default)]
839 pub provenance: String,
840 #[serde(default)]
841 pub confidence: Option<f64>,
842 #[serde(default)]
843 pub created_at: String,
844 #[serde(default = "default_importance")]
845 pub importance: u8,
846 #[serde(default)]
847 pub key_facts: Vec<String>,
848 #[serde(default)]
849 pub alias: Vec<String>,
850 #[serde(default)]
851 pub feedback_score: f64,
852 #[serde(default)]
853 pub feedback_count: u64,
854 #[serde(default)]
855 pub feedback_last_ts_ms: Option<u64>,
856}
857
858fn default_importance() -> u8 {
859 4
860}
861
862impl Default for NodeProperties {
863 fn default() -> Self {
864 Self {
865 description: String::new(),
866 domain_area: String::new(),
867 provenance: String::new(),
868 confidence: None,
869 created_at: String::new(),
870 importance: default_importance(),
871 key_facts: Vec::new(),
872 alias: Vec::new(),
873 feedback_score: 0.0,
874 feedback_count: 0,
875 feedback_last_ts_ms: None,
876 }
877 }
878}
879
880#[derive(Debug, Clone, Serialize, Deserialize)]
881pub struct Edge {
882 pub source_id: String,
883 pub relation: String,
884 pub target_id: String,
885 #[serde(default)]
886 pub properties: EdgeProperties,
887}
888
889#[derive(Debug, Clone, Default, Serialize, Deserialize)]
890pub struct EdgeProperties {
891 #[serde(default)]
892 pub detail: String,
893 #[serde(default)]
894 pub valid_from: String,
895 #[serde(default)]
896 pub valid_to: String,
897 #[serde(default)]
898 pub feedback_score: f64,
899 #[serde(default)]
900 pub feedback_count: u64,
901 #[serde(default)]
902 pub feedback_last_ts_ms: Option<u64>,
903}
904
905#[derive(Debug, Clone, Default, Serialize, Deserialize)]
906pub struct Note {
907 pub id: String,
908 pub node_id: String,
909 #[serde(default)]
910 pub body: String,
911 #[serde(default)]
912 pub tags: Vec<String>,
913 #[serde(default)]
914 pub author: String,
915 #[serde(default)]
916 pub created_at: String,
917 #[serde(default)]
918 pub provenance: String,
919 #[serde(default)]
920 pub source_files: Vec<String>,
921}
922
923impl GraphFile {
924 pub fn new(name: &str) -> Self {
925 Self {
926 metadata: Metadata {
927 name: name.to_owned(),
928 version: "1.0".to_owned(),
929 description: format!("Knowledge graph: {name}"),
930 node_count: 0,
931 edge_count: 0,
932 },
933 nodes: Vec::new(),
934 edges: Vec::new(),
935 notes: Vec::new(),
936 }
937 }
938
939 pub fn load(path: &Path) -> Result<Self> {
940 let raw = fs::read_to_string(path)
941 .with_context(|| format!("failed to read graph: {}", path.display()))?;
942 let ext = path
943 .extension()
944 .and_then(|ext| ext.to_str())
945 .unwrap_or("json");
946 let mut graph = if ext == "kg" {
947 if raw.trim_start().starts_with('{') {
948 serde_json::from_str(&raw).with_context(|| {
949 format!(
950 "invalid legacy JSON payload in .kg file: {}",
951 path.display()
952 )
953 })?
954 } else {
955 let graph_name = path
956 .file_stem()
957 .and_then(|stem| stem.to_str())
958 .unwrap_or("graph");
959 parse_kg(&raw, graph_name, strict_kg_mode())?
960 }
961 } else {
962 serde_json::from_str(&raw)
963 .with_context(|| format!("invalid JSON: {}", path.display()))?
964 };
965 graph.refresh_counts();
966 Ok(graph)
967 }
968
969 pub fn save(&self, path: &Path) -> Result<()> {
970 let mut graph = self.clone();
971 graph.refresh_counts();
972 let ext = path
973 .extension()
974 .and_then(|ext| ext.to_str())
975 .unwrap_or("json");
976 let raw = if ext == "kg" {
977 serialize_kg(&graph)
978 } else {
979 serde_json::to_string_pretty(&graph).context("failed to serialize graph")?
980 };
981 atomic_write(path, &raw)?;
982 backup_graph_if_stale(path, &raw)
983 }
984
985 pub fn refresh_counts(&mut self) {
986 self.metadata.node_count = self.nodes.len();
987 self.metadata.edge_count = self.edges.len();
988 }
989
990 pub fn node_by_id(&self, id: &str) -> Option<&Node> {
991 self.nodes.iter().find(|node| node.id == id)
992 }
993
994 pub fn node_by_id_sorted(&self, id: &str) -> Option<&Node> {
995 self.nodes
996 .binary_search_by(|node| node.id.as_str().cmp(id))
997 .ok()
998 .and_then(|idx| self.nodes.get(idx))
999 }
1000
1001 pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
1002 self.nodes.iter_mut().find(|node| node.id == id)
1003 }
1004
1005 pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
1006 self.edges.iter().any(|edge| {
1007 edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
1008 })
1009 }
1010}
1011
1012#[cfg(test)]
1013mod tests {
1014 use super::{GraphFile, parse_kg};
1015
1016 #[test]
1017 fn save_and_load_kg_roundtrip_keeps_core_fields() {
1018 let dir = tempfile::tempdir().expect("temp dir");
1019 let path = dir.path().join("graph.kg");
1020
1021 let mut graph = GraphFile::new("graph");
1022 graph.nodes.push(crate::Node {
1023 id: "concept:refrigerator".to_owned(),
1024 r#type: "Concept".to_owned(),
1025 name: "Lodowka".to_owned(),
1026 properties: crate::NodeProperties {
1027 description: "Urzadzenie chlodzace".to_owned(),
1028 provenance: "U".to_owned(),
1029 created_at: "2026-04-04T12:00:00Z".to_owned(),
1030 importance: 5,
1031 key_facts: vec!["A".to_owned(), "b".to_owned()],
1032 alias: vec!["Fridge".to_owned()],
1033 ..Default::default()
1034 },
1035 source_files: vec!["docs/fridge.md".to_owned()],
1036 });
1037 graph.edges.push(crate::Edge {
1038 source_id: "concept:refrigerator".to_owned(),
1039 relation: "READS_FROM".to_owned(),
1040 target_id: "datastore:settings".to_owned(),
1041 properties: crate::EdgeProperties {
1042 detail: "runtime read".to_owned(),
1043 valid_from: "2026-04-04T12:00:00Z".to_owned(),
1044 valid_to: "2026-04-05T12:00:00Z".to_owned(),
1045 ..Default::default()
1046 },
1047 });
1048
1049 graph.save(&path).expect("save kg");
1050 let raw = std::fs::read_to_string(&path).expect("read kg");
1051 assert!(raw.contains("@ K:concept:refrigerator"));
1052 assert!(raw.contains("> R datastore:settings"));
1053
1054 let loaded = GraphFile::load(&path).expect("load kg");
1055 assert_eq!(loaded.nodes.len(), 1);
1056 assert_eq!(loaded.edges.len(), 1);
1057 let node = &loaded.nodes[0];
1058 assert_eq!(node.properties.importance, 5);
1059 assert_eq!(node.properties.provenance, "U");
1060 assert_eq!(node.name, "Lodowka");
1061 assert_eq!(loaded.edges[0].relation, "READS_FROM");
1062 assert_eq!(loaded.edges[0].properties.detail, "runtime read");
1063 assert_eq!(
1064 loaded.edges[0].properties.valid_from,
1065 "2026-04-04T12:00:00Z"
1066 );
1067 assert_eq!(loaded.edges[0].properties.valid_to, "2026-04-05T12:00:00Z");
1068 }
1069
1070 #[test]
1071 fn load_supports_legacy_json_payload_with_kg_extension() {
1072 let dir = tempfile::tempdir().expect("temp dir");
1073 let path = dir.path().join("legacy.kg");
1074 std::fs::write(
1075 &path,
1076 r#"{
1077 "metadata": {"name": "legacy", "version": "1.0", "description": "x", "node_count": 0, "edge_count": 0},
1078 "nodes": [],
1079 "edges": [],
1080 "notes": []
1081}"#,
1082 )
1083 .expect("write legacy payload");
1084
1085 let loaded = GraphFile::load(&path).expect("load legacy kg");
1086 assert_eq!(loaded.metadata.name, "legacy");
1087 assert!(loaded.nodes.is_empty());
1088 }
1089
1090 #[test]
1091 fn load_kg_rejects_invalid_timestamp_format() {
1092 let dir = tempfile::tempdir().expect("temp dir");
1093 let path = dir.path().join("invalid-ts.kg");
1094 std::fs::write(
1095 &path,
1096 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04 12:00:00\nV 4\nP U\n",
1097 )
1098 .expect("write kg");
1099
1100 let err = GraphFile::load(&path).expect_err("invalid timestamp should fail");
1101 let msg = format!("{err:#}");
1102 assert!(msg.contains("invalid E timestamp"));
1103 }
1104
1105 #[test]
1106 fn load_kg_rejects_invalid_edge_timestamp_format() {
1107 let dir = tempfile::tempdir().expect("temp dir");
1108 let path = dir.path().join("invalid-edge-ts.kg");
1109 std::fs::write(
1110 &path,
1111 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n> H concept:y\ni 2026-04-04 12:00:00\n",
1112 )
1113 .expect("write kg");
1114
1115 let err = GraphFile::load(&path).expect_err("invalid edge timestamp should fail");
1116 let msg = format!("{err:#}");
1117 assert!(msg.contains("invalid i timestamp"));
1118 }
1119
1120 #[test]
1121 fn load_kg_preserves_whitespace_and_dedupes_exact_duplicates() {
1122 let dir = tempfile::tempdir().expect("temp dir");
1123 let path = dir.path().join("normalize.kg");
1124 std::fs::write(
1125 &path,
1126 "@ K:concept:x\nN Name With Spaces \nD Desc with spaces \nA Alias\nA Alias\nF fact one\nF FACT one\nS docs/a.md\nS docs/a.md\nE 2026-04-04T12:00:00Z\nV 4\nP U\n",
1127 )
1128 .expect("write kg");
1129
1130 let loaded = GraphFile::load(&path).expect("load kg");
1131 let node = &loaded.nodes[0];
1132 assert_eq!(node.name, " Name With Spaces ");
1133 assert_eq!(node.properties.description, " Desc with spaces ");
1134 assert_eq!(node.properties.alias.len(), 1);
1135 assert_eq!(node.properties.key_facts.len(), 2);
1136 assert_eq!(node.source_files.len(), 1);
1137 }
1138
1139 #[test]
1140 fn save_and_load_kg_roundtrip_keeps_notes_without_json_fallback() {
1141 let dir = tempfile::tempdir().expect("temp dir");
1142 let path = dir.path().join("graph-notes.kg");
1143
1144 let mut graph = GraphFile::new("graph-notes");
1145 graph.nodes.push(crate::Node {
1146 id: "concept:refrigerator".to_owned(),
1147 r#type: "Concept".to_owned(),
1148 name: "Lodowka".to_owned(),
1149 properties: crate::NodeProperties {
1150 description: "Urzadzenie chlodzace".to_owned(),
1151 provenance: "U".to_owned(),
1152 created_at: "2026-04-04T12:00:00Z".to_owned(),
1153 ..Default::default()
1154 },
1155 source_files: vec!["docs/fridge.md".to_owned()],
1156 });
1157 graph.notes.push(crate::Note {
1158 id: "note:1".to_owned(),
1159 node_id: "concept:refrigerator".to_owned(),
1160 body: "Important maintenance insight".to_owned(),
1161 tags: vec!["Maintenance".to_owned(), "maintenance".to_owned()],
1162 author: "alice".to_owned(),
1163 created_at: "1712345678".to_owned(),
1164 provenance: "U".to_owned(),
1165 source_files: vec!["docs/a.md".to_owned(), "docs/a.md".to_owned()],
1166 });
1167
1168 graph.save(&path).expect("save kg");
1169 let raw = std::fs::read_to_string(&path).expect("read kg");
1170 assert!(raw.contains("! note:1 concept:refrigerator"));
1171 assert!(!raw.trim_start().starts_with('{'));
1172
1173 let loaded = GraphFile::load(&path).expect("load kg");
1174 assert_eq!(loaded.notes.len(), 1);
1175 let note = &loaded.notes[0];
1176 assert_eq!(note.id, "note:1");
1177 assert_eq!(note.node_id, "concept:refrigerator");
1178 assert_eq!(note.body, "Important maintenance insight");
1179 assert_eq!(note.tags.len(), 1);
1180 assert_eq!(note.source_files.len(), 1);
1181 }
1182
1183 #[test]
1184 fn save_and_load_kg_roundtrip_preserves_multiline_text_fields() {
1185 let dir = tempfile::tempdir().expect("temp dir");
1186 let path = dir.path().join("graph-multiline.kg");
1187
1188 let mut graph = GraphFile::new("graph-multiline");
1189 graph.nodes.push(crate::Node {
1190 id: "concept:refrigerator".to_owned(),
1191 r#type: "Concept".to_owned(),
1192 name: "Lodowka\nSmart".to_owned(),
1193 properties: crate::NodeProperties {
1194 description: "Linia 1\nLinia 2\\nliteral".to_owned(),
1195 provenance: "user\nimport".to_owned(),
1196 created_at: "2026-04-04T12:00:00Z".to_owned(),
1197 importance: 5,
1198 key_facts: vec!["Fakt 1\nFakt 2".to_owned()],
1199 alias: vec!["Alias\nA".to_owned()],
1200 domain_area: "ops\nfield".to_owned(),
1201 ..Default::default()
1202 },
1203 source_files: vec!["docs/fridge\nnotes.md".to_owned()],
1204 });
1205 graph.edges.push(crate::Edge {
1206 source_id: "concept:refrigerator".to_owned(),
1207 relation: "READS_FROM".to_owned(),
1208 target_id: "datastore:settings".to_owned(),
1209 properties: crate::EdgeProperties {
1210 detail: "runtime\nread".to_owned(),
1211 valid_from: "2026-04-04T12:00:00Z".to_owned(),
1212 valid_to: "2026-04-05T12:00:00Z".to_owned(),
1213 ..Default::default()
1214 },
1215 });
1216 graph.notes.push(crate::Note {
1217 id: "note:1".to_owned(),
1218 node_id: "concept:refrigerator".to_owned(),
1219 body: "line1\nline2\\nkeep".to_owned(),
1220 tags: vec!["multi\nline".to_owned()],
1221 author: "alice\nbob".to_owned(),
1222 created_at: "1712345678".to_owned(),
1223 provenance: "manual\nentry".to_owned(),
1224 source_files: vec!["docs/a\nb.md".to_owned()],
1225 });
1226
1227 graph.save(&path).expect("save kg");
1228 let raw = std::fs::read_to_string(&path).expect("read kg");
1229 assert!(raw.contains("N Lodowka\\nSmart"));
1230 assert!(raw.contains("D Linia 1\\nLinia 2\\\\nliteral"));
1231 assert!(raw.contains("- domain_area ops\\nfield"));
1232 assert!(raw.contains("d runtime\\nread"));
1233 assert!(raw.contains("b line1\\nline2\\\\nkeep"));
1234
1235 let loaded = GraphFile::load(&path).expect("load kg");
1236 let node = &loaded.nodes[0];
1237 assert_eq!(node.name, "Lodowka\nSmart");
1238 assert_eq!(node.properties.description, "Linia 1\nLinia 2\\nliteral");
1239 assert_eq!(node.properties.provenance, "user\nimport");
1240 assert_eq!(node.properties.alias, vec!["Alias\nA".to_owned()]);
1241 assert_eq!(node.properties.key_facts, vec!["Fakt 1\nFakt 2".to_owned()]);
1242 assert_eq!(node.properties.domain_area, "ops\nfield");
1243 assert_eq!(node.source_files, vec!["docs/fridge\nnotes.md".to_owned()]);
1244 assert_eq!(loaded.edges[0].properties.detail, "runtime\nread");
1245 let note = &loaded.notes[0];
1246 assert_eq!(note.body, "line1\nline2\\nkeep");
1247 assert_eq!(note.tags, vec!["multi\nline".to_owned()]);
1248 assert_eq!(note.author, "alice\nbob");
1249 assert_eq!(note.provenance, "manual\nentry");
1250 assert_eq!(note.source_files, vec!["docs/a\nb.md".to_owned()]);
1251 }
1252
1253 #[test]
1254 fn strict_mode_rejects_out_of_order_node_fields() {
1255 let raw = "@ K:concept:x\nD Desc\nN Name\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n";
1256 let err = parse_kg(raw, "x", true).expect_err("strict mode should fail on field order");
1257 assert!(format!("{err:#}").contains("invalid field order"));
1258 }
1259
1260 #[test]
1261 fn strict_mode_rejects_overlong_name_but_compat_mode_allows_it() {
1262 let long_name = "N ".to_owned() + &"X".repeat(121);
1263 let raw = format!(
1264 "@ K:concept:x\n{}\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n",
1265 long_name
1266 );
1267
1268 let strict_err = parse_kg(&raw, "x", true).expect_err("strict mode should fail on length");
1269 assert!(format!("{strict_err:#}").contains("invalid N length"));
1270
1271 parse_kg(&raw, "x", false).expect("compat mode keeps permissive behavior");
1272 }
1273
1274 #[test]
1275 fn save_kg_skips_empty_e_and_p_fields() {
1276 let dir = tempfile::tempdir().expect("temp dir");
1277 let path = dir.path().join("no-empty-ep.kg");
1278
1279 let mut graph = GraphFile::new("graph");
1280 graph.nodes.push(crate::Node {
1281 id: "concept:x".to_owned(),
1282 r#type: "Concept".to_owned(),
1283 name: "X".to_owned(),
1284 properties: crate::NodeProperties {
1285 description: "Desc".to_owned(),
1286 provenance: String::new(),
1287 created_at: String::new(),
1288 ..Default::default()
1289 },
1290 source_files: vec!["docs/a.md".to_owned()],
1291 });
1292
1293 graph.save(&path).expect("save kg");
1294 let raw = std::fs::read_to_string(&path).expect("read kg");
1295 assert!(!raw.contains("\nE \n"));
1296 assert!(!raw.contains("\nP \n"));
1297 }
1298}