1use std::fs;
2use std::io::Write;
3use std::path::Path;
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use anyhow::{Context, Result};
7use flate2::Compression;
8use flate2::write::GzEncoder;
9use serde::{Deserialize, Serialize};
10
11fn atomic_write(dest: &Path, data: &str) -> Result<()> {
16 let tmp = dest.with_extension("tmp");
17 fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
18 if dest.exists() {
19 let bak = dest.with_extension("bak");
20 fs::copy(dest, &bak)
21 .with_context(|| format!("failed to create backup: {}", bak.display()))?;
22 }
23 fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
24}
25
26const BACKUP_STALE_SECS: u64 = 60 * 60;
27
28fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
29 let parent = match path.parent() {
30 Some(parent) => parent,
31 None => return Ok(()),
32 };
33 let stem = match path.file_stem().and_then(|s| s.to_str()) {
34 Some(stem) => stem,
35 None => return Ok(()),
36 };
37 let now = SystemTime::now()
38 .duration_since(UNIX_EPOCH)
39 .context("time went backwards")?
40 .as_secs();
41 if let Some(latest) = latest_backup_ts(parent, stem)? {
42 if now.saturating_sub(latest) < BACKUP_STALE_SECS {
43 return Ok(());
44 }
45 }
46
47 let backup_path = parent.join(format!("{stem}.bck.{now}.gz"));
48 let tmp_path = backup_path.with_extension("tmp");
49 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
50 encoder.write_all(data.as_bytes())?;
51 let encoded = encoder.finish()?;
52 fs::write(&tmp_path, encoded)
53 .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
54 fs::rename(&tmp_path, &backup_path)
55 .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
56 Ok(())
57}
58
59fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
60 let prefix = format!("{stem}.bck.");
61 let suffix = ".gz";
62 let mut latest = None;
63 for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
64 let entry = entry?;
65 let name = entry.file_name();
66 let name = name.to_string_lossy();
67 if !name.starts_with(&prefix) || !name.ends_with(suffix) {
68 continue;
69 }
70 let ts_part = &name[prefix.len()..name.len() - suffix.len()];
71 if let Ok(ts) = ts_part.parse::<u64>() {
72 match latest {
73 Some(current) => {
74 if ts > current {
75 latest = Some(ts);
76 }
77 }
78 None => latest = Some(ts),
79 }
80 }
81 }
82 Ok(latest)
83}
84
85fn node_type_to_code(node_type: &str) -> &str {
86 match node_type {
87 "Feature" => "F",
88 "Concept" => "K",
89 "Interface" => "I",
90 "Process" => "P",
91 "DataStore" => "D",
92 "Attribute" => "A",
93 "Entity" => "Y",
94 "Note" => "N",
95 "Rule" => "R",
96 "Convention" => "C",
97 "Bug" => "B",
98 "Decision" => "Z",
99 "OpenQuestion" => "O",
100 "Claim" => "Q",
101 "Insight" => "W",
102 "Reference" => "M",
103 "Term" => "T",
104 "Status" => "S",
105 "Doubt" => "L",
106 _ => node_type,
107 }
108}
109
110fn code_to_node_type(code: &str) -> &str {
111 match code {
112 "F" => "Feature",
113 "K" => "Concept",
114 "I" => "Interface",
115 "P" => "Process",
116 "D" => "DataStore",
117 "A" => "Attribute",
118 "Y" => "Entity",
119 "N" => "Note",
120 "R" => "Rule",
121 "C" => "Convention",
122 "B" => "Bug",
123 "Z" => "Decision",
124 "O" => "OpenQuestion",
125 "Q" => "Claim",
126 "W" => "Insight",
127 "M" => "Reference",
128 "T" => "Term",
129 "S" => "Status",
130 "L" => "Doubt",
131 _ => code,
132 }
133}
134
135fn relation_to_code(relation: &str) -> &str {
136 match relation {
137 "DOCUMENTED_IN" | "DOCUMENTS" => "D",
138 "HAS" => "H",
139 "TRIGGERS" => "T",
140 "AFFECTED_BY" | "AFFECTS" => "A",
141 "READS_FROM" | "READS" => "R",
142 "GOVERNED_BY" | "GOVERNS" => "G",
143 "DEPENDS_ON" => "O",
144 "AVAILABLE_IN" => "I",
145 "SUPPORTS" => "S",
146 "SUMMARIZES" => "U",
147 "RELATED_TO" => "L",
148 "CONTRADICTS" => "V",
149 "CREATED_BY" | "CREATES" => "C",
150 _ => relation,
151 }
152}
153
154fn code_to_relation(code: &str) -> &str {
155 match code {
156 "D" => "DOCUMENTED_IN",
157 "H" => "HAS",
158 "T" => "TRIGGERS",
159 "A" => "AFFECTED_BY",
160 "R" => "READS_FROM",
161 "G" => "GOVERNED_BY",
162 "O" => "DEPENDS_ON",
163 "I" => "AVAILABLE_IN",
164 "S" => "SUPPORTS",
165 "U" => "SUMMARIZES",
166 "L" => "RELATED_TO",
167 "V" => "CONTRADICTS",
168 "C" => "CREATED_BY",
169 _ => code,
170 }
171}
172
173fn sort_case_insensitive(values: &[String]) -> Vec<String> {
174 let mut sorted = values.to_vec();
175 sorted.sort_by(|a, b| {
176 let la = a.to_ascii_lowercase();
177 let lb = b.to_ascii_lowercase();
178 la.cmp(&lb).then_with(|| a.cmp(b))
179 });
180 sorted
181}
182
183fn normalize_text(value: &str) -> String {
184 value.split_whitespace().collect::<Vec<_>>().join(" ")
185}
186
187fn dedupe_case_insensitive(values: Vec<String>) -> Vec<String> {
188 let mut seen = std::collections::HashSet::new();
189 let mut out = Vec::new();
190 for value in values {
191 let key = value.to_ascii_lowercase();
192 if seen.insert(key) {
193 out.push(value);
194 }
195 }
196 out
197}
198
199fn parse_utc_timestamp(value: &str) -> bool {
200 if value.len() != 20 {
201 return false;
202 }
203 let bytes = value.as_bytes();
204 let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
205 if !(is_digit(0)
206 && is_digit(1)
207 && is_digit(2)
208 && is_digit(3)
209 && bytes.get(4) == Some(&b'-')
210 && is_digit(5)
211 && is_digit(6)
212 && bytes.get(7) == Some(&b'-')
213 && is_digit(8)
214 && is_digit(9)
215 && bytes.get(10) == Some(&b'T')
216 && is_digit(11)
217 && is_digit(12)
218 && bytes.get(13) == Some(&b':')
219 && is_digit(14)
220 && is_digit(15)
221 && bytes.get(16) == Some(&b':')
222 && is_digit(17)
223 && is_digit(18)
224 && bytes.get(19) == Some(&b'Z'))
225 {
226 return false;
227 }
228
229 let month = value[5..7].parse::<u32>().ok();
230 let day = value[8..10].parse::<u32>().ok();
231 let hour = value[11..13].parse::<u32>().ok();
232 let minute = value[14..16].parse::<u32>().ok();
233 let second = value[17..19].parse::<u32>().ok();
234 matches!(month, Some(1..=12))
235 && matches!(day, Some(1..=31))
236 && matches!(hour, Some(0..=23))
237 && matches!(minute, Some(0..=59))
238 && matches!(second, Some(0..=59))
239}
240
241fn strict_kg_mode() -> bool {
242 let Ok(value) = std::env::var("KG_STRICT_FORMAT") else {
243 return false;
244 };
245 matches!(
246 value.trim().to_ascii_lowercase().as_str(),
247 "1" | "true" | "yes" | "on"
248 )
249}
250
251fn validate_len(
252 line_no: usize,
253 field: &str,
254 value: &str,
255 min: usize,
256 max: usize,
257 strict: bool,
258) -> Result<()> {
259 let len = value.chars().count();
260 if strict && (len < min || len > max) {
261 return Err(anyhow::anyhow!(
262 "invalid {field} length at line {line_no}: expected {min}..={max}, got {len}"
263 ));
264 }
265 Ok(())
266}
267
268fn enforce_field_order(
269 line_no: usize,
270 key: &str,
271 rank: u8,
272 last_rank: &mut u8,
273 section: &str,
274 strict: bool,
275) -> Result<()> {
276 if strict && rank < *last_rank {
277 return Err(anyhow::anyhow!(
278 "invalid field order at line {line_no}: {key} in {section} block"
279 ));
280 }
281 if rank > *last_rank {
282 *last_rank = rank;
283 }
284 Ok(())
285}
286
287fn field_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
288 if line == key {
289 Some("")
290 } else {
291 line.strip_prefix(key)
292 .and_then(|rest| rest.strip_prefix(' '))
293 }
294}
295
296fn parse_kg(raw: &str, graph_name: &str, strict: bool) -> Result<GraphFile> {
297 let mut graph = GraphFile::new(graph_name);
298 let mut current_node: Option<Node> = None;
299 let mut current_note: Option<Note> = None;
300 let mut current_edge_index: Option<usize> = None;
301 let mut last_node_rank: u8 = 0;
302 let mut last_note_rank: u8 = 0;
303 let mut last_edge_rank: u8 = 0;
304
305 for (idx, line) in raw.lines().enumerate() {
306 let line_no = idx + 1;
307 let trimmed = line.trim();
308 if trimmed.is_empty() || trimmed.starts_with('#') {
309 continue;
310 }
311
312 if let Some(rest) = trimmed.strip_prefix("@ ") {
313 if let Some(note) = current_note.take() {
314 graph.notes.push(note);
315 }
316 if let Some(node) = current_node.take() {
317 graph.nodes.push(node);
318 }
319 let (type_code, node_id) = rest.split_once(':').ok_or_else(|| {
320 anyhow::anyhow!("invalid node header at line {line_no}: {trimmed}")
321 })?;
322 current_node = Some(Node {
323 id: node_id.trim().to_owned(),
324 r#type: code_to_node_type(type_code.trim()).to_owned(),
325 name: String::new(),
326 properties: NodeProperties::default(),
327 source_files: Vec::new(),
328 });
329 current_edge_index = None;
330 last_node_rank = 0;
331 last_edge_rank = 0;
332 continue;
333 }
334
335 if let Some(rest) = trimmed.strip_prefix("! ") {
336 if let Some(node) = current_node.take() {
337 graph.nodes.push(node);
338 }
339 if let Some(note) = current_note.take() {
340 graph.notes.push(note);
341 }
342 let mut parts = rest.split_whitespace();
343 let id = parts.next().ok_or_else(|| {
344 anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
345 })?;
346 let node_id = parts.next().ok_or_else(|| {
347 anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
348 })?;
349 current_note = Some(Note {
350 id: id.to_owned(),
351 node_id: node_id.to_owned(),
352 ..Default::default()
353 });
354 current_edge_index = None;
355 last_note_rank = 0;
356 continue;
357 }
358
359 if let Some(note) = current_note.as_mut() {
360 if let Some(rest) = field_value(trimmed, "b") {
361 enforce_field_order(line_no, "b", 1, &mut last_note_rank, "note", strict)?;
362 note.body = normalize_text(rest.trim());
363 continue;
364 }
365 if let Some(rest) = field_value(trimmed, "t") {
366 enforce_field_order(line_no, "t", 2, &mut last_note_rank, "note", strict)?;
367 let value = normalize_text(rest.trim());
368 if !value.is_empty() {
369 note.tags.push(value);
370 }
371 continue;
372 }
373 if let Some(rest) = field_value(trimmed, "a") {
374 enforce_field_order(line_no, "a", 3, &mut last_note_rank, "note", strict)?;
375 note.author = normalize_text(rest.trim());
376 continue;
377 }
378 if let Some(rest) = field_value(trimmed, "e") {
379 enforce_field_order(line_no, "e", 4, &mut last_note_rank, "note", strict)?;
380 note.created_at = rest.trim().to_owned();
381 continue;
382 }
383 if let Some(rest) = field_value(trimmed, "p") {
384 enforce_field_order(line_no, "p", 5, &mut last_note_rank, "note", strict)?;
385 note.provenance = normalize_text(rest.trim());
386 continue;
387 }
388 if let Some(rest) = field_value(trimmed, "s") {
389 enforce_field_order(line_no, "s", 6, &mut last_note_rank, "note", strict)?;
390 let value = normalize_text(rest.trim());
391 if !value.is_empty() {
392 note.source_files.push(value);
393 }
394 continue;
395 }
396 return Err(anyhow::anyhow!(
397 "unrecognized note line at {line_no}: {trimmed}"
398 ));
399 }
400
401 let Some(node) = current_node.as_mut() else {
402 return Err(anyhow::anyhow!(
403 "unexpected line before first node at line {line_no}: {trimmed}"
404 ));
405 };
406
407 if let Some(rest) = field_value(trimmed, "N") {
408 enforce_field_order(line_no, "N", 1, &mut last_node_rank, "node", strict)?;
409 let value = normalize_text(rest.trim());
410 validate_len(line_no, "N", &value, 1, 120, strict)?;
411 node.name = value;
412 continue;
413 }
414 if let Some(rest) = field_value(trimmed, "D") {
415 enforce_field_order(line_no, "D", 2, &mut last_node_rank, "node", strict)?;
416 let value = normalize_text(rest.trim());
417 validate_len(line_no, "D", &value, 1, 200, strict)?;
418 node.properties.description = value;
419 continue;
420 }
421 if let Some(rest) = field_value(trimmed, "A") {
422 enforce_field_order(line_no, "A", 3, &mut last_node_rank, "node", strict)?;
423 let value = normalize_text(rest.trim());
424 validate_len(line_no, "A", &value, 1, 80, strict)?;
425 node.properties.alias.push(value);
426 continue;
427 }
428 if let Some(rest) = field_value(trimmed, "F") {
429 enforce_field_order(line_no, "F", 4, &mut last_node_rank, "node", strict)?;
430 let value = normalize_text(rest.trim());
431 validate_len(line_no, "F", &value, 1, 200, strict)?;
432 node.properties.key_facts.push(value);
433 continue;
434 }
435 if let Some(rest) = field_value(trimmed, "E") {
436 enforce_field_order(line_no, "E", 5, &mut last_node_rank, "node", strict)?;
437 let value = rest.trim();
438 if !value.is_empty() && !parse_utc_timestamp(value) {
439 return Err(anyhow::anyhow!(
440 "invalid E timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
441 ));
442 }
443 node.properties.created_at = value.to_owned();
444 continue;
445 }
446 if let Some(rest) = field_value(trimmed, "C") {
447 enforce_field_order(line_no, "C", 6, &mut last_node_rank, "node", strict)?;
448 if !rest.trim().is_empty() {
449 node.properties.confidence = rest.trim().parse::<f64>().ok();
450 }
451 continue;
452 }
453 if let Some(rest) = field_value(trimmed, "V") {
454 enforce_field_order(line_no, "V", 7, &mut last_node_rank, "node", strict)?;
455 if let Ok(value) = rest.trim().parse::<u8>() {
456 node.properties.importance = value;
457 }
458 continue;
459 }
460 if let Some(rest) = field_value(trimmed, "P") {
461 enforce_field_order(line_no, "P", 8, &mut last_node_rank, "node", strict)?;
462 node.properties.provenance = normalize_text(rest.trim());
463 continue;
464 }
465 if let Some(rest) = field_value(trimmed, "S") {
466 enforce_field_order(line_no, "S", 10, &mut last_node_rank, "node", strict)?;
467 let value = normalize_text(rest.trim());
468 validate_len(line_no, "S", &value, 1, 200, strict)?;
469 node.source_files.push(value);
470 continue;
471 }
472
473 if let Some(rest) = trimmed.strip_prefix("> ") {
474 let mut parts = rest.split_whitespace();
475 let relation = parts.next().ok_or_else(|| {
476 anyhow::anyhow!("missing relation in edge at line {line_no}: {trimmed}")
477 })?;
478 let target_id = parts.next().ok_or_else(|| {
479 anyhow::anyhow!("missing target id in edge at line {line_no}: {trimmed}")
480 })?;
481 graph.edges.push(Edge {
482 source_id: node.id.clone(),
483 relation: code_to_relation(relation).to_owned(),
484 target_id: target_id.to_owned(),
485 properties: EdgeProperties::default(),
486 });
487 current_edge_index = Some(graph.edges.len() - 1);
488 last_edge_rank = 0;
489 continue;
490 }
491
492 if let Some(rest) = field_value(trimmed, "d") {
493 enforce_field_order(line_no, "d", 1, &mut last_edge_rank, "edge", strict)?;
494 let edge_idx = current_edge_index.ok_or_else(|| {
495 anyhow::anyhow!("edge detail without preceding edge at line {line_no}")
496 })?;
497 let value = normalize_text(rest.trim());
498 validate_len(line_no, "d", &value, 1, 200, strict)?;
499 graph.edges[edge_idx].properties.detail = value;
500 continue;
501 }
502
503 if let Some(rest) = field_value(trimmed, "i") {
504 enforce_field_order(line_no, "i", 2, &mut last_edge_rank, "edge", strict)?;
505 let edge_idx = current_edge_index.ok_or_else(|| {
506 anyhow::anyhow!("edge valid_from without preceding edge at line {line_no}")
507 })?;
508 let value = rest.trim();
509 if !value.is_empty() && !parse_utc_timestamp(value) {
510 return Err(anyhow::anyhow!(
511 "invalid i timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
512 ));
513 }
514 graph.edges[edge_idx].properties.valid_from = value.to_owned();
515 continue;
516 }
517
518 if let Some(rest) = field_value(trimmed, "x") {
519 enforce_field_order(line_no, "x", 3, &mut last_edge_rank, "edge", strict)?;
520 let edge_idx = current_edge_index.ok_or_else(|| {
521 anyhow::anyhow!("edge valid_to without preceding edge at line {line_no}")
522 })?;
523 let value = rest.trim();
524 if !value.is_empty() && !parse_utc_timestamp(value) {
525 return Err(anyhow::anyhow!(
526 "invalid x timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
527 ));
528 }
529 graph.edges[edge_idx].properties.valid_to = value.to_owned();
530 continue;
531 }
532
533 if let Some(rest) = field_value(trimmed, "-") {
534 let mut parts = rest.trim().splitn(2, char::is_whitespace);
535 let key = parts.next().unwrap_or("").trim();
536 let value = parts.next().unwrap_or("").trim();
537 let is_edge_custom = matches!(
538 key,
539 "edge_feedback_score" | "edge_feedback_count" | "edge_feedback_last_ts_ms"
540 );
541 if is_edge_custom {
542 enforce_field_order(line_no, "-", 4, &mut last_edge_rank, "edge", strict)?;
543 } else {
544 enforce_field_order(line_no, "-", 9, &mut last_node_rank, "node", strict)?;
545 }
546 match key {
547 "domain_area" => node.properties.domain_area = value.to_owned(),
548 "feedback_score" => {
549 node.properties.feedback_score = value.parse::<f64>().unwrap_or(0.0)
550 }
551 "feedback_count" => {
552 node.properties.feedback_count = value.parse::<u64>().unwrap_or(0)
553 }
554 "feedback_last_ts_ms" => {
555 node.properties.feedback_last_ts_ms = value.parse::<u64>().ok()
556 }
557 "edge_feedback_score" => {
558 if let Some(edge_idx) = current_edge_index {
559 graph.edges[edge_idx].properties.feedback_score =
560 value.parse::<f64>().unwrap_or(0.0);
561 }
562 }
563 "edge_feedback_count" => {
564 if let Some(edge_idx) = current_edge_index {
565 graph.edges[edge_idx].properties.feedback_count =
566 value.parse::<u64>().unwrap_or(0);
567 }
568 }
569 "edge_feedback_last_ts_ms" => {
570 if let Some(edge_idx) = current_edge_index {
571 graph.edges[edge_idx].properties.feedback_last_ts_ms =
572 value.parse::<u64>().ok();
573 }
574 }
575 _ => {}
576 }
577 continue;
578 }
579
580 return Err(anyhow::anyhow!("unrecognized line at {line_no}: {trimmed}"));
581 }
582
583 if let Some(node) = current_node.take() {
584 graph.nodes.push(node);
585 }
586 if let Some(note) = current_note.take() {
587 graph.notes.push(note);
588 }
589
590 for node in &mut graph.nodes {
591 node.properties.alias =
592 sort_case_insensitive(&dedupe_case_insensitive(node.properties.alias.clone()));
593 node.properties.key_facts =
594 sort_case_insensitive(&dedupe_case_insensitive(node.properties.key_facts.clone()));
595 node.source_files =
596 sort_case_insensitive(&dedupe_case_insensitive(node.source_files.clone()));
597 }
598
599 graph.edges.sort_by(|a, b| {
600 a.source_id
601 .cmp(&b.source_id)
602 .then_with(|| a.relation.cmp(&b.relation))
603 .then_with(|| a.target_id.cmp(&b.target_id))
604 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
605 });
606
607 for note in &mut graph.notes {
608 note.tags = sort_case_insensitive(&dedupe_case_insensitive(note.tags.clone()));
609 note.source_files =
610 sort_case_insensitive(&dedupe_case_insensitive(note.source_files.clone()));
611 }
612 graph.notes.sort_by(|a, b| {
613 a.id.cmp(&b.id)
614 .then_with(|| a.node_id.cmp(&b.node_id))
615 .then_with(|| a.created_at.cmp(&b.created_at))
616 });
617
618 graph.refresh_counts();
619 Ok(graph)
620}
621
622fn serialize_kg(graph: &GraphFile) -> String {
623 let mut out = String::new();
624 let mut nodes = graph.nodes.clone();
625 nodes.sort_by(|a, b| a.id.cmp(&b.id));
626
627 for node in nodes {
628 out.push_str(&format!(
629 "@ {}:{}\n",
630 node_type_to_code(&node.r#type),
631 node.id
632 ));
633 out.push_str(&format!("N {}\n", node.name));
634 out.push_str(&format!("D {}\n", node.properties.description));
635
636 for alias in sort_case_insensitive(&node.properties.alias) {
637 out.push_str(&format!("A {}\n", alias));
638 }
639 for fact in sort_case_insensitive(&node.properties.key_facts) {
640 out.push_str(&format!("F {}\n", fact));
641 }
642
643 if !node.properties.created_at.is_empty() {
644 out.push_str(&format!("E {}\n", node.properties.created_at));
645 }
646 if let Some(confidence) = node.properties.confidence {
647 out.push_str(&format!("C {}\n", confidence));
648 }
649 out.push_str(&format!("V {}\n", node.properties.importance));
650 if !node.properties.provenance.is_empty() {
651 out.push_str(&format!("P {}\n", node.properties.provenance));
652 }
653 if !node.properties.domain_area.is_empty() {
654 out.push_str(&format!("- domain_area {}\n", node.properties.domain_area));
655 }
656 if node.properties.feedback_score != 0.0 {
657 out.push_str(&format!(
658 "- feedback_score {}\n",
659 node.properties.feedback_score
660 ));
661 }
662 if node.properties.feedback_count != 0 {
663 out.push_str(&format!(
664 "- feedback_count {}\n",
665 node.properties.feedback_count
666 ));
667 }
668 if let Some(ts) = node.properties.feedback_last_ts_ms {
669 out.push_str(&format!("- feedback_last_ts_ms {}\n", ts));
670 }
671
672 for source in sort_case_insensitive(&node.source_files) {
673 out.push_str(&format!("S {}\n", source));
674 }
675
676 let mut edges: Vec<Edge> = graph
677 .edges
678 .iter()
679 .filter(|edge| edge.source_id == node.id)
680 .cloned()
681 .collect();
682 edges.sort_by(|a, b| {
683 a.relation
684 .cmp(&b.relation)
685 .then_with(|| a.target_id.cmp(&b.target_id))
686 .then_with(|| a.properties.detail.cmp(&b.properties.detail))
687 });
688
689 for edge in edges {
690 out.push_str(&format!(
691 "> {} {}\n",
692 relation_to_code(&edge.relation),
693 edge.target_id
694 ));
695 if !edge.properties.detail.is_empty() {
696 out.push_str(&format!("d {}\n", edge.properties.detail));
697 }
698 if !edge.properties.valid_from.is_empty() {
699 out.push_str(&format!("i {}\n", edge.properties.valid_from));
700 }
701 if !edge.properties.valid_to.is_empty() {
702 out.push_str(&format!("x {}\n", edge.properties.valid_to));
703 }
704 if edge.properties.feedback_score != 0.0 {
705 out.push_str(&format!(
706 "- edge_feedback_score {}\n",
707 edge.properties.feedback_score
708 ));
709 }
710 if edge.properties.feedback_count != 0 {
711 out.push_str(&format!(
712 "- edge_feedback_count {}\n",
713 edge.properties.feedback_count
714 ));
715 }
716 if let Some(ts) = edge.properties.feedback_last_ts_ms {
717 out.push_str(&format!("- edge_feedback_last_ts_ms {}\n", ts));
718 }
719 }
720
721 out.push('\n');
722 }
723
724 let mut notes = graph.notes.clone();
725 notes.sort_by(|a, b| {
726 a.id.cmp(&b.id)
727 .then_with(|| a.node_id.cmp(&b.node_id))
728 .then_with(|| a.created_at.cmp(&b.created_at))
729 });
730 for note in notes {
731 out.push_str(&format!("! {} {}\n", note.id, note.node_id));
732 out.push_str(&format!("b {}\n", note.body));
733 for tag in sort_case_insensitive(¬e.tags) {
734 out.push_str(&format!("t {}\n", tag));
735 }
736 if !note.author.is_empty() {
737 out.push_str(&format!("a {}\n", note.author));
738 }
739 if !note.created_at.is_empty() {
740 out.push_str(&format!("e {}\n", note.created_at));
741 }
742 if !note.provenance.is_empty() {
743 out.push_str(&format!("p {}\n", note.provenance));
744 }
745 for source in sort_case_insensitive(¬e.source_files) {
746 out.push_str(&format!("s {}\n", source));
747 }
748 out.push('\n');
749 }
750
751 out
752}
753
754#[derive(Debug, Clone, Serialize, Deserialize)]
755pub struct GraphFile {
756 pub metadata: Metadata,
757 #[serde(default)]
758 pub nodes: Vec<Node>,
759 #[serde(default)]
760 pub edges: Vec<Edge>,
761 #[serde(default)]
762 pub notes: Vec<Note>,
763}
764
765#[derive(Debug, Clone, Serialize, Deserialize)]
766pub struct Metadata {
767 pub name: String,
768 pub version: String,
769 pub description: String,
770 pub node_count: usize,
771 pub edge_count: usize,
772}
773
774#[derive(Debug, Clone, Serialize, Deserialize)]
775pub struct Node {
776 pub id: String,
777 #[serde(rename = "type")]
778 pub r#type: String,
779 pub name: String,
780 #[serde(default)]
781 pub properties: NodeProperties,
782 #[serde(default)]
783 pub source_files: Vec<String>,
784}
785
786#[derive(Debug, Clone, Serialize, Deserialize)]
787pub struct NodeProperties {
788 #[serde(default)]
789 pub description: String,
790 #[serde(default)]
791 pub domain_area: String,
792 #[serde(default)]
793 pub provenance: String,
794 #[serde(default)]
795 pub confidence: Option<f64>,
796 #[serde(default)]
797 pub created_at: String,
798 #[serde(default = "default_importance")]
799 pub importance: u8,
800 #[serde(default)]
801 pub key_facts: Vec<String>,
802 #[serde(default)]
803 pub alias: Vec<String>,
804 #[serde(default)]
805 pub feedback_score: f64,
806 #[serde(default)]
807 pub feedback_count: u64,
808 #[serde(default)]
809 pub feedback_last_ts_ms: Option<u64>,
810}
811
812fn default_importance() -> u8 {
813 4
814}
815
816impl Default for NodeProperties {
817 fn default() -> Self {
818 Self {
819 description: String::new(),
820 domain_area: String::new(),
821 provenance: String::new(),
822 confidence: None,
823 created_at: String::new(),
824 importance: default_importance(),
825 key_facts: Vec::new(),
826 alias: Vec::new(),
827 feedback_score: 0.0,
828 feedback_count: 0,
829 feedback_last_ts_ms: None,
830 }
831 }
832}
833
834#[derive(Debug, Clone, Serialize, Deserialize)]
835pub struct Edge {
836 pub source_id: String,
837 pub relation: String,
838 pub target_id: String,
839 #[serde(default)]
840 pub properties: EdgeProperties,
841}
842
843#[derive(Debug, Clone, Default, Serialize, Deserialize)]
844pub struct EdgeProperties {
845 #[serde(default)]
846 pub detail: String,
847 #[serde(default)]
848 pub valid_from: String,
849 #[serde(default)]
850 pub valid_to: String,
851 #[serde(default)]
852 pub feedback_score: f64,
853 #[serde(default)]
854 pub feedback_count: u64,
855 #[serde(default)]
856 pub feedback_last_ts_ms: Option<u64>,
857}
858
859#[derive(Debug, Clone, Default, Serialize, Deserialize)]
860pub struct Note {
861 pub id: String,
862 pub node_id: String,
863 #[serde(default)]
864 pub body: String,
865 #[serde(default)]
866 pub tags: Vec<String>,
867 #[serde(default)]
868 pub author: String,
869 #[serde(default)]
870 pub created_at: String,
871 #[serde(default)]
872 pub provenance: String,
873 #[serde(default)]
874 pub source_files: Vec<String>,
875}
876
877impl GraphFile {
878 pub fn new(name: &str) -> Self {
879 Self {
880 metadata: Metadata {
881 name: name.to_owned(),
882 version: "1.0".to_owned(),
883 description: format!("Knowledge graph: {name}"),
884 node_count: 0,
885 edge_count: 0,
886 },
887 nodes: Vec::new(),
888 edges: Vec::new(),
889 notes: Vec::new(),
890 }
891 }
892
893 pub fn load(path: &Path) -> Result<Self> {
894 let raw = fs::read_to_string(path)
895 .with_context(|| format!("failed to read graph: {}", path.display()))?;
896 let ext = path
897 .extension()
898 .and_then(|ext| ext.to_str())
899 .unwrap_or("json");
900 let mut graph = if ext == "kg" {
901 if raw.trim_start().starts_with('{') {
902 serde_json::from_str(&raw).with_context(|| {
903 format!(
904 "invalid legacy JSON payload in .kg file: {}",
905 path.display()
906 )
907 })?
908 } else {
909 let graph_name = path
910 .file_stem()
911 .and_then(|stem| stem.to_str())
912 .unwrap_or("graph");
913 parse_kg(&raw, graph_name, strict_kg_mode())?
914 }
915 } else {
916 serde_json::from_str(&raw)
917 .with_context(|| format!("invalid JSON: {}", path.display()))?
918 };
919 graph.refresh_counts();
920 Ok(graph)
921 }
922
923 pub fn save(&self, path: &Path) -> Result<()> {
924 let mut graph = self.clone();
925 graph.refresh_counts();
926 let ext = path
927 .extension()
928 .and_then(|ext| ext.to_str())
929 .unwrap_or("json");
930 let raw = if ext == "kg" {
931 serialize_kg(&graph)
932 } else {
933 serde_json::to_string_pretty(&graph).context("failed to serialize graph")?
934 };
935 atomic_write(path, &raw)?;
936 backup_graph_if_stale(path, &raw)
937 }
938
939 pub fn refresh_counts(&mut self) {
940 self.metadata.node_count = self.nodes.len();
941 self.metadata.edge_count = self.edges.len();
942 }
943
944 pub fn node_by_id(&self, id: &str) -> Option<&Node> {
945 self.nodes.iter().find(|node| node.id == id)
946 }
947
948 pub fn node_by_id_sorted(&self, id: &str) -> Option<&Node> {
949 self.nodes
950 .binary_search_by(|node| node.id.as_str().cmp(id))
951 .ok()
952 .and_then(|idx| self.nodes.get(idx))
953 }
954
955 pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
956 self.nodes.iter_mut().find(|node| node.id == id)
957 }
958
959 pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
960 self.edges.iter().any(|edge| {
961 edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
962 })
963 }
964}
965
966#[cfg(test)]
967mod tests {
968 use super::{GraphFile, parse_kg};
969
970 #[test]
971 fn save_and_load_kg_roundtrip_keeps_core_fields() {
972 let dir = tempfile::tempdir().expect("temp dir");
973 let path = dir.path().join("graph.kg");
974
975 let mut graph = GraphFile::new("graph");
976 graph.nodes.push(crate::Node {
977 id: "concept:refrigerator".to_owned(),
978 r#type: "Concept".to_owned(),
979 name: "Lodowka".to_owned(),
980 properties: crate::NodeProperties {
981 description: "Urzadzenie chlodzace".to_owned(),
982 provenance: "U".to_owned(),
983 created_at: "2026-04-04T12:00:00Z".to_owned(),
984 importance: 5,
985 key_facts: vec!["A".to_owned(), "b".to_owned()],
986 alias: vec!["Fridge".to_owned()],
987 ..Default::default()
988 },
989 source_files: vec!["docs/fridge.md".to_owned()],
990 });
991 graph.edges.push(crate::Edge {
992 source_id: "concept:refrigerator".to_owned(),
993 relation: "READS_FROM".to_owned(),
994 target_id: "datastore:settings".to_owned(),
995 properties: crate::EdgeProperties {
996 detail: "runtime read".to_owned(),
997 valid_from: "2026-04-04T12:00:00Z".to_owned(),
998 valid_to: "2026-04-05T12:00:00Z".to_owned(),
999 ..Default::default()
1000 },
1001 });
1002
1003 graph.save(&path).expect("save kg");
1004 let raw = std::fs::read_to_string(&path).expect("read kg");
1005 assert!(raw.contains("@ K:concept:refrigerator"));
1006 assert!(raw.contains("> R datastore:settings"));
1007
1008 let loaded = GraphFile::load(&path).expect("load kg");
1009 assert_eq!(loaded.nodes.len(), 1);
1010 assert_eq!(loaded.edges.len(), 1);
1011 let node = &loaded.nodes[0];
1012 assert_eq!(node.properties.importance, 5);
1013 assert_eq!(node.properties.provenance, "U");
1014 assert_eq!(node.name, "Lodowka");
1015 assert_eq!(loaded.edges[0].relation, "READS_FROM");
1016 assert_eq!(loaded.edges[0].properties.detail, "runtime read");
1017 assert_eq!(
1018 loaded.edges[0].properties.valid_from,
1019 "2026-04-04T12:00:00Z"
1020 );
1021 assert_eq!(loaded.edges[0].properties.valid_to, "2026-04-05T12:00:00Z");
1022 }
1023
1024 #[test]
1025 fn load_supports_legacy_json_payload_with_kg_extension() {
1026 let dir = tempfile::tempdir().expect("temp dir");
1027 let path = dir.path().join("legacy.kg");
1028 std::fs::write(
1029 &path,
1030 r#"{
1031 "metadata": {"name": "legacy", "version": "1.0", "description": "x", "node_count": 0, "edge_count": 0},
1032 "nodes": [],
1033 "edges": [],
1034 "notes": []
1035}"#,
1036 )
1037 .expect("write legacy payload");
1038
1039 let loaded = GraphFile::load(&path).expect("load legacy kg");
1040 assert_eq!(loaded.metadata.name, "legacy");
1041 assert!(loaded.nodes.is_empty());
1042 }
1043
1044 #[test]
1045 fn load_kg_rejects_invalid_timestamp_format() {
1046 let dir = tempfile::tempdir().expect("temp dir");
1047 let path = dir.path().join("invalid-ts.kg");
1048 std::fs::write(
1049 &path,
1050 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04 12:00:00\nV 4\nP U\n",
1051 )
1052 .expect("write kg");
1053
1054 let err = GraphFile::load(&path).expect_err("invalid timestamp should fail");
1055 let msg = format!("{err:#}");
1056 assert!(msg.contains("invalid E timestamp"));
1057 }
1058
1059 #[test]
1060 fn load_kg_rejects_invalid_edge_timestamp_format() {
1061 let dir = tempfile::tempdir().expect("temp dir");
1062 let path = dir.path().join("invalid-edge-ts.kg");
1063 std::fs::write(
1064 &path,
1065 "@ K:concept:x\nN X\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n> H concept:y\ni 2026-04-04 12:00:00\n",
1066 )
1067 .expect("write kg");
1068
1069 let err = GraphFile::load(&path).expect_err("invalid edge timestamp should fail");
1070 let msg = format!("{err:#}");
1071 assert!(msg.contains("invalid i timestamp"));
1072 }
1073
1074 #[test]
1075 fn load_kg_normalizes_and_dedupes_multivalue_fields() {
1076 let dir = tempfile::tempdir().expect("temp dir");
1077 let path = dir.path().join("normalize.kg");
1078 std::fs::write(
1079 &path,
1080 "@ K:concept:x\nN Name With Spaces \nD Desc with spaces \nA Alias\nA alias\nF fact one\nF FACT one\nS docs/a.md\nS docs/a.md\nE 2026-04-04T12:00:00Z\nV 4\nP U\n",
1081 )
1082 .expect("write kg");
1083
1084 let loaded = GraphFile::load(&path).expect("load kg");
1085 let node = &loaded.nodes[0];
1086 assert_eq!(node.name, "Name With Spaces");
1087 assert_eq!(node.properties.description, "Desc with spaces");
1088 assert_eq!(node.properties.alias.len(), 1);
1089 assert_eq!(node.properties.key_facts.len(), 1);
1090 assert_eq!(node.source_files.len(), 1);
1091 }
1092
1093 #[test]
1094 fn save_and_load_kg_roundtrip_keeps_notes_without_json_fallback() {
1095 let dir = tempfile::tempdir().expect("temp dir");
1096 let path = dir.path().join("graph-notes.kg");
1097
1098 let mut graph = GraphFile::new("graph-notes");
1099 graph.nodes.push(crate::Node {
1100 id: "concept:refrigerator".to_owned(),
1101 r#type: "Concept".to_owned(),
1102 name: "Lodowka".to_owned(),
1103 properties: crate::NodeProperties {
1104 description: "Urzadzenie chlodzace".to_owned(),
1105 provenance: "U".to_owned(),
1106 created_at: "2026-04-04T12:00:00Z".to_owned(),
1107 ..Default::default()
1108 },
1109 source_files: vec!["docs/fridge.md".to_owned()],
1110 });
1111 graph.notes.push(crate::Note {
1112 id: "note:1".to_owned(),
1113 node_id: "concept:refrigerator".to_owned(),
1114 body: "Important maintenance insight".to_owned(),
1115 tags: vec!["Maintenance".to_owned(), "maintenance".to_owned()],
1116 author: "alice".to_owned(),
1117 created_at: "1712345678".to_owned(),
1118 provenance: "U".to_owned(),
1119 source_files: vec!["docs/a.md".to_owned(), "docs/a.md".to_owned()],
1120 });
1121
1122 graph.save(&path).expect("save kg");
1123 let raw = std::fs::read_to_string(&path).expect("read kg");
1124 assert!(raw.contains("! note:1 concept:refrigerator"));
1125 assert!(!raw.trim_start().starts_with('{'));
1126
1127 let loaded = GraphFile::load(&path).expect("load kg");
1128 assert_eq!(loaded.notes.len(), 1);
1129 let note = &loaded.notes[0];
1130 assert_eq!(note.id, "note:1");
1131 assert_eq!(note.node_id, "concept:refrigerator");
1132 assert_eq!(note.body, "Important maintenance insight");
1133 assert_eq!(note.tags.len(), 1);
1134 assert_eq!(note.source_files.len(), 1);
1135 }
1136
1137 #[test]
1138 fn strict_mode_rejects_out_of_order_node_fields() {
1139 let raw = "@ K:concept:x\nD Desc\nN Name\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n";
1140 let err = parse_kg(raw, "x", true).expect_err("strict mode should fail on field order");
1141 assert!(format!("{err:#}").contains("invalid field order"));
1142 }
1143
1144 #[test]
1145 fn strict_mode_rejects_overlong_name_but_compat_mode_allows_it() {
1146 let long_name = "N ".to_owned() + &"X".repeat(121);
1147 let raw = format!(
1148 "@ K:concept:x\n{}\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n",
1149 long_name
1150 );
1151
1152 let strict_err = parse_kg(&raw, "x", true).expect_err("strict mode should fail on length");
1153 assert!(format!("{strict_err:#}").contains("invalid N length"));
1154
1155 parse_kg(&raw, "x", false).expect("compat mode keeps permissive behavior");
1156 }
1157
1158 #[test]
1159 fn save_kg_skips_empty_e_and_p_fields() {
1160 let dir = tempfile::tempdir().expect("temp dir");
1161 let path = dir.path().join("no-empty-ep.kg");
1162
1163 let mut graph = GraphFile::new("graph");
1164 graph.nodes.push(crate::Node {
1165 id: "concept:x".to_owned(),
1166 r#type: "Concept".to_owned(),
1167 name: "X".to_owned(),
1168 properties: crate::NodeProperties {
1169 description: "Desc".to_owned(),
1170 provenance: String::new(),
1171 created_at: String::new(),
1172 ..Default::default()
1173 },
1174 source_files: vec!["docs/a.md".to_owned()],
1175 });
1176
1177 graph.save(&path).expect("save kg");
1178 let raw = std::fs::read_to_string(&path).expect("read kg");
1179 assert!(!raw.contains("\nE \n"));
1180 assert!(!raw.contains("\nP \n"));
1181 }
1182}