Skip to main content

kg/
graph.rs

1use std::fs;
2use std::io::Write;
3use std::path::Path;
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use anyhow::{Context, Result};
7use flate2::Compression;
8use flate2::write::GzEncoder;
9use serde::{Deserialize, Serialize};
10
11/// Write `data` to `dest` atomically:
12/// 1. Write to `dest.tmp`
13/// 2. If `dest` already exists, copy it to `dest.bak`
14/// 3. Rename `dest.tmp` -> `dest`
15fn atomic_write(dest: &Path, data: &str) -> Result<()> {
16    let tmp = dest.with_extension("tmp");
17    fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
18    if dest.exists() {
19        let bak = dest.with_extension("bak");
20        fs::copy(dest, &bak)
21            .with_context(|| format!("failed to create backup: {}", bak.display()))?;
22    }
23    fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
24}
25
26const BACKUP_STALE_SECS: u64 = 60 * 60;
27
28fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
29    let parent = match path.parent() {
30        Some(parent) => parent,
31        None => return Ok(()),
32    };
33    let stem = match path.file_stem().and_then(|s| s.to_str()) {
34        Some(stem) => stem,
35        None => return Ok(()),
36    };
37    let now = SystemTime::now()
38        .duration_since(UNIX_EPOCH)
39        .context("time went backwards")?
40        .as_secs();
41    if let Some(latest) = latest_backup_ts(parent, stem)? {
42        if now.saturating_sub(latest) < BACKUP_STALE_SECS {
43            return Ok(());
44        }
45    }
46
47    let backup_path = parent.join(format!("{stem}.bck.{now}.gz"));
48    let tmp_path = backup_path.with_extension("tmp");
49    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
50    encoder.write_all(data.as_bytes())?;
51    let encoded = encoder.finish()?;
52    fs::write(&tmp_path, encoded)
53        .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
54    fs::rename(&tmp_path, &backup_path)
55        .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
56    Ok(())
57}
58
59fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
60    let prefix = format!("{stem}.bck.");
61    let suffix = ".gz";
62    let mut latest = None;
63    for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
64        let entry = entry?;
65        let name = entry.file_name();
66        let name = name.to_string_lossy();
67        if !name.starts_with(&prefix) || !name.ends_with(suffix) {
68            continue;
69        }
70        let ts_part = &name[prefix.len()..name.len() - suffix.len()];
71        if let Ok(ts) = ts_part.parse::<u64>() {
72            match latest {
73                Some(current) => {
74                    if ts > current {
75                        latest = Some(ts);
76                    }
77                }
78                None => latest = Some(ts),
79            }
80        }
81    }
82    Ok(latest)
83}
84
85fn node_type_to_code(node_type: &str) -> &str {
86    match node_type {
87        "Feature" => "F",
88        "Concept" => "K",
89        "Interface" => "I",
90        "Process" => "P",
91        "DataStore" => "D",
92        "Attribute" => "A",
93        "Entity" => "Y",
94        "Note" => "N",
95        "Rule" => "R",
96        "Convention" => "C",
97        "Bug" => "B",
98        "Decision" => "Z",
99        "OpenQuestion" => "O",
100        "Claim" => "Q",
101        "Insight" => "W",
102        "Reference" => "M",
103        "Term" => "T",
104        "Status" => "S",
105        "Doubt" => "L",
106        _ => node_type,
107    }
108}
109
110fn code_to_node_type(code: &str) -> &str {
111    match code {
112        "F" => "Feature",
113        "K" => "Concept",
114        "I" => "Interface",
115        "P" => "Process",
116        "D" => "DataStore",
117        "A" => "Attribute",
118        "Y" => "Entity",
119        "N" => "Note",
120        "R" => "Rule",
121        "C" => "Convention",
122        "B" => "Bug",
123        "Z" => "Decision",
124        "O" => "OpenQuestion",
125        "Q" => "Claim",
126        "W" => "Insight",
127        "M" => "Reference",
128        "T" => "Term",
129        "S" => "Status",
130        "L" => "Doubt",
131        _ => code,
132    }
133}
134
135fn relation_to_code(relation: &str) -> &str {
136    match relation {
137        "DOCUMENTED_IN" | "DOCUMENTS" => "D",
138        "HAS" => "H",
139        "TRIGGERS" => "T",
140        "AFFECTED_BY" | "AFFECTS" => "A",
141        "READS_FROM" | "READS" => "R",
142        "GOVERNED_BY" | "GOVERNS" => "G",
143        "DEPENDS_ON" => "O",
144        "AVAILABLE_IN" => "I",
145        "SUPPORTS" => "S",
146        "SUMMARIZES" => "U",
147        "RELATED_TO" => "L",
148        "CONTRADICTS" => "V",
149        "CREATED_BY" | "CREATES" => "C",
150        _ => relation,
151    }
152}
153
154fn code_to_relation(code: &str) -> &str {
155    match code {
156        "D" => "DOCUMENTED_IN",
157        "H" => "HAS",
158        "T" => "TRIGGERS",
159        "A" => "AFFECTED_BY",
160        "R" => "READS_FROM",
161        "G" => "GOVERNED_BY",
162        "O" => "DEPENDS_ON",
163        "I" => "AVAILABLE_IN",
164        "S" => "SUPPORTS",
165        "U" => "SUMMARIZES",
166        "L" => "RELATED_TO",
167        "V" => "CONTRADICTS",
168        "C" => "CREATED_BY",
169        _ => code,
170    }
171}
172
173fn sort_case_insensitive(values: &[String]) -> Vec<String> {
174    let mut sorted = values.to_vec();
175    sorted.sort_by(|a, b| {
176        let la = a.to_ascii_lowercase();
177        let lb = b.to_ascii_lowercase();
178        la.cmp(&lb).then_with(|| a.cmp(b))
179    });
180    sorted
181}
182
183fn normalize_text(value: &str) -> String {
184    value.split_whitespace().collect::<Vec<_>>().join(" ")
185}
186
187fn dedupe_case_insensitive(values: Vec<String>) -> Vec<String> {
188    let mut seen = std::collections::HashSet::new();
189    let mut out = Vec::new();
190    for value in values {
191        let key = value.to_ascii_lowercase();
192        if seen.insert(key) {
193            out.push(value);
194        }
195    }
196    out
197}
198
199fn parse_utc_timestamp(value: &str) -> bool {
200    if value.len() != 20 {
201        return false;
202    }
203    let bytes = value.as_bytes();
204    let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
205    if !(is_digit(0)
206        && is_digit(1)
207        && is_digit(2)
208        && is_digit(3)
209        && bytes.get(4) == Some(&b'-')
210        && is_digit(5)
211        && is_digit(6)
212        && bytes.get(7) == Some(&b'-')
213        && is_digit(8)
214        && is_digit(9)
215        && bytes.get(10) == Some(&b'T')
216        && is_digit(11)
217        && is_digit(12)
218        && bytes.get(13) == Some(&b':')
219        && is_digit(14)
220        && is_digit(15)
221        && bytes.get(16) == Some(&b':')
222        && is_digit(17)
223        && is_digit(18)
224        && bytes.get(19) == Some(&b'Z'))
225    {
226        return false;
227    }
228
229    let month = value[5..7].parse::<u32>().ok();
230    let day = value[8..10].parse::<u32>().ok();
231    let hour = value[11..13].parse::<u32>().ok();
232    let minute = value[14..16].parse::<u32>().ok();
233    let second = value[17..19].parse::<u32>().ok();
234    matches!(month, Some(1..=12))
235        && matches!(day, Some(1..=31))
236        && matches!(hour, Some(0..=23))
237        && matches!(minute, Some(0..=59))
238        && matches!(second, Some(0..=59))
239}
240
241fn strict_kg_mode() -> bool {
242    let Ok(value) = std::env::var("KG_STRICT_FORMAT") else {
243        return false;
244    };
245    matches!(
246        value.trim().to_ascii_lowercase().as_str(),
247        "1" | "true" | "yes" | "on"
248    )
249}
250
251fn validate_len(
252    line_no: usize,
253    field: &str,
254    value: &str,
255    min: usize,
256    max: usize,
257    strict: bool,
258) -> Result<()> {
259    let len = value.chars().count();
260    if strict && (len < min || len > max) {
261        return Err(anyhow::anyhow!(
262            "invalid {field} length at line {line_no}: expected {min}..={max}, got {len}"
263        ));
264    }
265    Ok(())
266}
267
268fn enforce_field_order(
269    line_no: usize,
270    key: &str,
271    rank: u8,
272    last_rank: &mut u8,
273    section: &str,
274    strict: bool,
275) -> Result<()> {
276    if strict && rank < *last_rank {
277        return Err(anyhow::anyhow!(
278            "invalid field order at line {line_no}: {key} in {section} block"
279        ));
280    }
281    if rank > *last_rank {
282        *last_rank = rank;
283    }
284    Ok(())
285}
286
287fn field_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
288    if line == key {
289        Some("")
290    } else {
291        line.strip_prefix(key)
292            .and_then(|rest| rest.strip_prefix(' '))
293    }
294}
295
296fn parse_kg(raw: &str, graph_name: &str, strict: bool) -> Result<GraphFile> {
297    let mut graph = GraphFile::new(graph_name);
298    let mut current_node: Option<Node> = None;
299    let mut current_note: Option<Note> = None;
300    let mut current_edge_index: Option<usize> = None;
301    let mut last_node_rank: u8 = 0;
302    let mut last_note_rank: u8 = 0;
303    let mut last_edge_rank: u8 = 0;
304
305    for (idx, line) in raw.lines().enumerate() {
306        let line_no = idx + 1;
307        let trimmed = line.trim();
308        if trimmed.is_empty() || trimmed.starts_with('#') {
309            continue;
310        }
311
312        if let Some(rest) = trimmed.strip_prefix("@ ") {
313            if let Some(note) = current_note.take() {
314                graph.notes.push(note);
315            }
316            if let Some(node) = current_node.take() {
317                graph.nodes.push(node);
318            }
319            let (type_code, node_id) = rest.split_once(':').ok_or_else(|| {
320                anyhow::anyhow!("invalid node header at line {line_no}: {trimmed}")
321            })?;
322            current_node = Some(Node {
323                id: node_id.trim().to_owned(),
324                r#type: code_to_node_type(type_code.trim()).to_owned(),
325                name: String::new(),
326                properties: NodeProperties::default(),
327                source_files: Vec::new(),
328            });
329            current_edge_index = None;
330            last_node_rank = 0;
331            last_edge_rank = 0;
332            continue;
333        }
334
335        if let Some(rest) = trimmed.strip_prefix("! ") {
336            if let Some(node) = current_node.take() {
337                graph.nodes.push(node);
338            }
339            if let Some(note) = current_note.take() {
340                graph.notes.push(note);
341            }
342            let mut parts = rest.split_whitespace();
343            let id = parts.next().ok_or_else(|| {
344                anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
345            })?;
346            let node_id = parts.next().ok_or_else(|| {
347                anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
348            })?;
349            current_note = Some(Note {
350                id: id.to_owned(),
351                node_id: node_id.to_owned(),
352                ..Default::default()
353            });
354            current_edge_index = None;
355            last_note_rank = 0;
356            continue;
357        }
358
359        if let Some(note) = current_note.as_mut() {
360            if let Some(rest) = field_value(trimmed, "b") {
361                enforce_field_order(line_no, "b", 1, &mut last_note_rank, "note", strict)?;
362                note.body = normalize_text(rest.trim());
363                continue;
364            }
365            if let Some(rest) = field_value(trimmed, "t") {
366                enforce_field_order(line_no, "t", 2, &mut last_note_rank, "note", strict)?;
367                let value = normalize_text(rest.trim());
368                if !value.is_empty() {
369                    note.tags.push(value);
370                }
371                continue;
372            }
373            if let Some(rest) = field_value(trimmed, "a") {
374                enforce_field_order(line_no, "a", 3, &mut last_note_rank, "note", strict)?;
375                note.author = normalize_text(rest.trim());
376                continue;
377            }
378            if let Some(rest) = field_value(trimmed, "e") {
379                enforce_field_order(line_no, "e", 4, &mut last_note_rank, "note", strict)?;
380                note.created_at = rest.trim().to_owned();
381                continue;
382            }
383            if let Some(rest) = field_value(trimmed, "p") {
384                enforce_field_order(line_no, "p", 5, &mut last_note_rank, "note", strict)?;
385                note.provenance = normalize_text(rest.trim());
386                continue;
387            }
388            if let Some(rest) = field_value(trimmed, "s") {
389                enforce_field_order(line_no, "s", 6, &mut last_note_rank, "note", strict)?;
390                let value = normalize_text(rest.trim());
391                if !value.is_empty() {
392                    note.source_files.push(value);
393                }
394                continue;
395            }
396            return Err(anyhow::anyhow!(
397                "unrecognized note line at {line_no}: {trimmed}"
398            ));
399        }
400
401        let Some(node) = current_node.as_mut() else {
402            return Err(anyhow::anyhow!(
403                "unexpected line before first node at line {line_no}: {trimmed}"
404            ));
405        };
406
407        if let Some(rest) = field_value(trimmed, "N") {
408            enforce_field_order(line_no, "N", 1, &mut last_node_rank, "node", strict)?;
409            let value = normalize_text(rest.trim());
410            validate_len(line_no, "N", &value, 1, 120, strict)?;
411            node.name = value;
412            continue;
413        }
414        if let Some(rest) = field_value(trimmed, "D") {
415            enforce_field_order(line_no, "D", 2, &mut last_node_rank, "node", strict)?;
416            let value = normalize_text(rest.trim());
417            validate_len(line_no, "D", &value, 1, 200, strict)?;
418            node.properties.description = value;
419            continue;
420        }
421        if let Some(rest) = field_value(trimmed, "A") {
422            enforce_field_order(line_no, "A", 3, &mut last_node_rank, "node", strict)?;
423            let value = normalize_text(rest.trim());
424            validate_len(line_no, "A", &value, 1, 80, strict)?;
425            node.properties.alias.push(value);
426            continue;
427        }
428        if let Some(rest) = field_value(trimmed, "F") {
429            enforce_field_order(line_no, "F", 4, &mut last_node_rank, "node", strict)?;
430            let value = normalize_text(rest.trim());
431            validate_len(line_no, "F", &value, 1, 200, strict)?;
432            node.properties.key_facts.push(value);
433            continue;
434        }
435        if let Some(rest) = field_value(trimmed, "E") {
436            enforce_field_order(line_no, "E", 5, &mut last_node_rank, "node", strict)?;
437            let value = rest.trim();
438            if !value.is_empty() && !parse_utc_timestamp(value) {
439                return Err(anyhow::anyhow!(
440                    "invalid E timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
441                ));
442            }
443            node.properties.created_at = value.to_owned();
444            continue;
445        }
446        if let Some(rest) = field_value(trimmed, "C") {
447            enforce_field_order(line_no, "C", 6, &mut last_node_rank, "node", strict)?;
448            if !rest.trim().is_empty() {
449                node.properties.confidence = rest.trim().parse::<f64>().ok();
450            }
451            continue;
452        }
453        if let Some(rest) = field_value(trimmed, "V") {
454            enforce_field_order(line_no, "V", 7, &mut last_node_rank, "node", strict)?;
455            if let Ok(value) = rest.trim().parse::<u8>() {
456                node.properties.importance = value;
457            }
458            continue;
459        }
460        if let Some(rest) = field_value(trimmed, "P") {
461            enforce_field_order(line_no, "P", 8, &mut last_node_rank, "node", strict)?;
462            node.properties.provenance = normalize_text(rest.trim());
463            continue;
464        }
465        if let Some(rest) = field_value(trimmed, "S") {
466            enforce_field_order(line_no, "S", 10, &mut last_node_rank, "node", strict)?;
467            let value = normalize_text(rest.trim());
468            validate_len(line_no, "S", &value, 1, 200, strict)?;
469            node.source_files.push(value);
470            continue;
471        }
472
473        if let Some(rest) = trimmed.strip_prefix("> ") {
474            let mut parts = rest.split_whitespace();
475            let relation = parts.next().ok_or_else(|| {
476                anyhow::anyhow!("missing relation in edge at line {line_no}: {trimmed}")
477            })?;
478            let target_id = parts.next().ok_or_else(|| {
479                anyhow::anyhow!("missing target id in edge at line {line_no}: {trimmed}")
480            })?;
481            graph.edges.push(Edge {
482                source_id: node.id.clone(),
483                relation: code_to_relation(relation).to_owned(),
484                target_id: target_id.to_owned(),
485                properties: EdgeProperties::default(),
486            });
487            current_edge_index = Some(graph.edges.len() - 1);
488            last_edge_rank = 0;
489            continue;
490        }
491
492        if let Some(rest) = field_value(trimmed, "d") {
493            enforce_field_order(line_no, "d", 1, &mut last_edge_rank, "edge", strict)?;
494            let edge_idx = current_edge_index.ok_or_else(|| {
495                anyhow::anyhow!("edge detail without preceding edge at line {line_no}")
496            })?;
497            let value = normalize_text(rest.trim());
498            validate_len(line_no, "d", &value, 1, 200, strict)?;
499            graph.edges[edge_idx].properties.detail = value;
500            continue;
501        }
502
503        if let Some(rest) = field_value(trimmed, "i") {
504            enforce_field_order(line_no, "i", 2, &mut last_edge_rank, "edge", strict)?;
505            let edge_idx = current_edge_index.ok_or_else(|| {
506                anyhow::anyhow!("edge valid_from without preceding edge at line {line_no}")
507            })?;
508            let value = rest.trim();
509            if !value.is_empty() && !parse_utc_timestamp(value) {
510                return Err(anyhow::anyhow!(
511                    "invalid i timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
512                ));
513            }
514            graph.edges[edge_idx].properties.valid_from = value.to_owned();
515            continue;
516        }
517
518        if let Some(rest) = field_value(trimmed, "x") {
519            enforce_field_order(line_no, "x", 3, &mut last_edge_rank, "edge", strict)?;
520            let edge_idx = current_edge_index.ok_or_else(|| {
521                anyhow::anyhow!("edge valid_to without preceding edge at line {line_no}")
522            })?;
523            let value = rest.trim();
524            if !value.is_empty() && !parse_utc_timestamp(value) {
525                return Err(anyhow::anyhow!(
526                    "invalid x timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
527                ));
528            }
529            graph.edges[edge_idx].properties.valid_to = value.to_owned();
530            continue;
531        }
532
533        if let Some(rest) = field_value(trimmed, "-") {
534            let mut parts = rest.trim().splitn(2, char::is_whitespace);
535            let key = parts.next().unwrap_or("").trim();
536            let value = parts.next().unwrap_or("").trim();
537            let is_edge_custom = matches!(
538                key,
539                "edge_feedback_score" | "edge_feedback_count" | "edge_feedback_last_ts_ms"
540            );
541            if is_edge_custom {
542                enforce_field_order(line_no, "-", 4, &mut last_edge_rank, "edge", strict)?;
543            } else {
544                enforce_field_order(line_no, "-", 9, &mut last_node_rank, "node", strict)?;
545            }
546            match key {
547                "domain_area" => node.properties.domain_area = value.to_owned(),
548                "feedback_score" => {
549                    node.properties.feedback_score = value.parse::<f64>().unwrap_or(0.0)
550                }
551                "feedback_count" => {
552                    node.properties.feedback_count = value.parse::<u64>().unwrap_or(0)
553                }
554                "feedback_last_ts_ms" => {
555                    node.properties.feedback_last_ts_ms = value.parse::<u64>().ok()
556                }
557                "edge_feedback_score" => {
558                    if let Some(edge_idx) = current_edge_index {
559                        graph.edges[edge_idx].properties.feedback_score =
560                            value.parse::<f64>().unwrap_or(0.0);
561                    }
562                }
563                "edge_feedback_count" => {
564                    if let Some(edge_idx) = current_edge_index {
565                        graph.edges[edge_idx].properties.feedback_count =
566                            value.parse::<u64>().unwrap_or(0);
567                    }
568                }
569                "edge_feedback_last_ts_ms" => {
570                    if let Some(edge_idx) = current_edge_index {
571                        graph.edges[edge_idx].properties.feedback_last_ts_ms =
572                            value.parse::<u64>().ok();
573                    }
574                }
575                _ => {}
576            }
577            continue;
578        }
579
580        return Err(anyhow::anyhow!("unrecognized line at {line_no}: {trimmed}"));
581    }
582
583    if let Some(node) = current_node.take() {
584        graph.nodes.push(node);
585    }
586    if let Some(note) = current_note.take() {
587        graph.notes.push(note);
588    }
589
590    for node in &mut graph.nodes {
591        node.properties.alias =
592            sort_case_insensitive(&dedupe_case_insensitive(node.properties.alias.clone()));
593        node.properties.key_facts =
594            sort_case_insensitive(&dedupe_case_insensitive(node.properties.key_facts.clone()));
595        node.source_files =
596            sort_case_insensitive(&dedupe_case_insensitive(node.source_files.clone()));
597    }
598
599    graph.edges.sort_by(|a, b| {
600        a.source_id
601            .cmp(&b.source_id)
602            .then_with(|| a.relation.cmp(&b.relation))
603            .then_with(|| a.target_id.cmp(&b.target_id))
604            .then_with(|| a.properties.detail.cmp(&b.properties.detail))
605    });
606
607    for note in &mut graph.notes {
608        note.tags = sort_case_insensitive(&dedupe_case_insensitive(note.tags.clone()));
609        note.source_files =
610            sort_case_insensitive(&dedupe_case_insensitive(note.source_files.clone()));
611    }
612    graph.notes.sort_by(|a, b| {
613        a.id.cmp(&b.id)
614            .then_with(|| a.node_id.cmp(&b.node_id))
615            .then_with(|| a.created_at.cmp(&b.created_at))
616    });
617
618    graph.refresh_counts();
619    Ok(graph)
620}
621
622fn serialize_kg(graph: &GraphFile) -> String {
623    let mut out = String::new();
624    let mut nodes = graph.nodes.clone();
625    nodes.sort_by(|a, b| a.id.cmp(&b.id));
626
627    for node in nodes {
628        out.push_str(&format!(
629            "@ {}:{}\n",
630            node_type_to_code(&node.r#type),
631            node.id
632        ));
633        out.push_str(&format!("N {}\n", node.name));
634        out.push_str(&format!("D {}\n", node.properties.description));
635
636        for alias in sort_case_insensitive(&node.properties.alias) {
637            out.push_str(&format!("A {}\n", alias));
638        }
639        for fact in sort_case_insensitive(&node.properties.key_facts) {
640            out.push_str(&format!("F {}\n", fact));
641        }
642
643        if !node.properties.created_at.is_empty() {
644            out.push_str(&format!("E {}\n", node.properties.created_at));
645        }
646        if let Some(confidence) = node.properties.confidence {
647            out.push_str(&format!("C {}\n", confidence));
648        }
649        out.push_str(&format!("V {}\n", node.properties.importance));
650        if !node.properties.provenance.is_empty() {
651            out.push_str(&format!("P {}\n", node.properties.provenance));
652        }
653        if !node.properties.domain_area.is_empty() {
654            out.push_str(&format!("- domain_area {}\n", node.properties.domain_area));
655        }
656        if node.properties.feedback_score != 0.0 {
657            out.push_str(&format!(
658                "- feedback_score {}\n",
659                node.properties.feedback_score
660            ));
661        }
662        if node.properties.feedback_count != 0 {
663            out.push_str(&format!(
664                "- feedback_count {}\n",
665                node.properties.feedback_count
666            ));
667        }
668        if let Some(ts) = node.properties.feedback_last_ts_ms {
669            out.push_str(&format!("- feedback_last_ts_ms {}\n", ts));
670        }
671
672        for source in sort_case_insensitive(&node.source_files) {
673            out.push_str(&format!("S {}\n", source));
674        }
675
676        let mut edges: Vec<Edge> = graph
677            .edges
678            .iter()
679            .filter(|edge| edge.source_id == node.id)
680            .cloned()
681            .collect();
682        edges.sort_by(|a, b| {
683            a.relation
684                .cmp(&b.relation)
685                .then_with(|| a.target_id.cmp(&b.target_id))
686                .then_with(|| a.properties.detail.cmp(&b.properties.detail))
687        });
688
689        for edge in edges {
690            out.push_str(&format!(
691                "> {} {}\n",
692                relation_to_code(&edge.relation),
693                edge.target_id
694            ));
695            if !edge.properties.detail.is_empty() {
696                out.push_str(&format!("d {}\n", edge.properties.detail));
697            }
698            if !edge.properties.valid_from.is_empty() {
699                out.push_str(&format!("i {}\n", edge.properties.valid_from));
700            }
701            if !edge.properties.valid_to.is_empty() {
702                out.push_str(&format!("x {}\n", edge.properties.valid_to));
703            }
704            if edge.properties.feedback_score != 0.0 {
705                out.push_str(&format!(
706                    "- edge_feedback_score {}\n",
707                    edge.properties.feedback_score
708                ));
709            }
710            if edge.properties.feedback_count != 0 {
711                out.push_str(&format!(
712                    "- edge_feedback_count {}\n",
713                    edge.properties.feedback_count
714                ));
715            }
716            if let Some(ts) = edge.properties.feedback_last_ts_ms {
717                out.push_str(&format!("- edge_feedback_last_ts_ms {}\n", ts));
718            }
719        }
720
721        out.push('\n');
722    }
723
724    let mut notes = graph.notes.clone();
725    notes.sort_by(|a, b| {
726        a.id.cmp(&b.id)
727            .then_with(|| a.node_id.cmp(&b.node_id))
728            .then_with(|| a.created_at.cmp(&b.created_at))
729    });
730    for note in notes {
731        out.push_str(&format!("! {} {}\n", note.id, note.node_id));
732        out.push_str(&format!("b {}\n", note.body));
733        for tag in sort_case_insensitive(&note.tags) {
734            out.push_str(&format!("t {}\n", tag));
735        }
736        if !note.author.is_empty() {
737            out.push_str(&format!("a {}\n", note.author));
738        }
739        if !note.created_at.is_empty() {
740            out.push_str(&format!("e {}\n", note.created_at));
741        }
742        if !note.provenance.is_empty() {
743            out.push_str(&format!("p {}\n", note.provenance));
744        }
745        for source in sort_case_insensitive(&note.source_files) {
746            out.push_str(&format!("s {}\n", source));
747        }
748        out.push('\n');
749    }
750
751    out
752}
753
754#[derive(Debug, Clone, Serialize, Deserialize)]
755pub struct GraphFile {
756    pub metadata: Metadata,
757    #[serde(default)]
758    pub nodes: Vec<Node>,
759    #[serde(default)]
760    pub edges: Vec<Edge>,
761    #[serde(default)]
762    pub notes: Vec<Note>,
763}
764
765#[derive(Debug, Clone, Serialize, Deserialize)]
766pub struct Metadata {
767    pub name: String,
768    pub version: String,
769    pub description: String,
770    pub node_count: usize,
771    pub edge_count: usize,
772}
773
774#[derive(Debug, Clone, Serialize, Deserialize)]
775pub struct Node {
776    pub id: String,
777    #[serde(rename = "type")]
778    pub r#type: String,
779    pub name: String,
780    #[serde(default)]
781    pub properties: NodeProperties,
782    #[serde(default)]
783    pub source_files: Vec<String>,
784}
785
786#[derive(Debug, Clone, Serialize, Deserialize)]
787pub struct NodeProperties {
788    #[serde(default)]
789    pub description: String,
790    #[serde(default)]
791    pub domain_area: String,
792    #[serde(default)]
793    pub provenance: String,
794    #[serde(default)]
795    pub confidence: Option<f64>,
796    #[serde(default)]
797    pub created_at: String,
798    #[serde(default = "default_importance")]
799    pub importance: u8,
800    #[serde(default)]
801    pub key_facts: Vec<String>,
802    #[serde(default)]
803    pub alias: Vec<String>,
804    #[serde(default)]
805    pub feedback_score: f64,
806    #[serde(default)]
807    pub feedback_count: u64,
808    #[serde(default)]
809    pub feedback_last_ts_ms: Option<u64>,
810}
811
812fn default_importance() -> u8 {
813    4
814}
815
816impl Default for NodeProperties {
817    fn default() -> Self {
818        Self {
819            description: String::new(),
820            domain_area: String::new(),
821            provenance: String::new(),
822            confidence: None,
823            created_at: String::new(),
824            importance: default_importance(),
825            key_facts: Vec::new(),
826            alias: Vec::new(),
827            feedback_score: 0.0,
828            feedback_count: 0,
829            feedback_last_ts_ms: None,
830        }
831    }
832}
833
834#[derive(Debug, Clone, Serialize, Deserialize)]
835pub struct Edge {
836    pub source_id: String,
837    pub relation: String,
838    pub target_id: String,
839    #[serde(default)]
840    pub properties: EdgeProperties,
841}
842
843#[derive(Debug, Clone, Default, Serialize, Deserialize)]
844pub struct EdgeProperties {
845    #[serde(default)]
846    pub detail: String,
847    #[serde(default)]
848    pub valid_from: String,
849    #[serde(default)]
850    pub valid_to: String,
851    #[serde(default)]
852    pub feedback_score: f64,
853    #[serde(default)]
854    pub feedback_count: u64,
855    #[serde(default)]
856    pub feedback_last_ts_ms: Option<u64>,
857}
858
859#[derive(Debug, Clone, Default, Serialize, Deserialize)]
860pub struct Note {
861    pub id: String,
862    pub node_id: String,
863    #[serde(default)]
864    pub body: String,
865    #[serde(default)]
866    pub tags: Vec<String>,
867    #[serde(default)]
868    pub author: String,
869    #[serde(default)]
870    pub created_at: String,
871    #[serde(default)]
872    pub provenance: String,
873    #[serde(default)]
874    pub source_files: Vec<String>,
875}
876
877impl GraphFile {
878    pub fn new(name: &str) -> Self {
879        Self {
880            metadata: Metadata {
881                name: name.to_owned(),
882                version: "1.0".to_owned(),
883                description: format!("Knowledge graph: {name}"),
884                node_count: 0,
885                edge_count: 0,
886            },
887            nodes: Vec::new(),
888            edges: Vec::new(),
889            notes: Vec::new(),
890        }
891    }
892
893    pub fn load(path: &Path) -> Result<Self> {
894        let raw = fs::read_to_string(path)
895            .with_context(|| format!("failed to read graph: {}", path.display()))?;
896        let ext = path
897            .extension()
898            .and_then(|ext| ext.to_str())
899            .unwrap_or("json");
900        let mut graph = if ext == "kg" {
901            if raw.trim_start().starts_with('{') {
902                serde_json::from_str(&raw).with_context(|| {
903                    format!(
904                        "invalid legacy JSON payload in .kg file: {}",
905                        path.display()
906                    )
907                })?
908            } else {
909                let graph_name = path
910                    .file_stem()
911                    .and_then(|stem| stem.to_str())
912                    .unwrap_or("graph");
913                parse_kg(&raw, graph_name, strict_kg_mode())?
914            }
915        } else {
916            serde_json::from_str(&raw)
917                .with_context(|| format!("invalid JSON: {}", path.display()))?
918        };
919        graph.refresh_counts();
920        Ok(graph)
921    }
922
923    pub fn save(&self, path: &Path) -> Result<()> {
924        let mut graph = self.clone();
925        graph.refresh_counts();
926        let ext = path
927            .extension()
928            .and_then(|ext| ext.to_str())
929            .unwrap_or("json");
930        let raw = if ext == "kg" {
931            serialize_kg(&graph)
932        } else {
933            serde_json::to_string_pretty(&graph).context("failed to serialize graph")?
934        };
935        atomic_write(path, &raw)?;
936        backup_graph_if_stale(path, &raw)
937    }
938
939    pub fn refresh_counts(&mut self) {
940        self.metadata.node_count = self.nodes.len();
941        self.metadata.edge_count = self.edges.len();
942    }
943
944    pub fn node_by_id(&self, id: &str) -> Option<&Node> {
945        self.nodes.iter().find(|node| node.id == id)
946    }
947
948    pub fn node_by_id_sorted(&self, id: &str) -> Option<&Node> {
949        self.nodes
950            .binary_search_by(|node| node.id.as_str().cmp(id))
951            .ok()
952            .and_then(|idx| self.nodes.get(idx))
953    }
954
955    pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
956        self.nodes.iter_mut().find(|node| node.id == id)
957    }
958
959    pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
960        self.edges.iter().any(|edge| {
961            edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
962        })
963    }
964}
965
966#[cfg(test)]
967mod tests {
968    use super::{GraphFile, parse_kg};
969
970    #[test]
971    fn save_and_load_kg_roundtrip_keeps_core_fields() {
972        let dir = tempfile::tempdir().expect("temp dir");
973        let path = dir.path().join("graph.kg");
974
975        let mut graph = GraphFile::new("graph");
976        graph.nodes.push(crate::Node {
977            id: "concept:refrigerator".to_owned(),
978            r#type: "Concept".to_owned(),
979            name: "Lodowka".to_owned(),
980            properties: crate::NodeProperties {
981                description: "Urzadzenie chlodzace".to_owned(),
982                provenance: "U".to_owned(),
983                created_at: "2026-04-04T12:00:00Z".to_owned(),
984                importance: 5,
985                key_facts: vec!["A".to_owned(), "b".to_owned()],
986                alias: vec!["Fridge".to_owned()],
987                ..Default::default()
988            },
989            source_files: vec!["docs/fridge.md".to_owned()],
990        });
991        graph.edges.push(crate::Edge {
992            source_id: "concept:refrigerator".to_owned(),
993            relation: "READS_FROM".to_owned(),
994            target_id: "datastore:settings".to_owned(),
995            properties: crate::EdgeProperties {
996                detail: "runtime read".to_owned(),
997                valid_from: "2026-04-04T12:00:00Z".to_owned(),
998                valid_to: "2026-04-05T12:00:00Z".to_owned(),
999                ..Default::default()
1000            },
1001        });
1002
1003        graph.save(&path).expect("save kg");
1004        let raw = std::fs::read_to_string(&path).expect("read kg");
1005        assert!(raw.contains("@ K:concept:refrigerator"));
1006        assert!(raw.contains("> R datastore:settings"));
1007
1008        let loaded = GraphFile::load(&path).expect("load kg");
1009        assert_eq!(loaded.nodes.len(), 1);
1010        assert_eq!(loaded.edges.len(), 1);
1011        let node = &loaded.nodes[0];
1012        assert_eq!(node.properties.importance, 5);
1013        assert_eq!(node.properties.provenance, "U");
1014        assert_eq!(node.name, "Lodowka");
1015        assert_eq!(loaded.edges[0].relation, "READS_FROM");
1016        assert_eq!(loaded.edges[0].properties.detail, "runtime read");
1017        assert_eq!(
1018            loaded.edges[0].properties.valid_from,
1019            "2026-04-04T12:00:00Z"
1020        );
1021        assert_eq!(loaded.edges[0].properties.valid_to, "2026-04-05T12:00:00Z");
1022    }
1023
1024    #[test]
1025    fn load_supports_legacy_json_payload_with_kg_extension() {
1026        let dir = tempfile::tempdir().expect("temp dir");
1027        let path = dir.path().join("legacy.kg");
1028        std::fs::write(
1029            &path,
1030            r#"{
1031  "metadata": {"name": "legacy", "version": "1.0", "description": "x", "node_count": 0, "edge_count": 0},
1032  "nodes": [],
1033  "edges": [],
1034  "notes": []
1035}"#,
1036        )
1037        .expect("write legacy payload");
1038
1039        let loaded = GraphFile::load(&path).expect("load legacy kg");
1040        assert_eq!(loaded.metadata.name, "legacy");
1041        assert!(loaded.nodes.is_empty());
1042    }
1043
1044    #[test]
1045    fn load_kg_rejects_invalid_timestamp_format() {
1046        let dir = tempfile::tempdir().expect("temp dir");
1047        let path = dir.path().join("invalid-ts.kg");
1048        std::fs::write(
1049            &path,
1050            "@ K:concept:x\nN X\nD Desc\nE 2026-04-04 12:00:00\nV 4\nP U\n",
1051        )
1052        .expect("write kg");
1053
1054        let err = GraphFile::load(&path).expect_err("invalid timestamp should fail");
1055        let msg = format!("{err:#}");
1056        assert!(msg.contains("invalid E timestamp"));
1057    }
1058
1059    #[test]
1060    fn load_kg_rejects_invalid_edge_timestamp_format() {
1061        let dir = tempfile::tempdir().expect("temp dir");
1062        let path = dir.path().join("invalid-edge-ts.kg");
1063        std::fs::write(
1064            &path,
1065            "@ K:concept:x\nN X\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n> H concept:y\ni 2026-04-04 12:00:00\n",
1066        )
1067        .expect("write kg");
1068
1069        let err = GraphFile::load(&path).expect_err("invalid edge timestamp should fail");
1070        let msg = format!("{err:#}");
1071        assert!(msg.contains("invalid i timestamp"));
1072    }
1073
1074    #[test]
1075    fn load_kg_normalizes_and_dedupes_multivalue_fields() {
1076        let dir = tempfile::tempdir().expect("temp dir");
1077        let path = dir.path().join("normalize.kg");
1078        std::fs::write(
1079            &path,
1080            "@ K:concept:x\nN  Name   With   Spaces \nD  Desc   with   spaces \nA Alias\nA alias\nF fact one\nF FACT   one\nS docs/a.md\nS docs/a.md\nE 2026-04-04T12:00:00Z\nV 4\nP U\n",
1081        )
1082        .expect("write kg");
1083
1084        let loaded = GraphFile::load(&path).expect("load kg");
1085        let node = &loaded.nodes[0];
1086        assert_eq!(node.name, "Name With Spaces");
1087        assert_eq!(node.properties.description, "Desc with spaces");
1088        assert_eq!(node.properties.alias.len(), 1);
1089        assert_eq!(node.properties.key_facts.len(), 1);
1090        assert_eq!(node.source_files.len(), 1);
1091    }
1092
1093    #[test]
1094    fn save_and_load_kg_roundtrip_keeps_notes_without_json_fallback() {
1095        let dir = tempfile::tempdir().expect("temp dir");
1096        let path = dir.path().join("graph-notes.kg");
1097
1098        let mut graph = GraphFile::new("graph-notes");
1099        graph.nodes.push(crate::Node {
1100            id: "concept:refrigerator".to_owned(),
1101            r#type: "Concept".to_owned(),
1102            name: "Lodowka".to_owned(),
1103            properties: crate::NodeProperties {
1104                description: "Urzadzenie chlodzace".to_owned(),
1105                provenance: "U".to_owned(),
1106                created_at: "2026-04-04T12:00:00Z".to_owned(),
1107                ..Default::default()
1108            },
1109            source_files: vec!["docs/fridge.md".to_owned()],
1110        });
1111        graph.notes.push(crate::Note {
1112            id: "note:1".to_owned(),
1113            node_id: "concept:refrigerator".to_owned(),
1114            body: "Important maintenance insight".to_owned(),
1115            tags: vec!["Maintenance".to_owned(), "maintenance".to_owned()],
1116            author: "alice".to_owned(),
1117            created_at: "1712345678".to_owned(),
1118            provenance: "U".to_owned(),
1119            source_files: vec!["docs/a.md".to_owned(), "docs/a.md".to_owned()],
1120        });
1121
1122        graph.save(&path).expect("save kg");
1123        let raw = std::fs::read_to_string(&path).expect("read kg");
1124        assert!(raw.contains("! note:1 concept:refrigerator"));
1125        assert!(!raw.trim_start().starts_with('{'));
1126
1127        let loaded = GraphFile::load(&path).expect("load kg");
1128        assert_eq!(loaded.notes.len(), 1);
1129        let note = &loaded.notes[0];
1130        assert_eq!(note.id, "note:1");
1131        assert_eq!(note.node_id, "concept:refrigerator");
1132        assert_eq!(note.body, "Important maintenance insight");
1133        assert_eq!(note.tags.len(), 1);
1134        assert_eq!(note.source_files.len(), 1);
1135    }
1136
1137    #[test]
1138    fn strict_mode_rejects_out_of_order_node_fields() {
1139        let raw = "@ K:concept:x\nD Desc\nN Name\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n";
1140        let err = parse_kg(raw, "x", true).expect_err("strict mode should fail on field order");
1141        assert!(format!("{err:#}").contains("invalid field order"));
1142    }
1143
1144    #[test]
1145    fn strict_mode_rejects_overlong_name_but_compat_mode_allows_it() {
1146        let long_name = "N ".to_owned() + &"X".repeat(121);
1147        let raw = format!(
1148            "@ K:concept:x\n{}\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n",
1149            long_name
1150        );
1151
1152        let strict_err = parse_kg(&raw, "x", true).expect_err("strict mode should fail on length");
1153        assert!(format!("{strict_err:#}").contains("invalid N length"));
1154
1155        parse_kg(&raw, "x", false).expect("compat mode keeps permissive behavior");
1156    }
1157
1158    #[test]
1159    fn save_kg_skips_empty_e_and_p_fields() {
1160        let dir = tempfile::tempdir().expect("temp dir");
1161        let path = dir.path().join("no-empty-ep.kg");
1162
1163        let mut graph = GraphFile::new("graph");
1164        graph.nodes.push(crate::Node {
1165            id: "concept:x".to_owned(),
1166            r#type: "Concept".to_owned(),
1167            name: "X".to_owned(),
1168            properties: crate::NodeProperties {
1169                description: "Desc".to_owned(),
1170                provenance: String::new(),
1171                created_at: String::new(),
1172                ..Default::default()
1173            },
1174            source_files: vec!["docs/a.md".to_owned()],
1175        });
1176
1177        graph.save(&path).expect("save kg");
1178        let raw = std::fs::read_to_string(&path).expect("read kg");
1179        assert!(!raw.contains("\nE \n"));
1180        assert!(!raw.contains("\nP \n"));
1181    }
1182}