Skip to main content

kg/
graph.rs

1use std::collections::{BTreeMap, HashMap};
2use std::fs;
3use std::io::Write;
4use std::path::{Path, PathBuf};
5use std::time::{SystemTime, UNIX_EPOCH};
6
7use anyhow::{Context, Result};
8use flate2::Compression;
9use flate2::write::GzEncoder;
10use serde::{Deserialize, Serialize};
11
12const GRAPH_INFO_NODE_ID: &str = "^:graph_info";
13const GRAPH_INFO_NODE_TYPE: &str = "^";
14const GRAPH_UUID_FACT_PREFIX: &str = "graph_uuid=";
15
16/// Write `data` to `dest` atomically:
17/// 1. Write to `dest.tmp`
18/// 2. If `dest` already exists, copy it to `dest.bak`
19/// 3. Rename `dest.tmp` -> `dest`
20fn atomic_write(dest: &Path, data: &str) -> Result<()> {
21    let unique = SystemTime::now()
22        .duration_since(UNIX_EPOCH)
23        .unwrap_or_default()
24        .as_nanos();
25    let tmp = dest.with_extension(format!("tmp.{}.{}", std::process::id(), unique));
26    fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
27    if dest.exists() {
28        let bak = backup_bak_path(dest)?;
29        if should_refresh_bak(&bak)? {
30            fs::copy(dest, &bak)
31                .with_context(|| format!("failed to create backup: {}", bak.display()))?;
32        }
33    }
34    fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
35}
36
37const BACKUP_BAK_STALE_SECS: u64 = 5 * 60;
38const BACKUP_STALE_SECS: u64 = 60 * 60;
39
40fn should_refresh_bak(bak_path: &Path) -> Result<bool> {
41    if !bak_path.exists() {
42        return Ok(true);
43    }
44    let modified = fs::metadata(bak_path)
45        .and_then(|m| m.modified())
46        .with_context(|| format!("failed to read backup mtime: {}", bak_path.display()))?;
47    let age_secs = SystemTime::now()
48        .duration_since(modified)
49        .unwrap_or_default()
50        .as_secs();
51    Ok(age_secs >= BACKUP_BAK_STALE_SECS)
52}
53
54fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
55    let cache_dir = backup_cache_dir(path)?;
56    let stem = match path.file_stem().and_then(|s| s.to_str()) {
57        Some(stem) => stem,
58        None => return Ok(()),
59    };
60    let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("json");
61    let backup_prefix = format!("{stem}.{ext}");
62    let now = SystemTime::now()
63        .duration_since(UNIX_EPOCH)
64        .context("time went backwards")?
65        .as_secs();
66    if let Some(latest) = latest_backup_ts(&cache_dir, &backup_prefix)? {
67        if now.saturating_sub(latest) < BACKUP_STALE_SECS {
68            return Ok(());
69        }
70    }
71
72    let backup_path = cache_dir.join(format!("{backup_prefix}.bck.{now}.gz"));
73    let tmp_path = backup_path.with_extension("tmp");
74    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
75    encoder.write_all(data.as_bytes())?;
76    let encoded = encoder.finish()?;
77    fs::write(&tmp_path, encoded)
78        .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
79    fs::rename(&tmp_path, &backup_path)
80        .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
81    Ok(())
82}
83
84fn backup_cache_dir(path: &Path) -> Result<PathBuf> {
85    let dir = crate::cache_paths::cache_root_for_graph(path);
86    fs::create_dir_all(&dir)
87        .with_context(|| format!("failed to create cache directory: {}", dir.display()))?;
88    Ok(dir)
89}
90
91fn backup_bak_path(dest: &Path) -> Result<PathBuf> {
92    let cache_dir = backup_cache_dir(dest)?;
93    let stem = dest.file_stem().and_then(|s| s.to_str()).unwrap_or("graph");
94    let ext = dest.extension().and_then(|s| s.to_str()).unwrap_or("json");
95    Ok(cache_dir.join(format!("{stem}.{ext}.bak")))
96}
97
98fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
99    let prefix = format!("{stem}.bck.");
100    let suffix = ".gz";
101    let mut latest = None;
102    for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
103        let entry = entry?;
104        let name = entry.file_name();
105        let name = name.to_string_lossy();
106        if !name.starts_with(&prefix) || !name.ends_with(suffix) {
107            continue;
108        }
109        let ts_part = &name[prefix.len()..name.len() - suffix.len()];
110        if let Ok(ts) = ts_part.parse::<u64>() {
111            match latest {
112                Some(current) => {
113                    if ts > current {
114                        latest = Some(ts);
115                    }
116                }
117                None => latest = Some(ts),
118            }
119        }
120    }
121    Ok(latest)
122}
123
124fn node_type_to_code(node_type: &str) -> &str {
125    match node_type {
126        "Feature" => "F",
127        "Concept" => "K",
128        "Interface" => "I",
129        "Process" => "P",
130        "DataStore" => "D",
131        "Attribute" => "A",
132        "Entity" => "Y",
133        "Note" => "N",
134        "Rule" => "R",
135        "Convention" => "C",
136        "Bug" => "B",
137        "Decision" => "Z",
138        "OpenQuestion" => "O",
139        "Claim" => "Q",
140        "Insight" => "W",
141        "Reference" => "M",
142        "Term" => "T",
143        "Status" => "S",
144        "Doubt" => "L",
145        _ => node_type,
146    }
147}
148
149fn code_to_node_type(code: &str) -> &str {
150    match code {
151        "F" => "Feature",
152        "K" => "Concept",
153        "I" => "Interface",
154        "P" => "Process",
155        "D" => "DataStore",
156        "A" => "Attribute",
157        "Y" => "Entity",
158        "N" => "Note",
159        "R" => "Rule",
160        "C" => "Convention",
161        "B" => "Bug",
162        "Z" => "Decision",
163        "O" => "OpenQuestion",
164        "Q" => "Claim",
165        "W" => "Insight",
166        "M" => "Reference",
167        "T" => "Term",
168        "S" => "Status",
169        "L" => "Doubt",
170        _ => code,
171    }
172}
173
174fn relation_to_code(relation: &str) -> &str {
175    match relation {
176        "DOCUMENTED_IN" | "DOCUMENTS" => "D",
177        "HAS" => "H",
178        "TRIGGERS" => "T",
179        "AFFECTED_BY" | "AFFECTS" => "A",
180        "READS_FROM" | "READS" => "R",
181        "GOVERNED_BY" | "GOVERNS" => "G",
182        "DEPENDS_ON" => "O",
183        "AVAILABLE_IN" => "I",
184        "SUPPORTS" => "S",
185        "SUMMARIZES" => "U",
186        "RELATED_TO" => "L",
187        "CONTRADICTS" => "V",
188        "CREATED_BY" | "CREATES" => "C",
189        _ => relation,
190    }
191}
192
193fn code_to_relation(code: &str) -> &str {
194    match code {
195        "D" => "DOCUMENTED_IN",
196        "H" => "HAS",
197        "T" => "TRIGGERS",
198        "A" => "AFFECTED_BY",
199        "R" => "READS_FROM",
200        "G" => "GOVERNED_BY",
201        "O" => "DEPENDS_ON",
202        "I" => "AVAILABLE_IN",
203        "S" => "SUPPORTS",
204        "U" => "SUMMARIZES",
205        "L" => "RELATED_TO",
206        "V" => "CONTRADICTS",
207        "C" => "CREATED_BY",
208        _ => code,
209    }
210}
211
212fn canonicalize_bidirectional_pair(a: &str, b: &str) -> (String, String) {
213    if a <= b {
214        (a.to_owned(), b.to_owned())
215    } else {
216        (b.to_owned(), a.to_owned())
217    }
218}
219
220fn is_score_component_label(value: &str) -> bool {
221    let mut chars = value.chars();
222    matches!(chars.next(), Some('C'))
223        && chars.clone().next().is_some()
224        && chars.all(|ch| ch.is_ascii_digit())
225}
226
227fn sort_case_insensitive(values: &[String]) -> Vec<String> {
228    let mut sorted = values.to_vec();
229    sorted.sort_by(|a, b| {
230        let la = a.to_ascii_lowercase();
231        let lb = b.to_ascii_lowercase();
232        la.cmp(&lb).then_with(|| a.cmp(b))
233    });
234    sorted
235}
236
237fn decode_kg_text(value: &str) -> String {
238    let mut out = String::new();
239    let mut chars = value.chars();
240    while let Some(ch) = chars.next() {
241        if ch != '\\' {
242            out.push(ch);
243            continue;
244        }
245        match chars.next() {
246            Some('n') => out.push('\n'),
247            Some('r') => out.push('\r'),
248            Some('\\') => out.push('\\'),
249            Some(other) => {
250                out.push('\\');
251                out.push(other);
252            }
253            None => out.push('\\'),
254        }
255    }
256    out
257}
258
259fn escape_kg_text(value: &str) -> String {
260    let mut out = String::new();
261    for ch in value.chars() {
262        match ch {
263            '\\' => out.push_str("\\\\"),
264            '\n' => out.push_str("\\n"),
265            '\r' => out.push_str("\\r"),
266            _ => out.push(ch),
267        }
268    }
269    out
270}
271
272fn parse_text_field(value: &str) -> String {
273    decode_kg_text(value)
274}
275
276fn push_text_line(out: &mut String, key: &str, value: &str) {
277    out.push_str(key);
278    out.push(' ');
279    out.push_str(&escape_kg_text(value));
280    out.push('\n');
281}
282
283fn dedupe_case_insensitive(values: Vec<String>) -> Vec<String> {
284    let mut seen = std::collections::HashSet::new();
285    let mut out = Vec::new();
286    for value in values {
287        let key = value.to_ascii_lowercase();
288        if seen.insert(key) {
289            out.push(value);
290        }
291    }
292    out
293}
294
295fn parse_utc_timestamp(value: &str) -> bool {
296    if value.len() != 20 {
297        return false;
298    }
299    let bytes = value.as_bytes();
300    let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
301    if !(is_digit(0)
302        && is_digit(1)
303        && is_digit(2)
304        && is_digit(3)
305        && bytes.get(4) == Some(&b'-')
306        && is_digit(5)
307        && is_digit(6)
308        && bytes.get(7) == Some(&b'-')
309        && is_digit(8)
310        && is_digit(9)
311        && bytes.get(10) == Some(&b'T')
312        && is_digit(11)
313        && is_digit(12)
314        && bytes.get(13) == Some(&b':')
315        && is_digit(14)
316        && is_digit(15)
317        && bytes.get(16) == Some(&b':')
318        && is_digit(17)
319        && is_digit(18)
320        && bytes.get(19) == Some(&b'Z'))
321    {
322        return false;
323    }
324
325    let month = value[5..7].parse::<u32>().ok();
326    let day = value[8..10].parse::<u32>().ok();
327    let hour = value[11..13].parse::<u32>().ok();
328    let minute = value[14..16].parse::<u32>().ok();
329    let second = value[17..19].parse::<u32>().ok();
330    matches!(month, Some(1..=12))
331        && matches!(day, Some(1..=31))
332        && matches!(hour, Some(0..=23))
333        && matches!(minute, Some(0..=59))
334        && matches!(second, Some(0..=59))
335}
336
337fn strict_kg_mode() -> bool {
338    let Ok(value) = std::env::var("KG_STRICT_FORMAT") else {
339        return false;
340    };
341    matches!(
342        value.trim().to_ascii_lowercase().as_str(),
343        "1" | "true" | "yes" | "on"
344    )
345}
346
347fn abbreviated_line(line: &str) -> String {
348    const MAX_CHARS: usize = 160;
349    let trimmed = line.trim();
350    let mut out = String::new();
351    for (idx, ch) in trimmed.chars().enumerate() {
352        if idx >= MAX_CHARS {
353            out.push_str("...");
354            break;
355        }
356        out.push(ch);
357    }
358    out
359}
360
361fn line_fragment(line: &str) -> String {
362    let snippet = abbreviated_line(line);
363    if snippet.is_empty() {
364        "fragment: <empty line>".to_owned()
365    } else {
366        format!("fragment: {snippet}")
367    }
368}
369
370fn json_error_detail(label: &str, path: &Path, raw: &str, error: &serde_json::Error) -> String {
371    let line_no = error.line();
372    let column = error.column();
373    let fragment = raw
374        .lines()
375        .nth(line_no.saturating_sub(1))
376        .map(line_fragment)
377        .unwrap_or_else(|| "fragment: <unavailable>".to_owned());
378    format!(
379        "{label}: {} at line {line_no}, column {column}: {error}\n{fragment}",
380        path.display()
381    )
382}
383
384fn validate_len(
385    line_no: usize,
386    field: &str,
387    value: &str,
388    raw_line: &str,
389    min: usize,
390    max: usize,
391    strict: bool,
392) -> Result<()> {
393    let len = value.chars().count();
394    if strict && (len < min || len > max) {
395        return Err(anyhow::anyhow!(
396            "invalid {field} length at line {line_no}: expected {min}..={max}, got {len}\n{}",
397            line_fragment(raw_line)
398        ));
399    }
400    Ok(())
401}
402
403fn enforce_field_order(
404    line_no: usize,
405    key: &str,
406    rank: u8,
407    last_rank: &mut u8,
408    section: &str,
409    raw_line: &str,
410    strict: bool,
411) -> Result<()> {
412    if strict && rank < *last_rank {
413        return Err(anyhow::anyhow!(
414            "invalid field order at line {line_no}: {key} in {section} block\n{}",
415            line_fragment(raw_line)
416        ));
417    }
418    if rank > *last_rank {
419        *last_rank = rank;
420    }
421    Ok(())
422}
423
424fn field_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
425    if line == key {
426        Some("")
427    } else {
428        line.strip_prefix(key)
429            .and_then(|rest| rest.strip_prefix(' '))
430    }
431}
432
433fn fail_or_warn(strict: bool, warnings: &mut Vec<String>, message: String) -> Result<()> {
434    if strict {
435        Err(anyhow::anyhow!(message))
436    } else {
437        warnings.push(message);
438        Ok(())
439    }
440}
441
442#[cfg(test)]
443fn parse_kg(raw: &str, graph_name: &str, strict: bool) -> Result<GraphFile> {
444    Ok(parse_kg_with_warnings(raw, graph_name, strict)?.0)
445}
446
447fn parse_kg_with_warnings(
448    raw: &str,
449    graph_name: &str,
450    strict: bool,
451) -> Result<(GraphFile, Vec<String>)> {
452    let mut graph = GraphFile::new(graph_name);
453    let mut warnings = Vec::new();
454    let mut current_node: Option<Node> = None;
455    let mut current_note: Option<Note> = None;
456    let mut current_edge_index: Option<usize> = None;
457    let mut last_node_rank: u8 = 0;
458    let mut last_note_rank: u8 = 0;
459    let mut last_edge_rank: u8 = 0;
460
461    for (idx, line) in raw.lines().enumerate() {
462        let line_no = idx + 1;
463        let raw_line = line.strip_suffix('\r').unwrap_or(line);
464        let trimmed = raw_line.trim();
465        if trimmed.is_empty() || trimmed.starts_with('#') {
466            continue;
467        }
468
469        if let Some(rest) = trimmed.strip_prefix("@ ") {
470            if let Some(note) = current_note.take() {
471                graph.notes.push(note);
472            }
473            if let Some(node) = current_node.take() {
474                graph.nodes.push(node);
475            }
476            let Some((type_code, node_id)) = rest.split_once(':') else {
477                fail_or_warn(
478                    strict,
479                    &mut warnings,
480                    format!("invalid node header at line {line_no}: {trimmed}"),
481                )?;
482                current_edge_index = None;
483                continue;
484            };
485            let parsed_id = {
486                let raw_id = node_id.trim();
487                if raw_id.contains(':') {
488                    crate::validate::normalize_node_id(raw_id)
489                } else if code_to_node_type(type_code.trim()) != type_code.trim() {
490                    crate::validate::normalize_node_id(&format!("{}:{raw_id}", type_code.trim()))
491                } else {
492                    format!("{}:{raw_id}", type_code.trim())
493                }
494            };
495            current_node = Some(Node {
496                id: parsed_id,
497                r#type: code_to_node_type(type_code.trim()).to_owned(),
498                name: String::new(),
499                properties: NodeProperties::default(),
500                source_files: Vec::new(),
501            });
502            current_edge_index = None;
503            last_node_rank = 0;
504            last_edge_rank = 0;
505            continue;
506        }
507
508        if let Some(rest) = trimmed.strip_prefix("! ") {
509            if let Some(node) = current_node.take() {
510                graph.nodes.push(node);
511            }
512            if let Some(note) = current_note.take() {
513                graph.notes.push(note);
514            }
515            let mut parts = rest.split_whitespace();
516            let Some(id) = parts.next() else {
517                fail_or_warn(
518                    strict,
519                    &mut warnings,
520                    format!("invalid note header at line {line_no}: {trimmed}"),
521                )?;
522                current_edge_index = None;
523                continue;
524            };
525            let Some(node_id) = parts.next() else {
526                fail_or_warn(
527                    strict,
528                    &mut warnings,
529                    format!("invalid note header at line {line_no}: {trimmed}"),
530                )?;
531                current_edge_index = None;
532                continue;
533            };
534            current_note = Some(Note {
535                id: id.to_owned(),
536                node_id: node_id.to_owned(),
537                ..Default::default()
538            });
539            current_edge_index = None;
540            last_note_rank = 0;
541            continue;
542        }
543
544        if let Some(note) = current_note.as_mut() {
545            if let Some(rest) = field_value(raw_line, "b") {
546                enforce_field_order(
547                    line_no,
548                    "b",
549                    1,
550                    &mut last_note_rank,
551                    "note",
552                    raw_line,
553                    strict,
554                )?;
555                note.body = parse_text_field(rest);
556                continue;
557            }
558            if let Some(rest) = field_value(raw_line, "t") {
559                enforce_field_order(
560                    line_no,
561                    "t",
562                    2,
563                    &mut last_note_rank,
564                    "note",
565                    raw_line,
566                    strict,
567                )?;
568                let value = parse_text_field(rest);
569                if !value.is_empty() {
570                    note.tags.push(value);
571                }
572                continue;
573            }
574            if let Some(rest) = field_value(raw_line, "a") {
575                enforce_field_order(
576                    line_no,
577                    "a",
578                    3,
579                    &mut last_note_rank,
580                    "note",
581                    raw_line,
582                    strict,
583                )?;
584                note.author = parse_text_field(rest);
585                continue;
586            }
587            if let Some(rest) = field_value(raw_line, "e") {
588                enforce_field_order(
589                    line_no,
590                    "e",
591                    4,
592                    &mut last_note_rank,
593                    "note",
594                    raw_line,
595                    strict,
596                )?;
597                note.created_at = rest.trim().to_owned();
598                continue;
599            }
600            if let Some(rest) = field_value(raw_line, "p") {
601                enforce_field_order(
602                    line_no,
603                    "p",
604                    5,
605                    &mut last_note_rank,
606                    "note",
607                    raw_line,
608                    strict,
609                )?;
610                note.provenance = parse_text_field(rest);
611                continue;
612            }
613            if let Some(rest) = field_value(raw_line, "s") {
614                enforce_field_order(
615                    line_no,
616                    "s",
617                    6,
618                    &mut last_note_rank,
619                    "note",
620                    raw_line,
621                    strict,
622                )?;
623                let value = parse_text_field(rest);
624                if !value.is_empty() {
625                    note.source_files.push(value);
626                }
627                continue;
628            }
629            fail_or_warn(
630                strict,
631                &mut warnings,
632                format!("unrecognized note line at {line_no}: {trimmed}"),
633            )?;
634            continue;
635        }
636
637        let Some(node) = current_node.as_mut() else {
638            fail_or_warn(
639                strict,
640                &mut warnings,
641                format!("unexpected line before first node at line {line_no}: {trimmed}"),
642            )?;
643            continue;
644        };
645
646        if let Some(rest) = field_value(raw_line, "N") {
647            enforce_field_order(
648                line_no,
649                "N",
650                1,
651                &mut last_node_rank,
652                "node",
653                raw_line,
654                strict,
655            )?;
656            let value = parse_text_field(rest);
657            validate_len(line_no, "N", &value, raw_line, 1, 120, strict)?;
658            node.name = value;
659            continue;
660        }
661        if let Some(rest) = field_value(raw_line, "D") {
662            enforce_field_order(
663                line_no,
664                "D",
665                2,
666                &mut last_node_rank,
667                "node",
668                raw_line,
669                strict,
670            )?;
671            let value = parse_text_field(rest);
672            validate_len(line_no, "D", &value, raw_line, 1, 200, strict)?;
673            node.properties.description = value;
674            continue;
675        }
676        if let Some(rest) = field_value(raw_line, "A") {
677            enforce_field_order(
678                line_no,
679                "A",
680                3,
681                &mut last_node_rank,
682                "node",
683                raw_line,
684                strict,
685            )?;
686            let value = parse_text_field(rest);
687            validate_len(line_no, "A", &value, raw_line, 1, 80, strict)?;
688            node.properties.alias.push(value);
689            continue;
690        }
691        if let Some(rest) = field_value(raw_line, "F") {
692            enforce_field_order(
693                line_no,
694                "F",
695                4,
696                &mut last_node_rank,
697                "node",
698                raw_line,
699                strict,
700            )?;
701            let value = parse_text_field(rest);
702            validate_len(line_no, "F", &value, raw_line, 1, 200, strict)?;
703            node.properties.key_facts.push(value);
704            continue;
705        }
706        if let Some(rest) = field_value(raw_line, "E") {
707            enforce_field_order(
708                line_no,
709                "E",
710                5,
711                &mut last_node_rank,
712                "node",
713                raw_line,
714                strict,
715            )?;
716            let value = rest.trim();
717            if !value.is_empty() && !parse_utc_timestamp(value) {
718                fail_or_warn(
719                    strict,
720                    &mut warnings,
721                    format!(
722                        "invalid E timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
723                        line_fragment(raw_line)
724                    ),
725                )?;
726                continue;
727            }
728            node.properties.created_at = value.to_owned();
729            continue;
730        }
731        if let Some(rest) = field_value(raw_line, "C") {
732            enforce_field_order(
733                line_no,
734                "C",
735                6,
736                &mut last_node_rank,
737                "node",
738                raw_line,
739                strict,
740            )?;
741            if !rest.trim().is_empty() {
742                node.properties.confidence = rest.trim().parse::<f64>().ok();
743            }
744            continue;
745        }
746        if let Some(rest) = field_value(raw_line, "V") {
747            enforce_field_order(
748                line_no,
749                "V",
750                7,
751                &mut last_node_rank,
752                "node",
753                raw_line,
754                strict,
755            )?;
756            if let Ok(value) = rest.trim().parse::<f64>() {
757                node.properties.importance = value;
758            }
759            continue;
760        }
761        if let Some(rest) = field_value(raw_line, "P") {
762            enforce_field_order(
763                line_no,
764                "P",
765                8,
766                &mut last_node_rank,
767                "node",
768                raw_line,
769                strict,
770            )?;
771            node.properties.provenance = parse_text_field(rest);
772            continue;
773        }
774        if let Some(rest) = field_value(raw_line, "S") {
775            enforce_field_order(
776                line_no,
777                "S",
778                10,
779                &mut last_node_rank,
780                "node",
781                raw_line,
782                strict,
783            )?;
784            let value = parse_text_field(rest);
785            validate_len(line_no, "S", &value, raw_line, 1, 200, strict)?;
786            node.source_files.push(value);
787            continue;
788        }
789
790        if let Some(rest) = trimmed.strip_prefix("> ") {
791            let mut parts = rest.split_whitespace();
792            let Some(relation) = parts.next() else {
793                fail_or_warn(
794                    strict,
795                    &mut warnings,
796                    format!("missing relation in edge at line {line_no}: {trimmed}"),
797                )?;
798                current_edge_index = None;
799                continue;
800            };
801            let Some(target_id) = parts.next() else {
802                fail_or_warn(
803                    strict,
804                    &mut warnings,
805                    format!("missing target id in edge at line {line_no}: {trimmed}"),
806                )?;
807                current_edge_index = None;
808                continue;
809            };
810            graph.edges.push(Edge {
811                source_id: node.id.clone(),
812                relation: code_to_relation(relation).to_owned(),
813                target_id: crate::validate::normalize_node_id(target_id),
814                properties: EdgeProperties::default(),
815            });
816            current_edge_index = Some(graph.edges.len() - 1);
817            last_edge_rank = 0;
818            continue;
819        }
820
821        if let Some(rest) = trimmed.strip_prefix("= ") {
822            let mut parts = rest.split_whitespace();
823            let Some(relation) = parts.next() else {
824                fail_or_warn(
825                    strict,
826                    &mut warnings,
827                    format!("missing relation in bidirectional edge at line {line_no}: {trimmed}"),
828                )?;
829                current_edge_index = None;
830                continue;
831            };
832            let Some(target_id) = parts.next() else {
833                fail_or_warn(
834                    strict,
835                    &mut warnings,
836                    format!("missing target id in bidirectional edge at line {line_no}: {trimmed}"),
837                )?;
838                current_edge_index = None;
839                continue;
840            };
841            let relation = code_to_relation(relation).to_owned();
842            if relation != "~" {
843                fail_or_warn(
844                    strict,
845                    &mut warnings,
846                    format!(
847                        "invalid bidirectional relation at line {line_no}: expected '~', got '{}'",
848                        relation
849                    ),
850                )?;
851                current_edge_index = None;
852                continue;
853            }
854
855            let target_id = crate::validate::normalize_node_id(target_id);
856            let (source_id, target_id) = canonicalize_bidirectional_pair(&node.id, &target_id);
857            graph.edges.push(Edge {
858                source_id,
859                relation,
860                target_id,
861                properties: EdgeProperties {
862                    bidirectional: true,
863                    ..EdgeProperties::default()
864                },
865            });
866            current_edge_index = Some(graph.edges.len() - 1);
867            last_edge_rank = 0;
868            continue;
869        }
870
871        if let Some(rest) = field_value(raw_line, "d") {
872            enforce_field_order(
873                line_no,
874                "d",
875                1,
876                &mut last_edge_rank,
877                "edge",
878                raw_line,
879                strict,
880            )?;
881            let Some(edge_idx) = current_edge_index else {
882                fail_or_warn(
883                    strict,
884                    &mut warnings,
885                    format!(
886                        "edge detail without preceding edge at line {line_no}\n{}",
887                        line_fragment(raw_line)
888                    ),
889                )?;
890                continue;
891            };
892            let trimmed_rest = rest.trim();
893            let mut parts = trimmed_rest.split_whitespace();
894            if let (Some(label), Some(raw_score), None) = (parts.next(), parts.next(), parts.next())
895            {
896                if is_score_component_label(label) {
897                    let score = raw_score.parse::<f64>().map_err(|_| {
898                        anyhow::anyhow!(
899                            "invalid score component value at line {line_no}: expected number in '{}', got '{}'",
900                            line_fragment(raw_line),
901                            raw_score
902                        )
903                    })?;
904                    graph.edges[edge_idx]
905                        .properties
906                        .score_components
907                        .insert(label.to_owned(), score);
908                    continue;
909                }
910            }
911
912            let value = parse_text_field(rest);
913            validate_len(line_no, "d", &value, raw_line, 1, 200, strict)?;
914            graph.edges[edge_idx].properties.detail = value;
915            continue;
916        }
917
918        if let Some(rest) = field_value(raw_line, "i") {
919            enforce_field_order(
920                line_no,
921                "i",
922                2,
923                &mut last_edge_rank,
924                "edge",
925                raw_line,
926                strict,
927            )?;
928            let Some(edge_idx) = current_edge_index else {
929                fail_or_warn(
930                    strict,
931                    &mut warnings,
932                    format!(
933                        "edge valid_from without preceding edge at line {line_no}\n{}",
934                        line_fragment(raw_line)
935                    ),
936                )?;
937                continue;
938            };
939            let value = rest.trim();
940            if !value.is_empty() && !parse_utc_timestamp(value) {
941                fail_or_warn(
942                    strict,
943                    &mut warnings,
944                    format!(
945                        "invalid i timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
946                        line_fragment(raw_line)
947                    ),
948                )?;
949                continue;
950            }
951            graph.edges[edge_idx].properties.valid_from = value.to_owned();
952            continue;
953        }
954
955        if let Some(rest) = field_value(raw_line, "x") {
956            enforce_field_order(
957                line_no,
958                "x",
959                3,
960                &mut last_edge_rank,
961                "edge",
962                raw_line,
963                strict,
964            )?;
965            let Some(edge_idx) = current_edge_index else {
966                fail_or_warn(
967                    strict,
968                    &mut warnings,
969                    format!(
970                        "edge valid_to without preceding edge at line {line_no}\n{}",
971                        line_fragment(raw_line)
972                    ),
973                )?;
974                continue;
975            };
976            let value = rest.trim();
977            if !value.is_empty() && !parse_utc_timestamp(value) {
978                fail_or_warn(
979                    strict,
980                    &mut warnings,
981                    format!(
982                        "invalid x timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ\n{}",
983                        line_fragment(raw_line)
984                    ),
985                )?;
986                continue;
987            }
988            graph.edges[edge_idx].properties.valid_to = value.to_owned();
989            continue;
990        }
991
992        if let Some(rest) = field_value(raw_line, "-") {
993            let (key, value) = rest
994                .split_once(char::is_whitespace)
995                .map(|(key, value)| (key.trim(), value))
996                .unwrap_or((rest.trim(), ""));
997            let is_edge_custom = matches!(
998                key,
999                "edge_feedback_score" | "edge_feedback_count" | "edge_feedback_last_ts_ms"
1000            );
1001            if is_edge_custom {
1002                enforce_field_order(
1003                    line_no,
1004                    "-",
1005                    4,
1006                    &mut last_edge_rank,
1007                    "edge",
1008                    raw_line,
1009                    strict,
1010                )?;
1011            } else {
1012                enforce_field_order(
1013                    line_no,
1014                    "-",
1015                    9,
1016                    &mut last_node_rank,
1017                    "node",
1018                    raw_line,
1019                    strict,
1020                )?;
1021            }
1022            match key {
1023                "domain_area" => node.properties.domain_area = parse_text_field(value),
1024                "feedback_score" => {
1025                    node.properties.feedback_score = value.trim().parse::<f64>().unwrap_or(0.0)
1026                }
1027                "feedback_count" => {
1028                    node.properties.feedback_count = value.trim().parse::<u64>().unwrap_or(0)
1029                }
1030                "feedback_last_ts_ms" => {
1031                    node.properties.feedback_last_ts_ms = value.trim().parse::<u64>().ok()
1032                }
1033                "edge_feedback_score" => {
1034                    if let Some(edge_idx) = current_edge_index {
1035                        graph.edges[edge_idx].properties.feedback_score =
1036                            value.trim().parse::<f64>().unwrap_or(0.0);
1037                    }
1038                }
1039                "edge_feedback_count" => {
1040                    if let Some(edge_idx) = current_edge_index {
1041                        graph.edges[edge_idx].properties.feedback_count =
1042                            value.trim().parse::<u64>().unwrap_or(0);
1043                    }
1044                }
1045                "edge_feedback_last_ts_ms" => {
1046                    if let Some(edge_idx) = current_edge_index {
1047                        graph.edges[edge_idx].properties.feedback_last_ts_ms =
1048                            value.trim().parse::<u64>().ok();
1049                    }
1050                }
1051                _ => {}
1052            }
1053            continue;
1054        }
1055
1056        fail_or_warn(
1057            strict,
1058            &mut warnings,
1059            format!("unrecognized line at {line_no}: {trimmed}"),
1060        )?;
1061    }
1062
1063    if let Some(node) = current_node.take() {
1064        graph.nodes.push(node);
1065    }
1066    if let Some(note) = current_note.take() {
1067        graph.notes.push(note);
1068    }
1069
1070    for node in &mut graph.nodes {
1071        node.properties.alias =
1072            sort_case_insensitive(&dedupe_case_insensitive(node.properties.alias.clone()));
1073        node.properties.key_facts =
1074            sort_case_insensitive(&dedupe_case_insensitive(node.properties.key_facts.clone()));
1075        node.source_files =
1076            sort_case_insensitive(&dedupe_case_insensitive(node.source_files.clone()));
1077    }
1078
1079    graph.edges.sort_by(|a, b| {
1080        a.source_id
1081            .cmp(&b.source_id)
1082            .then_with(|| a.relation.cmp(&b.relation))
1083            .then_with(|| a.target_id.cmp(&b.target_id))
1084            .then_with(|| a.properties.bidirectional.cmp(&b.properties.bidirectional))
1085            .then_with(|| a.properties.detail.cmp(&b.properties.detail))
1086    });
1087
1088    for note in &mut graph.notes {
1089        note.tags = sort_case_insensitive(&dedupe_case_insensitive(note.tags.clone()));
1090        note.source_files =
1091            sort_case_insensitive(&dedupe_case_insensitive(note.source_files.clone()));
1092    }
1093    graph.notes.sort_by(|a, b| {
1094        a.id.cmp(&b.id)
1095            .then_with(|| a.node_id.cmp(&b.node_id))
1096            .then_with(|| a.created_at.cmp(&b.created_at))
1097    });
1098
1099    graph.refresh_counts();
1100    Ok((graph, warnings))
1101}
1102
1103fn serialize_kg(graph: &GraphFile) -> String {
1104    let mut out = String::new();
1105    let mut nodes = graph.nodes.clone();
1106    nodes.sort_by(|a, b| a.id.cmp(&b.id));
1107
1108    for node in nodes {
1109        out.push_str(&format!(
1110            "@ {}:{}\n",
1111            node_type_to_code(&node.r#type),
1112            node.id
1113        ));
1114        push_text_line(&mut out, "N", &node.name);
1115        push_text_line(&mut out, "D", &node.properties.description);
1116
1117        for alias in sort_case_insensitive(&node.properties.alias) {
1118            push_text_line(&mut out, "A", &alias);
1119        }
1120        for fact in sort_case_insensitive(&node.properties.key_facts) {
1121            push_text_line(&mut out, "F", &fact);
1122        }
1123
1124        if !node.properties.created_at.is_empty() {
1125            out.push_str(&format!("E {}\n", node.properties.created_at));
1126        }
1127        if let Some(confidence) = node.properties.confidence {
1128            out.push_str(&format!("C {}\n", confidence));
1129        }
1130        out.push_str(&format!("V {}\n", node.properties.importance));
1131        if !node.properties.provenance.is_empty() {
1132            push_text_line(&mut out, "P", &node.properties.provenance);
1133        }
1134        if !node.properties.domain_area.is_empty() {
1135            out.push_str("- domain_area ");
1136            out.push_str(&escape_kg_text(&node.properties.domain_area));
1137            out.push('\n');
1138        }
1139        if node.properties.feedback_score != 0.0 {
1140            out.push_str(&format!(
1141                "- feedback_score {}\n",
1142                node.properties.feedback_score
1143            ));
1144        }
1145        if node.properties.feedback_count != 0 {
1146            out.push_str(&format!(
1147                "- feedback_count {}\n",
1148                node.properties.feedback_count
1149            ));
1150        }
1151        if let Some(ts) = node.properties.feedback_last_ts_ms {
1152            out.push_str(&format!("- feedback_last_ts_ms {}\n", ts));
1153        }
1154
1155        for source in sort_case_insensitive(&node.source_files) {
1156            push_text_line(&mut out, "S", &source);
1157        }
1158
1159        let mut edges: Vec<Edge> = graph
1160            .edges
1161            .iter()
1162            .filter(|edge| edge.source_id == node.id)
1163            .cloned()
1164            .collect();
1165        edges.sort_by(|a, b| {
1166            a.relation
1167                .cmp(&b.relation)
1168                .then_with(|| a.target_id.cmp(&b.target_id))
1169                .then_with(|| a.properties.bidirectional.cmp(&b.properties.bidirectional))
1170                .then_with(|| a.properties.detail.cmp(&b.properties.detail))
1171        });
1172
1173        for edge in edges {
1174            let op = if edge.properties.bidirectional && edge.relation == "~" {
1175                "="
1176            } else {
1177                ">"
1178            };
1179            out.push_str(&format!(
1180                "{} {} {}\n",
1181                op,
1182                relation_to_code(&edge.relation),
1183                edge.target_id
1184            ));
1185            for (label, score) in &edge.properties.score_components {
1186                out.push_str(&format!("d {} {:.6}\n", label, score));
1187            }
1188            if !edge.properties.detail.is_empty() {
1189                push_text_line(&mut out, "d", &edge.properties.detail);
1190            }
1191            if !edge.properties.valid_from.is_empty() {
1192                out.push_str(&format!("i {}\n", edge.properties.valid_from));
1193            }
1194            if !edge.properties.valid_to.is_empty() {
1195                out.push_str(&format!("x {}\n", edge.properties.valid_to));
1196            }
1197            if edge.properties.feedback_score != 0.0 {
1198                out.push_str(&format!(
1199                    "- edge_feedback_score {}\n",
1200                    edge.properties.feedback_score
1201                ));
1202            }
1203            if edge.properties.feedback_count != 0 {
1204                out.push_str(&format!(
1205                    "- edge_feedback_count {}\n",
1206                    edge.properties.feedback_count
1207                ));
1208            }
1209            if let Some(ts) = edge.properties.feedback_last_ts_ms {
1210                out.push_str(&format!("- edge_feedback_last_ts_ms {}\n", ts));
1211            }
1212        }
1213
1214        out.push('\n');
1215    }
1216
1217    let mut notes = graph.notes.clone();
1218    notes.sort_by(|a, b| {
1219        a.id.cmp(&b.id)
1220            .then_with(|| a.node_id.cmp(&b.node_id))
1221            .then_with(|| a.created_at.cmp(&b.created_at))
1222    });
1223    for note in notes {
1224        out.push_str(&format!("! {} {}\n", note.id, note.node_id));
1225        push_text_line(&mut out, "b", &note.body);
1226        for tag in sort_case_insensitive(&note.tags) {
1227            push_text_line(&mut out, "t", &tag);
1228        }
1229        if !note.author.is_empty() {
1230            push_text_line(&mut out, "a", &note.author);
1231        }
1232        if !note.created_at.is_empty() {
1233            out.push_str(&format!("e {}\n", note.created_at));
1234        }
1235        if !note.provenance.is_empty() {
1236            push_text_line(&mut out, "p", &note.provenance);
1237        }
1238        for source in sort_case_insensitive(&note.source_files) {
1239            push_text_line(&mut out, "s", &source);
1240        }
1241        out.push('\n');
1242    }
1243
1244    out
1245}
1246
1247#[derive(Debug, Clone, Serialize, Deserialize)]
1248pub struct GraphFile {
1249    pub metadata: Metadata,
1250    #[serde(default)]
1251    pub nodes: Vec<Node>,
1252    #[serde(default)]
1253    pub edges: Vec<Edge>,
1254    #[serde(default)]
1255    pub notes: Vec<Note>,
1256}
1257
1258#[derive(Debug, Clone, Serialize, Deserialize)]
1259pub struct Metadata {
1260    pub name: String,
1261    pub version: String,
1262    pub description: String,
1263    pub node_count: usize,
1264    pub edge_count: usize,
1265}
1266
1267#[derive(Debug, Clone, Serialize, Deserialize)]
1268pub struct Node {
1269    pub id: String,
1270    #[serde(rename = "type")]
1271    pub r#type: String,
1272    pub name: String,
1273    #[serde(default)]
1274    pub properties: NodeProperties,
1275    #[serde(default)]
1276    pub source_files: Vec<String>,
1277}
1278
1279#[derive(Debug, Clone, Serialize, Deserialize)]
1280pub struct NodeProperties {
1281    #[serde(default)]
1282    pub description: String,
1283    #[serde(default)]
1284    pub domain_area: String,
1285    #[serde(default)]
1286    pub provenance: String,
1287    #[serde(default)]
1288    pub confidence: Option<f64>,
1289    #[serde(default)]
1290    pub created_at: String,
1291    #[serde(default = "default_importance")]
1292    pub importance: f64,
1293    #[serde(default)]
1294    pub key_facts: Vec<String>,
1295    #[serde(default)]
1296    pub alias: Vec<String>,
1297    #[serde(default)]
1298    pub feedback_score: f64,
1299    #[serde(default)]
1300    pub feedback_count: u64,
1301    #[serde(default)]
1302    pub feedback_last_ts_ms: Option<u64>,
1303}
1304
1305fn default_importance() -> f64 {
1306    0.5
1307}
1308
1309impl Default for NodeProperties {
1310    fn default() -> Self {
1311        Self {
1312            description: String::new(),
1313            domain_area: String::new(),
1314            provenance: String::new(),
1315            confidence: None,
1316            created_at: String::new(),
1317            importance: default_importance(),
1318            key_facts: Vec::new(),
1319            alias: Vec::new(),
1320            feedback_score: 0.0,
1321            feedback_count: 0,
1322            feedback_last_ts_ms: None,
1323        }
1324    }
1325}
1326
1327#[derive(Debug, Clone, Serialize, Deserialize)]
1328pub struct Edge {
1329    pub source_id: String,
1330    pub relation: String,
1331    pub target_id: String,
1332    #[serde(default)]
1333    pub properties: EdgeProperties,
1334}
1335
1336#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1337pub struct EdgeProperties {
1338    #[serde(default)]
1339    pub detail: String,
1340    #[serde(default)]
1341    pub valid_from: String,
1342    #[serde(default)]
1343    pub valid_to: String,
1344    #[serde(default)]
1345    pub feedback_score: f64,
1346    #[serde(default)]
1347    pub feedback_count: u64,
1348    #[serde(default)]
1349    pub feedback_last_ts_ms: Option<u64>,
1350    #[serde(default)]
1351    pub bidirectional: bool,
1352    #[serde(default)]
1353    pub score_components: BTreeMap<String, f64>,
1354}
1355
1356#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1357pub struct Note {
1358    pub id: String,
1359    pub node_id: String,
1360    #[serde(default)]
1361    pub body: String,
1362    #[serde(default)]
1363    pub tags: Vec<String>,
1364    #[serde(default)]
1365    pub author: String,
1366    #[serde(default)]
1367    pub created_at: String,
1368    #[serde(default)]
1369    pub provenance: String,
1370    #[serde(default)]
1371    pub source_files: Vec<String>,
1372}
1373
1374impl GraphFile {
1375    pub fn new(name: &str) -> Self {
1376        Self {
1377            metadata: Metadata {
1378                name: name.to_owned(),
1379                version: "1.0".to_owned(),
1380                description: format!("Knowledge graph: {name}"),
1381                node_count: 0,
1382                edge_count: 0,
1383            },
1384            nodes: Vec::new(),
1385            edges: Vec::new(),
1386            notes: Vec::new(),
1387        }
1388    }
1389
1390    pub fn load(path: &Path) -> Result<Self> {
1391        let raw = fs::read_to_string(path)
1392            .with_context(|| format!("failed to read graph: {}", path.display()))?;
1393        let ext = path
1394            .extension()
1395            .and_then(|ext| ext.to_str())
1396            .unwrap_or("json");
1397        let mut graph = if ext == "kg" {
1398            if raw.trim_start().starts_with('{') {
1399                serde_json::from_str(&raw).map_err(|error| {
1400                    anyhow::anyhow!(json_error_detail(
1401                        "invalid legacy JSON payload in .kg file",
1402                        path,
1403                        &raw,
1404                        &error,
1405                    ))
1406                })?
1407            } else {
1408                let graph_name = path
1409                    .file_stem()
1410                    .and_then(|stem| stem.to_str())
1411                    .unwrap_or("graph");
1412                let (graph, warnings) = parse_kg_with_warnings(&raw, graph_name, strict_kg_mode())
1413                    .with_context(|| format!("failed to parse .kg graph: {}", path.display()))?;
1414                for warning in warnings {
1415                    let _ = crate::kg_sidecar::append_warning(
1416                        path,
1417                        &format!(
1418                            "ignored invalid graph entry in {}: {warning}",
1419                            path.display()
1420                        ),
1421                    );
1422                }
1423                graph
1424            }
1425        } else {
1426            serde_json::from_str(&raw).map_err(|error| {
1427                anyhow::anyhow!(json_error_detail("invalid JSON", path, &raw, &error))
1428            })?
1429        };
1430        normalize_graph_ids(&mut graph);
1431        let created_graph_info = ensure_graph_info_node(&mut graph);
1432        graph.refresh_counts();
1433        if created_graph_info {
1434            graph.save(path)?;
1435        }
1436        Ok(graph)
1437    }
1438
1439    pub fn save(&self, path: &Path) -> Result<()> {
1440        let mut graph = self.clone();
1441        ensure_graph_info_node(&mut graph);
1442        graph.refresh_counts();
1443        let ext = path
1444            .extension()
1445            .and_then(|ext| ext.to_str())
1446            .unwrap_or("json");
1447        let raw = if ext == "kg" {
1448            serialize_kg(&graph)
1449        } else {
1450            serde_json::to_string_pretty(&graph).context("failed to serialize graph")?
1451        };
1452        atomic_write(path, &raw)?;
1453        backup_graph_if_stale(path, &raw)
1454    }
1455
1456    pub fn refresh_counts(&mut self) {
1457        self.metadata.node_count = self.nodes.len();
1458        self.metadata.edge_count = self.edges.len();
1459    }
1460
1461    pub fn node_by_id(&self, id: &str) -> Option<&Node> {
1462        self.nodes.iter().find(|node| node.id == id)
1463    }
1464
1465    pub fn node_by_id_sorted(&self, id: &str) -> Option<&Node> {
1466        self.nodes
1467            .binary_search_by(|node| node.id.as_str().cmp(id))
1468            .ok()
1469            .and_then(|idx| self.nodes.get(idx))
1470    }
1471
1472    pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
1473        self.nodes.iter_mut().find(|node| node.id == id)
1474    }
1475
1476    pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
1477        self.edges.iter().any(|edge| {
1478            edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
1479        })
1480    }
1481}
1482
1483fn normalize_graph_ids(graph: &mut GraphFile) {
1484    let mut remap: HashMap<String, String> = HashMap::new();
1485    for node in &mut graph.nodes {
1486        let normalized = crate::validate::normalize_node_id(&node.id);
1487        if normalized != node.id {
1488            remap.insert(node.id.clone(), normalized.clone());
1489            node.id = normalized;
1490        }
1491    }
1492
1493    for edge in &mut graph.edges {
1494        edge.source_id = remap
1495            .get(&edge.source_id)
1496            .cloned()
1497            .unwrap_or_else(|| crate::validate::normalize_node_id(&edge.source_id));
1498        edge.target_id = remap
1499            .get(&edge.target_id)
1500            .cloned()
1501            .unwrap_or_else(|| crate::validate::normalize_node_id(&edge.target_id));
1502        if edge.properties.bidirectional {
1503            let (source_id, target_id) =
1504                canonicalize_bidirectional_pair(&edge.source_id, &edge.target_id);
1505            edge.source_id = source_id;
1506            edge.target_id = target_id;
1507        }
1508    }
1509
1510    for note in &mut graph.notes {
1511        note.node_id = remap
1512            .get(&note.node_id)
1513            .cloned()
1514            .unwrap_or_else(|| crate::validate::normalize_node_id(&note.node_id));
1515    }
1516}
1517
1518fn ensure_graph_info_node(graph: &mut GraphFile) -> bool {
1519    if let Some(node) = graph.node_by_id_mut(GRAPH_INFO_NODE_ID) {
1520        let mut changed = false;
1521        if node.r#type != GRAPH_INFO_NODE_TYPE {
1522            node.r#type = GRAPH_INFO_NODE_TYPE.to_owned();
1523            changed = true;
1524        }
1525        if node.name.is_empty() {
1526            node.name = "Graph Metadata".to_owned();
1527            changed = true;
1528        }
1529        if node.properties.description.is_empty() {
1530            node.properties.description =
1531                "Internal graph metadata for cross-graph linking".to_owned();
1532            changed = true;
1533        }
1534        if !node
1535            .properties
1536            .key_facts
1537            .iter()
1538            .any(|fact| fact.starts_with(GRAPH_UUID_FACT_PREFIX))
1539        {
1540            node.properties
1541                .key_facts
1542                .push(format!("{GRAPH_UUID_FACT_PREFIX}{}", generate_graph_uuid()));
1543            changed = true;
1544        }
1545        return changed;
1546    }
1547
1548    graph.nodes.push(Node {
1549        id: GRAPH_INFO_NODE_ID.to_owned(),
1550        r#type: GRAPH_INFO_NODE_TYPE.to_owned(),
1551        name: "Graph Metadata".to_owned(),
1552        properties: NodeProperties {
1553            description: "Internal graph metadata for cross-graph linking".to_owned(),
1554            domain_area: "internal_metadata".to_owned(),
1555            provenance: "A".to_owned(),
1556            importance: 1.0,
1557            key_facts: vec![format!("{GRAPH_UUID_FACT_PREFIX}{}", generate_graph_uuid())],
1558            ..NodeProperties::default()
1559        },
1560        source_files: vec!["DOC .kg/internal/graph_info".to_owned()],
1561    });
1562    true
1563}
1564
1565fn generate_graph_uuid() -> String {
1566    let mut bytes = [0u8; 10];
1567    if fs::File::open("/dev/urandom")
1568        .and_then(|mut file| {
1569            use std::io::Read;
1570            file.read_exact(&mut bytes)
1571        })
1572        .is_err()
1573    {
1574        let nanos = SystemTime::now()
1575            .duration_since(UNIX_EPOCH)
1576            .unwrap_or_default()
1577            .as_nanos();
1578        let pid = std::process::id() as u128;
1579        let mixed = nanos ^ (pid << 64) ^ (nanos.rotate_left(17));
1580        bytes.copy_from_slice(&mixed.to_be_bytes()[6..16]);
1581    }
1582    let mut out = String::with_capacity(20);
1583    for byte in bytes {
1584        out.push_str(&format!("{byte:02x}"));
1585    }
1586    out
1587}
1588
1589#[cfg(test)]
1590mod tests {
1591    use super::{
1592        GRAPH_INFO_NODE_ID, GRAPH_INFO_NODE_TYPE, GRAPH_UUID_FACT_PREFIX, GraphFile, parse_kg,
1593    };
1594
1595    #[test]
1596    fn save_and_load_kg_roundtrip_keeps_core_fields() {
1597        let dir = tempfile::tempdir().expect("temp dir");
1598        let path = dir.path().join("graph.kg");
1599
1600        let mut graph = GraphFile::new("graph");
1601        graph.nodes.push(crate::Node {
1602            id: "concept:refrigerator".to_owned(),
1603            r#type: "Concept".to_owned(),
1604            name: "Lodowka".to_owned(),
1605            properties: crate::NodeProperties {
1606                description: "Urzadzenie chlodzace".to_owned(),
1607                provenance: "U".to_owned(),
1608                created_at: "2026-04-04T12:00:00Z".to_owned(),
1609                importance: 5.0,
1610                key_facts: vec!["A".to_owned(), "b".to_owned()],
1611                alias: vec!["Fridge".to_owned()],
1612                ..Default::default()
1613            },
1614            source_files: vec!["docs/fridge.md".to_owned()],
1615        });
1616        graph.edges.push(crate::Edge {
1617            source_id: "concept:refrigerator".to_owned(),
1618            relation: "READS_FROM".to_owned(),
1619            target_id: "datastore:settings".to_owned(),
1620            properties: crate::EdgeProperties {
1621                detail: "runtime read".to_owned(),
1622                valid_from: "2026-04-04T12:00:00Z".to_owned(),
1623                valid_to: "2026-04-05T12:00:00Z".to_owned(),
1624                ..Default::default()
1625            },
1626        });
1627
1628        graph.save(&path).expect("save kg");
1629        let raw = std::fs::read_to_string(&path).expect("read kg");
1630        assert!(raw.contains("@ K:concept:refrigerator"));
1631        assert!(raw.contains("> R datastore:settings"));
1632
1633        let loaded = GraphFile::load(&path).expect("load kg");
1634        assert_eq!(loaded.nodes.len(), 2);
1635        assert_eq!(loaded.edges.len(), 1);
1636        let node = loaded
1637            .node_by_id("concept:refrigerator")
1638            .expect("domain node");
1639        assert_eq!(node.properties.importance, 5.0);
1640        assert_eq!(node.properties.provenance, "U");
1641        assert_eq!(node.name, "Lodowka");
1642        assert_eq!(loaded.edges[0].relation, "READS_FROM");
1643        assert_eq!(loaded.edges[0].properties.detail, "runtime read");
1644        assert_eq!(
1645            loaded.edges[0].properties.valid_from,
1646            "2026-04-04T12:00:00Z"
1647        );
1648        assert_eq!(loaded.edges[0].properties.valid_to, "2026-04-05T12:00:00Z");
1649    }
1650
1651    #[test]
1652    fn load_supports_legacy_json_payload_with_kg_extension() {
1653        let dir = tempfile::tempdir().expect("temp dir");
1654        let path = dir.path().join("legacy.kg");
1655        std::fs::write(
1656            &path,
1657            r#"{
1658  "metadata": {"name": "legacy", "version": "1.0", "description": "x", "node_count": 0, "edge_count": 0},
1659  "nodes": [],
1660  "edges": [],
1661  "notes": []
1662}"#,
1663        )
1664        .expect("write legacy payload");
1665
1666        let loaded = GraphFile::load(&path).expect("load legacy kg");
1667        assert_eq!(loaded.metadata.name, "legacy");
1668        assert_eq!(loaded.nodes.len(), 1);
1669        assert!(loaded.node_by_id(GRAPH_INFO_NODE_ID).is_some());
1670    }
1671
1672    #[test]
1673    fn load_kg_ignores_invalid_timestamp_format() {
1674        let dir = tempfile::tempdir().expect("temp dir");
1675        let path = dir.path().join("invalid-ts.kg");
1676        std::fs::write(
1677            &path,
1678            "@ K:concept:x\nN X\nD Desc\nE 2026-04-04 12:00:00\nV 4\nP U\n",
1679        )
1680        .expect("write kg");
1681
1682        let loaded = GraphFile::load(&path).expect("invalid timestamp should be ignored");
1683        assert_eq!(loaded.nodes.len(), 2);
1684        assert!(
1685            loaded
1686                .node_by_id("concept:x")
1687                .expect("concept node")
1688                .properties
1689                .created_at
1690                .is_empty()
1691        );
1692    }
1693
1694    #[test]
1695    fn load_kg_ignores_invalid_edge_timestamp_format() {
1696        let dir = tempfile::tempdir().expect("temp dir");
1697        let path = dir.path().join("invalid-edge-ts.kg");
1698        std::fs::write(
1699            &path,
1700            "@ K:concept:x\nN X\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n> H concept:y\ni 2026-04-04 12:00:00\n",
1701        )
1702        .expect("write kg");
1703
1704        let loaded = GraphFile::load(&path).expect("invalid edge timestamp should be ignored");
1705        assert_eq!(loaded.edges.len(), 1);
1706        assert!(loaded.edges[0].properties.valid_from.is_empty());
1707    }
1708
1709    #[test]
1710    fn load_kg_preserves_whitespace_and_dedupes_exact_duplicates() {
1711        let dir = tempfile::tempdir().expect("temp dir");
1712        let path = dir.path().join("normalize.kg");
1713        std::fs::write(
1714            &path,
1715            "@ K:concept:x\nN  Name   With   Spaces \nD  Desc   with   spaces \nA Alias\nA Alias\nF fact one\nF FACT   one\nS docs/a.md\nS docs/a.md\nE 2026-04-04T12:00:00Z\nV 4\nP U\n",
1716        )
1717        .expect("write kg");
1718
1719        let loaded = GraphFile::load(&path).expect("load kg");
1720        let node = loaded.node_by_id("concept:x").expect("concept node");
1721        assert_eq!(node.name, " Name   With   Spaces ");
1722        assert_eq!(node.properties.description, " Desc   with   spaces ");
1723        assert_eq!(node.properties.alias.len(), 1);
1724        assert_eq!(node.properties.key_facts.len(), 2);
1725        assert_eq!(node.source_files.len(), 1);
1726    }
1727
1728    #[test]
1729    fn save_and_load_kg_roundtrip_keeps_notes_without_json_fallback() {
1730        let dir = tempfile::tempdir().expect("temp dir");
1731        let path = dir.path().join("graph-notes.kg");
1732
1733        let mut graph = GraphFile::new("graph-notes");
1734        graph.nodes.push(crate::Node {
1735            id: "concept:refrigerator".to_owned(),
1736            r#type: "Concept".to_owned(),
1737            name: "Lodowka".to_owned(),
1738            properties: crate::NodeProperties {
1739                description: "Urzadzenie chlodzace".to_owned(),
1740                provenance: "U".to_owned(),
1741                created_at: "2026-04-04T12:00:00Z".to_owned(),
1742                ..Default::default()
1743            },
1744            source_files: vec!["docs/fridge.md".to_owned()],
1745        });
1746        graph.notes.push(crate::Note {
1747            id: "note:1".to_owned(),
1748            node_id: "concept:refrigerator".to_owned(),
1749            body: "Important maintenance insight".to_owned(),
1750            tags: vec!["Maintenance".to_owned(), "maintenance".to_owned()],
1751            author: "alice".to_owned(),
1752            created_at: "1712345678".to_owned(),
1753            provenance: "U".to_owned(),
1754            source_files: vec!["docs/a.md".to_owned(), "docs/a.md".to_owned()],
1755        });
1756
1757        graph.save(&path).expect("save kg");
1758        let raw = std::fs::read_to_string(&path).expect("read kg");
1759        assert!(raw.contains("! note:1 concept:refrigerator"));
1760        assert!(!raw.trim_start().starts_with('{'));
1761
1762        let loaded = GraphFile::load(&path).expect("load kg");
1763        assert_eq!(loaded.notes.len(), 1);
1764        let note = &loaded.notes[0];
1765        assert_eq!(note.id, "note:1");
1766        assert_eq!(note.node_id, "concept:refrigerator");
1767        assert_eq!(note.body, "Important maintenance insight");
1768        assert_eq!(note.tags.len(), 1);
1769        assert_eq!(note.source_files.len(), 1);
1770    }
1771
1772    #[test]
1773    fn save_and_load_kg_roundtrip_preserves_multiline_text_fields() {
1774        let dir = tempfile::tempdir().expect("temp dir");
1775        let path = dir.path().join("graph-multiline.kg");
1776
1777        let mut graph = GraphFile::new("graph-multiline");
1778        graph.nodes.push(crate::Node {
1779            id: "concept:refrigerator".to_owned(),
1780            r#type: "Concept".to_owned(),
1781            name: "Lodowka\nSmart".to_owned(),
1782            properties: crate::NodeProperties {
1783                description: "Linia 1\nLinia 2\\nliteral".to_owned(),
1784                provenance: "user\nimport".to_owned(),
1785                created_at: "2026-04-04T12:00:00Z".to_owned(),
1786                importance: 5.0,
1787                key_facts: vec!["Fakt 1\nFakt 2".to_owned()],
1788                alias: vec!["Alias\nA".to_owned()],
1789                domain_area: "ops\nfield".to_owned(),
1790                ..Default::default()
1791            },
1792            source_files: vec!["docs/fridge\nnotes.md".to_owned()],
1793        });
1794        graph.edges.push(crate::Edge {
1795            source_id: "concept:refrigerator".to_owned(),
1796            relation: "READS_FROM".to_owned(),
1797            target_id: "datastore:settings".to_owned(),
1798            properties: crate::EdgeProperties {
1799                detail: "runtime\nread".to_owned(),
1800                valid_from: "2026-04-04T12:00:00Z".to_owned(),
1801                valid_to: "2026-04-05T12:00:00Z".to_owned(),
1802                ..Default::default()
1803            },
1804        });
1805        graph.notes.push(crate::Note {
1806            id: "note:1".to_owned(),
1807            node_id: "concept:refrigerator".to_owned(),
1808            body: "line1\nline2\\nkeep".to_owned(),
1809            tags: vec!["multi\nline".to_owned()],
1810            author: "alice\nbob".to_owned(),
1811            created_at: "1712345678".to_owned(),
1812            provenance: "manual\nentry".to_owned(),
1813            source_files: vec!["docs/a\nb.md".to_owned()],
1814        });
1815
1816        graph.save(&path).expect("save kg");
1817        let raw = std::fs::read_to_string(&path).expect("read kg");
1818        assert!(raw.contains("N Lodowka\\nSmart"));
1819        assert!(raw.contains("D Linia 1\\nLinia 2\\\\nliteral"));
1820        assert!(raw.contains("- domain_area ops\\nfield"));
1821        assert!(raw.contains("d runtime\\nread"));
1822        assert!(raw.contains("b line1\\nline2\\\\nkeep"));
1823
1824        let loaded = GraphFile::load(&path).expect("load kg");
1825        let node = loaded
1826            .node_by_id("concept:refrigerator")
1827            .expect("domain node");
1828        assert_eq!(node.name, "Lodowka\nSmart");
1829        assert_eq!(node.properties.description, "Linia 1\nLinia 2\\nliteral");
1830        assert_eq!(node.properties.provenance, "user\nimport");
1831        assert_eq!(node.properties.alias, vec!["Alias\nA".to_owned()]);
1832        assert_eq!(node.properties.key_facts, vec!["Fakt 1\nFakt 2".to_owned()]);
1833        assert_eq!(node.properties.domain_area, "ops\nfield");
1834        assert_eq!(node.source_files, vec!["docs/fridge\nnotes.md".to_owned()]);
1835        assert_eq!(loaded.edges[0].properties.detail, "runtime\nread");
1836        let note = &loaded.notes[0];
1837        assert_eq!(note.body, "line1\nline2\\nkeep");
1838        assert_eq!(note.tags, vec!["multi\nline".to_owned()]);
1839        assert_eq!(note.author, "alice\nbob");
1840        assert_eq!(note.provenance, "manual\nentry");
1841        assert_eq!(note.source_files, vec!["docs/a\nb.md".to_owned()]);
1842    }
1843
1844    #[test]
1845    fn parse_bidirectional_similarity_edge_is_canonical_and_scored() {
1846        let raw = "@ ~:dedupe_b\nN B\nD Desc\nV 0.5\nP U\nS docs/b.md\n= ~ ~:dedupe_a\nd C1 0.11\nd C2 0.83\nd 0.91\n\n@ ~:dedupe_a\nN A\nD Desc\nV 0.5\nP U\nS docs/a.md\n";
1847        let graph = parse_kg(raw, "virt", true).expect("parse kg");
1848
1849        assert_eq!(graph.nodes.len(), 2);
1850        assert_eq!(graph.edges.len(), 1);
1851        let edge = &graph.edges[0];
1852        assert_eq!(edge.relation, "~");
1853        assert_eq!(edge.source_id, "~:dedupe_a");
1854        assert_eq!(edge.target_id, "~:dedupe_b");
1855        assert_eq!(edge.properties.detail, "0.91");
1856        assert!(edge.properties.bidirectional);
1857        assert_eq!(edge.properties.score_components.get("C1"), Some(&0.11));
1858        assert_eq!(edge.properties.score_components.get("C2"), Some(&0.83));
1859    }
1860
1861    #[test]
1862    fn serialize_bidirectional_similarity_edge_uses_equals_operator() {
1863        let dir = tempfile::tempdir().expect("temp dir");
1864        let path = dir.path().join("virt.kg");
1865        let mut graph = GraphFile::new("virt");
1866        graph.nodes.push(crate::Node {
1867            id: "~:dedupe_a".to_owned(),
1868            r#type: "~".to_owned(),
1869            name: "A".to_owned(),
1870            properties: crate::NodeProperties {
1871                description: "Desc".to_owned(),
1872                provenance: "U".to_owned(),
1873                created_at: "2026-04-10T00:00:00Z".to_owned(),
1874                importance: 0.6,
1875                ..Default::default()
1876            },
1877            source_files: vec!["docs/a.md".to_owned()],
1878        });
1879        graph.nodes.push(crate::Node {
1880            id: "~:dedupe_b".to_owned(),
1881            r#type: "~".to_owned(),
1882            name: "B".to_owned(),
1883            properties: crate::NodeProperties {
1884                description: "Desc".to_owned(),
1885                provenance: "U".to_owned(),
1886                created_at: "2026-04-10T00:00:00Z".to_owned(),
1887                importance: 0.6,
1888                ..Default::default()
1889            },
1890            source_files: vec!["docs/b.md".to_owned()],
1891        });
1892        graph.edges.push(crate::Edge {
1893            source_id: "~:dedupe_a".to_owned(),
1894            relation: "~".to_owned(),
1895            target_id: "~:dedupe_b".to_owned(),
1896            properties: crate::EdgeProperties {
1897                detail: "0.75".to_owned(),
1898                bidirectional: true,
1899                score_components: std::collections::BTreeMap::from([
1900                    ("C1".to_owned(), 0.2),
1901                    ("C2".to_owned(), 0.8),
1902                ]),
1903                ..Default::default()
1904            },
1905        });
1906
1907        graph.save(&path).expect("save");
1908        let raw = std::fs::read_to_string(&path).expect("read");
1909        assert!(raw.contains("= ~ ~:dedupe_b"));
1910        assert!(raw.contains("d C1 0.200000"));
1911        assert!(raw.contains("d C2 0.800000"));
1912        assert!(!raw.contains("> ~ ~:dedupe_b"));
1913
1914        let loaded = GraphFile::load(&path).expect("load");
1915        assert_eq!(loaded.edges.len(), 1);
1916        assert!(loaded.edges[0].properties.bidirectional);
1917        assert_eq!(loaded.edges[0].properties.detail, "0.75");
1918        assert_eq!(
1919            loaded.edges[0].properties.score_components.get("C1"),
1920            Some(&0.2)
1921        );
1922        assert_eq!(
1923            loaded.edges[0].properties.score_components.get("C2"),
1924            Some(&0.8)
1925        );
1926    }
1927
1928    #[test]
1929    fn strict_mode_rejects_bidirectional_relation_other_than_similarity() {
1930        let raw = "@ K:concept:a\nN A\nD Desc\nV 0.5\nP U\nS docs/a.md\n= HAS concept:b\n";
1931        let err = parse_kg(raw, "x", true).expect_err("strict mode should reject invalid '='");
1932        assert!(format!("{err:#}").contains("expected '~'"));
1933    }
1934
1935    #[test]
1936    fn strict_mode_rejects_out_of_order_node_fields() {
1937        let raw = "@ K:concept:x\nD Desc\nN Name\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n";
1938        let err = parse_kg(raw, "x", true).expect_err("strict mode should fail on field order");
1939        assert!(format!("{err:#}").contains("invalid field order"));
1940    }
1941
1942    #[test]
1943    fn strict_mode_rejects_overlong_name_but_compat_mode_allows_it() {
1944        let long_name = "N ".to_owned() + &"X".repeat(121);
1945        let raw = format!(
1946            "@ K:concept:x\n{}\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n",
1947            long_name
1948        );
1949
1950        let strict_err = parse_kg(&raw, "x", true).expect_err("strict mode should fail on length");
1951        assert!(format!("{strict_err:#}").contains("invalid N length"));
1952
1953        parse_kg(&raw, "x", false).expect("compat mode keeps permissive behavior");
1954    }
1955
1956    #[test]
1957    fn save_kg_skips_empty_e_and_p_fields() {
1958        let dir = tempfile::tempdir().expect("temp dir");
1959        let path = dir.path().join("no-empty-ep.kg");
1960
1961        let mut graph = GraphFile::new("graph");
1962        graph.nodes.push(crate::Node {
1963            id: "concept:x".to_owned(),
1964            r#type: "Concept".to_owned(),
1965            name: "X".to_owned(),
1966            properties: crate::NodeProperties {
1967                description: "Desc".to_owned(),
1968                provenance: String::new(),
1969                created_at: String::new(),
1970                ..Default::default()
1971            },
1972            source_files: vec!["docs/a.md".to_owned()],
1973        });
1974
1975        graph.save(&path).expect("save kg");
1976        let raw = std::fs::read_to_string(&path).expect("read kg");
1977        assert!(!raw.contains("\nE \n"));
1978        assert!(!raw.contains("\nP \n"));
1979    }
1980
1981    #[test]
1982    fn load_generates_graph_info_node_when_missing() {
1983        let dir = tempfile::tempdir().expect("temp dir");
1984        let path = dir.path().join("meta.kg");
1985        let raw = "@ K:concept:x\nN X\nD Desc\nV 0.5\nP U\nS docs/a.md\n";
1986        std::fs::write(&path, raw).expect("write kg");
1987
1988        let loaded = GraphFile::load(&path).expect("load kg");
1989        let info = loaded
1990            .node_by_id(GRAPH_INFO_NODE_ID)
1991            .expect("graph info node should be generated");
1992        assert_eq!(info.r#type, GRAPH_INFO_NODE_TYPE);
1993        assert!(
1994            info.properties
1995                .key_facts
1996                .iter()
1997                .any(|fact| fact.starts_with(GRAPH_UUID_FACT_PREFIX))
1998        );
1999
2000        let persisted = std::fs::read_to_string(&path).expect("read persisted kg");
2001        assert!(persisted.contains("graph_info"));
2002        assert!(persisted.contains("graph_uuid="));
2003    }
2004}