Skip to main content

kg/
graph.rs

1use std::fs;
2use std::io::Write;
3use std::path::Path;
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use anyhow::{Context, Result};
7use flate2::Compression;
8use flate2::write::GzEncoder;
9use serde::{Deserialize, Serialize};
10
11/// Write `data` to `dest` atomically:
12/// 1. Write to `dest.tmp`
13/// 2. If `dest` already exists, copy it to `dest.bak`
14/// 3. Rename `dest.tmp` -> `dest`
15fn atomic_write(dest: &Path, data: &str) -> Result<()> {
16    let tmp = dest.with_extension("tmp");
17    fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
18    if dest.exists() {
19        let bak = dest.with_extension("bak");
20        fs::copy(dest, &bak)
21            .with_context(|| format!("failed to create backup: {}", bak.display()))?;
22    }
23    fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
24}
25
26const BACKUP_STALE_SECS: u64 = 60 * 60;
27
28fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
29    let parent = match path.parent() {
30        Some(parent) => parent,
31        None => return Ok(()),
32    };
33    let stem = match path.file_stem().and_then(|s| s.to_str()) {
34        Some(stem) => stem,
35        None => return Ok(()),
36    };
37    let now = SystemTime::now()
38        .duration_since(UNIX_EPOCH)
39        .context("time went backwards")?
40        .as_secs();
41    if let Some(latest) = latest_backup_ts(parent, stem)? {
42        if now.saturating_sub(latest) < BACKUP_STALE_SECS {
43            return Ok(());
44        }
45    }
46
47    let backup_path = parent.join(format!("{stem}.bck.{now}.gz"));
48    let tmp_path = backup_path.with_extension("tmp");
49    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
50    encoder.write_all(data.as_bytes())?;
51    let encoded = encoder.finish()?;
52    fs::write(&tmp_path, encoded)
53        .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
54    fs::rename(&tmp_path, &backup_path)
55        .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
56    Ok(())
57}
58
59fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
60    let prefix = format!("{stem}.bck.");
61    let suffix = ".gz";
62    let mut latest = None;
63    for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
64        let entry = entry?;
65        let name = entry.file_name();
66        let name = name.to_string_lossy();
67        if !name.starts_with(&prefix) || !name.ends_with(suffix) {
68            continue;
69        }
70        let ts_part = &name[prefix.len()..name.len() - suffix.len()];
71        if let Ok(ts) = ts_part.parse::<u64>() {
72            match latest {
73                Some(current) => {
74                    if ts > current {
75                        latest = Some(ts);
76                    }
77                }
78                None => latest = Some(ts),
79            }
80        }
81    }
82    Ok(latest)
83}
84
85fn node_type_to_code(node_type: &str) -> &str {
86    match node_type {
87        "Feature" => "F",
88        "Concept" => "K",
89        "Interface" => "I",
90        "Process" => "P",
91        "DataStore" => "D",
92        "Attribute" => "A",
93        "Entity" => "Y",
94        "Note" => "N",
95        "Rule" => "R",
96        "Convention" => "C",
97        "Bug" => "B",
98        "Decision" => "Z",
99        "OpenQuestion" => "O",
100        "Claim" => "Q",
101        "Insight" => "W",
102        "Reference" => "M",
103        "Term" => "T",
104        "Status" => "S",
105        "Doubt" => "L",
106        _ => node_type,
107    }
108}
109
110fn code_to_node_type(code: &str) -> &str {
111    match code {
112        "F" => "Feature",
113        "K" => "Concept",
114        "I" => "Interface",
115        "P" => "Process",
116        "D" => "DataStore",
117        "A" => "Attribute",
118        "Y" => "Entity",
119        "N" => "Note",
120        "R" => "Rule",
121        "C" => "Convention",
122        "B" => "Bug",
123        "Z" => "Decision",
124        "O" => "OpenQuestion",
125        "Q" => "Claim",
126        "W" => "Insight",
127        "M" => "Reference",
128        "T" => "Term",
129        "S" => "Status",
130        "L" => "Doubt",
131        _ => code,
132    }
133}
134
135fn relation_to_code(relation: &str) -> &str {
136    match relation {
137        "DOCUMENTED_IN" | "DOCUMENTS" => "D",
138        "HAS" => "H",
139        "TRIGGERS" => "T",
140        "AFFECTED_BY" | "AFFECTS" => "A",
141        "READS_FROM" | "READS" => "R",
142        "GOVERNED_BY" | "GOVERNS" => "G",
143        "DEPENDS_ON" => "O",
144        "AVAILABLE_IN" => "I",
145        "SUPPORTS" => "S",
146        "SUMMARIZES" => "U",
147        "RELATED_TO" => "L",
148        "CONTRADICTS" => "V",
149        "CREATED_BY" | "CREATES" => "C",
150        _ => relation,
151    }
152}
153
154fn code_to_relation(code: &str) -> &str {
155    match code {
156        "D" => "DOCUMENTED_IN",
157        "H" => "HAS",
158        "T" => "TRIGGERS",
159        "A" => "AFFECTED_BY",
160        "R" => "READS_FROM",
161        "G" => "GOVERNED_BY",
162        "O" => "DEPENDS_ON",
163        "I" => "AVAILABLE_IN",
164        "S" => "SUPPORTS",
165        "U" => "SUMMARIZES",
166        "L" => "RELATED_TO",
167        "V" => "CONTRADICTS",
168        "C" => "CREATED_BY",
169        _ => code,
170    }
171}
172
173fn sort_case_insensitive(values: &[String]) -> Vec<String> {
174    let mut sorted = values.to_vec();
175    sorted.sort_by(|a, b| {
176        let la = a.to_ascii_lowercase();
177        let lb = b.to_ascii_lowercase();
178        la.cmp(&lb).then_with(|| a.cmp(b))
179    });
180    sorted
181}
182
183fn decode_kg_text(value: &str) -> String {
184    let mut out = String::new();
185    let mut chars = value.chars();
186    while let Some(ch) = chars.next() {
187        if ch != '\\' {
188            out.push(ch);
189            continue;
190        }
191        match chars.next() {
192            Some('n') => out.push('\n'),
193            Some('r') => out.push('\r'),
194            Some('\\') => out.push('\\'),
195            Some(other) => {
196                out.push('\\');
197                out.push(other);
198            }
199            None => out.push('\\'),
200        }
201    }
202    out
203}
204
205fn escape_kg_text(value: &str) -> String {
206    let mut out = String::new();
207    for ch in value.chars() {
208        match ch {
209            '\\' => out.push_str("\\\\"),
210            '\n' => out.push_str("\\n"),
211            '\r' => out.push_str("\\r"),
212            _ => out.push(ch),
213        }
214    }
215    out
216}
217
218fn parse_text_field(value: &str) -> String {
219    decode_kg_text(value)
220}
221
222fn push_text_line(out: &mut String, key: &str, value: &str) {
223    out.push_str(key);
224    out.push(' ');
225    out.push_str(&escape_kg_text(value));
226    out.push('\n');
227}
228
229fn dedupe_case_insensitive(values: Vec<String>) -> Vec<String> {
230    let mut seen = std::collections::HashSet::new();
231    let mut out = Vec::new();
232    for value in values {
233        let key = value.to_ascii_lowercase();
234        if seen.insert(key) {
235            out.push(value);
236        }
237    }
238    out
239}
240
241fn parse_utc_timestamp(value: &str) -> bool {
242    if value.len() != 20 {
243        return false;
244    }
245    let bytes = value.as_bytes();
246    let is_digit = |idx: usize| bytes.get(idx).is_some_and(|b| b.is_ascii_digit());
247    if !(is_digit(0)
248        && is_digit(1)
249        && is_digit(2)
250        && is_digit(3)
251        && bytes.get(4) == Some(&b'-')
252        && is_digit(5)
253        && is_digit(6)
254        && bytes.get(7) == Some(&b'-')
255        && is_digit(8)
256        && is_digit(9)
257        && bytes.get(10) == Some(&b'T')
258        && is_digit(11)
259        && is_digit(12)
260        && bytes.get(13) == Some(&b':')
261        && is_digit(14)
262        && is_digit(15)
263        && bytes.get(16) == Some(&b':')
264        && is_digit(17)
265        && is_digit(18)
266        && bytes.get(19) == Some(&b'Z'))
267    {
268        return false;
269    }
270
271    let month = value[5..7].parse::<u32>().ok();
272    let day = value[8..10].parse::<u32>().ok();
273    let hour = value[11..13].parse::<u32>().ok();
274    let minute = value[14..16].parse::<u32>().ok();
275    let second = value[17..19].parse::<u32>().ok();
276    matches!(month, Some(1..=12))
277        && matches!(day, Some(1..=31))
278        && matches!(hour, Some(0..=23))
279        && matches!(minute, Some(0..=59))
280        && matches!(second, Some(0..=59))
281}
282
283fn strict_kg_mode() -> bool {
284    let Ok(value) = std::env::var("KG_STRICT_FORMAT") else {
285        return false;
286    };
287    matches!(
288        value.trim().to_ascii_lowercase().as_str(),
289        "1" | "true" | "yes" | "on"
290    )
291}
292
293fn validate_len(
294    line_no: usize,
295    field: &str,
296    value: &str,
297    min: usize,
298    max: usize,
299    strict: bool,
300) -> Result<()> {
301    let len = value.chars().count();
302    if strict && (len < min || len > max) {
303        return Err(anyhow::anyhow!(
304            "invalid {field} length at line {line_no}: expected {min}..={max}, got {len}"
305        ));
306    }
307    Ok(())
308}
309
310fn enforce_field_order(
311    line_no: usize,
312    key: &str,
313    rank: u8,
314    last_rank: &mut u8,
315    section: &str,
316    strict: bool,
317) -> Result<()> {
318    if strict && rank < *last_rank {
319        return Err(anyhow::anyhow!(
320            "invalid field order at line {line_no}: {key} in {section} block"
321        ));
322    }
323    if rank > *last_rank {
324        *last_rank = rank;
325    }
326    Ok(())
327}
328
329fn field_value<'a>(line: &'a str, key: &str) -> Option<&'a str> {
330    if line == key {
331        Some("")
332    } else {
333        line.strip_prefix(key)
334            .and_then(|rest| rest.strip_prefix(' '))
335    }
336}
337
338fn parse_kg(raw: &str, graph_name: &str, strict: bool) -> Result<GraphFile> {
339    let mut graph = GraphFile::new(graph_name);
340    let mut current_node: Option<Node> = None;
341    let mut current_note: Option<Note> = None;
342    let mut current_edge_index: Option<usize> = None;
343    let mut last_node_rank: u8 = 0;
344    let mut last_note_rank: u8 = 0;
345    let mut last_edge_rank: u8 = 0;
346
347    for (idx, line) in raw.lines().enumerate() {
348        let line_no = idx + 1;
349        let raw_line = line.strip_suffix('\r').unwrap_or(line);
350        let trimmed = raw_line.trim();
351        if trimmed.is_empty() || trimmed.starts_with('#') {
352            continue;
353        }
354
355        if let Some(rest) = trimmed.strip_prefix("@ ") {
356            if let Some(note) = current_note.take() {
357                graph.notes.push(note);
358            }
359            if let Some(node) = current_node.take() {
360                graph.nodes.push(node);
361            }
362            let (type_code, node_id) = rest.split_once(':').ok_or_else(|| {
363                anyhow::anyhow!("invalid node header at line {line_no}: {trimmed}")
364            })?;
365            current_node = Some(Node {
366                id: node_id.trim().to_owned(),
367                r#type: code_to_node_type(type_code.trim()).to_owned(),
368                name: String::new(),
369                properties: NodeProperties::default(),
370                source_files: Vec::new(),
371            });
372            current_edge_index = None;
373            last_node_rank = 0;
374            last_edge_rank = 0;
375            continue;
376        }
377
378        if let Some(rest) = trimmed.strip_prefix("! ") {
379            if let Some(node) = current_node.take() {
380                graph.nodes.push(node);
381            }
382            if let Some(note) = current_note.take() {
383                graph.notes.push(note);
384            }
385            let mut parts = rest.split_whitespace();
386            let id = parts.next().ok_or_else(|| {
387                anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
388            })?;
389            let node_id = parts.next().ok_or_else(|| {
390                anyhow::anyhow!("invalid note header at line {line_no}: {trimmed}")
391            })?;
392            current_note = Some(Note {
393                id: id.to_owned(),
394                node_id: node_id.to_owned(),
395                ..Default::default()
396            });
397            current_edge_index = None;
398            last_note_rank = 0;
399            continue;
400        }
401
402        if let Some(note) = current_note.as_mut() {
403            if let Some(rest) = field_value(raw_line, "b") {
404                enforce_field_order(line_no, "b", 1, &mut last_note_rank, "note", strict)?;
405                note.body = parse_text_field(rest);
406                continue;
407            }
408            if let Some(rest) = field_value(raw_line, "t") {
409                enforce_field_order(line_no, "t", 2, &mut last_note_rank, "note", strict)?;
410                let value = parse_text_field(rest);
411                if !value.is_empty() {
412                    note.tags.push(value);
413                }
414                continue;
415            }
416            if let Some(rest) = field_value(raw_line, "a") {
417                enforce_field_order(line_no, "a", 3, &mut last_note_rank, "note", strict)?;
418                note.author = parse_text_field(rest);
419                continue;
420            }
421            if let Some(rest) = field_value(raw_line, "e") {
422                enforce_field_order(line_no, "e", 4, &mut last_note_rank, "note", strict)?;
423                note.created_at = rest.trim().to_owned();
424                continue;
425            }
426            if let Some(rest) = field_value(raw_line, "p") {
427                enforce_field_order(line_no, "p", 5, &mut last_note_rank, "note", strict)?;
428                note.provenance = parse_text_field(rest);
429                continue;
430            }
431            if let Some(rest) = field_value(raw_line, "s") {
432                enforce_field_order(line_no, "s", 6, &mut last_note_rank, "note", strict)?;
433                let value = parse_text_field(rest);
434                if !value.is_empty() {
435                    note.source_files.push(value);
436                }
437                continue;
438            }
439            return Err(anyhow::anyhow!(
440                "unrecognized note line at {line_no}: {trimmed}"
441            ));
442        }
443
444        let Some(node) = current_node.as_mut() else {
445            return Err(anyhow::anyhow!(
446                "unexpected line before first node at line {line_no}: {trimmed}"
447            ));
448        };
449
450        if let Some(rest) = field_value(raw_line, "N") {
451            enforce_field_order(line_no, "N", 1, &mut last_node_rank, "node", strict)?;
452            let value = parse_text_field(rest);
453            validate_len(line_no, "N", &value, 1, 120, strict)?;
454            node.name = value;
455            continue;
456        }
457        if let Some(rest) = field_value(raw_line, "D") {
458            enforce_field_order(line_no, "D", 2, &mut last_node_rank, "node", strict)?;
459            let value = parse_text_field(rest);
460            validate_len(line_no, "D", &value, 1, 200, strict)?;
461            node.properties.description = value;
462            continue;
463        }
464        if let Some(rest) = field_value(raw_line, "A") {
465            enforce_field_order(line_no, "A", 3, &mut last_node_rank, "node", strict)?;
466            let value = parse_text_field(rest);
467            validate_len(line_no, "A", &value, 1, 80, strict)?;
468            node.properties.alias.push(value);
469            continue;
470        }
471        if let Some(rest) = field_value(raw_line, "F") {
472            enforce_field_order(line_no, "F", 4, &mut last_node_rank, "node", strict)?;
473            let value = parse_text_field(rest);
474            validate_len(line_no, "F", &value, 1, 200, strict)?;
475            node.properties.key_facts.push(value);
476            continue;
477        }
478        if let Some(rest) = field_value(raw_line, "E") {
479            enforce_field_order(line_no, "E", 5, &mut last_node_rank, "node", strict)?;
480            let value = rest.trim();
481            if !value.is_empty() && !parse_utc_timestamp(value) {
482                return Err(anyhow::anyhow!(
483                    "invalid E timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
484                ));
485            }
486            node.properties.created_at = value.to_owned();
487            continue;
488        }
489        if let Some(rest) = field_value(raw_line, "C") {
490            enforce_field_order(line_no, "C", 6, &mut last_node_rank, "node", strict)?;
491            if !rest.trim().is_empty() {
492                node.properties.confidence = rest.trim().parse::<f64>().ok();
493            }
494            continue;
495        }
496        if let Some(rest) = field_value(raw_line, "V") {
497            enforce_field_order(line_no, "V", 7, &mut last_node_rank, "node", strict)?;
498            if let Ok(value) = rest.trim().parse::<u8>() {
499                node.properties.importance = value;
500            }
501            continue;
502        }
503        if let Some(rest) = field_value(raw_line, "P") {
504            enforce_field_order(line_no, "P", 8, &mut last_node_rank, "node", strict)?;
505            node.properties.provenance = parse_text_field(rest);
506            continue;
507        }
508        if let Some(rest) = field_value(raw_line, "S") {
509            enforce_field_order(line_no, "S", 10, &mut last_node_rank, "node", strict)?;
510            let value = parse_text_field(rest);
511            validate_len(line_no, "S", &value, 1, 200, strict)?;
512            node.source_files.push(value);
513            continue;
514        }
515
516        if let Some(rest) = trimmed.strip_prefix("> ") {
517            let mut parts = rest.split_whitespace();
518            let relation = parts.next().ok_or_else(|| {
519                anyhow::anyhow!("missing relation in edge at line {line_no}: {trimmed}")
520            })?;
521            let target_id = parts.next().ok_or_else(|| {
522                anyhow::anyhow!("missing target id in edge at line {line_no}: {trimmed}")
523            })?;
524            graph.edges.push(Edge {
525                source_id: node.id.clone(),
526                relation: code_to_relation(relation).to_owned(),
527                target_id: target_id.to_owned(),
528                properties: EdgeProperties::default(),
529            });
530            current_edge_index = Some(graph.edges.len() - 1);
531            last_edge_rank = 0;
532            continue;
533        }
534
535        if let Some(rest) = field_value(raw_line, "d") {
536            enforce_field_order(line_no, "d", 1, &mut last_edge_rank, "edge", strict)?;
537            let edge_idx = current_edge_index.ok_or_else(|| {
538                anyhow::anyhow!("edge detail without preceding edge at line {line_no}")
539            })?;
540            let value = parse_text_field(rest);
541            validate_len(line_no, "d", &value, 1, 200, strict)?;
542            graph.edges[edge_idx].properties.detail = value;
543            continue;
544        }
545
546        if let Some(rest) = field_value(raw_line, "i") {
547            enforce_field_order(line_no, "i", 2, &mut last_edge_rank, "edge", strict)?;
548            let edge_idx = current_edge_index.ok_or_else(|| {
549                anyhow::anyhow!("edge valid_from without preceding edge at line {line_no}")
550            })?;
551            let value = rest.trim();
552            if !value.is_empty() && !parse_utc_timestamp(value) {
553                return Err(anyhow::anyhow!(
554                    "invalid i timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
555                ));
556            }
557            graph.edges[edge_idx].properties.valid_from = value.to_owned();
558            continue;
559        }
560
561        if let Some(rest) = field_value(raw_line, "x") {
562            enforce_field_order(line_no, "x", 3, &mut last_edge_rank, "edge", strict)?;
563            let edge_idx = current_edge_index.ok_or_else(|| {
564                anyhow::anyhow!("edge valid_to without preceding edge at line {line_no}")
565            })?;
566            let value = rest.trim();
567            if !value.is_empty() && !parse_utc_timestamp(value) {
568                return Err(anyhow::anyhow!(
569                    "invalid x timestamp at line {line_no}: expected YYYY-MM-DDTHH:MM:SSZ"
570                ));
571            }
572            graph.edges[edge_idx].properties.valid_to = value.to_owned();
573            continue;
574        }
575
576        if let Some(rest) = field_value(raw_line, "-") {
577            let (key, value) = rest
578                .split_once(char::is_whitespace)
579                .map(|(key, value)| (key.trim(), value))
580                .unwrap_or((rest.trim(), ""));
581            let is_edge_custom = matches!(
582                key,
583                "edge_feedback_score" | "edge_feedback_count" | "edge_feedback_last_ts_ms"
584            );
585            if is_edge_custom {
586                enforce_field_order(line_no, "-", 4, &mut last_edge_rank, "edge", strict)?;
587            } else {
588                enforce_field_order(line_no, "-", 9, &mut last_node_rank, "node", strict)?;
589            }
590            match key {
591                "domain_area" => node.properties.domain_area = parse_text_field(value),
592                "feedback_score" => {
593                    node.properties.feedback_score = value.trim().parse::<f64>().unwrap_or(0.0)
594                }
595                "feedback_count" => {
596                    node.properties.feedback_count = value.trim().parse::<u64>().unwrap_or(0)
597                }
598                "feedback_last_ts_ms" => {
599                    node.properties.feedback_last_ts_ms = value.trim().parse::<u64>().ok()
600                }
601                "edge_feedback_score" => {
602                    if let Some(edge_idx) = current_edge_index {
603                        graph.edges[edge_idx].properties.feedback_score =
604                            value.trim().parse::<f64>().unwrap_or(0.0);
605                    }
606                }
607                "edge_feedback_count" => {
608                    if let Some(edge_idx) = current_edge_index {
609                        graph.edges[edge_idx].properties.feedback_count =
610                            value.trim().parse::<u64>().unwrap_or(0);
611                    }
612                }
613                "edge_feedback_last_ts_ms" => {
614                    if let Some(edge_idx) = current_edge_index {
615                        graph.edges[edge_idx].properties.feedback_last_ts_ms =
616                            value.trim().parse::<u64>().ok();
617                    }
618                }
619                _ => {}
620            }
621            continue;
622        }
623
624        return Err(anyhow::anyhow!("unrecognized line at {line_no}: {trimmed}"));
625    }
626
627    if let Some(node) = current_node.take() {
628        graph.nodes.push(node);
629    }
630    if let Some(note) = current_note.take() {
631        graph.notes.push(note);
632    }
633
634    for node in &mut graph.nodes {
635        node.properties.alias =
636            sort_case_insensitive(&dedupe_case_insensitive(node.properties.alias.clone()));
637        node.properties.key_facts =
638            sort_case_insensitive(&dedupe_case_insensitive(node.properties.key_facts.clone()));
639        node.source_files =
640            sort_case_insensitive(&dedupe_case_insensitive(node.source_files.clone()));
641    }
642
643    graph.edges.sort_by(|a, b| {
644        a.source_id
645            .cmp(&b.source_id)
646            .then_with(|| a.relation.cmp(&b.relation))
647            .then_with(|| a.target_id.cmp(&b.target_id))
648            .then_with(|| a.properties.detail.cmp(&b.properties.detail))
649    });
650
651    for note in &mut graph.notes {
652        note.tags = sort_case_insensitive(&dedupe_case_insensitive(note.tags.clone()));
653        note.source_files =
654            sort_case_insensitive(&dedupe_case_insensitive(note.source_files.clone()));
655    }
656    graph.notes.sort_by(|a, b| {
657        a.id.cmp(&b.id)
658            .then_with(|| a.node_id.cmp(&b.node_id))
659            .then_with(|| a.created_at.cmp(&b.created_at))
660    });
661
662    graph.refresh_counts();
663    Ok(graph)
664}
665
666fn serialize_kg(graph: &GraphFile) -> String {
667    let mut out = String::new();
668    let mut nodes = graph.nodes.clone();
669    nodes.sort_by(|a, b| a.id.cmp(&b.id));
670
671    for node in nodes {
672        out.push_str(&format!(
673            "@ {}:{}\n",
674            node_type_to_code(&node.r#type),
675            node.id
676        ));
677        push_text_line(&mut out, "N", &node.name);
678        push_text_line(&mut out, "D", &node.properties.description);
679
680        for alias in sort_case_insensitive(&node.properties.alias) {
681            push_text_line(&mut out, "A", &alias);
682        }
683        for fact in sort_case_insensitive(&node.properties.key_facts) {
684            push_text_line(&mut out, "F", &fact);
685        }
686
687        if !node.properties.created_at.is_empty() {
688            out.push_str(&format!("E {}\n", node.properties.created_at));
689        }
690        if let Some(confidence) = node.properties.confidence {
691            out.push_str(&format!("C {}\n", confidence));
692        }
693        out.push_str(&format!("V {}\n", node.properties.importance));
694        if !node.properties.provenance.is_empty() {
695            push_text_line(&mut out, "P", &node.properties.provenance);
696        }
697        if !node.properties.domain_area.is_empty() {
698            out.push_str("- domain_area ");
699            out.push_str(&escape_kg_text(&node.properties.domain_area));
700            out.push('\n');
701        }
702        if node.properties.feedback_score != 0.0 {
703            out.push_str(&format!(
704                "- feedback_score {}\n",
705                node.properties.feedback_score
706            ));
707        }
708        if node.properties.feedback_count != 0 {
709            out.push_str(&format!(
710                "- feedback_count {}\n",
711                node.properties.feedback_count
712            ));
713        }
714        if let Some(ts) = node.properties.feedback_last_ts_ms {
715            out.push_str(&format!("- feedback_last_ts_ms {}\n", ts));
716        }
717
718        for source in sort_case_insensitive(&node.source_files) {
719            push_text_line(&mut out, "S", &source);
720        }
721
722        let mut edges: Vec<Edge> = graph
723            .edges
724            .iter()
725            .filter(|edge| edge.source_id == node.id)
726            .cloned()
727            .collect();
728        edges.sort_by(|a, b| {
729            a.relation
730                .cmp(&b.relation)
731                .then_with(|| a.target_id.cmp(&b.target_id))
732                .then_with(|| a.properties.detail.cmp(&b.properties.detail))
733        });
734
735        for edge in edges {
736            out.push_str(&format!(
737                "> {} {}\n",
738                relation_to_code(&edge.relation),
739                edge.target_id
740            ));
741            if !edge.properties.detail.is_empty() {
742                push_text_line(&mut out, "d", &edge.properties.detail);
743            }
744            if !edge.properties.valid_from.is_empty() {
745                out.push_str(&format!("i {}\n", edge.properties.valid_from));
746            }
747            if !edge.properties.valid_to.is_empty() {
748                out.push_str(&format!("x {}\n", edge.properties.valid_to));
749            }
750            if edge.properties.feedback_score != 0.0 {
751                out.push_str(&format!(
752                    "- edge_feedback_score {}\n",
753                    edge.properties.feedback_score
754                ));
755            }
756            if edge.properties.feedback_count != 0 {
757                out.push_str(&format!(
758                    "- edge_feedback_count {}\n",
759                    edge.properties.feedback_count
760                ));
761            }
762            if let Some(ts) = edge.properties.feedback_last_ts_ms {
763                out.push_str(&format!("- edge_feedback_last_ts_ms {}\n", ts));
764            }
765        }
766
767        out.push('\n');
768    }
769
770    let mut notes = graph.notes.clone();
771    notes.sort_by(|a, b| {
772        a.id.cmp(&b.id)
773            .then_with(|| a.node_id.cmp(&b.node_id))
774            .then_with(|| a.created_at.cmp(&b.created_at))
775    });
776    for note in notes {
777        out.push_str(&format!("! {} {}\n", note.id, note.node_id));
778        push_text_line(&mut out, "b", &note.body);
779        for tag in sort_case_insensitive(&note.tags) {
780            push_text_line(&mut out, "t", &tag);
781        }
782        if !note.author.is_empty() {
783            push_text_line(&mut out, "a", &note.author);
784        }
785        if !note.created_at.is_empty() {
786            out.push_str(&format!("e {}\n", note.created_at));
787        }
788        if !note.provenance.is_empty() {
789            push_text_line(&mut out, "p", &note.provenance);
790        }
791        for source in sort_case_insensitive(&note.source_files) {
792            push_text_line(&mut out, "s", &source);
793        }
794        out.push('\n');
795    }
796
797    out
798}
799
800#[derive(Debug, Clone, Serialize, Deserialize)]
801pub struct GraphFile {
802    pub metadata: Metadata,
803    #[serde(default)]
804    pub nodes: Vec<Node>,
805    #[serde(default)]
806    pub edges: Vec<Edge>,
807    #[serde(default)]
808    pub notes: Vec<Note>,
809}
810
811#[derive(Debug, Clone, Serialize, Deserialize)]
812pub struct Metadata {
813    pub name: String,
814    pub version: String,
815    pub description: String,
816    pub node_count: usize,
817    pub edge_count: usize,
818}
819
820#[derive(Debug, Clone, Serialize, Deserialize)]
821pub struct Node {
822    pub id: String,
823    #[serde(rename = "type")]
824    pub r#type: String,
825    pub name: String,
826    #[serde(default)]
827    pub properties: NodeProperties,
828    #[serde(default)]
829    pub source_files: Vec<String>,
830}
831
832#[derive(Debug, Clone, Serialize, Deserialize)]
833pub struct NodeProperties {
834    #[serde(default)]
835    pub description: String,
836    #[serde(default)]
837    pub domain_area: String,
838    #[serde(default)]
839    pub provenance: String,
840    #[serde(default)]
841    pub confidence: Option<f64>,
842    #[serde(default)]
843    pub created_at: String,
844    #[serde(default = "default_importance")]
845    pub importance: u8,
846    #[serde(default)]
847    pub key_facts: Vec<String>,
848    #[serde(default)]
849    pub alias: Vec<String>,
850    #[serde(default)]
851    pub feedback_score: f64,
852    #[serde(default)]
853    pub feedback_count: u64,
854    #[serde(default)]
855    pub feedback_last_ts_ms: Option<u64>,
856}
857
858fn default_importance() -> u8 {
859    4
860}
861
862impl Default for NodeProperties {
863    fn default() -> Self {
864        Self {
865            description: String::new(),
866            domain_area: String::new(),
867            provenance: String::new(),
868            confidence: None,
869            created_at: String::new(),
870            importance: default_importance(),
871            key_facts: Vec::new(),
872            alias: Vec::new(),
873            feedback_score: 0.0,
874            feedback_count: 0,
875            feedback_last_ts_ms: None,
876        }
877    }
878}
879
880#[derive(Debug, Clone, Serialize, Deserialize)]
881pub struct Edge {
882    pub source_id: String,
883    pub relation: String,
884    pub target_id: String,
885    #[serde(default)]
886    pub properties: EdgeProperties,
887}
888
889#[derive(Debug, Clone, Default, Serialize, Deserialize)]
890pub struct EdgeProperties {
891    #[serde(default)]
892    pub detail: String,
893    #[serde(default)]
894    pub valid_from: String,
895    #[serde(default)]
896    pub valid_to: String,
897    #[serde(default)]
898    pub feedback_score: f64,
899    #[serde(default)]
900    pub feedback_count: u64,
901    #[serde(default)]
902    pub feedback_last_ts_ms: Option<u64>,
903}
904
905#[derive(Debug, Clone, Default, Serialize, Deserialize)]
906pub struct Note {
907    pub id: String,
908    pub node_id: String,
909    #[serde(default)]
910    pub body: String,
911    #[serde(default)]
912    pub tags: Vec<String>,
913    #[serde(default)]
914    pub author: String,
915    #[serde(default)]
916    pub created_at: String,
917    #[serde(default)]
918    pub provenance: String,
919    #[serde(default)]
920    pub source_files: Vec<String>,
921}
922
923impl GraphFile {
924    pub fn new(name: &str) -> Self {
925        Self {
926            metadata: Metadata {
927                name: name.to_owned(),
928                version: "1.0".to_owned(),
929                description: format!("Knowledge graph: {name}"),
930                node_count: 0,
931                edge_count: 0,
932            },
933            nodes: Vec::new(),
934            edges: Vec::new(),
935            notes: Vec::new(),
936        }
937    }
938
939    pub fn load(path: &Path) -> Result<Self> {
940        let raw = fs::read_to_string(path)
941            .with_context(|| format!("failed to read graph: {}", path.display()))?;
942        let ext = path
943            .extension()
944            .and_then(|ext| ext.to_str())
945            .unwrap_or("json");
946        let mut graph = if ext == "kg" {
947            if raw.trim_start().starts_with('{') {
948                serde_json::from_str(&raw).with_context(|| {
949                    format!(
950                        "invalid legacy JSON payload in .kg file: {}",
951                        path.display()
952                    )
953                })?
954            } else {
955                let graph_name = path
956                    .file_stem()
957                    .and_then(|stem| stem.to_str())
958                    .unwrap_or("graph");
959                parse_kg(&raw, graph_name, strict_kg_mode())?
960            }
961        } else {
962            serde_json::from_str(&raw)
963                .with_context(|| format!("invalid JSON: {}", path.display()))?
964        };
965        graph.refresh_counts();
966        Ok(graph)
967    }
968
969    pub fn save(&self, path: &Path) -> Result<()> {
970        let mut graph = self.clone();
971        graph.refresh_counts();
972        let ext = path
973            .extension()
974            .and_then(|ext| ext.to_str())
975            .unwrap_or("json");
976        let raw = if ext == "kg" {
977            serialize_kg(&graph)
978        } else {
979            serde_json::to_string_pretty(&graph).context("failed to serialize graph")?
980        };
981        atomic_write(path, &raw)?;
982        backup_graph_if_stale(path, &raw)
983    }
984
985    pub fn refresh_counts(&mut self) {
986        self.metadata.node_count = self.nodes.len();
987        self.metadata.edge_count = self.edges.len();
988    }
989
990    pub fn node_by_id(&self, id: &str) -> Option<&Node> {
991        self.nodes.iter().find(|node| node.id == id)
992    }
993
994    pub fn node_by_id_sorted(&self, id: &str) -> Option<&Node> {
995        self.nodes
996            .binary_search_by(|node| node.id.as_str().cmp(id))
997            .ok()
998            .and_then(|idx| self.nodes.get(idx))
999    }
1000
1001    pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
1002        self.nodes.iter_mut().find(|node| node.id == id)
1003    }
1004
1005    pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
1006        self.edges.iter().any(|edge| {
1007            edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
1008        })
1009    }
1010}
1011
1012#[cfg(test)]
1013mod tests {
1014    use super::{GraphFile, parse_kg};
1015
1016    #[test]
1017    fn save_and_load_kg_roundtrip_keeps_core_fields() {
1018        let dir = tempfile::tempdir().expect("temp dir");
1019        let path = dir.path().join("graph.kg");
1020
1021        let mut graph = GraphFile::new("graph");
1022        graph.nodes.push(crate::Node {
1023            id: "concept:refrigerator".to_owned(),
1024            r#type: "Concept".to_owned(),
1025            name: "Lodowka".to_owned(),
1026            properties: crate::NodeProperties {
1027                description: "Urzadzenie chlodzace".to_owned(),
1028                provenance: "U".to_owned(),
1029                created_at: "2026-04-04T12:00:00Z".to_owned(),
1030                importance: 5,
1031                key_facts: vec!["A".to_owned(), "b".to_owned()],
1032                alias: vec!["Fridge".to_owned()],
1033                ..Default::default()
1034            },
1035            source_files: vec!["docs/fridge.md".to_owned()],
1036        });
1037        graph.edges.push(crate::Edge {
1038            source_id: "concept:refrigerator".to_owned(),
1039            relation: "READS_FROM".to_owned(),
1040            target_id: "datastore:settings".to_owned(),
1041            properties: crate::EdgeProperties {
1042                detail: "runtime read".to_owned(),
1043                valid_from: "2026-04-04T12:00:00Z".to_owned(),
1044                valid_to: "2026-04-05T12:00:00Z".to_owned(),
1045                ..Default::default()
1046            },
1047        });
1048
1049        graph.save(&path).expect("save kg");
1050        let raw = std::fs::read_to_string(&path).expect("read kg");
1051        assert!(raw.contains("@ K:concept:refrigerator"));
1052        assert!(raw.contains("> R datastore:settings"));
1053
1054        let loaded = GraphFile::load(&path).expect("load kg");
1055        assert_eq!(loaded.nodes.len(), 1);
1056        assert_eq!(loaded.edges.len(), 1);
1057        let node = &loaded.nodes[0];
1058        assert_eq!(node.properties.importance, 5);
1059        assert_eq!(node.properties.provenance, "U");
1060        assert_eq!(node.name, "Lodowka");
1061        assert_eq!(loaded.edges[0].relation, "READS_FROM");
1062        assert_eq!(loaded.edges[0].properties.detail, "runtime read");
1063        assert_eq!(
1064            loaded.edges[0].properties.valid_from,
1065            "2026-04-04T12:00:00Z"
1066        );
1067        assert_eq!(loaded.edges[0].properties.valid_to, "2026-04-05T12:00:00Z");
1068    }
1069
1070    #[test]
1071    fn load_supports_legacy_json_payload_with_kg_extension() {
1072        let dir = tempfile::tempdir().expect("temp dir");
1073        let path = dir.path().join("legacy.kg");
1074        std::fs::write(
1075            &path,
1076            r#"{
1077  "metadata": {"name": "legacy", "version": "1.0", "description": "x", "node_count": 0, "edge_count": 0},
1078  "nodes": [],
1079  "edges": [],
1080  "notes": []
1081}"#,
1082        )
1083        .expect("write legacy payload");
1084
1085        let loaded = GraphFile::load(&path).expect("load legacy kg");
1086        assert_eq!(loaded.metadata.name, "legacy");
1087        assert!(loaded.nodes.is_empty());
1088    }
1089
1090    #[test]
1091    fn load_kg_rejects_invalid_timestamp_format() {
1092        let dir = tempfile::tempdir().expect("temp dir");
1093        let path = dir.path().join("invalid-ts.kg");
1094        std::fs::write(
1095            &path,
1096            "@ K:concept:x\nN X\nD Desc\nE 2026-04-04 12:00:00\nV 4\nP U\n",
1097        )
1098        .expect("write kg");
1099
1100        let err = GraphFile::load(&path).expect_err("invalid timestamp should fail");
1101        let msg = format!("{err:#}");
1102        assert!(msg.contains("invalid E timestamp"));
1103    }
1104
1105    #[test]
1106    fn load_kg_rejects_invalid_edge_timestamp_format() {
1107        let dir = tempfile::tempdir().expect("temp dir");
1108        let path = dir.path().join("invalid-edge-ts.kg");
1109        std::fs::write(
1110            &path,
1111            "@ K:concept:x\nN X\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n> H concept:y\ni 2026-04-04 12:00:00\n",
1112        )
1113        .expect("write kg");
1114
1115        let err = GraphFile::load(&path).expect_err("invalid edge timestamp should fail");
1116        let msg = format!("{err:#}");
1117        assert!(msg.contains("invalid i timestamp"));
1118    }
1119
1120    #[test]
1121    fn load_kg_preserves_whitespace_and_dedupes_exact_duplicates() {
1122        let dir = tempfile::tempdir().expect("temp dir");
1123        let path = dir.path().join("normalize.kg");
1124        std::fs::write(
1125            &path,
1126            "@ K:concept:x\nN  Name   With   Spaces \nD  Desc   with   spaces \nA Alias\nA Alias\nF fact one\nF FACT   one\nS docs/a.md\nS docs/a.md\nE 2026-04-04T12:00:00Z\nV 4\nP U\n",
1127        )
1128        .expect("write kg");
1129
1130        let loaded = GraphFile::load(&path).expect("load kg");
1131        let node = &loaded.nodes[0];
1132        assert_eq!(node.name, " Name   With   Spaces ");
1133        assert_eq!(node.properties.description, " Desc   with   spaces ");
1134        assert_eq!(node.properties.alias.len(), 1);
1135        assert_eq!(node.properties.key_facts.len(), 2);
1136        assert_eq!(node.source_files.len(), 1);
1137    }
1138
1139    #[test]
1140    fn save_and_load_kg_roundtrip_keeps_notes_without_json_fallback() {
1141        let dir = tempfile::tempdir().expect("temp dir");
1142        let path = dir.path().join("graph-notes.kg");
1143
1144        let mut graph = GraphFile::new("graph-notes");
1145        graph.nodes.push(crate::Node {
1146            id: "concept:refrigerator".to_owned(),
1147            r#type: "Concept".to_owned(),
1148            name: "Lodowka".to_owned(),
1149            properties: crate::NodeProperties {
1150                description: "Urzadzenie chlodzace".to_owned(),
1151                provenance: "U".to_owned(),
1152                created_at: "2026-04-04T12:00:00Z".to_owned(),
1153                ..Default::default()
1154            },
1155            source_files: vec!["docs/fridge.md".to_owned()],
1156        });
1157        graph.notes.push(crate::Note {
1158            id: "note:1".to_owned(),
1159            node_id: "concept:refrigerator".to_owned(),
1160            body: "Important maintenance insight".to_owned(),
1161            tags: vec!["Maintenance".to_owned(), "maintenance".to_owned()],
1162            author: "alice".to_owned(),
1163            created_at: "1712345678".to_owned(),
1164            provenance: "U".to_owned(),
1165            source_files: vec!["docs/a.md".to_owned(), "docs/a.md".to_owned()],
1166        });
1167
1168        graph.save(&path).expect("save kg");
1169        let raw = std::fs::read_to_string(&path).expect("read kg");
1170        assert!(raw.contains("! note:1 concept:refrigerator"));
1171        assert!(!raw.trim_start().starts_with('{'));
1172
1173        let loaded = GraphFile::load(&path).expect("load kg");
1174        assert_eq!(loaded.notes.len(), 1);
1175        let note = &loaded.notes[0];
1176        assert_eq!(note.id, "note:1");
1177        assert_eq!(note.node_id, "concept:refrigerator");
1178        assert_eq!(note.body, "Important maintenance insight");
1179        assert_eq!(note.tags.len(), 1);
1180        assert_eq!(note.source_files.len(), 1);
1181    }
1182
1183    #[test]
1184    fn save_and_load_kg_roundtrip_preserves_multiline_text_fields() {
1185        let dir = tempfile::tempdir().expect("temp dir");
1186        let path = dir.path().join("graph-multiline.kg");
1187
1188        let mut graph = GraphFile::new("graph-multiline");
1189        graph.nodes.push(crate::Node {
1190            id: "concept:refrigerator".to_owned(),
1191            r#type: "Concept".to_owned(),
1192            name: "Lodowka\nSmart".to_owned(),
1193            properties: crate::NodeProperties {
1194                description: "Linia 1\nLinia 2\\nliteral".to_owned(),
1195                provenance: "user\nimport".to_owned(),
1196                created_at: "2026-04-04T12:00:00Z".to_owned(),
1197                importance: 5,
1198                key_facts: vec!["Fakt 1\nFakt 2".to_owned()],
1199                alias: vec!["Alias\nA".to_owned()],
1200                domain_area: "ops\nfield".to_owned(),
1201                ..Default::default()
1202            },
1203            source_files: vec!["docs/fridge\nnotes.md".to_owned()],
1204        });
1205        graph.edges.push(crate::Edge {
1206            source_id: "concept:refrigerator".to_owned(),
1207            relation: "READS_FROM".to_owned(),
1208            target_id: "datastore:settings".to_owned(),
1209            properties: crate::EdgeProperties {
1210                detail: "runtime\nread".to_owned(),
1211                valid_from: "2026-04-04T12:00:00Z".to_owned(),
1212                valid_to: "2026-04-05T12:00:00Z".to_owned(),
1213                ..Default::default()
1214            },
1215        });
1216        graph.notes.push(crate::Note {
1217            id: "note:1".to_owned(),
1218            node_id: "concept:refrigerator".to_owned(),
1219            body: "line1\nline2\\nkeep".to_owned(),
1220            tags: vec!["multi\nline".to_owned()],
1221            author: "alice\nbob".to_owned(),
1222            created_at: "1712345678".to_owned(),
1223            provenance: "manual\nentry".to_owned(),
1224            source_files: vec!["docs/a\nb.md".to_owned()],
1225        });
1226
1227        graph.save(&path).expect("save kg");
1228        let raw = std::fs::read_to_string(&path).expect("read kg");
1229        assert!(raw.contains("N Lodowka\\nSmart"));
1230        assert!(raw.contains("D Linia 1\\nLinia 2\\\\nliteral"));
1231        assert!(raw.contains("- domain_area ops\\nfield"));
1232        assert!(raw.contains("d runtime\\nread"));
1233        assert!(raw.contains("b line1\\nline2\\\\nkeep"));
1234
1235        let loaded = GraphFile::load(&path).expect("load kg");
1236        let node = &loaded.nodes[0];
1237        assert_eq!(node.name, "Lodowka\nSmart");
1238        assert_eq!(node.properties.description, "Linia 1\nLinia 2\\nliteral");
1239        assert_eq!(node.properties.provenance, "user\nimport");
1240        assert_eq!(node.properties.alias, vec!["Alias\nA".to_owned()]);
1241        assert_eq!(node.properties.key_facts, vec!["Fakt 1\nFakt 2".to_owned()]);
1242        assert_eq!(node.properties.domain_area, "ops\nfield");
1243        assert_eq!(node.source_files, vec!["docs/fridge\nnotes.md".to_owned()]);
1244        assert_eq!(loaded.edges[0].properties.detail, "runtime\nread");
1245        let note = &loaded.notes[0];
1246        assert_eq!(note.body, "line1\nline2\\nkeep");
1247        assert_eq!(note.tags, vec!["multi\nline".to_owned()]);
1248        assert_eq!(note.author, "alice\nbob");
1249        assert_eq!(note.provenance, "manual\nentry");
1250        assert_eq!(note.source_files, vec!["docs/a\nb.md".to_owned()]);
1251    }
1252
1253    #[test]
1254    fn strict_mode_rejects_out_of_order_node_fields() {
1255        let raw = "@ K:concept:x\nD Desc\nN Name\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n";
1256        let err = parse_kg(raw, "x", true).expect_err("strict mode should fail on field order");
1257        assert!(format!("{err:#}").contains("invalid field order"));
1258    }
1259
1260    #[test]
1261    fn strict_mode_rejects_overlong_name_but_compat_mode_allows_it() {
1262        let long_name = "N ".to_owned() + &"X".repeat(121);
1263        let raw = format!(
1264            "@ K:concept:x\n{}\nD Desc\nE 2026-04-04T12:00:00Z\nV 4\nP U\nS docs/a.md\n",
1265            long_name
1266        );
1267
1268        let strict_err = parse_kg(&raw, "x", true).expect_err("strict mode should fail on length");
1269        assert!(format!("{strict_err:#}").contains("invalid N length"));
1270
1271        parse_kg(&raw, "x", false).expect("compat mode keeps permissive behavior");
1272    }
1273
1274    #[test]
1275    fn save_kg_skips_empty_e_and_p_fields() {
1276        let dir = tempfile::tempdir().expect("temp dir");
1277        let path = dir.path().join("no-empty-ep.kg");
1278
1279        let mut graph = GraphFile::new("graph");
1280        graph.nodes.push(crate::Node {
1281            id: "concept:x".to_owned(),
1282            r#type: "Concept".to_owned(),
1283            name: "X".to_owned(),
1284            properties: crate::NodeProperties {
1285                description: "Desc".to_owned(),
1286                provenance: String::new(),
1287                created_at: String::new(),
1288                ..Default::default()
1289            },
1290            source_files: vec!["docs/a.md".to_owned()],
1291        });
1292
1293        graph.save(&path).expect("save kg");
1294        let raw = std::fs::read_to_string(&path).expect("read kg");
1295        assert!(!raw.contains("\nE \n"));
1296        assert!(!raw.contains("\nP \n"));
1297    }
1298}