Skip to main content

kg/
graph.rs

1use std::fs;
2use std::io::Write;
3use std::path::Path;
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use anyhow::{Context, Result};
7use flate2::Compression;
8use flate2::write::GzEncoder;
9use serde::{Deserialize, Serialize};
10
11/// Write `data` to `dest` atomically:
12/// 1. Write to `dest.tmp`
13/// 2. If `dest` already exists, copy it to `dest.bak`
14/// 3. Rename `dest.tmp` -> `dest`
15fn atomic_write(dest: &Path, data: &str) -> Result<()> {
16    let tmp = dest.with_extension("tmp");
17    fs::write(&tmp, data).with_context(|| format!("failed to write tmp: {}", tmp.display()))?;
18    if dest.exists() {
19        let bak = dest.with_extension("bak");
20        fs::copy(dest, &bak)
21            .with_context(|| format!("failed to create backup: {}", bak.display()))?;
22    }
23    fs::rename(&tmp, dest).with_context(|| format!("failed to rename tmp to {}", dest.display()))
24}
25
26const BACKUP_STALE_SECS: u64 = 60 * 60;
27
28fn backup_graph_if_stale(path: &Path, data: &str) -> Result<()> {
29    let parent = match path.parent() {
30        Some(parent) => parent,
31        None => return Ok(()),
32    };
33    let stem = match path.file_stem().and_then(|s| s.to_str()) {
34        Some(stem) => stem,
35        None => return Ok(()),
36    };
37    let now = SystemTime::now()
38        .duration_since(UNIX_EPOCH)
39        .context("time went backwards")?
40        .as_secs();
41    if let Some(latest) = latest_backup_ts(parent, stem)? {
42        if now.saturating_sub(latest) < BACKUP_STALE_SECS {
43            return Ok(());
44        }
45    }
46
47    let backup_path = parent.join(format!("{stem}.bck.{now}.gz"));
48    let tmp_path = backup_path.with_extension("tmp");
49    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
50    encoder.write_all(data.as_bytes())?;
51    let encoded = encoder.finish()?;
52    fs::write(&tmp_path, encoded)
53        .with_context(|| format!("failed to write tmp: {}", tmp_path.display()))?;
54    fs::rename(&tmp_path, &backup_path)
55        .with_context(|| format!("failed to rename tmp to {}", backup_path.display()))?;
56    Ok(())
57}
58
59fn latest_backup_ts(dir: &Path, stem: &str) -> Result<Option<u64>> {
60    let prefix = format!("{stem}.bck.");
61    let suffix = ".gz";
62    let mut latest = None;
63    for entry in fs::read_dir(dir).with_context(|| format!("read dir: {}", dir.display()))? {
64        let entry = entry?;
65        let name = entry.file_name();
66        let name = name.to_string_lossy();
67        if !name.starts_with(&prefix) || !name.ends_with(suffix) {
68            continue;
69        }
70        let ts_part = &name[prefix.len()..name.len() - suffix.len()];
71        if let Ok(ts) = ts_part.parse::<u64>() {
72            match latest {
73                Some(current) => {
74                    if ts > current {
75                        latest = Some(ts);
76                    }
77                }
78                None => latest = Some(ts),
79            }
80        }
81    }
82    Ok(latest)
83}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct GraphFile {
87    pub metadata: Metadata,
88    #[serde(default)]
89    pub nodes: Vec<Node>,
90    #[serde(default)]
91    pub edges: Vec<Edge>,
92    #[serde(default)]
93    pub notes: Vec<Note>,
94}
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct Metadata {
98    pub name: String,
99    pub version: String,
100    pub description: String,
101    pub node_count: usize,
102    pub edge_count: usize,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct Node {
107    pub id: String,
108    #[serde(rename = "type")]
109    pub r#type: String,
110    pub name: String,
111    #[serde(default)]
112    pub properties: NodeProperties,
113    #[serde(default)]
114    pub source_files: Vec<String>,
115}
116
117#[derive(Debug, Clone, Default, Serialize, Deserialize)]
118pub struct NodeProperties {
119    #[serde(default)]
120    pub description: String,
121    #[serde(default)]
122    pub domain_area: String,
123    #[serde(default)]
124    pub provenance: String,
125    #[serde(default)]
126    pub confidence: Option<f64>,
127    #[serde(default)]
128    pub created_at: String,
129    #[serde(default)]
130    pub key_facts: Vec<String>,
131    #[serde(default)]
132    pub alias: Vec<String>,
133    #[serde(default)]
134    pub feedback_score: f64,
135    #[serde(default)]
136    pub feedback_count: u64,
137    #[serde(default)]
138    pub feedback_last_ts_ms: Option<u64>,
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct Edge {
143    pub source_id: String,
144    pub relation: String,
145    pub target_id: String,
146    #[serde(default)]
147    pub properties: EdgeProperties,
148}
149
150#[derive(Debug, Clone, Default, Serialize, Deserialize)]
151pub struct EdgeProperties {
152    #[serde(default)]
153    pub detail: String,
154    #[serde(default)]
155    pub feedback_score: f64,
156    #[serde(default)]
157    pub feedback_count: u64,
158    #[serde(default)]
159    pub feedback_last_ts_ms: Option<u64>,
160}
161
162#[derive(Debug, Clone, Default, Serialize, Deserialize)]
163pub struct Note {
164    pub id: String,
165    pub node_id: String,
166    #[serde(default)]
167    pub body: String,
168    #[serde(default)]
169    pub tags: Vec<String>,
170    #[serde(default)]
171    pub author: String,
172    #[serde(default)]
173    pub created_at: String,
174    #[serde(default)]
175    pub provenance: String,
176    #[serde(default)]
177    pub source_files: Vec<String>,
178}
179
180impl GraphFile {
181    pub fn new(name: &str) -> Self {
182        Self {
183            metadata: Metadata {
184                name: name.to_owned(),
185                version: "1.0".to_owned(),
186                description: format!("Knowledge graph: {name}"),
187                node_count: 0,
188                edge_count: 0,
189            },
190            nodes: Vec::new(),
191            edges: Vec::new(),
192            notes: Vec::new(),
193        }
194    }
195
196    pub fn load(path: &Path) -> Result<Self> {
197        let raw = fs::read_to_string(path)
198            .with_context(|| format!("failed to read graph: {}", path.display()))?;
199        let mut graph: GraphFile = serde_json::from_str(&raw)
200            .with_context(|| format!("invalid JSON: {}", path.display()))?;
201        graph.refresh_counts();
202        Ok(graph)
203    }
204
205    pub fn save(&self, path: &Path) -> Result<()> {
206        let mut graph = self.clone();
207        graph.refresh_counts();
208        let raw = serde_json::to_string_pretty(&graph).context("failed to serialize graph")?;
209        atomic_write(path, &raw)?;
210        backup_graph_if_stale(path, &raw)
211    }
212
213    pub fn refresh_counts(&mut self) {
214        self.metadata.node_count = self.nodes.len();
215        self.metadata.edge_count = self.edges.len();
216    }
217
218    pub fn node_by_id(&self, id: &str) -> Option<&Node> {
219        self.nodes.iter().find(|node| node.id == id)
220    }
221
222    pub fn node_by_id_mut(&mut self, id: &str) -> Option<&mut Node> {
223        self.nodes.iter_mut().find(|node| node.id == id)
224    }
225
226    pub fn has_edge(&self, source_id: &str, relation: &str, target_id: &str) -> bool {
227        self.edges.iter().any(|edge| {
228            edge.source_id == source_id && edge.relation == relation && edge.target_id == target_id
229        })
230    }
231}