Skip to main content

talon_core/graph/
build.rs

1//! Build the persisted graph artifact from indexed notes and resolved links.
2
3use std::collections::{BTreeMap, BTreeSet};
4
5use rusqlite::Connection;
6use serde_json::Value;
7
8use crate::TalonError;
9use crate::indexing::migrations::read_db_version;
10
11use super::snapshot::{GraphEdge, GraphNode};
12
13/// Graph rebuild options.
14#[derive(Debug, Clone, Default, PartialEq, Eq)]
15pub struct GraphBuildInput;
16
17/// Statistics returned after rebuilding the graph artifact.
18#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
19#[serde(rename_all = "camelCase")]
20pub struct GraphBuildStats {
21    /// Active graph node count.
22    pub node_count: u32,
23    /// Active-active directed edge count.
24    pub edge_count: u32,
25    /// Normalized source citation rows.
26    pub source_count: u32,
27}
28
29#[derive(Debug, Clone, Default, PartialEq)]
30pub(super) struct BuiltGraph {
31    pub(super) db_version: u64,
32    pub(super) nodes: BTreeMap<String, GraphNode>,
33    pub(super) edges: Vec<GraphEdge>,
34    pub(super) source_citations: BTreeMap<String, BTreeSet<String>>,
35    pub(super) communities: Vec<super::CommunityInfo>,
36    pub(super) missing_links: Vec<super::LinkSuggestion>,
37}
38
39/// Rebuilds graph tables from active notes and active-active resolved links.
40///
41/// # Errors
42///
43/// Returns [`TalonError::Sqlite`] when the index cannot be queried or written.
44pub fn rebuild_graph(
45    conn: &mut Connection,
46    input: &GraphBuildInput,
47) -> Result<GraphBuildStats, TalonError> {
48    rebuild_graph_inner(conn, input)
49}
50
51fn rebuild_graph_inner(
52    conn: &mut Connection,
53    _input: &GraphBuildInput,
54) -> Result<GraphBuildStats, TalonError> {
55    crate::indexing::migrations::run_migrations(conn)?;
56    let graph = build_graph(conn)?;
57    let stats = GraphBuildStats {
58        node_count: graph.nodes.len().try_into().unwrap_or(u32::MAX),
59        edge_count: graph.edges.len().try_into().unwrap_or(u32::MAX),
60        source_count: graph
61            .source_citations
62            .values()
63            .map(BTreeSet::len)
64            .sum::<usize>()
65            .try_into()
66            .unwrap_or(u32::MAX),
67    };
68    super::storage::replace_graph(conn, &graph)?;
69    Ok(stats)
70}
71
72fn build_graph(conn: &Connection) -> Result<BuiltGraph, TalonError> {
73    let mut graph = BuiltGraph {
74        db_version: read_db_version(conn),
75        ..BuiltGraph::default()
76    };
77    load_nodes(conn, &mut graph)?;
78    load_edges(conn, &mut graph)?;
79    populate_degrees(&mut graph);
80    let mut snapshot = super::GraphSnapshot {
81        db_version: graph.db_version,
82        built_at: None,
83        nodes: graph.nodes.clone(),
84        edges: graph.edges.clone(),
85        source_citations: graph.source_citations.clone(),
86    };
87    graph.communities = super::detect_communities(&mut snapshot);
88    graph.missing_links = super::build_suggestions::build_link_suggestions(conn, &snapshot)?;
89    graph.nodes = snapshot.nodes;
90    Ok(graph)
91}
92
93fn load_nodes(conn: &Connection, graph: &mut BuiltGraph) -> Result<(), TalonError> {
94    let mut stmt = conn
95        .prepare(
96            "SELECT id, vault_path, COALESCE(title, vault_path), aliases, tags, scope
97             FROM notes
98             WHERE active = 1
99             ORDER BY vault_path",
100        )
101        .map_err(|source| TalonError::Sqlite {
102            context: "load graph source notes",
103            source,
104        })?;
105    let rows = stmt
106        .query_map([], |row| {
107            Ok((
108                row.get::<_, i64>(0)?,
109                row.get::<_, String>(1)?,
110                row.get::<_, String>(2)?,
111                row.get::<_, String>(3)?,
112                row.get::<_, String>(4)?,
113                row.get::<_, String>(5)?,
114            ))
115        })
116        .map_err(|source| TalonError::Sqlite {
117            context: "load graph source notes",
118            source,
119        })?;
120
121    for row in rows {
122        let (note_id, vault_path, title, aliases_json, tags_json, scope) =
123            row.map_err(|source| TalonError::Sqlite {
124                context: "load graph source notes",
125                source,
126            })?;
127        let sources = load_sources(conn, note_id, &vault_path)?;
128        for source in &sources {
129            graph
130                .source_citations
131                .entry(source.clone())
132                .or_default()
133                .insert(vault_path.clone());
134        }
135        graph.nodes.insert(
136            vault_path.clone(),
137            GraphNode {
138                structural: is_structural_page(&vault_path),
139                vault_path,
140                title,
141                aliases: parse_string_vec(&aliases_json),
142                tags: parse_string_vec(&tags_json),
143                scope,
144                note_type: load_frontmatter_value(conn, note_id, "type")?,
145                sources,
146                outgoing_degree: 0,
147                backlink_degree: 0,
148                total_degree: 0,
149                community_id: None,
150                community_cohesion: 0.0,
151                community_neighbor_count: 0,
152                bridge_weight: 0.0,
153            },
154        );
155    }
156    Ok(())
157}
158
159fn load_edges(conn: &Connection, graph: &mut BuiltGraph) -> Result<(), TalonError> {
160    let mut stmt = conn
161        .prepare(
162            "SELECT l.from_path, l.to_path, MIN(COALESCE(l.alias, l.raw_target, l.to_path)), COUNT(*)
163             FROM links l
164             JOIN notes nf ON nf.vault_path = l.from_path AND nf.active = 1
165             JOIN notes nt ON nt.vault_path = l.to_path AND nt.active = 1
166             GROUP BY l.from_path, l.to_path
167             ORDER BY l.from_path, l.to_path",
168        )
169        .map_err(|source| TalonError::Sqlite {
170            context: "load graph edges",
171            source,
172        })?;
173    graph.edges = stmt
174        .query_map([], |row| {
175            Ok(GraphEdge {
176                from_path: row.get(0)?,
177                to_path: row.get(1)?,
178                link_text: row.get(2)?,
179                weight: row.get(3)?,
180            })
181        })
182        .map_err(|source| TalonError::Sqlite {
183            context: "load graph edges",
184            source,
185        })?
186        .collect::<Result<Vec<_>, _>>()
187        .map_err(|source| TalonError::Sqlite {
188            context: "load graph edges",
189            source,
190        })?;
191    Ok(())
192}
193
194fn populate_degrees(graph: &mut BuiltGraph) {
195    let mut outgoing: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
196    let mut backlinks: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
197    for edge in &graph.edges {
198        outgoing
199            .entry(edge.from_path.clone())
200            .or_default()
201            .insert(edge.to_path.clone());
202        backlinks
203            .entry(edge.to_path.clone())
204            .or_default()
205            .insert(edge.from_path.clone());
206    }
207    for (path, node) in &mut graph.nodes {
208        let out = outgoing.get(path).cloned().unwrap_or_default();
209        let back = backlinks.get(path).cloned().unwrap_or_default();
210        let total = out.union(&back).count();
211        node.outgoing_degree = out.len().try_into().unwrap_or(u32::MAX);
212        node.backlink_degree = back.len().try_into().unwrap_or(u32::MAX);
213        node.total_degree = total.try_into().unwrap_or(u32::MAX);
214    }
215}
216
217fn load_sources(
218    conn: &Connection,
219    note_id: i64,
220    vault_path: &str,
221) -> Result<Vec<String>, TalonError> {
222    let mut stmt = conn
223        .prepare(
224            "SELECT value FROM note_frontmatter_fields
225             WHERE note_id = ?1 AND field = 'sources'
226             ORDER BY value",
227        )
228        .map_err(|source| TalonError::Sqlite {
229            context: "load graph sources",
230            source,
231        })?;
232    let rows = stmt
233        .query_map([note_id], |row| row.get::<_, String>(0))
234        .map_err(|source| TalonError::Sqlite {
235            context: "load graph sources",
236            source,
237        })?;
238    let mut seen = BTreeSet::new();
239    for row in rows {
240        let source = row.map_err(|source| TalonError::Sqlite {
241            context: "load graph sources",
242            source,
243        })?;
244        let cleaned = clean_source_reference(vault_path, &source);
245        if !cleaned.is_empty() {
246            seen.insert(cleaned);
247        }
248    }
249    Ok(seen.into_iter().collect())
250}
251
252fn load_frontmatter_value(
253    conn: &Connection,
254    note_id: i64,
255    field: &str,
256) -> Result<Option<String>, TalonError> {
257    match conn.query_row(
258        "SELECT value FROM note_frontmatter_fields
259         WHERE note_id = ?1 AND field = ?2
260         ORDER BY value
261         LIMIT 1",
262        (note_id, field),
263        |row| row.get(0),
264    ) {
265        Ok(value) => Ok(Some(value)),
266        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
267        Err(source) => Err(TalonError::Sqlite {
268            context: "load graph frontmatter",
269            source,
270        }),
271    }
272}
273
274fn parse_string_vec(raw: &str) -> Vec<String> {
275    match serde_json::from_str::<Value>(raw) {
276        Ok(Value::Array(values)) => values
277            .into_iter()
278            .filter_map(|value| value.as_str().map(str::to_string))
279            .collect(),
280        _ => Vec::new(),
281    }
282}
283
284fn is_structural_page(path: &str) -> bool {
285    let Some(file_name) = std::path::Path::new(path)
286        .file_name()
287        .and_then(std::ffi::OsStr::to_str)
288    else {
289        return false;
290    };
291    let lower = file_name.to_ascii_lowercase();
292    matches!(lower.as_str(), "index.md" | "readme.md" | "overview.md")
293        || lower.ends_with("_index.md")
294        || lower.ends_with("-index.md")
295}
296
297pub(super) fn clean_source_reference(from_path: &str, value: &str) -> String {
298    let trimmed = value.trim();
299    let without_link = trimmed
300        .strip_prefix("[[")
301        .and_then(|inner| inner.strip_suffix("]]"))
302        .unwrap_or(trimmed);
303    let without_alias = without_link
304        .split_once('|')
305        .map_or(without_link, |(target, _alias)| target);
306    let target = without_alias
307        .split_once('#')
308        .map_or(without_alias, |(target, _heading)| target)
309        .trim();
310    if target.is_empty() {
311        return String::new();
312    }
313    if target.contains("://") {
314        return target.to_string();
315    }
316    if has_markdown_extension(target) || target.contains('/') {
317        return target.replace('\\', "/");
318    }
319    let Some(parent) = std::path::Path::new(from_path).parent() else {
320        return format!("{target}.md");
321    };
322    let joined = parent.join(format!("{target}.md"));
323    joined.to_string_lossy().replace('\\', "/")
324}
325
326fn has_markdown_extension(target: &str) -> bool {
327    std::path::Path::new(target)
328        .extension()
329        .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
330}