1use std::collections::{BTreeMap, BTreeSet};
4
5use rusqlite::Connection;
6use serde_json::Value;
7
8use crate::TalonError;
9use crate::indexing::migrations::read_db_version;
10
11use super::snapshot::{GraphEdge, GraphNode};
12
13#[derive(Debug, Clone, Default, PartialEq, Eq)]
15pub struct GraphBuildInput;
16
17#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
19#[serde(rename_all = "camelCase")]
20pub struct GraphBuildStats {
21 pub node_count: u32,
23 pub edge_count: u32,
25 pub source_count: u32,
27}
28
29#[derive(Debug, Clone, Default, PartialEq)]
30pub(super) struct BuiltGraph {
31 pub(super) db_version: u64,
32 pub(super) nodes: BTreeMap<String, GraphNode>,
33 pub(super) edges: Vec<GraphEdge>,
34 pub(super) source_citations: BTreeMap<String, BTreeSet<String>>,
35 pub(super) communities: Vec<super::CommunityInfo>,
36 pub(super) missing_links: Vec<super::LinkSuggestion>,
37}
38
39pub fn rebuild_graph(
45 conn: &mut Connection,
46 input: &GraphBuildInput,
47) -> Result<GraphBuildStats, TalonError> {
48 rebuild_graph_inner(conn, input)
49}
50
51fn rebuild_graph_inner(
52 conn: &mut Connection,
53 _input: &GraphBuildInput,
54) -> Result<GraphBuildStats, TalonError> {
55 crate::indexing::migrations::run_migrations(conn)?;
56 let graph = build_graph(conn)?;
57 let stats = GraphBuildStats {
58 node_count: graph.nodes.len().try_into().unwrap_or(u32::MAX),
59 edge_count: graph.edges.len().try_into().unwrap_or(u32::MAX),
60 source_count: graph
61 .source_citations
62 .values()
63 .map(BTreeSet::len)
64 .sum::<usize>()
65 .try_into()
66 .unwrap_or(u32::MAX),
67 };
68 super::storage::replace_graph(conn, &graph)?;
69 Ok(stats)
70}
71
72fn build_graph(conn: &Connection) -> Result<BuiltGraph, TalonError> {
73 let mut graph = BuiltGraph {
74 db_version: read_db_version(conn),
75 ..BuiltGraph::default()
76 };
77 load_nodes(conn, &mut graph)?;
78 load_edges(conn, &mut graph)?;
79 populate_degrees(&mut graph);
80 let mut snapshot = super::GraphSnapshot {
81 db_version: graph.db_version,
82 built_at: None,
83 nodes: graph.nodes.clone(),
84 edges: graph.edges.clone(),
85 source_citations: graph.source_citations.clone(),
86 };
87 graph.communities = super::detect_communities(&mut snapshot);
88 graph.missing_links = super::build_suggestions::build_link_suggestions(conn, &snapshot)?;
89 graph.nodes = snapshot.nodes;
90 Ok(graph)
91}
92
93fn load_nodes(conn: &Connection, graph: &mut BuiltGraph) -> Result<(), TalonError> {
94 let mut stmt = conn
95 .prepare(
96 "SELECT id, vault_path, COALESCE(title, vault_path), aliases, tags, scope
97 FROM notes
98 WHERE active = 1
99 ORDER BY vault_path",
100 )
101 .map_err(|source| TalonError::Sqlite {
102 context: "load graph source notes",
103 source,
104 })?;
105 let rows = stmt
106 .query_map([], |row| {
107 Ok((
108 row.get::<_, i64>(0)?,
109 row.get::<_, String>(1)?,
110 row.get::<_, String>(2)?,
111 row.get::<_, String>(3)?,
112 row.get::<_, String>(4)?,
113 row.get::<_, String>(5)?,
114 ))
115 })
116 .map_err(|source| TalonError::Sqlite {
117 context: "load graph source notes",
118 source,
119 })?;
120
121 for row in rows {
122 let (note_id, vault_path, title, aliases_json, tags_json, scope) =
123 row.map_err(|source| TalonError::Sqlite {
124 context: "load graph source notes",
125 source,
126 })?;
127 let sources = load_sources(conn, note_id, &vault_path)?;
128 for source in &sources {
129 graph
130 .source_citations
131 .entry(source.clone())
132 .or_default()
133 .insert(vault_path.clone());
134 }
135 graph.nodes.insert(
136 vault_path.clone(),
137 GraphNode {
138 structural: is_structural_page(&vault_path),
139 vault_path,
140 title,
141 aliases: parse_string_vec(&aliases_json),
142 tags: parse_string_vec(&tags_json),
143 scope,
144 note_type: load_frontmatter_value(conn, note_id, "type")?,
145 sources,
146 outgoing_degree: 0,
147 backlink_degree: 0,
148 total_degree: 0,
149 community_id: None,
150 community_cohesion: 0.0,
151 community_neighbor_count: 0,
152 bridge_weight: 0.0,
153 },
154 );
155 }
156 Ok(())
157}
158
159fn load_edges(conn: &Connection, graph: &mut BuiltGraph) -> Result<(), TalonError> {
160 let mut stmt = conn
161 .prepare(
162 "SELECT l.from_path, l.to_path, MIN(COALESCE(l.alias, l.raw_target, l.to_path)), COUNT(*)
163 FROM links l
164 JOIN notes nf ON nf.vault_path = l.from_path AND nf.active = 1
165 JOIN notes nt ON nt.vault_path = l.to_path AND nt.active = 1
166 GROUP BY l.from_path, l.to_path
167 ORDER BY l.from_path, l.to_path",
168 )
169 .map_err(|source| TalonError::Sqlite {
170 context: "load graph edges",
171 source,
172 })?;
173 graph.edges = stmt
174 .query_map([], |row| {
175 Ok(GraphEdge {
176 from_path: row.get(0)?,
177 to_path: row.get(1)?,
178 link_text: row.get(2)?,
179 weight: row.get(3)?,
180 })
181 })
182 .map_err(|source| TalonError::Sqlite {
183 context: "load graph edges",
184 source,
185 })?
186 .collect::<Result<Vec<_>, _>>()
187 .map_err(|source| TalonError::Sqlite {
188 context: "load graph edges",
189 source,
190 })?;
191 Ok(())
192}
193
194fn populate_degrees(graph: &mut BuiltGraph) {
195 let mut outgoing: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
196 let mut backlinks: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
197 for edge in &graph.edges {
198 outgoing
199 .entry(edge.from_path.clone())
200 .or_default()
201 .insert(edge.to_path.clone());
202 backlinks
203 .entry(edge.to_path.clone())
204 .or_default()
205 .insert(edge.from_path.clone());
206 }
207 for (path, node) in &mut graph.nodes {
208 let out = outgoing.get(path).cloned().unwrap_or_default();
209 let back = backlinks.get(path).cloned().unwrap_or_default();
210 let total = out.union(&back).count();
211 node.outgoing_degree = out.len().try_into().unwrap_or(u32::MAX);
212 node.backlink_degree = back.len().try_into().unwrap_or(u32::MAX);
213 node.total_degree = total.try_into().unwrap_or(u32::MAX);
214 }
215}
216
217fn load_sources(
218 conn: &Connection,
219 note_id: i64,
220 vault_path: &str,
221) -> Result<Vec<String>, TalonError> {
222 let mut stmt = conn
223 .prepare(
224 "SELECT value FROM note_frontmatter_fields
225 WHERE note_id = ?1 AND field = 'sources'
226 ORDER BY value",
227 )
228 .map_err(|source| TalonError::Sqlite {
229 context: "load graph sources",
230 source,
231 })?;
232 let rows = stmt
233 .query_map([note_id], |row| row.get::<_, String>(0))
234 .map_err(|source| TalonError::Sqlite {
235 context: "load graph sources",
236 source,
237 })?;
238 let mut seen = BTreeSet::new();
239 for row in rows {
240 let source = row.map_err(|source| TalonError::Sqlite {
241 context: "load graph sources",
242 source,
243 })?;
244 let cleaned = clean_source_reference(vault_path, &source);
245 if !cleaned.is_empty() {
246 seen.insert(cleaned);
247 }
248 }
249 Ok(seen.into_iter().collect())
250}
251
252fn load_frontmatter_value(
253 conn: &Connection,
254 note_id: i64,
255 field: &str,
256) -> Result<Option<String>, TalonError> {
257 match conn.query_row(
258 "SELECT value FROM note_frontmatter_fields
259 WHERE note_id = ?1 AND field = ?2
260 ORDER BY value
261 LIMIT 1",
262 (note_id, field),
263 |row| row.get(0),
264 ) {
265 Ok(value) => Ok(Some(value)),
266 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
267 Err(source) => Err(TalonError::Sqlite {
268 context: "load graph frontmatter",
269 source,
270 }),
271 }
272}
273
274fn parse_string_vec(raw: &str) -> Vec<String> {
275 match serde_json::from_str::<Value>(raw) {
276 Ok(Value::Array(values)) => values
277 .into_iter()
278 .filter_map(|value| value.as_str().map(str::to_string))
279 .collect(),
280 _ => Vec::new(),
281 }
282}
283
284fn is_structural_page(path: &str) -> bool {
285 let Some(file_name) = std::path::Path::new(path)
286 .file_name()
287 .and_then(std::ffi::OsStr::to_str)
288 else {
289 return false;
290 };
291 let lower = file_name.to_ascii_lowercase();
292 matches!(lower.as_str(), "index.md" | "readme.md" | "overview.md")
293 || lower.ends_with("_index.md")
294 || lower.ends_with("-index.md")
295}
296
297pub(super) fn clean_source_reference(from_path: &str, value: &str) -> String {
298 let trimmed = value.trim();
299 let without_link = trimmed
300 .strip_prefix("[[")
301 .and_then(|inner| inner.strip_suffix("]]"))
302 .unwrap_or(trimmed);
303 let without_alias = without_link
304 .split_once('|')
305 .map_or(without_link, |(target, _alias)| target);
306 let target = without_alias
307 .split_once('#')
308 .map_or(without_alias, |(target, _heading)| target)
309 .trim();
310 if target.is_empty() {
311 return String::new();
312 }
313 if target.contains("://") {
314 return target.to_string();
315 }
316 if has_markdown_extension(target) || target.contains('/') {
317 return target.replace('\\', "/");
318 }
319 let Some(parent) = std::path::Path::new(from_path).parent() else {
320 return format!("{target}.md");
321 };
322 let joined = parent.join(format!("{target}.md"));
323 joined.to_string_lossy().replace('\\', "/")
324}
325
326fn has_markdown_extension(target: &str) -> bool {
327 std::path::Path::new(target)
328 .extension()
329 .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
330}