Skip to main content

talon_core/
links.rs

1//! Link graph types and resolution logic.
2//!
3//! Implements wikilink resolution against the indexed note set, producing
4//! directed edges for the link graph with backlink computation.
5
6use crate::numeric::count_u32;
7use crate::text::frontmatter::{WikiLink, normalize_keyword, normalize_vault_path};
8use serde::{Deserialize, Serialize};
9use std::collections::HashSet;
10use std::hash::BuildHasher;
11
12mod resolver;
13pub use resolver::LinkResolver;
14
15// ── Link graph types ────────────────────────────────────────────────────────
16
17/// A resolved link between two notes in the vault.
18#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
19pub struct ResolvedLink {
20    /// Source note path (where the link appears).
21    pub from_path: String,
22    /// Target note path (where the link resolves to).
23    pub to_path: String,
24    /// Display alias (if `[[target|alias]]`).
25    pub alias: Option<String>,
26    /// Section heading anchor (if `[[target#heading]]`).
27    pub heading: Option<String>,
28    /// Raw target text (before resolution).
29    pub raw_target: String,
30}
31
32/// A note reference used for wikilink resolution.
33#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
34pub struct NoteReference {
35    /// Vault-relative path.
36    pub vault_path: String,
37    /// Note title (from frontmatter or filename).
38    pub title: Option<String>,
39    /// Normalized aliases from frontmatter.
40    pub aliases: Vec<String>,
41}
42
43/// Link graph edge for database storage.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct LinkEdge {
46    /// Source note path.
47    pub from_path: String,
48    /// Target note path.
49    pub to_path: String,
50    /// Whether the target was resolved.
51    pub resolved: bool,
52    /// Raw target text.
53    pub raw_target: String,
54    /// Display alias.
55    pub alias: Option<String>,
56    /// Section heading anchor.
57    pub heading: Option<String>,
58}
59
60/// Link graph statistics.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62pub struct LinkGraphStats {
63    /// Total number of links (edges).
64    pub total_links: u32,
65    /// Number of resolved links.
66    pub resolved_links: u32,
67    /// Number of unresolved links.
68    pub unresolved_links: u32,
69    /// Number of unique target paths.
70    pub unique_targets: u32,
71    /// Number of nodes with no outgoing links.
72    pub isolated_nodes: u32,
73}
74
75/// Resolves a single wikilink target against the note reference set.
76///
77/// Returns the resolved vault path, or `None` if the target doesn't match any note.
78///
79/// # Algorithm
80/// 1. Normalize the target (NFD, lowercase, remove `.md` suffix).
81/// 2. For each note, check:
82///    - Path match (exact, with/without `.md`)
83///    - Unique path suffix or basename match (Obsidian-style shortest path)
84///    - Title match
85///    - Alias match
86/// 3. Return first match.
87#[must_use]
88pub fn resolve_wiki_link_target(target: &str, notes: &[NoteReference]) -> Option<String> {
89    let normalized_target = normalize_keyword(&normalize_vault_path(target));
90    let normalized_stem = normalized_target
91        .strip_suffix(".md")
92        .unwrap_or(&normalized_target)
93        .to_string();
94    let normalized_with_ext = if has_markdown_extension(&normalized_target) {
95        normalized_target.clone()
96    } else {
97        format!("{normalized_target}.md")
98    };
99    let target_contains_path = normalized_stem.contains('/');
100    let mut suffix_matches = Vec::new();
101
102    for note in notes {
103        let normalized_path = normalize_keyword(&normalize_vault_path(&note.vault_path));
104        let normalized_path_stem = normalized_path
105            .strip_suffix(".md")
106            .unwrap_or(&normalized_path)
107            .to_string();
108
109        if normalized_path == normalized_target
110            || normalized_path == normalized_with_ext
111            || normalized_path_stem == normalized_stem
112        {
113            return Some(note.vault_path.clone());
114        }
115
116        let suffix_matches_note = if target_contains_path {
117            path_has_component_suffix(&normalized_path_stem, &normalized_stem)
118        } else {
119            basename(&normalized_path_stem) == normalized_stem
120        };
121        if suffix_matches_note {
122            suffix_matches.push(note.vault_path.clone());
123        }
124    }
125
126    if suffix_matches.len() == 1 {
127        return suffix_matches.into_iter().next();
128    }
129
130    for note in notes {
131        let normalized_title = note
132            .title
133            .as_ref()
134            .map(|t| normalize_keyword(t))
135            .unwrap_or_default();
136        let normalized_aliases: HashSet<String> = note
137            .aliases
138            .iter()
139            .map(|alias| normalize_keyword(alias))
140            .collect();
141
142        // Title match
143        let matches_title =
144            normalized_title == normalized_target || normalized_title == normalized_stem;
145
146        // Alias match
147        let matches_alias = normalized_aliases.contains(&normalized_target)
148            || normalized_aliases.contains(&normalized_stem);
149
150        if matches_title || matches_alias {
151            return Some(note.vault_path.clone());
152        }
153    }
154
155    None
156}
157
158fn basename(path_stem: &str) -> &str {
159    path_stem.rsplit('/').next().unwrap_or(path_stem)
160}
161
162fn path_has_component_suffix(path_stem: &str, target_stem: &str) -> bool {
163    path_stem
164        .strip_suffix(target_stem)
165        .is_some_and(|prefix| prefix.is_empty() || prefix.ends_with('/'))
166}
167
168fn has_markdown_extension(path: &str) -> bool {
169    std::path::Path::new(path)
170        .extension()
171        .is_some_and(|extension| extension.eq_ignore_ascii_case("md"))
172}
173
174/// Resolves all wikilinks from a source note against the note reference set.
175///
176/// Returns resolved links (those that match an indexed note).
177#[must_use]
178pub fn resolve_wiki_links(
179    from_path: &str,
180    links: &[WikiLink],
181    notes: &[NoteReference],
182) -> Vec<ResolvedLink> {
183    let mut resolved = Vec::new();
184
185    for link in links {
186        let to_path = resolve_wiki_link_target(&link.target, notes);
187        if let Some(to_path) = to_path {
188            resolved.push(ResolvedLink {
189                from_path: from_path.to_string(),
190                to_path,
191                alias: link.alias.clone(),
192                heading: link.heading.clone(),
193                raw_target: link.raw_target.clone(),
194            });
195        }
196    }
197
198    resolved
199}
200
201/// Builds a link graph edge list from resolved links.
202#[must_use]
203pub fn build_link_edges(from_path: &str, resolved: &[ResolvedLink]) -> Vec<LinkEdge> {
204    resolved
205        .iter()
206        .map(|r| LinkEdge {
207            from_path: from_path.to_string(),
208            to_path: r.to_path.clone(),
209            resolved: true,
210            raw_target: r.raw_target.clone(),
211            alias: r.alias.clone(),
212            heading: r.heading.clone(),
213        })
214        .collect()
215}
216
217/// Computes backlinks: for each target, find all sources that link to it.
218#[must_use]
219pub fn compute_backlinks(edges: &[LinkEdge]) -> std::collections::BTreeMap<String, Vec<String>> {
220    let mut backlinks: std::collections::BTreeMap<String, std::collections::BTreeSet<String>> =
221        std::collections::BTreeMap::new();
222
223    for edge in edges {
224        if edge.resolved {
225            backlinks
226                .entry(edge.to_path.clone())
227                .or_default()
228                .insert(edge.from_path.clone());
229        }
230    }
231
232    backlinks
233        .into_iter()
234        .map(|(k, v)| (k, v.into_iter().collect()))
235        .collect()
236}
237
238/// Finds unresolved links (links whose targets don't resolve to any indexed note).
239#[must_use]
240pub fn find_unresolved_links<S: BuildHasher>(
241    from_path: &str,
242    links: &[WikiLink],
243    notes: &[NoteReference],
244    ignored_link_targets: &HashSet<String, S>,
245) -> Vec<ResolvedLink> {
246    let mut unresolved = Vec::new();
247
248    for link in links {
249        if resolve_wiki_link_target(&link.target, notes).is_none()
250            && !is_ignored_link_target(&link.target, ignored_link_targets)
251        {
252            unresolved.push(ResolvedLink {
253                from_path: from_path.to_string(),
254                to_path: String::new(),
255                alias: link.alias.clone(),
256                heading: link.heading.clone(),
257                raw_target: link.raw_target.clone(),
258            });
259        }
260    }
261
262    unresolved
263}
264
265fn is_ignored_link_target<S: BuildHasher>(
266    target: &str,
267    ignored_link_targets: &HashSet<String, S>,
268) -> bool {
269    let normalized_target = normalize_keyword(&normalize_vault_path(target));
270    if ignored_link_targets.contains(&normalized_target) {
271        return true;
272    }
273
274    normalized_target
275        .rsplit('/')
276        .next()
277        .is_some_and(|name| ignored_link_targets.contains(name))
278}
279
280/// Computes link graph statistics.
281#[must_use]
282pub fn compute_link_stats(edges: &[LinkEdge], note_paths: &[String]) -> LinkGraphStats {
283    let total_links = count_u32(edges.len());
284    let resolved_links = count_u32(edges.iter().filter(|e| e.resolved).count());
285    let unresolved_links = count_u32(edges.iter().filter(|e| !e.resolved).count());
286
287    let unique_targets: std::collections::BTreeSet<String> = edges
288        .iter()
289        .filter(|e| e.resolved)
290        .map(|e| e.to_path.clone())
291        .collect();
292
293    let sources_with_outgoing: std::collections::BTreeSet<String> =
294        edges.iter().map(|e| e.from_path.clone()).collect();
295    let isolated_nodes = note_paths
296        .iter()
297        .filter(|p| !sources_with_outgoing.contains(p.as_str()))
298        .count();
299
300    LinkGraphStats {
301        total_links,
302        resolved_links,
303        unresolved_links,
304        unique_targets: count_u32(unique_targets.len()),
305        isolated_nodes: count_u32(isolated_nodes),
306    }
307}
308
309#[cfg(test)]
310#[allow(clippy::unwrap_used)]
311mod tests;