1use std::collections::{BTreeMap, BTreeSet};
7
8use regex::Regex;
9use std::sync::LazyLock;
10
11use crate::record::{Record, Value};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
17pub enum Direction {
18 Outgoing,
19 Incoming,
20 Both,
21}
22
23#[derive(Debug, Clone)]
25pub enum GraphScope {
26 All,
27 Folder(String),
28 Where(crate::query::Expr),
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
34pub struct UnresolvedLink {
35 pub source: String,
36 pub target: String,
37}
38
39static WIKI_LINK_RE: LazyLock<Regex> =
40 LazyLock::new(|| Regex::new(r"\[\[([^\]\|#]+)(?:#[^\]\|]*)?\|?[^\]]*\]\]").unwrap());
41
42static FENCED_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?s)```.*?```").unwrap());
43
44static INLINE_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`[^`]+`").unwrap());
45
46fn strip_code_blocks(content: &str) -> String {
48 let without_fenced = FENCED_CODE_RE.replace_all(content, "");
49 INLINE_CODE_RE.replace_all(&without_fenced, "").into_owned()
50}
51
52fn extract_links_from_str(text: &str) -> Vec<String> {
55 WIKI_LINK_RE
56 .captures_iter(text)
57 .map(|cap| cap[1].trim().to_string())
58 .collect()
59}
60
61pub fn extract_links(content: &str) -> BTreeSet<String> {
64 let cleaned = strip_code_blocks(content);
65 let mut links = BTreeSet::new();
66 for link in extract_links_from_str(&cleaned) {
67 links.insert(link);
68 }
69 links
70}
71
72pub fn record_links(record: &Record) -> BTreeSet<String> {
74 match &record.raw_content {
75 Some(content) => extract_links(content),
76 None => BTreeSet::new(),
77 }
78}
79
80#[derive(Debug, Default)]
86pub struct LinkGraph {
87 outgoing: BTreeMap<String, BTreeSet<String>>,
89 incoming: BTreeMap<String, BTreeSet<String>>,
91 name_to_paths: BTreeMap<String, Vec<String>>,
93 records_by_name: BTreeMap<String, Record>,
95}
96
97impl LinkGraph {
98 pub fn build(records: &[Record]) -> Self {
101 Self::build_with_root(records, None)
102 }
103
104 pub fn build_with_root(records: &[Record], vault_root: Option<&std::path::Path>) -> Self {
106 let mut index = LinkGraph::default();
107
108 for record in records {
110 let name = record.virtual_name();
111 let rel_path = match vault_root {
112 Some(root) => record.virtual_path(root),
113 None => record.path.to_string_lossy().into_owned(),
114 };
115 index
116 .name_to_paths
117 .entry(name.clone())
118 .or_default()
119 .push(rel_path);
120 index
121 .records_by_name
122 .entry(name)
123 .or_insert_with(|| record.clone());
124 }
125
126 for record in records {
128 let name = record.virtual_name();
129 let links = record_links(record);
130
131 for target in &links {
133 let resolved = index.resolve_link_target(target);
134 index
135 .incoming
136 .entry(resolved.clone())
137 .or_default()
138 .insert(name.clone());
139 }
140
141 index.outgoing.insert(name, links);
142 }
143
144 index
145 }
146
147 pub fn record_by_name(&self, name: &str) -> Option<&Record> {
149 self.records_by_name.get(name)
150 }
151
152 fn resolve_link_target(&self, target: &str) -> String {
155 if target.contains('/') {
156 target.rsplit('/').next().unwrap_or(target).to_string()
158 } else {
159 target.to_string()
160 }
161 }
162
163 pub fn is_ambiguous(&self, name: &str) -> bool {
165 self.name_to_paths
166 .get(name)
167 .is_some_and(|paths| paths.len() > 1)
168 }
169
170 pub fn paths_for_name(&self, name: &str) -> Vec<&str> {
172 self.name_to_paths
173 .get(name)
174 .map(|paths| paths.iter().map(|s| s.as_str()).collect())
175 .unwrap_or_default()
176 }
177
178 pub fn outgoing_links(&self, name: &str) -> Vec<&str> {
180 self.outgoing
181 .get(name)
182 .map(|s| s.iter().map(|s| s.as_str()).collect())
183 .unwrap_or_default()
184 }
185
186 pub fn incoming_links(&self, name: &str) -> Vec<&str> {
188 self.incoming
189 .get(name)
190 .map(|s| s.iter().map(|s| s.as_str()).collect())
191 .unwrap_or_default()
192 }
193
194 pub fn outgoing_count(&self, name: &str) -> usize {
196 self.outgoing.get(name).map(|s| s.len()).unwrap_or(0)
197 }
198
199 pub fn incoming_count(&self, name: &str) -> usize {
201 self.incoming.get(name).map(|s| s.len()).unwrap_or(0)
202 }
203
204 pub fn traverse(
208 &self,
209 start: &str,
210 max_depth: usize,
211 direction: Direction,
212 ) -> Vec<(String, usize)> {
213 use std::collections::VecDeque;
214
215 let mut visited = BTreeSet::new();
216 let mut queue = VecDeque::new();
217 let mut results = Vec::new();
218
219 visited.insert(start.to_string());
220 queue.push_back((start.to_string(), 0usize));
221 results.push((start.to_string(), 0));
222
223 while let Some((current, depth)) = queue.pop_front() {
224 if depth >= max_depth {
225 continue;
226 }
227
228 let neighbors: Vec<&str> = match direction {
229 Direction::Outgoing => self.outgoing_links(¤t),
230 Direction::Incoming => self.incoming_links(¤t),
231 Direction::Both => {
232 let mut all = self.outgoing_links(¤t);
233 all.extend(self.incoming_links(¤t));
234 all
235 }
236 };
237
238 for neighbor in neighbors {
239 if visited.insert(neighbor.to_string()) {
240 let next_depth = depth + 1;
241 results.push((neighbor.to_string(), next_depth));
242 queue.push_back((neighbor.to_string(), next_depth));
243 }
244 }
245 }
246
247 results
248 }
249
250 pub fn has_link_to(&self, from: &str, to: &str) -> bool {
252 self.outgoing
253 .get(from)
254 .is_some_and(|links| links.contains(to))
255 }
256
257 pub fn has_link_from(&self, to: &str, from: &str) -> bool {
259 self.incoming
260 .get(to)
261 .is_some_and(|links| links.contains(from))
262 }
263
264 pub fn unresolved(&self) -> Vec<UnresolvedLink> {
268 let mut out = Vec::new();
269 for (source, targets) in &self.outgoing {
270 for target in targets {
271 let resolved = self.resolve_link_target(target);
272 if !self.name_to_paths.contains_key(&resolved) {
273 out.push(UnresolvedLink {
274 source: source.clone(),
275 target: target.clone(),
276 });
277 }
278 }
279 }
280 out
281 }
282
283 pub fn traverse_from(&self, start: &str, depth: usize, direction: Direction) -> Vec<String> {
286 self.traverse(start, depth, direction)
287 .into_iter()
288 .filter(|(name, d)| name != start && *d > 0)
289 .map(|(name, _)| name)
290 .collect()
291 }
292
293 pub fn virtual_fields(&self, name: &str) -> Vec<(&'static str, Value)> {
295 let out_links = self.outgoing_links(name);
296 let in_links = self.incoming_links(name);
297
298 vec![
299 (
300 "_links",
301 Value::List(
302 out_links
303 .iter()
304 .map(|s| Value::String(s.to_string()))
305 .collect(),
306 ),
307 ),
308 ("_link_count", Value::Integer(out_links.len() as i64)),
309 (
310 "_backlinks",
311 Value::List(
312 in_links
313 .iter()
314 .map(|s| Value::String(s.to_string()))
315 .collect(),
316 ),
317 ),
318 ("_backlink_count", Value::Integer(in_links.len() as i64)),
319 ]
320 }
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326 use std::collections::BTreeMap;
327 use std::path::PathBuf;
328
329 #[test]
330 fn extract_simple_link() {
331 let links = extract_links("Some text with [[React]] and [[Node.js]] links.");
332 assert!(links.contains("React"));
333 assert!(links.contains("Node.js"));
334 assert_eq!(links.len(), 2);
335 }
336
337 #[test]
338 fn extract_link_with_alias() {
339 let links = extract_links("See [[React|the React framework]] for details.");
340 assert!(links.contains("React"));
341 assert_eq!(links.len(), 1);
342 }
343
344 #[test]
345 fn extract_link_with_section() {
346 let links = extract_links("Check [[React#Hooks]] and [[React#State Management|state]].");
347 assert!(links.contains("React"));
348 assert_eq!(links.len(), 1); }
350
351 #[test]
352 fn extract_chinese_links() {
353 let links = extract_links("Zıt anlamlısı [[慢]] ile birlikte [[快]] kullanılır.");
354 assert!(links.contains("慢"));
355 assert!(links.contains("快"));
356 }
357
358 #[test]
359 fn extract_links_from_frontmatter_value() {
360 let content =
361 "---\nrelated-to:\n - \"[[Watchlist]]\"\n - \"[[2FA Setup]]\"\n---\nBody.\n";
362 let links = extract_links(content);
363 assert!(links.contains("Watchlist"));
364 assert!(links.contains("2FA Setup"));
365 }
366
367 #[test]
368 fn extract_no_links() {
369 let links = extract_links("Plain text with no links at all.");
370 assert!(links.is_empty());
371 }
372
373 #[test]
374 fn ignores_links_in_fenced_code_block() {
375 let content = "Real link [[React]].\n```\n[[FakeLink]] in code\n```\nMore text.";
376 let links = extract_links(content);
377 assert!(links.contains("React"));
378 assert!(!links.contains("FakeLink"));
379 }
380
381 #[test]
382 fn ignores_links_in_inline_code() {
383 let content = "Use `[[NotALink]]` but also see [[RealLink]].";
384 let links = extract_links(content);
385 assert!(links.contains("RealLink"));
386 assert!(!links.contains("NotALink"));
387 }
388
389 #[test]
390 fn build_link_index() {
391 let records = vec![
392 Record {
393 path: PathBuf::from("/vault/A.md"),
394 fields: BTreeMap::new(),
395 raw_content: Some("Links to [[B]] and [[C]].".into()),
396 },
397 Record {
398 path: PathBuf::from("/vault/B.md"),
399 fields: BTreeMap::new(),
400 raw_content: Some("Links to [[C]].".into()),
401 },
402 Record {
403 path: PathBuf::from("/vault/C.md"),
404 fields: BTreeMap::new(),
405 raw_content: Some("No links here.".into()),
406 },
407 ];
408
409 let index = LinkGraph::build(&records);
410
411 assert_eq!(index.outgoing_count("A"), 2);
413 assert_eq!(index.outgoing_count("B"), 1);
414 assert_eq!(index.outgoing_count("C"), 0);
415
416 assert_eq!(index.incoming_count("A"), 0); assert_eq!(index.incoming_count("B"), 1); assert_eq!(index.incoming_count("C"), 2); let c_backlinks = index.incoming_links("C");
423 assert!(c_backlinks.contains(&"A"));
424 assert!(c_backlinks.contains(&"B"));
425 }
426
427 #[test]
428 fn virtual_fields_from_index() {
429 let records = vec![
430 Record {
431 path: PathBuf::from("/vault/A.md"),
432 fields: BTreeMap::new(),
433 raw_content: Some("Links to [[B]] and [[C]].".into()),
434 },
435 Record {
436 path: PathBuf::from("/vault/B.md"),
437 fields: BTreeMap::new(),
438 raw_content: Some("Links back to [[A]].".into()),
439 },
440 ];
441
442 let index = LinkGraph::build(&records);
443 let fields = index.virtual_fields("A");
444
445 let link_count = fields.iter().find(|(k, _)| *k == "_link_count").unwrap();
446 assert_eq!(link_count.1, Value::Integer(2));
447
448 let backlink_count = fields
449 .iter()
450 .find(|(k, _)| *k == "_backlink_count")
451 .unwrap();
452 assert_eq!(backlink_count.1, Value::Integer(1));
453 }
454
455 #[test]
456 fn unresolved_returns_dangling_targets() {
457 let records = vec![
458 Record {
459 path: PathBuf::from("/vault/a.md"),
460 fields: BTreeMap::new(),
461 raw_content: Some("Links to [[ghost]] and [[b]].".into()),
462 },
463 Record {
464 path: PathBuf::from("/vault/b.md"),
465 fields: BTreeMap::new(),
466 raw_content: Some("".into()),
467 },
468 ];
469
470 let index = LinkGraph::build(&records);
471 let unresolved = index.unresolved();
472 assert_eq!(
473 unresolved.len(),
474 1,
475 "expected one dangling link, got {:?}",
476 unresolved
477 );
478 assert_eq!(unresolved[0].source, "a");
479 assert_eq!(unresolved[0].target, "ghost");
480 }
481
482 #[test]
483 fn unresolved_empty_when_all_resolved() {
484 let records = vec![
485 Record {
486 path: PathBuf::from("/vault/a.md"),
487 fields: BTreeMap::new(),
488 raw_content: Some("Links to [[b]].".into()),
489 },
490 Record {
491 path: PathBuf::from("/vault/b.md"),
492 fields: BTreeMap::new(),
493 raw_content: Some("".into()),
494 },
495 ];
496
497 let index = LinkGraph::build(&records);
498 assert!(index.unresolved().is_empty());
499 }
500
501 #[test]
502 fn traverse_from_outgoing_skips_self() {
503 let mk = |name: &str, content: &str| Record {
504 path: PathBuf::from(format!("/vault/{}.md", name)),
505 fields: BTreeMap::new(),
506 raw_content: Some(content.into()),
507 };
508 let records = vec![mk("a", "[[b]]"), mk("b", "[[c]]"), mk("c", "")];
509 let index = LinkGraph::build(&records);
510 let names = index.traverse_from("a", 2, Direction::Outgoing);
511 assert!(names.contains(&"b".to_string()));
512 assert!(names.contains(&"c".to_string()));
513 assert!(
514 !names.contains(&"a".to_string()),
515 "starting node should not be in the result"
516 );
517 }
518
519 #[test]
520 fn traverse_from_respects_depth() {
521 let mk = |name: &str, content: &str| Record {
522 path: PathBuf::from(format!("/vault/{}.md", name)),
523 fields: BTreeMap::new(),
524 raw_content: Some(content.into()),
525 };
526 let records = vec![
527 mk("a", "[[b]]"),
528 mk("b", "[[c]]"),
529 mk("c", "[[d]]"),
530 mk("d", ""),
531 ];
532 let index = LinkGraph::build(&records);
533 let names = index.traverse_from("a", 1, Direction::Outgoing);
534 assert!(names.contains(&"b".to_string()));
535 assert!(
536 !names.contains(&"c".to_string()),
537 "depth=1 should not reach c"
538 );
539 }
540}