1use std::collections::{BTreeMap, BTreeSet};
7
8use regex::Regex;
9use std::sync::LazyLock;
10
11use crate::record::{Record, Value};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
17pub enum Direction {
18 Outgoing,
19 Incoming,
20 Both,
21}
22
23#[derive(Debug, Clone)]
25pub enum GraphScope {
26 All,
27 Folder(String),
28 Where(crate::query::Expr),
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
34pub struct UnresolvedLink {
35 pub source: String,
36 pub target: String,
37}
38
39static WIKI_LINK_RE: LazyLock<Regex> =
40 LazyLock::new(|| Regex::new(r"\[\[([^\]\|#]+)(?:#[^\]\|]*)?\|?[^\]]*\]\]").unwrap());
41
42static FENCED_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?s)```.*?```").unwrap());
43
44static INLINE_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`[^`]+`").unwrap());
45
46static MD_LINK_RE: LazyLock<Regex> =
48 LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
49
50fn strip_code_blocks(content: &str) -> String {
52 let without_fenced = FENCED_CODE_RE.replace_all(content, "");
53 INLINE_CODE_RE.replace_all(&without_fenced, "").into_owned()
54}
55
56fn extract_links_from_str(text: &str) -> Vec<String> {
59 WIKI_LINK_RE
60 .captures_iter(text)
61 .map(|cap| cap[1].trim().to_string())
62 .collect()
63}
64
65pub fn extract_links(content: &str) -> BTreeSet<String> {
68 let cleaned = strip_code_blocks(content);
69 let mut links = BTreeSet::new();
70 for link in extract_links_from_str(&cleaned) {
71 links.insert(link);
72 }
73 links
74}
75
76pub fn extract_markdown_links(content: &str) -> Vec<(String, String)> {
81 let cleaned = strip_code_blocks(content);
82 let mut out = Vec::new();
83 for cap in MD_LINK_RE.captures_iter(&cleaned) {
84 let whole = cap.get(0).expect("capture group 0 always present");
85 if cleaned[..whole.start()].ends_with('!') {
87 continue;
88 }
89 out.push((cap[1].trim().to_string(), cap[2].trim().to_string()));
90 }
91 out
92}
93
94pub fn record_links(record: &Record) -> BTreeSet<String> {
96 match &record.raw_content {
97 Some(content) => extract_links(content),
98 None => BTreeSet::new(),
99 }
100}
101
102#[derive(Debug, Default)]
108pub struct LinkGraph {
109 outgoing: BTreeMap<String, BTreeSet<String>>,
111 incoming: BTreeMap<String, BTreeSet<String>>,
113 name_to_paths: BTreeMap<String, Vec<String>>,
115 records_by_name: BTreeMap<String, Record>,
117}
118
119impl LinkGraph {
120 pub fn build(records: &[Record]) -> Self {
123 Self::build_with_root(records, None)
124 }
125
126 pub fn build_with_root(records: &[Record], vault_root: Option<&std::path::Path>) -> Self {
128 let mut index = LinkGraph::default();
129
130 for record in records {
132 let name = record.virtual_name();
133 let rel_path = match vault_root {
134 Some(root) => record.virtual_path(root),
135 None => record.path.to_string_lossy().into_owned(),
136 };
137 index
138 .name_to_paths
139 .entry(name.clone())
140 .or_default()
141 .push(rel_path);
142 index
143 .records_by_name
144 .entry(name)
145 .or_insert_with(|| record.clone());
146 }
147
148 for record in records {
150 let name = record.virtual_name();
151 let links = record_links(record);
152
153 for target in &links {
155 let resolved = index.resolve_link_target(target);
156 index
157 .incoming
158 .entry(resolved.clone())
159 .or_default()
160 .insert(name.clone());
161 }
162
163 index.outgoing.insert(name, links);
164 }
165
166 index
167 }
168
169 pub fn record_by_name(&self, name: &str) -> Option<&Record> {
171 self.records_by_name.get(name)
172 }
173
174 fn resolve_link_target(&self, target: &str) -> String {
177 if target.contains('/') {
178 target.rsplit('/').next().unwrap_or(target).to_string()
180 } else {
181 target.to_string()
182 }
183 }
184
185 pub fn is_ambiguous(&self, name: &str) -> bool {
187 self.name_to_paths
188 .get(name)
189 .is_some_and(|paths| paths.len() > 1)
190 }
191
192 pub fn paths_for_name(&self, name: &str) -> Vec<&str> {
194 self.name_to_paths
195 .get(name)
196 .map(|paths| paths.iter().map(|s| s.as_str()).collect())
197 .unwrap_or_default()
198 }
199
200 pub fn outgoing_links(&self, name: &str) -> Vec<&str> {
202 self.outgoing
203 .get(name)
204 .map(|s| s.iter().map(|s| s.as_str()).collect())
205 .unwrap_or_default()
206 }
207
208 pub fn incoming_links(&self, name: &str) -> Vec<&str> {
210 self.incoming
211 .get(name)
212 .map(|s| s.iter().map(|s| s.as_str()).collect())
213 .unwrap_or_default()
214 }
215
216 pub fn outgoing_count(&self, name: &str) -> usize {
218 self.outgoing.get(name).map(|s| s.len()).unwrap_or(0)
219 }
220
221 pub fn incoming_count(&self, name: &str) -> usize {
223 self.incoming.get(name).map(|s| s.len()).unwrap_or(0)
224 }
225
226 pub fn traverse(
230 &self,
231 start: &str,
232 max_depth: usize,
233 direction: Direction,
234 ) -> Vec<(String, usize)> {
235 use std::collections::VecDeque;
236
237 let mut visited = BTreeSet::new();
238 let mut queue = VecDeque::new();
239 let mut results = Vec::new();
240
241 visited.insert(start.to_string());
242 queue.push_back((start.to_string(), 0usize));
243 results.push((start.to_string(), 0));
244
245 while let Some((current, depth)) = queue.pop_front() {
246 if depth >= max_depth {
247 continue;
248 }
249
250 let neighbors: Vec<&str> = match direction {
251 Direction::Outgoing => self.outgoing_links(¤t),
252 Direction::Incoming => self.incoming_links(¤t),
253 Direction::Both => {
254 let mut all = self.outgoing_links(¤t);
255 all.extend(self.incoming_links(¤t));
256 all
257 }
258 };
259
260 for neighbor in neighbors {
261 if visited.insert(neighbor.to_string()) {
262 let next_depth = depth + 1;
263 results.push((neighbor.to_string(), next_depth));
264 queue.push_back((neighbor.to_string(), next_depth));
265 }
266 }
267 }
268
269 results
270 }
271
272 pub fn has_link_to(&self, from: &str, to: &str) -> bool {
274 self.outgoing
275 .get(from)
276 .is_some_and(|links| links.contains(to))
277 }
278
279 pub fn has_link_from(&self, to: &str, from: &str) -> bool {
281 self.incoming
282 .get(to)
283 .is_some_and(|links| links.contains(from))
284 }
285
286 pub fn unresolved(&self) -> Vec<UnresolvedLink> {
290 let mut out = Vec::new();
291 for (source, targets) in &self.outgoing {
292 for target in targets {
293 let resolved = self.resolve_link_target(target);
294 if !self.name_to_paths.contains_key(&resolved) {
295 out.push(UnresolvedLink {
296 source: source.clone(),
297 target: target.clone(),
298 });
299 }
300 }
301 }
302 out
303 }
304
305 pub fn traverse_from(&self, start: &str, depth: usize, direction: Direction) -> Vec<String> {
308 self.traverse(start, depth, direction)
309 .into_iter()
310 .filter(|(name, d)| name != start && *d > 0)
311 .map(|(name, _)| name)
312 .collect()
313 }
314
315 pub fn virtual_fields(&self, name: &str) -> Vec<(&'static str, Value)> {
317 let out_links = self.outgoing_links(name);
318 let in_links = self.incoming_links(name);
319
320 vec![
321 (
322 "_links",
323 Value::List(
324 out_links
325 .iter()
326 .map(|s| Value::String(s.to_string()))
327 .collect(),
328 ),
329 ),
330 ("_link_count", Value::Integer(out_links.len() as i64)),
331 (
332 "_backlinks",
333 Value::List(
334 in_links
335 .iter()
336 .map(|s| Value::String(s.to_string()))
337 .collect(),
338 ),
339 ),
340 ("_backlink_count", Value::Integer(in_links.len() as i64)),
341 ]
342 }
343}
344
345#[cfg(test)]
346mod tests {
347 use super::*;
348 use std::collections::BTreeMap;
349 use std::path::PathBuf;
350
351 #[test]
352 fn extract_simple_link() {
353 let links = extract_links("Some text with [[React]] and [[Node.js]] links.");
354 assert!(links.contains("React"));
355 assert!(links.contains("Node.js"));
356 assert_eq!(links.len(), 2);
357 }
358
359 #[test]
360 fn extract_link_with_alias() {
361 let links = extract_links("See [[React|the React framework]] for details.");
362 assert!(links.contains("React"));
363 assert_eq!(links.len(), 1);
364 }
365
366 #[test]
367 fn extract_link_with_section() {
368 let links = extract_links("Check [[React#Hooks]] and [[React#State Management|state]].");
369 assert!(links.contains("React"));
370 assert_eq!(links.len(), 1); }
372
373 #[test]
374 fn extract_chinese_links() {
375 let links = extract_links("Zıt anlamlısı [[慢]] ile birlikte [[快]] kullanılır.");
376 assert!(links.contains("慢"));
377 assert!(links.contains("快"));
378 }
379
380 #[test]
381 fn extract_links_from_frontmatter_value() {
382 let content =
383 "---\nrelated-to:\n - \"[[Watchlist]]\"\n - \"[[2FA Setup]]\"\n---\nBody.\n";
384 let links = extract_links(content);
385 assert!(links.contains("Watchlist"));
386 assert!(links.contains("2FA Setup"));
387 }
388
389 #[test]
390 fn extract_no_links() {
391 let links = extract_links("Plain text with no links at all.");
392 assert!(links.is_empty());
393 }
394
395 #[test]
396 fn ignores_links_in_fenced_code_block() {
397 let content = "Real link [[React]].\n```\n[[FakeLink]] in code\n```\nMore text.";
398 let links = extract_links(content);
399 assert!(links.contains("React"));
400 assert!(!links.contains("FakeLink"));
401 }
402
403 #[test]
404 fn ignores_links_in_inline_code() {
405 let content = "Use `[[NotALink]]` but also see [[RealLink]].";
406 let links = extract_links(content);
407 assert!(links.contains("RealLink"));
408 assert!(!links.contains("NotALink"));
409 }
410
411 #[test]
412 fn extract_markdown_links_basic() {
413 let md = "See [Docs](https://example.com/docs) and [Home](https://example.com).";
414 assert_eq!(
415 extract_markdown_links(md),
416 vec![
417 ("Docs".to_string(), "https://example.com/docs".to_string()),
418 ("Home".to_string(), "https://example.com".to_string()),
419 ]
420 );
421 }
422
423 #[test]
424 fn extract_markdown_links_skips_images_wikilinks_and_code() {
425 let md = " [Real](https://real.test) [[WikiNote]] `[Code](https://code.test)`";
426 assert_eq!(
427 extract_markdown_links(md),
428 vec![("Real".to_string(), "https://real.test".to_string())]
429 );
430 }
431
432 #[test]
433 fn build_link_index() {
434 let records = vec![
435 Record {
436 path: PathBuf::from("/vault/A.md"),
437 fields: BTreeMap::new(),
438 raw_content: Some("Links to [[B]] and [[C]].".into()),
439 },
440 Record {
441 path: PathBuf::from("/vault/B.md"),
442 fields: BTreeMap::new(),
443 raw_content: Some("Links to [[C]].".into()),
444 },
445 Record {
446 path: PathBuf::from("/vault/C.md"),
447 fields: BTreeMap::new(),
448 raw_content: Some("No links here.".into()),
449 },
450 ];
451
452 let index = LinkGraph::build(&records);
453
454 assert_eq!(index.outgoing_count("A"), 2);
456 assert_eq!(index.outgoing_count("B"), 1);
457 assert_eq!(index.outgoing_count("C"), 0);
458
459 assert_eq!(index.incoming_count("A"), 0); assert_eq!(index.incoming_count("B"), 1); assert_eq!(index.incoming_count("C"), 2); let c_backlinks = index.incoming_links("C");
466 assert!(c_backlinks.contains(&"A"));
467 assert!(c_backlinks.contains(&"B"));
468 }
469
470 #[test]
471 fn virtual_fields_from_index() {
472 let records = vec![
473 Record {
474 path: PathBuf::from("/vault/A.md"),
475 fields: BTreeMap::new(),
476 raw_content: Some("Links to [[B]] and [[C]].".into()),
477 },
478 Record {
479 path: PathBuf::from("/vault/B.md"),
480 fields: BTreeMap::new(),
481 raw_content: Some("Links back to [[A]].".into()),
482 },
483 ];
484
485 let index = LinkGraph::build(&records);
486 let fields = index.virtual_fields("A");
487
488 let link_count = fields.iter().find(|(k, _)| *k == "_link_count").unwrap();
489 assert_eq!(link_count.1, Value::Integer(2));
490
491 let backlink_count = fields
492 .iter()
493 .find(|(k, _)| *k == "_backlink_count")
494 .unwrap();
495 assert_eq!(backlink_count.1, Value::Integer(1));
496 }
497
498 #[test]
499 fn unresolved_returns_dangling_targets() {
500 let records = vec![
501 Record {
502 path: PathBuf::from("/vault/a.md"),
503 fields: BTreeMap::new(),
504 raw_content: Some("Links to [[ghost]] and [[b]].".into()),
505 },
506 Record {
507 path: PathBuf::from("/vault/b.md"),
508 fields: BTreeMap::new(),
509 raw_content: Some("".into()),
510 },
511 ];
512
513 let index = LinkGraph::build(&records);
514 let unresolved = index.unresolved();
515 assert_eq!(
516 unresolved.len(),
517 1,
518 "expected one dangling link, got {:?}",
519 unresolved
520 );
521 assert_eq!(unresolved[0].source, "a");
522 assert_eq!(unresolved[0].target, "ghost");
523 }
524
525 #[test]
526 fn unresolved_empty_when_all_resolved() {
527 let records = vec![
528 Record {
529 path: PathBuf::from("/vault/a.md"),
530 fields: BTreeMap::new(),
531 raw_content: Some("Links to [[b]].".into()),
532 },
533 Record {
534 path: PathBuf::from("/vault/b.md"),
535 fields: BTreeMap::new(),
536 raw_content: Some("".into()),
537 },
538 ];
539
540 let index = LinkGraph::build(&records);
541 assert!(index.unresolved().is_empty());
542 }
543
544 #[test]
545 fn traverse_from_outgoing_skips_self() {
546 let mk = |name: &str, content: &str| Record {
547 path: PathBuf::from(format!("/vault/{}.md", name)),
548 fields: BTreeMap::new(),
549 raw_content: Some(content.into()),
550 };
551 let records = vec![mk("a", "[[b]]"), mk("b", "[[c]]"), mk("c", "")];
552 let index = LinkGraph::build(&records);
553 let names = index.traverse_from("a", 2, Direction::Outgoing);
554 assert!(names.contains(&"b".to_string()));
555 assert!(names.contains(&"c".to_string()));
556 assert!(
557 !names.contains(&"a".to_string()),
558 "starting node should not be in the result"
559 );
560 }
561
562 #[test]
563 fn traverse_from_respects_depth() {
564 let mk = |name: &str, content: &str| Record {
565 path: PathBuf::from(format!("/vault/{}.md", name)),
566 fields: BTreeMap::new(),
567 raw_content: Some(content.into()),
568 };
569 let records = vec![
570 mk("a", "[[b]]"),
571 mk("b", "[[c]]"),
572 mk("c", "[[d]]"),
573 mk("d", ""),
574 ];
575 let index = LinkGraph::build(&records);
576 let names = index.traverse_from("a", 1, Direction::Outgoing);
577 assert!(names.contains(&"b".to_string()));
578 assert!(
579 !names.contains(&"c".to_string()),
580 "depth=1 should not reach c"
581 );
582 }
583}