use std::collections::{BTreeMap, BTreeSet};
use regex::Regex;
use std::sync::LazyLock;
use crate::record::{Record, Value};
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum Direction {
Outgoing,
Incoming,
Both,
}
#[derive(Debug, Clone)]
pub enum GraphScope {
All,
Folder(String),
Where(crate::query::Expr),
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct UnresolvedLink {
pub source: String,
pub target: String,
}
static WIKI_LINK_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[\[([^\]\|#]+)(?:#[^\]\|]*)?\|?[^\]]*\]\]").unwrap());
static FENCED_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?s)```.*?```").unwrap());
static INLINE_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`[^`]+`").unwrap());
static MD_LINK_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
fn strip_code_blocks(content: &str) -> String {
let without_fenced = FENCED_CODE_RE.replace_all(content, "");
INLINE_CODE_RE.replace_all(&without_fenced, "").into_owned()
}
fn extract_links_from_str(text: &str) -> Vec<String> {
WIKI_LINK_RE
.captures_iter(text)
.map(|cap| cap[1].trim().to_string())
.collect()
}
pub fn extract_links(content: &str) -> BTreeSet<String> {
let cleaned = strip_code_blocks(content);
let mut links = BTreeSet::new();
for link in extract_links_from_str(&cleaned) {
links.insert(link);
}
links
}
pub fn extract_markdown_links(content: &str) -> Vec<(String, String)> {
let cleaned = strip_code_blocks(content);
let mut out = Vec::new();
for cap in MD_LINK_RE.captures_iter(&cleaned) {
let whole = cap.get(0).expect("capture group 0 always present");
if cleaned[..whole.start()].ends_with('!') {
continue;
}
out.push((cap[1].trim().to_string(), cap[2].trim().to_string()));
}
out
}
pub fn record_links(record: &Record) -> BTreeSet<String> {
match &record.raw_content {
Some(content) => extract_links(content),
None => BTreeSet::new(),
}
}
#[derive(Debug, Default)]
pub struct LinkGraph {
outgoing: BTreeMap<String, BTreeSet<String>>,
incoming: BTreeMap<String, BTreeSet<String>>,
name_to_paths: BTreeMap<String, Vec<String>>,
records_by_name: BTreeMap<String, Record>,
}
impl LinkGraph {
pub fn build(records: &[Record]) -> Self {
Self::build_with_root(records, None)
}
pub fn build_with_root(records: &[Record], vault_root: Option<&std::path::Path>) -> Self {
let mut index = LinkGraph::default();
for record in records {
let name = record.virtual_name();
let rel_path = match vault_root {
Some(root) => record.virtual_path(root),
None => record.path.to_string_lossy().into_owned(),
};
index
.name_to_paths
.entry(name.clone())
.or_default()
.push(rel_path);
index
.records_by_name
.entry(name)
.or_insert_with(|| record.clone());
}
for record in records {
let name = record.virtual_name();
let links = record_links(record);
for target in &links {
let resolved = index.resolve_link_target(target);
index
.incoming
.entry(resolved.clone())
.or_default()
.insert(name.clone());
}
index.outgoing.insert(name, links);
}
index
}
pub fn record_by_name(&self, name: &str) -> Option<&Record> {
self.records_by_name.get(name)
}
fn resolve_link_target(&self, target: &str) -> String {
if target.contains('/') {
target.rsplit('/').next().unwrap_or(target).to_string()
} else {
target.to_string()
}
}
pub fn is_ambiguous(&self, name: &str) -> bool {
self.name_to_paths
.get(name)
.is_some_and(|paths| paths.len() > 1)
}
pub fn paths_for_name(&self, name: &str) -> Vec<&str> {
self.name_to_paths
.get(name)
.map(|paths| paths.iter().map(|s| s.as_str()).collect())
.unwrap_or_default()
}
pub fn outgoing_links(&self, name: &str) -> Vec<&str> {
self.outgoing
.get(name)
.map(|s| s.iter().map(|s| s.as_str()).collect())
.unwrap_or_default()
}
pub fn incoming_links(&self, name: &str) -> Vec<&str> {
self.incoming
.get(name)
.map(|s| s.iter().map(|s| s.as_str()).collect())
.unwrap_or_default()
}
pub fn outgoing_count(&self, name: &str) -> usize {
self.outgoing.get(name).map(|s| s.len()).unwrap_or(0)
}
pub fn incoming_count(&self, name: &str) -> usize {
self.incoming.get(name).map(|s| s.len()).unwrap_or(0)
}
pub fn traverse(
&self,
start: &str,
max_depth: usize,
direction: Direction,
) -> Vec<(String, usize)> {
use std::collections::VecDeque;
let mut visited = BTreeSet::new();
let mut queue = VecDeque::new();
let mut results = Vec::new();
visited.insert(start.to_string());
queue.push_back((start.to_string(), 0usize));
results.push((start.to_string(), 0));
while let Some((current, depth)) = queue.pop_front() {
if depth >= max_depth {
continue;
}
let neighbors: Vec<&str> = match direction {
Direction::Outgoing => self.outgoing_links(¤t),
Direction::Incoming => self.incoming_links(¤t),
Direction::Both => {
let mut all = self.outgoing_links(¤t);
all.extend(self.incoming_links(¤t));
all
}
};
for neighbor in neighbors {
if visited.insert(neighbor.to_string()) {
let next_depth = depth + 1;
results.push((neighbor.to_string(), next_depth));
queue.push_back((neighbor.to_string(), next_depth));
}
}
}
results
}
pub fn has_link_to(&self, from: &str, to: &str) -> bool {
self.outgoing
.get(from)
.is_some_and(|links| links.contains(to))
}
pub fn has_link_from(&self, to: &str, from: &str) -> bool {
self.incoming
.get(to)
.is_some_and(|links| links.contains(from))
}
pub fn unresolved(&self) -> Vec<UnresolvedLink> {
let mut out = Vec::new();
for (source, targets) in &self.outgoing {
for target in targets {
let resolved = self.resolve_link_target(target);
if !self.name_to_paths.contains_key(&resolved) {
out.push(UnresolvedLink {
source: source.clone(),
target: target.clone(),
});
}
}
}
out
}
pub fn traverse_from(&self, start: &str, depth: usize, direction: Direction) -> Vec<String> {
self.traverse(start, depth, direction)
.into_iter()
.filter(|(name, d)| name != start && *d > 0)
.map(|(name, _)| name)
.collect()
}
pub fn virtual_fields(&self, name: &str) -> Vec<(&'static str, Value)> {
let out_links = self.outgoing_links(name);
let in_links = self.incoming_links(name);
vec![
(
"_links",
Value::List(
out_links
.iter()
.map(|s| Value::String(s.to_string()))
.collect(),
),
),
("_link_count", Value::Integer(out_links.len() as i64)),
(
"_backlinks",
Value::List(
in_links
.iter()
.map(|s| Value::String(s.to_string()))
.collect(),
),
),
("_backlink_count", Value::Integer(in_links.len() as i64)),
]
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeMap;
use std::path::PathBuf;
#[test]
fn extract_simple_link() {
let links = extract_links("Some text with [[React]] and [[Node.js]] links.");
assert!(links.contains("React"));
assert!(links.contains("Node.js"));
assert_eq!(links.len(), 2);
}
#[test]
fn extract_link_with_alias() {
let links = extract_links("See [[React|the React framework]] for details.");
assert!(links.contains("React"));
assert_eq!(links.len(), 1);
}
#[test]
fn extract_link_with_section() {
let links = extract_links("Check [[React#Hooks]] and [[React#State Management|state]].");
assert!(links.contains("React"));
assert_eq!(links.len(), 1); }
#[test]
fn extract_chinese_links() {
let links = extract_links("Zıt anlamlısı [[慢]] ile birlikte [[快]] kullanılır.");
assert!(links.contains("慢"));
assert!(links.contains("快"));
}
#[test]
fn extract_links_from_frontmatter_value() {
let content =
"---\nrelated-to:\n - \"[[Watchlist]]\"\n - \"[[2FA Setup]]\"\n---\nBody.\n";
let links = extract_links(content);
assert!(links.contains("Watchlist"));
assert!(links.contains("2FA Setup"));
}
#[test]
fn extract_no_links() {
let links = extract_links("Plain text with no links at all.");
assert!(links.is_empty());
}
#[test]
fn ignores_links_in_fenced_code_block() {
let content = "Real link [[React]].\n```\n[[FakeLink]] in code\n```\nMore text.";
let links = extract_links(content);
assert!(links.contains("React"));
assert!(!links.contains("FakeLink"));
}
#[test]
fn ignores_links_in_inline_code() {
let content = "Use `[[NotALink]]` but also see [[RealLink]].";
let links = extract_links(content);
assert!(links.contains("RealLink"));
assert!(!links.contains("NotALink"));
}
#[test]
fn extract_markdown_links_basic() {
let md = "See [Docs](https://example.com/docs) and [Home](https://example.com).";
assert_eq!(
extract_markdown_links(md),
vec![
("Docs".to_string(), "https://example.com/docs".to_string()),
("Home".to_string(), "https://example.com".to_string()),
]
);
}
#[test]
fn extract_markdown_links_skips_images_wikilinks_and_code() {
let md = " [Real](https://real.test) [[WikiNote]] `[Code](https://code.test)`";
assert_eq!(
extract_markdown_links(md),
vec![("Real".to_string(), "https://real.test".to_string())]
);
}
#[test]
fn build_link_index() {
let records = vec![
Record {
path: PathBuf::from("/vault/A.md"),
fields: BTreeMap::new(),
raw_content: Some("Links to [[B]] and [[C]].".into()),
},
Record {
path: PathBuf::from("/vault/B.md"),
fields: BTreeMap::new(),
raw_content: Some("Links to [[C]].".into()),
},
Record {
path: PathBuf::from("/vault/C.md"),
fields: BTreeMap::new(),
raw_content: Some("No links here.".into()),
},
];
let index = LinkGraph::build(&records);
assert_eq!(index.outgoing_count("A"), 2);
assert_eq!(index.outgoing_count("B"), 1);
assert_eq!(index.outgoing_count("C"), 0);
assert_eq!(index.incoming_count("A"), 0); assert_eq!(index.incoming_count("B"), 1); assert_eq!(index.incoming_count("C"), 2);
let c_backlinks = index.incoming_links("C");
assert!(c_backlinks.contains(&"A"));
assert!(c_backlinks.contains(&"B"));
}
#[test]
fn virtual_fields_from_index() {
let records = vec![
Record {
path: PathBuf::from("/vault/A.md"),
fields: BTreeMap::new(),
raw_content: Some("Links to [[B]] and [[C]].".into()),
},
Record {
path: PathBuf::from("/vault/B.md"),
fields: BTreeMap::new(),
raw_content: Some("Links back to [[A]].".into()),
},
];
let index = LinkGraph::build(&records);
let fields = index.virtual_fields("A");
let link_count = fields.iter().find(|(k, _)| *k == "_link_count").unwrap();
assert_eq!(link_count.1, Value::Integer(2));
let backlink_count = fields
.iter()
.find(|(k, _)| *k == "_backlink_count")
.unwrap();
assert_eq!(backlink_count.1, Value::Integer(1));
}
#[test]
fn unresolved_returns_dangling_targets() {
let records = vec![
Record {
path: PathBuf::from("/vault/a.md"),
fields: BTreeMap::new(),
raw_content: Some("Links to [[ghost]] and [[b]].".into()),
},
Record {
path: PathBuf::from("/vault/b.md"),
fields: BTreeMap::new(),
raw_content: Some("".into()),
},
];
let index = LinkGraph::build(&records);
let unresolved = index.unresolved();
assert_eq!(
unresolved.len(),
1,
"expected one dangling link, got {:?}",
unresolved
);
assert_eq!(unresolved[0].source, "a");
assert_eq!(unresolved[0].target, "ghost");
}
#[test]
fn unresolved_empty_when_all_resolved() {
let records = vec![
Record {
path: PathBuf::from("/vault/a.md"),
fields: BTreeMap::new(),
raw_content: Some("Links to [[b]].".into()),
},
Record {
path: PathBuf::from("/vault/b.md"),
fields: BTreeMap::new(),
raw_content: Some("".into()),
},
];
let index = LinkGraph::build(&records);
assert!(index.unresolved().is_empty());
}
#[test]
fn traverse_from_outgoing_skips_self() {
let mk = |name: &str, content: &str| Record {
path: PathBuf::from(format!("/vault/{}.md", name)),
fields: BTreeMap::new(),
raw_content: Some(content.into()),
};
let records = vec![mk("a", "[[b]]"), mk("b", "[[c]]"), mk("c", "")];
let index = LinkGraph::build(&records);
let names = index.traverse_from("a", 2, Direction::Outgoing);
assert!(names.contains(&"b".to_string()));
assert!(names.contains(&"c".to_string()));
assert!(
!names.contains(&"a".to_string()),
"starting node should not be in the result"
);
}
#[test]
fn traverse_from_respects_depth() {
let mk = |name: &str, content: &str| Record {
path: PathBuf::from(format!("/vault/{}.md", name)),
fields: BTreeMap::new(),
raw_content: Some(content.into()),
};
let records = vec![
mk("a", "[[b]]"),
mk("b", "[[c]]"),
mk("c", "[[d]]"),
mk("d", ""),
];
let index = LinkGraph::build(&records);
let names = index.traverse_from("a", 1, Direction::Outgoing);
assert!(names.contains(&"b".to_string()));
assert!(
!names.contains(&"c".to_string()),
"depth=1 should not reach c"
);
}
}