use std::sync::LazyLock;
use crate::InlineLocation;
use regex::Regex;
#[derive(Clone)]
pub enum Link {
Wiki {
target: String,
heading: Option<String>,
alias: Option<String>,
},
Markdown {
text: String,
url: String,
},
Embed {
target: String,
heading: Option<String>,
alias: Option<String>,
},
}
#[derive(Clone)]
pub struct LocatedLink {
pub link: Link,
pub location: InlineLocation,
}
pub(crate) static FENCED_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?s)```[^\n]*\n.*?```").unwrap());
pub(crate) static INLINE_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`[^`\n]+`").unwrap());
static LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(!\[\[([^\]#|]*?)(?:#([^\]|]*?))?(?:\|([^\]]*?))?\]\])|(\[\[([^\]#|]*?)(?:#([^\]|]*?))?(?:\|([^\]]*?))?\]\])|(\[([^\]]+?)\]\(([^)\n]+?)\))",
)
.unwrap()
});
pub(crate) fn byte_to_line_col(text: &str, byte_pos: usize) -> (usize, usize) {
let before = &text[..byte_pos];
let line = before.matches('\n').count() + 1;
let col = match before.rfind('\n') {
Some(pos) => before[pos + 1..].chars().count(),
None => before.chars().count(),
};
(line, col)
}
pub(crate) fn parse_links(content: &str) -> Vec<LocatedLink> {
let mut sanitized = content.to_string();
for m in FENCED_CODE_RE.find_iter(content) {
sanitized.replace_range(m.range(), &" ".repeat(m.len()));
}
for m in INLINE_CODE_RE.find_iter(&sanitized.clone()) {
sanitized.replace_range(m.range(), &" ".repeat(m.len()));
}
let mut links = Vec::new();
for caps in LINK_RE.captures_iter(&sanitized) {
let m = caps.get(0).unwrap();
let (line, col_start) = byte_to_line_col(content, m.start());
let col_end = col_start + content[m.start()..m.end()].chars().count();
let location = InlineLocation {
line,
col_start,
col_end,
};
if caps.get(1).is_some() {
let target = caps.get(2).map_or("", |m| m.as_str()).to_string();
let heading = caps.get(3).map(|m| m.as_str().to_string());
let alias = caps.get(4).map(|m| m.as_str().to_string());
links.push(LocatedLink {
link: Link::Embed { target, heading, alias },
location,
});
} else if caps.get(5).is_some() {
let target = caps.get(6).map_or("", |m| m.as_str()).to_string();
let heading = caps.get(7).map(|m| m.as_str().to_string());
let alias = caps.get(8).map(|m| m.as_str().to_string());
links.push(LocatedLink {
link: Link::Wiki { target, heading, alias },
location,
});
} else if caps.get(9).is_some() {
let text = caps.get(10).map_or("", |m| m.as_str()).to_string();
let url = caps.get(11).map_or("", |m| m.as_str()).to_string();
links.push(LocatedLink {
link: Link::Markdown { text, url },
location,
});
}
}
links
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Note;
fn assert_wiki(link: &Link, target: &str, heading: Option<&str>, alias: Option<&str>) {
match link {
Link::Wiki {
target: t,
heading: h,
alias: a,
} => {
assert_eq!(t, target);
assert_eq!(h.as_deref(), heading);
assert_eq!(a.as_deref(), alias);
}
_ => panic!("expected Wiki link"),
}
}
fn assert_md(link: &Link, text: &str, url: &str) {
match link {
Link::Markdown { text: t, url: u } => {
assert_eq!(t, text);
assert_eq!(u, url);
}
_ => panic!("expected Markdown link"),
}
}
fn assert_embed(link: &Link, target: &str, heading: Option<&str>, alias: Option<&str>) {
match link {
Link::Embed {
target: t,
heading: h,
alias: a,
} => {
assert_eq!(t, target);
assert_eq!(h.as_deref(), heading);
assert_eq!(a.as_deref(), alias);
}
_ => panic!("expected Embed link"),
}
}
#[test]
fn wiki_basic() {
let links = parse_links("See [[target]].");
assert_eq!(links.len(), 1);
assert_wiki(&links[0].link, "target", None, None);
}
#[test]
fn wiki_basic_multi_word() {
let links = parse_links("See [[some target]].");
assert_eq!(links.len(), 1);
assert_wiki(&links[0].link, "some target", None, None);
}
#[test]
fn wiki_with_heading() {
let links = parse_links("See [[target#heading]].");
assert_eq!(links.len(), 1);
assert_wiki(&links[0].link, "target", Some("heading"), None);
}
#[test]
fn wiki_with_alias() {
let links = parse_links("See [[target|alias]].");
assert_eq!(links.len(), 1);
assert_wiki(&links[0].link, "target", None, Some("alias"));
}
#[test]
fn wiki_with_multi_word_alias() {
let links = parse_links("See [[target|some alias]].");
assert_eq!(links.len(), 1);
assert_wiki(&links[0].link, "target", None, Some("some alias"));
}
#[test]
fn wiki_multi_word_with_alias() {
let links = parse_links("See [[some target|alias]].");
assert_eq!(links.len(), 1);
assert_wiki(&links[0].link, "some target", None, Some("alias"));
}
#[test]
fn wiki_with_heading_and_alias() {
let links = parse_links("See [[target#heading|alias]].");
assert_eq!(links.len(), 1);
assert_wiki(&links[0].link, "target", Some("heading"), Some("alias"));
}
#[test]
fn markdown_link() {
let links = parse_links("See [some text](https://example.com).");
assert_eq!(links.len(), 1);
assert_md(&links[0].link, "some text", "https://example.com");
}
#[test]
fn embed_basic() {
let links = parse_links("![[image.png]]");
assert_eq!(links.len(), 1);
assert_embed(&links[0].link, "image.png", None, None);
}
#[test]
fn embed_with_heading_and_alias() {
let links = parse_links("![[note#section|caption]]");
assert_eq!(links.len(), 1);
assert_embed(&links[0].link, "note", Some("section"), Some("caption"));
}
#[test]
fn links_inside_fenced_code_block_excluded() {
let content = "Before.\n```\n[[hidden]]\n```\nAfter.";
let links = parse_links(content);
assert!(links.is_empty(), "expected no links, got {}", links.len());
}
#[test]
fn links_inside_inline_code_excluded() {
let content = "Text `[[hidden]]` more.";
let links = parse_links(content);
assert!(links.is_empty(), "expected no links, got {}", links.len());
}
#[test]
fn mixed_content() {
let content = "[[wiki]] and [md](url) and ![[embed]]";
let links = parse_links(content);
assert_eq!(links.len(), 3);
assert_wiki(&links[0].link, "wiki", None, None);
assert_md(&links[1].link, "md", "url");
assert_embed(&links[2].link, "embed", None, None);
}
#[test]
fn empty_content() {
let links = parse_links("");
assert!(links.is_empty());
}
#[test]
fn location_first_line() {
let links = parse_links("[[target]]");
assert_eq!(links.len(), 1);
let loc = &links[0].location;
assert_eq!(loc.line, 1);
assert_eq!(loc.col_start, 0);
assert_eq!(loc.col_end, 10);
}
#[test]
fn location_with_prefix() {
let links = parse_links("See [[target]].");
let loc = &links[0].location;
assert_eq!(loc.line, 1);
assert_eq!(loc.col_start, 4);
assert_eq!(loc.col_end, 14);
}
#[test]
fn location_second_line() {
let content = "First line.\n[[target]]";
let links = parse_links(content);
assert_eq!(links.len(), 1);
let loc = &links[0].location;
assert_eq!(loc.line, 2);
assert_eq!(loc.col_start, 0);
assert_eq!(loc.col_end, 10);
}
#[test]
fn location_markdown_link() {
let links = parse_links("[text](url)");
let loc = &links[0].location;
assert_eq!(loc.line, 1);
assert_eq!(loc.col_start, 0);
assert_eq!(loc.col_end, 11);
}
#[test]
fn note_links_delegates() {
let note = Note::parse("/vault/note.md", "See [[target]] and [text](url).");
assert_eq!(note.links.len(), 2);
assert_wiki(¬e.links[0].link, "target", None, None);
assert_md(¬e.links[1].link, "text", "url");
}
#[test]
fn note_links_location_offset_by_frontmatter() {
let content = "---\ntitle: T\n---\n[[target]]";
let note = Note::parse("/vault/note.md", content);
assert_eq!(note.links.len(), 1);
let loc = ¬e.links[0].location;
assert_eq!(loc.line, 4);
assert_eq!(loc.col_start, 0);
assert_eq!(loc.col_end, 10);
}
}