use std::collections::HashSet;
use std::sync::OnceLock;
use regex::Regex;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RawLink {
pub target_raw: String,
pub link_type: LinkType,
pub display_text: Option<String>,
pub heading_anchor: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LinkType {
Wikilink,
MarkdownLink,
InlineTag,
}
fn wikilink_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r"\[\[([^\]\|#]+)(?:#([^\]\|]+))?(?:\|([^\]]+))?\]\]").expect("wikilink regex")
})
}
fn md_link_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("md link regex"))
}
fn inline_tag_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r"(?:^|[\s])#([a-zA-Z][a-zA-Z0-9_/-]*)").expect("inline tag regex")
})
}
pub fn extract_links(body: &str) -> Vec<RawLink> {
let mut results = Vec::new();
let mut seen: HashSet<(String, LinkType)> = HashSet::new();
let mut in_code_block = false;
for line in body.lines() {
if line.trim_start().starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
for caps in wikilink_re().captures_iter(line) {
let target = caps[1].trim().to_string();
let heading = caps.get(2).map(|m| m.as_str().trim().to_string());
let display = caps.get(3).map(|m| m.as_str().trim().to_string());
let key = (target.clone(), LinkType::Wikilink);
if seen.insert(key) {
results.push(RawLink {
target_raw: target,
link_type: LinkType::Wikilink,
display_text: display,
heading_anchor: heading,
});
}
}
for caps in md_link_re().captures_iter(line) {
let display = caps[1].trim().to_string();
let path = caps[2].trim().to_string();
if path.starts_with("http://") || path.starts_with("https://") {
continue;
}
let has_ext = path.rsplit('/').next().is_some_and(|f| f.contains('.'));
let is_md = path.ends_with(".md");
if has_ext && !is_md {
continue;
}
let key = (path.clone(), LinkType::MarkdownLink);
if seen.insert(key) {
results.push(RawLink {
target_raw: path,
link_type: LinkType::MarkdownLink,
display_text: Some(display),
heading_anchor: None,
});
}
}
for caps in inline_tag_re().captures_iter(line) {
let tag = caps[1].to_string();
let key = (tag.clone(), LinkType::InlineTag);
if seen.insert(key) {
results.push(RawLink {
target_raw: tag,
link_type: LinkType::InlineTag,
display_text: None,
heading_anchor: None,
});
}
}
}
results
}
pub fn normalize_tag(raw: &str) -> String {
let stripped = raw.strip_prefix('#').unwrap_or(raw);
stripped
.split_whitespace()
.collect::<Vec<_>>()
.join("-")
.to_lowercase()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_wikilinks_simple() {
let links = extract_links("Check out [[Target Note]] for details.");
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_raw, "Target Note");
assert_eq!(links[0].link_type, LinkType::Wikilink);
assert_eq!(links[0].display_text, None);
assert_eq!(links[0].heading_anchor, None);
}
#[test]
fn extract_wikilinks_with_display() {
let links = extract_links("See [[Target|Display Text]] here.");
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_raw, "Target");
assert_eq!(links[0].display_text, Some("Display Text".to_string()));
}
#[test]
fn extract_wikilinks_with_heading() {
let links = extract_links("Jump to [[Target#Section One]].");
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_raw, "Target");
assert_eq!(links[0].heading_anchor, Some("Section One".to_string()));
}
#[test]
fn extract_wikilinks_full() {
let links = extract_links("See [[Target#Heading|Alias]].");
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_raw, "Target");
assert_eq!(links[0].heading_anchor, Some("Heading".to_string()));
assert_eq!(links[0].display_text, Some("Alias".to_string()));
}
#[test]
fn extract_markdown_links() {
let links = extract_links("Read [the guide](./path.md) now.");
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_raw, "./path.md");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
assert_eq!(links[0].display_text, Some("the guide".to_string()));
}
#[test]
fn extract_markdown_links_skip_external() {
let links = extract_links("[docs](https://example.com/docs)");
let md_links: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::MarkdownLink)
.collect();
assert!(md_links.is_empty());
}
#[test]
fn extract_markdown_links_no_extension() {
let links = extract_links("[other](other-note)");
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_raw, "other-note");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
}
#[test]
fn extract_markdown_links_skip_images() {
let links = extract_links("[photo](assets/image.png)");
let md_links: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::MarkdownLink)
.collect();
assert!(md_links.is_empty(), "should skip non-.md file extensions");
}
#[test]
fn extract_inline_tags() {
let links = extract_links("Topics: #rust #distributed-systems");
let tags: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::InlineTag)
.collect();
assert_eq!(tags.len(), 2);
assert_eq!(tags[0].target_raw, "rust");
assert_eq!(tags[1].target_raw, "distributed-systems");
}
#[test]
fn extract_inline_tags_skip_numeric_hex() {
let links = extract_links("color: #123456;");
let tags: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::InlineTag)
.collect();
assert!(tags.is_empty(), "numeric hex should not be a tag");
}
#[test]
fn extract_inline_tags_no_preceding_space() {
let links = extract_links("foo#bar");
let tags: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::InlineTag)
.collect();
assert!(tags.is_empty(), "no space before # means not a tag");
}
#[test]
fn extract_code_block_skipped() {
let body = "\
[[Real Link]]
```
[[Not A Link]]
#not-a-tag
[fake](./fake.md)
```
#real-tag
";
let links = extract_links(body);
assert_eq!(links.len(), 2);
assert!(links.iter().any(|l| l.target_raw == "Real Link"));
assert!(links.iter().any(|l| l.target_raw == "real-tag"));
}
#[test]
fn extract_mixed_links() {
let body = "\
See [[Wiki Note]] and [guide](./guide.md).
Tags: #architecture #design
";
let links = extract_links(body);
assert_eq!(links.len(), 4);
let types: Vec<LinkType> = links.iter().map(|l| l.link_type).collect();
assert!(types.contains(&LinkType::Wikilink));
assert!(types.contains(&LinkType::MarkdownLink));
assert!(types.contains(&LinkType::InlineTag));
}
#[test]
fn extract_deduplicates() {
let body = "See [[Target]] and also [[Target]] again.";
let links = extract_links(body);
let wiki: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Wikilink)
.collect();
assert_eq!(wiki.len(), 1, "duplicate wikilink should be deduplicated");
}
#[test]
fn normalize_tag_cases() {
assert_eq!(normalize_tag("#Rust"), "rust");
assert_eq!(normalize_tag("Rust"), "rust");
assert_eq!(normalize_tag("# My Tag"), "my-tag");
assert_eq!(normalize_tag(" spaces "), "spaces");
assert_eq!(normalize_tag("#distributed-systems"), "distributed-systems");
}
#[test]
fn extract_tag_at_line_start() {
let links = extract_links("#startup");
let tags: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::InlineTag)
.collect();
assert_eq!(tags.len(), 1);
assert_eq!(tags[0].target_raw, "startup");
}
#[test]
fn extract_multiple_wikilinks_same_line() {
let links = extract_links("See [[Alpha]] and [[Beta]] and [[Gamma]].");
let wikis: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Wikilink)
.collect();
assert_eq!(wikis.len(), 3);
}
}