use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use aho_corasick::{AhoCorasick, MatchKind};
use anyhow::{Context, Result};
use serde::Serialize;
use globset::{GlobBuilder, GlobSetBuilder};
use crate::discovery::{canonicalize_vault_dir, ensure_within_vault, match_globs};
use crate::fs_util::atomic_write;
use crate::index::{IndexEntry, VaultIndex};
use crate::links::extract_link_spans_with_original;
use crate::scanner::{
FenceTracker, MAX_FILE_SIZE, is_comment_fence, strip_inline_code, strip_inline_comments,
};
pub struct AutoLinkOptions<'a> {
pub apply: bool,
pub min_length: usize,
pub exclude_titles: &'a [String],
pub first_only: bool,
pub exclude_target_globs: &'a [String],
pub file_filter: Option<&'a str>,
pub glob_filter: &'a [String],
}
#[derive(Debug, Clone, Serialize)]
pub struct AutoLinkMatch {
pub file: String,
pub line: usize,
pub col: usize,
pub matched_text: String,
pub link_target: String,
}
#[derive(Debug, Serialize)]
pub struct AutoLinkReport {
pub scanned: usize,
pub total: usize,
pub matches: Vec<AutoLinkMatch>,
pub ambiguous_titles: Vec<String>,
pub applied: bool,
}
#[derive(Debug)]
struct TitleEntry {
link_target: String,
source_rel: String,
}
fn build_title_inventory(
entries: &[IndexEntry],
min_length: usize,
exclude_titles: &[String],
exclude_target_globs: &[String],
) -> Result<(HashMap<String, TitleEntry>, Vec<String>)> {
let glob_set = if exclude_target_globs.is_empty() {
None
} else {
let mut builder = GlobSetBuilder::new();
for pat in exclude_target_globs {
builder.add(
GlobBuilder::new(pat)
.literal_separator(true)
.build()
.context("invalid --exclude-target-glob pattern")?,
);
}
Some(
builder
.build()
.context("failed to build --exclude-target-glob globset")?,
)
};
let exclude_lower: HashSet<String> = exclude_titles
.iter()
.map(|s| s.to_ascii_lowercase())
.collect();
let mut map: HashMap<String, Option<TitleEntry>> = HashMap::new();
let mut ambiguous: Vec<String> = Vec::new();
let mut try_insert = |title: &str, entry: TitleEntry| {
if title.len() < min_length {
return;
}
let key = title.to_ascii_lowercase();
if exclude_lower.contains(&key) {
return;
}
match map.get(&key) {
None => {
map.insert(key, Some(entry));
}
Some(Some(existing)) if existing.source_rel != entry.source_rel => {
ambiguous.push(title.to_owned());
map.insert(key, None);
}
Some(_) => {
}
}
};
for entry in entries {
let rel = &entry.rel_path;
if let Some(ref gs) = glob_set
&& gs.is_match(rel)
{
continue;
}
let stem = stem_from_rel(rel);
try_insert(
stem,
TitleEntry {
link_target: stem.to_owned(),
source_rel: rel.clone(),
},
);
if let Some(title_val) = entry.properties.get("title")
&& let Some(title_str) = title_val.as_str()
{
let title_str = title_str.trim();
if !title_str.is_empty() {
try_insert(
title_str,
TitleEntry {
link_target: stem.to_owned(),
source_rel: rel.clone(),
},
);
}
}
if let Some(aliases_val) = entry.properties.get("aliases") {
let aliases: Vec<&str> = if let Some(arr) = aliases_val.as_array() {
arr.iter().filter_map(|v| v.as_str()).collect()
} else if let Some(s) = aliases_val.as_str() {
vec![s]
} else {
vec![]
};
for alias in aliases {
let alias = alias.trim();
if !alias.is_empty() {
try_insert(
alias,
TitleEntry {
link_target: stem.to_owned(),
source_rel: rel.clone(),
},
);
}
}
}
}
let mut title_map: HashMap<String, TitleEntry> = map
.into_iter()
.filter_map(|(k, v)| v.map(|entry| (k, entry)))
.collect();
let mut target_sources: HashMap<String, HashSet<String>> = HashMap::new();
for entry in title_map.values() {
target_sources
.entry(entry.link_target.clone())
.or_default()
.insert(entry.source_rel.clone());
}
let ambiguous_targets: HashSet<String> = target_sources
.into_iter()
.filter(|(_, sources)| sources.len() > 1)
.map(|(target, _)| target)
.collect();
if !ambiguous_targets.is_empty() {
title_map.retain(|_, entry| !ambiguous_targets.contains(&entry.link_target));
for target in &ambiguous_targets {
if !ambiguous.iter().any(|a| a.eq_ignore_ascii_case(target)) {
ambiguous.push(target.clone());
}
}
}
Ok((title_map, ambiguous))
}
fn stem_from_rel(rel: &str) -> &str {
let fname = rel.rsplit('/').next().unwrap_or(rel);
if fname.len() > 3
&& fname
.as_bytes()
.get(fname.len() - 3..)
.is_some_and(|s| s.eq_ignore_ascii_case(b".md"))
{
&fname[..fname.len() - 3]
} else {
fname
}
}
fn is_word_boundary_byte(s: &str, idx: usize) -> bool {
match s.as_bytes().get(idx) {
None => true,
Some(&b) => !b.is_ascii_alphanumeric() && b != b'_',
}
}
fn has_word_boundaries(line: &str, start: usize, end: usize) -> bool {
let before_ok = if start == 0 {
true
} else {
is_word_boundary_byte(line, start - 1)
};
let after_ok = is_word_boundary_byte(line, end);
before_ok && after_ok
}
fn overlaps_any_link(
spans: &[crate::links::LinkSpan],
match_start: usize,
match_end: usize,
) -> bool {
spans
.iter()
.any(|s| match_start < s.full_end && match_end > s.full_start)
}
pub fn auto_link(
index: &dyn VaultIndex,
dir: &Path,
opts: &AutoLinkOptions<'_>,
) -> Result<AutoLinkReport> {
let entries = index.entries();
if let Some(filter) = opts.file_filter {
anyhow::ensure!(
!crate::discovery::has_parent_traversal(filter),
"--file path must not contain '..' components: {filter}"
);
anyhow::ensure!(
!std::path::Path::new(filter).has_root(),
"--file path must be vault-relative, not absolute: {filter}"
);
}
let (title_map, ambiguous_titles) = build_title_inventory(
entries,
opts.min_length,
opts.exclude_titles,
opts.exclude_target_globs,
)?;
if title_map.is_empty() {
return Ok(AutoLinkReport {
scanned: 0,
total: 0,
matches: Vec::new(),
ambiguous_titles,
applied: false,
});
}
let mut patterns_sorted: Vec<(&str, &TitleEntry)> =
title_map.iter().map(|(k, v)| (k.as_str(), v)).collect();
patterns_sorted.sort_by_key(|(k, _)| *k);
let ac = AhoCorasick::builder()
.match_kind(MatchKind::LeftmostLongest)
.ascii_case_insensitive(true)
.build(patterns_sorted.iter().map(|(k, _)| k))
.context("failed to build Aho-Corasick automaton")?;
let all_paths: Vec<PathBuf> = entries.iter().map(|e| dir.join(&e.rel_path)).collect();
let paths_to_scan: Vec<(PathBuf, String)> = if !opts.glob_filter.is_empty() {
match_globs(dir, &all_paths, opts.glob_filter)?
} else if let Some(filter) = opts.file_filter {
let normalised = filter.replace('\\', "/");
entries
.iter()
.filter(|e| e.rel_path == normalised)
.map(|e| (dir.join(&e.rel_path), e.rel_path.clone()))
.collect()
} else {
entries
.iter()
.map(|e| (dir.join(&e.rel_path), e.rel_path.clone()))
.collect()
};
let mut all_matches: Vec<AutoLinkMatch> = Vec::new();
let mut scanned_content: HashMap<String, String> = HashMap::new();
let scanned = paths_to_scan.len();
for (abs_path, rel_path) in &paths_to_scan {
if let Ok(meta) = std::fs::metadata(abs_path)
&& meta.len() > MAX_FILE_SIZE
{
eprintln!(
"warning: skipping {} ({} MiB exceeds {} MiB limit)",
abs_path.display(),
meta.len() / (1024 * 1024),
MAX_FILE_SIZE / (1024 * 1024),
);
continue;
}
let content = std::fs::read_to_string(abs_path)
.with_context(|| format!("failed to read {}", abs_path.display()))?;
let file_matches = scan_file_for_matches(&content, rel_path, &ac, &patterns_sorted);
let has_matches = !file_matches.is_empty();
all_matches.extend(file_matches);
if opts.apply && has_matches {
scanned_content.insert(rel_path.clone(), content);
}
}
if opts.first_only {
let mut file_ids: HashMap<&str, usize> = HashMap::new();
let mut target_ids: HashMap<&str, usize> = HashMap::new();
let mut seen: HashSet<(usize, usize)> = HashSet::new();
let keep: Vec<bool> = all_matches
.iter()
.map(|m| {
let n = file_ids.len();
let fid = *file_ids.entry(&m.file).or_insert(n);
let n = target_ids.len();
let tid = *target_ids.entry(&m.link_target).or_insert(n);
seen.insert((fid, tid))
})
.collect();
let mut keep_iter = keep.into_iter();
all_matches.retain(|_| keep_iter.next().unwrap_or(false));
}
if opts.apply {
apply_matches(dir, &all_matches, &scanned_content)?;
}
let total = all_matches.len();
Ok(AutoLinkReport {
scanned,
total,
matches: all_matches,
ambiguous_titles,
applied: opts.apply,
})
}
fn scan_file_for_matches(
content: &str,
rel_path: &str,
ac: &AhoCorasick,
patterns_sorted: &[(&str, &TitleEntry)],
) -> Vec<AutoLinkMatch> {
let mut results = Vec::new();
let mut fence = FenceTracker::new();
let mut in_comment_fence = false;
let mut in_frontmatter = false;
let mut frontmatter_done = false;
let mut line_num = 0usize;
for line in content.split('\n') {
line_num += 1;
if !frontmatter_done {
if line_num == 1 && line.trim() == "---" {
in_frontmatter = true;
continue;
}
if in_frontmatter {
if line.trim() == "---" {
in_frontmatter = false;
frontmatter_done = true;
}
continue;
}
frontmatter_done = true;
}
if fence.process_line(line) {
continue;
}
if !fence.in_fence() && is_comment_fence(line) {
in_comment_fence = !in_comment_fence;
continue;
}
if in_comment_fence {
continue;
}
if line.trim_start().starts_with('#') {
continue;
}
let stripped_code = strip_inline_code(line);
let cleaned = strip_inline_comments(stripped_code.as_ref());
let cleaned_str: &str = cleaned.as_ref();
let link_spans = extract_link_spans_with_original(cleaned_str, line);
for mat in ac.find_iter(cleaned_str) {
let start = mat.start();
let end = mat.end();
let pat_idx = mat.pattern().as_usize();
let (_, entry) = patterns_sorted[pat_idx];
if entry.source_rel == rel_path {
continue;
}
if !has_word_boundaries(cleaned_str, start, end) {
continue;
}
if overlaps_any_link(&link_spans, start, end) {
continue;
}
let matched_text = line.get(start..end).unwrap_or(&cleaned_str[start..end]);
results.push(AutoLinkMatch {
file: rel_path.to_owned(),
line: line_num,
col: start,
matched_text: matched_text.to_owned(),
link_target: entry.link_target.clone(),
});
}
}
results
}
fn apply_matches(
dir: &Path,
matches: &[AutoLinkMatch],
scanned_content: &HashMap<String, String>,
) -> Result<()> {
let canonical_vault = canonicalize_vault_dir(dir)
.context("failed to canonicalize vault directory for write safety check")?;
let mut by_file: HashMap<&str, Vec<&AutoLinkMatch>> = HashMap::new();
for m in matches {
by_file.entry(&m.file).or_default().push(m);
}
for (rel_path, file_matches) in by_file {
let abs_path = dir.join(rel_path);
let within = ensure_within_vault(&canonical_vault, &abs_path)
.with_context(|| format!("could not verify {} is within vault", abs_path.display()))?;
anyhow::ensure!(
within,
"refusing to write outside vault: {}",
abs_path.display()
);
let Some(content) = scanned_content.get(rel_path).map(String::as_str) else {
eprintln!(
"warning: {rel_path} not in scan cache, skipping (possible internal bug)"
);
continue;
};
let disk_content = match std::fs::read_to_string(&abs_path) {
Ok(c) => c,
Err(err) => {
eprintln!("warning: could not verify {rel_path} after scan ({err}), skipping");
continue;
}
};
if disk_content != content {
eprintln!("warning: {rel_path} was modified after scan, skipping");
continue;
}
let mut sorted_matches: Vec<&AutoLinkMatch> = file_matches;
sorted_matches.sort_by(|a, b| b.line.cmp(&a.line).then(b.col.cmp(&a.col)));
let mut lines: Vec<String> = split_lines_preserving_endings(content);
for m in sorted_matches {
let line_idx = m.line.saturating_sub(1);
if let Some(line) = lines.get_mut(line_idx) {
let start = m.col;
let end = start + m.matched_text.len();
if end <= line.len() && line.get(start..end) == Some(&m.matched_text) {
let replacement = format!("[[{}]]", m.link_target);
line.replace_range(start..end, &replacement);
}
}
}
let new_content = lines.concat();
atomic_write(&abs_path, new_content.as_bytes())
.with_context(|| format!("failed to write {}", abs_path.display()))?;
}
Ok(())
}
fn split_lines_preserving_endings(content: &str) -> Vec<String> {
let mut lines = Vec::new();
let mut remaining = content;
while let Some(pos) = remaining.find('\n') {
lines.push(remaining[..=pos].to_owned());
remaining = &remaining[pos + 1..];
}
if !remaining.is_empty() {
lines.push(remaining.to_owned());
}
lines
}
#[cfg(test)]
mod tests {
use super::*;
use crate::index::{IndexEntry, VaultIndex};
use crate::link_graph::LinkGraph;
use indexmap::IndexMap;
use serde_json::Value;
use tempfile::TempDir;
struct MockIndex {
entries: Vec<IndexEntry>,
graph: LinkGraph,
}
impl MockIndex {
fn new(entries: Vec<IndexEntry>) -> Self {
Self {
entries,
graph: LinkGraph::default(),
}
}
}
impl VaultIndex for MockIndex {
fn entries(&self) -> &[IndexEntry] {
&self.entries
}
fn get(&self, rel_path: &str) -> Option<&IndexEntry> {
self.entries.iter().find(|e| e.rel_path == rel_path)
}
fn link_graph(&self) -> &LinkGraph {
&self.graph
}
}
fn make_entry(rel_path: &str, props: Vec<(&str, Value)>) -> IndexEntry {
let mut properties = IndexMap::new();
for (k, v) in props {
properties.insert(k.to_owned(), v);
}
IndexEntry {
rel_path: rel_path.to_owned(),
modified: String::new(),
properties,
tags: Vec::new(),
sections: Vec::new(),
tasks: Vec::new(),
links: Vec::new(),
bm25_tokens: None,
bm25_language: None,
}
}
fn write_file(dir: &TempDir, rel: &str, content: &str) -> PathBuf {
let path = dir.path().join(rel);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(&path, content).unwrap();
path
}
#[test]
fn test_title_inventory_basic() {
let entries = vec![make_entry(
"sprint-planning.md",
vec![
("title", Value::String("Sprint Planning".to_owned())),
(
"aliases",
Value::Array(vec![Value::String("SP".to_owned())]),
),
],
)];
let (map, ambiguous) = build_title_inventory(&entries, 2, &[], &[]).unwrap();
assert!(ambiguous.is_empty());
assert!(map.contains_key("sprint-planning"), "stem missing");
assert!(map.contains_key("sprint planning"), "title missing");
assert!(map.contains_key("sp"), "alias missing");
let title_entry = map.get("sprint planning").unwrap();
assert_eq!(title_entry.link_target, "sprint-planning");
assert_eq!(title_entry.source_rel, "sprint-planning.md");
}
#[test]
fn test_title_inventory_ambiguous() {
let entries = vec![
make_entry(
"planning/sprint.md",
vec![("title", Value::String("Sprint".to_owned()))],
),
make_entry(
"notes/sprint.md",
vec![("title", Value::String("Sprint".to_owned()))],
),
];
let (map, ambiguous) = build_title_inventory(&entries, 3, &[], &[]).unwrap();
assert!(
!map.contains_key("sprint"),
"ambiguous title should be absent from map"
);
assert!(!ambiguous.is_empty(), "should have ambiguous entries");
}
#[test]
fn test_title_inventory_min_length() {
let entries = vec![make_entry(
"go.md",
vec![("title", Value::String("Go".to_owned()))],
)];
let (map, _) = build_title_inventory(&entries, 3, &[], &[]).unwrap();
assert!(!map.contains_key("go"), "short title should be filtered");
}
#[test]
fn test_title_inventory_exclude() {
let entries = vec![make_entry(
"the.md",
vec![("title", Value::String("The".to_owned()))],
)];
let (map, _) =
build_title_inventory(&entries, 2, &["the".to_owned(), "The".to_owned()], &[]).unwrap();
assert!(!map.contains_key("the"), "excluded title should not appear");
}
#[test]
fn test_word_boundary() {
let entry = make_entry(
"sprint.md",
vec![("title", Value::String("Sprint".to_owned()))],
);
let other = make_entry("other.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "sprint.md", "---\ntitle: Sprint\n---\n");
write_file(&tmp, "other.md", "Sprinting is fun but Sprint is better.\n");
let index = MockIndex::new(vec![entry, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("other.md"),
glob_filter: &[],
},
)
.unwrap();
let matches: Vec<_> = report
.matches
.iter()
.filter(|m| m.matched_text == "Sprint")
.collect();
assert_eq!(matches.len(), 1, "only standalone 'Sprint' should match");
assert_eq!(matches[0].matched_text, "Sprint");
}
#[test]
fn test_skip_headings() {
let page = make_entry(
"sprint-planning.md",
vec![("title", Value::String("Sprint Planning".to_owned()))],
);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "sprint-planning.md", "# Sprint Planning\n");
write_file(
&tmp,
"notes.md",
"## Sprint Planning\n\nSee Sprint Planning for details.\n",
);
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert_eq!(
report.matches.len(),
1,
"only the body mention should match"
);
assert_eq!(report.matches[0].line, 3);
}
#[test]
fn test_skip_code_blocks() {
let page = make_entry("target.md", vec![]);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "target.md", "");
write_file(
&tmp,
"notes.md",
"```\ntarget text mentioning target\n```\n",
);
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert!(
report.matches.is_empty(),
"code block content should not match"
);
}
#[test]
fn test_skip_inline_code() {
let page = make_entry("target.md", vec![]);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "target.md", "");
write_file(&tmp, "notes.md", "Use `target` sparingly.\n");
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert!(
report.matches.is_empty(),
"inline code span should not match"
);
}
#[test]
fn test_skip_existing_links() {
let page = make_entry("target.md", vec![]);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "target.md", "");
write_file(
&tmp,
"notes.md",
"See [[target]] and [target](target.md) for details.\n",
);
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert!(
report.matches.is_empty(),
"matches overlapping existing links should be skipped"
);
}
#[test]
fn test_skip_self_links() {
let page = make_entry(
"sprint.md",
vec![("title", Value::String("Sprint".to_owned()))],
);
let tmp = TempDir::new().unwrap();
write_file(
&tmp,
"sprint.md",
"---\ntitle: Sprint\n---\n\nThis is the Sprint page.\n",
);
let index = MockIndex::new(vec![page]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("sprint.md"),
glob_filter: &[],
},
)
.unwrap();
assert!(
report.matches.is_empty(),
"self-links (file's own title in its own body) should be skipped"
);
}
#[test]
fn test_case_insensitive() {
let page = make_entry("target.md", vec![]);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "target.md", "");
write_file(&tmp, "notes.md", "See Target or TARGET or target here.\n");
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert_eq!(report.matches.len(), 3, "all case variants should match");
}
#[test]
fn test_longest_match() {
let sprint = make_entry(
"sprint.md",
vec![("title", Value::String("Sprint".to_owned()))],
);
let sp = make_entry(
"sprint-planning.md",
vec![("title", Value::String("Sprint Planning".to_owned()))],
);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "sprint.md", "");
write_file(&tmp, "sprint-planning.md", "");
write_file(&tmp, "notes.md", "Sprint Planning kicks off tomorrow.\n");
let index = MockIndex::new(vec![sprint, sp, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert_eq!(report.matches.len(), 1);
assert_eq!(report.matches[0].matched_text, "Sprint Planning");
assert_eq!(report.matches[0].link_target, "sprint-planning");
}
#[test]
fn test_skip_frontmatter() {
let page = make_entry("target.md", vec![]);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "target.md", "");
write_file(
&tmp,
"notes.md",
"---\ntitle: target mentions\n---\n\nNo mention in body.\n",
);
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert!(
report.matches.is_empty(),
"frontmatter mentions should be skipped"
);
}
#[test]
fn test_skip_comment_fences() {
let page = make_entry("target.md", vec![]);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "target.md", "");
write_file(
&tmp,
"notes.md",
"%%\ntarget is mentioned here inside comment\n%%\n",
);
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert!(
report.matches.is_empty(),
"comment fence blocks should be skipped"
);
}
#[test]
fn test_apply_writes_wikilinks() {
let page = make_entry("target.md", vec![]);
let other = make_entry("notes.md", vec![]);
let tmp = TempDir::new().unwrap();
write_file(&tmp, "target.md", "");
write_file(&tmp, "notes.md", "See target for details.\n");
let index = MockIndex::new(vec![page, other]);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: true,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("notes.md"),
glob_filter: &[],
},
)
.unwrap();
assert_eq!(report.matches.len(), 1);
assert!(report.applied);
let written = std::fs::read_to_string(tmp.path().join("notes.md")).unwrap();
assert!(
written.contains("[[target]]"),
"written content should contain wikilink: {written}"
);
}
#[test]
fn test_first_only_dedup() {
let tmp = TempDir::new().unwrap();
let entries = vec![
make_entry("alice.md", vec![("title", Value::String("Alice".into()))]),
make_entry("notes.md", vec![("title", Value::String("Notes".into()))]),
];
write_file(&tmp, "alice.md", "---\ntitle: Alice\n---\nAlice bio.\n");
write_file(
&tmp,
"notes.md",
"---\ntitle: Notes\n---\nAlice went to the park. Later Alice came back.\n",
);
let index = MockIndex::new(entries);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: None,
glob_filter: &[],
},
)
.unwrap();
let alice_matches: Vec<_> = report
.matches
.iter()
.filter(|m| m.file == "notes.md" && m.link_target == "alice")
.collect();
assert_eq!(
alice_matches.len(),
2,
"without first_only, expected 2 Alice matches"
);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: true,
exclude_target_globs: &[],
file_filter: None,
glob_filter: &[],
},
)
.unwrap();
let alice_matches: Vec<_> = report
.matches
.iter()
.filter(|m| m.file == "notes.md" && m.link_target == "alice")
.collect();
assert_eq!(
alice_matches.len(),
1,
"with first_only, expected 1 Alice match"
);
}
#[test]
fn test_exclude_target_glob() {
let tmp = TempDir::new().unwrap();
let entries = vec![
make_entry(
"templates/start.md",
vec![("title", Value::String("Start".into()))],
),
make_entry(
"people/alice.md",
vec![("title", Value::String("Alice".into()))],
),
make_entry("notes.md", vec![("title", Value::String("Notes".into()))]),
];
write_file(
&tmp,
"templates/start.md",
"---\ntitle: Start\n---\nStart template.\n",
);
write_file(
&tmp,
"people/alice.md",
"---\ntitle: Alice\n---\nAlice bio.\n",
);
write_file(
&tmp,
"notes.md",
"---\ntitle: Notes\n---\nWe Start with Alice today.\n",
);
let index = MockIndex::new(entries);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: None,
glob_filter: &[],
},
)
.unwrap();
let has_start = report.matches.iter().any(|m| m.link_target == "start");
let has_alice = report.matches.iter().any(|m| m.link_target == "alice");
assert!(has_start, "without exclusion, Start should match");
assert!(has_alice, "without exclusion, Alice should match");
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &["templates/*".to_owned()],
file_filter: None,
glob_filter: &[],
},
)
.unwrap();
let has_start = report.matches.iter().any(|m| m.link_target == "start");
let has_alice = report.matches.iter().any(|m| m.link_target == "alice");
assert!(!has_start, "with exclusion, Start should NOT match");
assert!(has_alice, "with exclusion, Alice should still match");
}
#[test]
fn test_exclude_target_glob_multiple() {
let tmp = TempDir::new().unwrap();
let entries = vec![
make_entry(
"templates/start.md",
vec![("title", Value::String("Start".into()))],
),
make_entry(
"archive/old.md",
vec![("title", Value::String("Old".into()))],
),
make_entry(
"people/alice.md",
vec![("title", Value::String("Alice".into()))],
),
make_entry("notes.md", vec![("title", Value::String("Notes".into()))]),
];
write_file(
&tmp,
"templates/start.md",
"---\ntitle: Start\n---\nStart.\n",
);
write_file(&tmp, "archive/old.md", "---\ntitle: Old\n---\nOld.\n");
write_file(&tmp, "people/alice.md", "---\ntitle: Alice\n---\nAlice.\n");
write_file(
&tmp,
"notes.md",
"---\ntitle: Notes\n---\nStart and Old and Alice today.\n",
);
let index = MockIndex::new(entries);
let report = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &["templates/*".to_owned(), "archive/*".to_owned()],
file_filter: None,
glob_filter: &[],
},
)
.unwrap();
let targets: Vec<&str> = report
.matches
.iter()
.map(|m| m.link_target.as_str())
.collect();
assert!(
!targets.contains(&"start"),
"templates/* should be excluded"
);
assert!(!targets.contains(&"old"), "archive/* should be excluded");
assert!(
targets.contains(&"alice"),
"people/alice should NOT be excluded"
);
}
#[test]
fn test_exclude_target_glob_resolves_ambiguity() {
let entries = vec![
make_entry(
"templates/sprint.md",
vec![("title", Value::String("Sprint".into()))],
),
make_entry(
"planning/sprint.md",
vec![("title", Value::String("Sprint".into()))],
),
make_entry("notes.md", vec![("title", Value::String("Notes".into()))]),
];
let (map, ambiguous) = build_title_inventory(&entries, 3, &[], &[]).unwrap();
assert!(
!map.contains_key("sprint"),
"without exclusion, sprint should be ambiguous"
);
assert!(!ambiguous.is_empty());
let (map, ambiguous) =
build_title_inventory(&entries, 3, &[], &["templates/*".to_owned()]).unwrap();
assert!(
map.contains_key("sprint"),
"with templates/* excluded, sprint should be unambiguous and present"
);
let entry = map.get("sprint").unwrap();
assert_eq!(entry.source_rel, "planning/sprint.md");
assert!(
!ambiguous.iter().any(|a| a.eq_ignore_ascii_case("sprint")),
"sprint should not be in the ambiguous list"
);
}
#[test]
fn file_filter_rejects_parent_traversal() {
let tmp = TempDir::new().unwrap();
write_file(&tmp, "a.md", "---\ntitle: A\n---\n");
let index = MockIndex::new(vec![make_entry("a.md", vec![])]);
let err = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("../etc/passwd"),
glob_filter: &[],
},
)
.unwrap_err();
assert!(
format!("{err:?}").contains(".."),
"error should mention '..' component: {err:?}"
);
}
#[test]
fn file_filter_rejects_absolute_path() {
let tmp = TempDir::new().unwrap();
write_file(&tmp, "a.md", "---\ntitle: A\n---\n");
let index = MockIndex::new(vec![make_entry("a.md", vec![])]);
let err = auto_link(
&index,
tmp.path(),
&AutoLinkOptions {
apply: false,
min_length: 3,
exclude_titles: &[],
first_only: false,
exclude_target_globs: &[],
file_filter: Some("/etc/passwd"),
glob_filter: &[],
},
)
.unwrap_err();
assert!(
format!("{err:?}").contains("absolute"),
"error should mention 'absolute': {err:?}"
);
}
}