use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use crate::lint_context::LintContext;
fn hex_digit_to_value(c: u8) -> Option<u8> {
match c {
b'0'..=b'9' => Some(c - b'0'),
b'a'..=b'f' => Some(c - b'a' + 10),
b'A'..=b'F' => Some(c - b'A' + 10),
_ => None,
}
}
fn url_decode(s: &str) -> String {
if !s.contains('%') {
return s.to_string();
}
let bytes = s.as_bytes();
let mut result = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
let hex1 = bytes[i + 1];
let hex2 = bytes[i + 2];
if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
result.push(d1 * 16 + d2);
i += 3;
continue;
}
}
result.push(bytes[i]);
i += 1;
}
String::from_utf8(result).unwrap_or_else(|_| s.to_string())
}
static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
static URL_EXTRACT_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
pub(crate) static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
const MARKDOWN_EXTENSIONS: &[&str] = &[
".md",
".markdown",
".mdx",
".mkd",
".mkdn",
".mdown",
".mdwn",
".qmd",
".rmd",
];
#[inline]
fn is_markdown_file(path: &str) -> bool {
let path_lower = path.to_lowercase();
MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
}
fn strip_query_and_fragment(url: &str) -> &str {
let query_pos = url.find('?');
let fragment_pos = url.find('#');
match (query_pos, fragment_pos) {
(Some(q), Some(f)) => &url[..q.min(f)],
(Some(q), None) => &url[..q],
(None, Some(f)) => &url[..f],
(None, None) => url,
}
}
pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
let content = ctx.content;
if content.is_empty() || !content.contains("](") {
return Vec::new();
}
let mut links = Vec::new();
let lines: Vec<&str> = content.lines().collect();
let line_index = &ctx.line_index;
let mut processed_lines = HashSet::new();
for link in &ctx.links {
let line_idx = link.line - 1;
if line_idx >= lines.len() {
continue;
}
if !processed_lines.insert(line_idx) {
continue;
}
let line = lines[line_idx];
if !line.contains("](") {
continue;
}
for link_match in LINK_START_REGEX.find_iter(line) {
let start_pos = link_match.start();
let end_pos = link_match.end();
let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
let absolute_start_pos = line_start_byte + start_pos;
if ctx.is_in_code_span_byte(absolute_start_pos) {
continue;
}
let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
.captures_at(line, end_pos - 1)
.or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
if let Some(caps) = caps_result
&& let Some(url_group) = caps.get(1)
{
let file_path = url_group.as_str().trim();
if file_path.is_empty()
|| PROTOCOL_DOMAIN_REGEX.is_match(file_path)
|| file_path.starts_with("www.")
|| file_path.starts_with('#')
|| file_path.starts_with("{{")
|| file_path.starts_with("{%")
|| file_path.starts_with('/')
|| file_path.starts_with('~')
|| file_path.starts_with('@')
|| (file_path.starts_with('`') && file_path.ends_with('`'))
{
continue;
}
let file_path = strip_query_and_fragment(file_path);
let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
if is_markdown_file(file_path) {
links.push(CrossFileLinkIndex {
target_path: file_path.to_string(),
fragment: fragment.to_string(),
line: link.line,
column: url_group.start() + 1,
});
}
}
}
}
links
}
#[cfg(feature = "native")]
const CACHE_MAGIC: &[u8; 4] = b"RWSI";
#[cfg(feature = "native")]
const CACHE_FORMAT_VERSION: u32 = 5;
#[cfg(feature = "native")]
const CACHE_FILE_NAME: &str = "workspace_index.bin";
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct WorkspaceIndex {
files: HashMap<PathBuf, FileIndex>,
reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
version: u64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FileIndex {
pub headings: Vec<HeadingIndex>,
pub reference_links: Vec<ReferenceLinkIndex>,
pub cross_file_links: Vec<CrossFileLinkIndex>,
pub defined_references: HashSet<String>,
pub content_hash: String,
anchor_to_heading: HashMap<String, usize>,
html_anchors: HashSet<String>,
attribute_anchors: HashSet<String>,
pub file_disabled_rules: HashSet<String>,
pub persistent_transitions: Vec<(usize, HashSet<String>, HashSet<String>)>,
pub line_disabled_rules: HashMap<usize, HashSet<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeadingIndex {
pub text: String,
pub auto_anchor: String,
pub custom_anchor: Option<String>,
pub line: usize,
#[serde(default)]
pub is_setext: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReferenceLinkIndex {
pub reference_id: String,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrossFileLinkIndex {
pub target_path: String,
pub fragment: String,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VulnerableAnchor {
pub file: PathBuf,
pub line: usize,
pub text: String,
}
impl WorkspaceIndex {
pub fn new() -> Self {
Self::default()
}
pub fn version(&self) -> u64 {
self.version
}
pub fn file_count(&self) -> usize {
self.files.len()
}
pub fn contains_file(&self, path: &Path) -> bool {
self.files.contains_key(path)
}
pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
self.files.get(path)
}
pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
self.files.insert(path, index);
self.version = self.version.wrapping_add(1);
}
pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
self.clear_reverse_deps_for(path);
let result = self.files.remove(path);
if result.is_some() {
self.version = self.version.wrapping_add(1);
}
result
}
pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
for (file_path, file_index) in &self.files {
for heading in &file_index.headings {
if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
let anchor_key = heading.auto_anchor.to_lowercase();
vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
file: file_path.clone(),
line: heading.line,
text: heading.text.clone(),
});
}
}
}
vulnerable
}
pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
self.files
.iter()
.flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
}
pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
self.files.iter().map(|(p, i)| (p.as_path(), i))
}
pub fn clear(&mut self) {
self.files.clear();
self.reverse_deps.clear();
self.version = self.version.wrapping_add(1);
}
pub fn update_file(&mut self, path: &Path, index: FileIndex) {
self.clear_reverse_deps_as_source(path);
for link in &index.cross_file_links {
let target = self.resolve_target_path(path, &link.target_path);
self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
}
self.files.insert(path.to_path_buf(), index);
self.version = self.version.wrapping_add(1);
}
pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
self.reverse_deps
.get(path)
.map(|set| set.iter().cloned().collect())
.unwrap_or_default()
}
pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
self.files
.get(path)
.map(|f| f.content_hash != current_hash)
.unwrap_or(true)
}
pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
let before_count = self.files.len();
let to_remove: Vec<PathBuf> = self
.files
.keys()
.filter(|path| !current_files.contains(*path))
.cloned()
.collect();
for path in &to_remove {
self.remove_file(path);
}
before_count - self.files.len()
}
#[cfg(feature = "native")]
pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
use std::fs;
use std::io::Write;
fs::create_dir_all(cache_dir)?;
let encoded = postcard::to_allocvec(self)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
let mut cache_data = Vec::with_capacity(8 + encoded.len());
cache_data.extend_from_slice(CACHE_MAGIC);
cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
cache_data.extend_from_slice(&encoded);
let final_path = cache_dir.join(CACHE_FILE_NAME);
let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
{
let mut file = fs::File::create(&temp_path)?;
file.write_all(&cache_data)?;
file.sync_all()?;
}
fs::rename(&temp_path, &final_path)?;
log::debug!(
"Saved workspace index to cache: {} files, {} bytes (format v{})",
self.files.len(),
cache_data.len(),
CACHE_FORMAT_VERSION
);
Ok(())
}
#[cfg(feature = "native")]
pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
use std::fs;
let path = cache_dir.join(CACHE_FILE_NAME);
let data = fs::read(&path).ok()?;
if data.len() < 8 {
log::warn!("Workspace index cache too small, discarding");
let _ = fs::remove_file(&path);
return None;
}
if &data[0..4] != CACHE_MAGIC {
log::warn!("Workspace index cache has invalid magic header, discarding");
let _ = fs::remove_file(&path);
return None;
}
let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
if version != CACHE_FORMAT_VERSION {
log::info!(
"Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
);
let _ = fs::remove_file(&path);
return None;
}
match postcard::from_bytes::<Self>(&data[8..]) {
Ok(index) => {
log::debug!(
"Loaded workspace index from cache: {} files (format v{})",
index.files.len(),
version
);
Some(index)
}
Err(e) => {
log::warn!("Failed to deserialize workspace index cache: {e}");
let _ = fs::remove_file(&path);
None
}
}
}
fn clear_reverse_deps_as_source(&mut self, path: &Path) {
for deps in self.reverse_deps.values_mut() {
deps.remove(path);
}
self.reverse_deps.retain(|_, deps| !deps.is_empty());
}
fn clear_reverse_deps_for(&mut self, path: &Path) {
self.clear_reverse_deps_as_source(path);
self.reverse_deps.remove(path);
}
fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
let source_dir = source_file.parent().unwrap_or(Path::new(""));
let target = source_dir.join(relative_target);
Self::normalize_path(&target)
}
fn normalize_path(path: &Path) -> PathBuf {
let mut components = Vec::new();
for component in path.components() {
match component {
std::path::Component::ParentDir => {
if !components.is_empty() {
components.pop();
}
}
std::path::Component::CurDir => {
}
_ => {
components.push(component);
}
}
}
components.iter().collect()
}
}
impl FileIndex {
pub fn new() -> Self {
Self::default()
}
pub fn with_hash(content_hash: String) -> Self {
Self {
content_hash,
..Default::default()
}
}
pub fn add_heading(&mut self, heading: HeadingIndex) {
let index = self.headings.len();
self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
if let Some(ref custom) = heading.custom_anchor {
self.anchor_to_heading.insert(custom.to_lowercase(), index);
}
self.headings.push(heading);
}
pub fn add_anchor_alias(&mut self, anchor: String, heading_index: usize) {
if heading_index < self.headings.len() {
self.anchor_to_heading.insert(anchor.to_lowercase(), heading_index);
}
}
pub fn has_anchor(&self, anchor: &str) -> bool {
let lower = anchor.to_lowercase();
if self.anchor_to_heading.contains_key(&lower)
|| self.html_anchors.contains(&lower)
|| self.attribute_anchors.contains(&lower)
{
return true;
}
if anchor.contains('%') {
let decoded = url_decode(anchor).to_lowercase();
if decoded != lower {
return self.anchor_to_heading.contains_key(&decoded)
|| self.html_anchors.contains(&decoded)
|| self.attribute_anchors.contains(&decoded);
}
}
false
}
pub fn add_html_anchor(&mut self, anchor: String) {
if !anchor.is_empty() {
self.html_anchors.insert(anchor.to_lowercase());
}
}
pub fn add_attribute_anchor(&mut self, anchor: String) {
if !anchor.is_empty() {
self.attribute_anchors.insert(anchor.to_lowercase());
}
}
pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
self.anchor_to_heading
.get(&anchor.to_lowercase())
.and_then(|&idx| self.headings.get(idx))
}
pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
self.reference_links.push(link);
}
pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
return true;
}
if let Some(rules) = self.line_disabled_rules.get(&line)
&& (rules.contains("*") || rules.contains(rule_name))
{
return true;
}
if !self.persistent_transitions.is_empty() {
let idx = match self.persistent_transitions.binary_search_by_key(&line, |t| t.0) {
Ok(i) => Some(i),
Err(i) => {
if i > 0 {
Some(i - 1)
} else {
None
}
}
};
if let Some(i) = idx {
let (_, ref disabled, ref enabled) = self.persistent_transitions[i];
if disabled.contains("*") {
return !enabled.contains(rule_name);
}
return disabled.contains(rule_name);
}
}
false
}
pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
let is_duplicate = self.cross_file_links.iter().any(|existing| {
existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
});
if !is_duplicate {
self.cross_file_links.push(link);
}
}
pub fn add_defined_reference(&mut self, ref_id: String) {
self.defined_references.insert(ref_id);
}
pub fn has_defined_reference(&self, ref_id: &str) -> bool {
self.defined_references.contains(ref_id)
}
pub fn hash_matches(&self, hash: &str) -> bool {
self.content_hash == hash
}
pub fn heading_count(&self) -> usize {
self.headings.len()
}
pub fn reference_link_count(&self) -> usize {
self.reference_links.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_workspace_index_basic() {
let mut index = WorkspaceIndex::new();
assert_eq!(index.file_count(), 0);
assert_eq!(index.version(), 0);
let mut file_index = FileIndex::with_hash("abc123".to_string());
file_index.add_heading(HeadingIndex {
text: "Installation".to_string(),
auto_anchor: "installation".to_string(),
custom_anchor: None,
line: 1,
is_setext: false,
});
index.insert_file(PathBuf::from("docs/install.md"), file_index);
assert_eq!(index.file_count(), 1);
assert_eq!(index.version(), 1);
assert!(index.contains_file(Path::new("docs/install.md")));
assert!(!index.contains_file(Path::new("docs/other.md")));
}
#[test]
fn test_vulnerable_anchors() {
let mut index = WorkspaceIndex::new();
let mut file1 = FileIndex::new();
file1.add_heading(HeadingIndex {
text: "Getting Started".to_string(),
auto_anchor: "getting-started".to_string(),
custom_anchor: None,
line: 1,
is_setext: false,
});
index.insert_file(PathBuf::from("docs/guide.md"), file1);
let mut file2 = FileIndex::new();
file2.add_heading(HeadingIndex {
text: "Installation".to_string(),
auto_anchor: "installation".to_string(),
custom_anchor: Some("install".to_string()),
line: 1,
is_setext: false,
});
index.insert_file(PathBuf::from("docs/install.md"), file2);
let vulnerable = index.get_vulnerable_anchors();
assert_eq!(vulnerable.len(), 1);
assert!(vulnerable.contains_key("getting-started"));
assert!(!vulnerable.contains_key("installation"));
let anchors = vulnerable.get("getting-started").unwrap();
assert_eq!(anchors.len(), 1);
assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
assert_eq!(anchors[0].text, "Getting Started");
}
#[test]
fn test_vulnerable_anchors_multiple_files_same_anchor() {
let mut index = WorkspaceIndex::new();
let mut file1 = FileIndex::new();
file1.add_heading(HeadingIndex {
text: "Installation".to_string(),
auto_anchor: "installation".to_string(),
custom_anchor: None,
line: 1,
is_setext: false,
});
index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
let mut file2 = FileIndex::new();
file2.add_heading(HeadingIndex {
text: "Installation".to_string(),
auto_anchor: "installation".to_string(),
custom_anchor: None,
line: 5,
is_setext: false,
});
index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
let mut file3 = FileIndex::new();
file3.add_heading(HeadingIndex {
text: "Installation".to_string(),
auto_anchor: "installation".to_string(),
custom_anchor: Some("install".to_string()),
line: 10,
is_setext: false,
});
index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
let vulnerable = index.get_vulnerable_anchors();
assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
let anchors = vulnerable.get("installation").unwrap();
assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
}
#[test]
fn test_file_index_hash() {
let index = FileIndex::with_hash("hash123".to_string());
assert!(index.hash_matches("hash123"));
assert!(!index.hash_matches("other"));
}
#[test]
fn test_version_increment() {
let mut index = WorkspaceIndex::new();
assert_eq!(index.version(), 0);
index.insert_file(PathBuf::from("a.md"), FileIndex::new());
assert_eq!(index.version(), 1);
index.insert_file(PathBuf::from("b.md"), FileIndex::new());
assert_eq!(index.version(), 2);
index.remove_file(Path::new("a.md"));
assert_eq!(index.version(), 3);
index.remove_file(Path::new("nonexistent.md"));
assert_eq!(index.version(), 3);
}
#[test]
fn test_reverse_deps_basic() {
let mut index = WorkspaceIndex::new();
let mut file_a = FileIndex::new();
file_a.add_cross_file_link(CrossFileLinkIndex {
target_path: "b.md".to_string(),
fragment: "section".to_string(),
line: 10,
column: 5,
});
index.update_file(Path::new("docs/a.md"), file_a);
let dependents = index.get_dependents(Path::new("docs/b.md"));
assert_eq!(dependents.len(), 1);
assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
let a_dependents = index.get_dependents(Path::new("docs/a.md"));
assert!(a_dependents.is_empty());
}
#[test]
fn test_reverse_deps_multiple() {
let mut index = WorkspaceIndex::new();
let mut file_a = FileIndex::new();
file_a.add_cross_file_link(CrossFileLinkIndex {
target_path: "../b.md".to_string(),
fragment: "".to_string(),
line: 1,
column: 1,
});
index.update_file(Path::new("docs/sub/a.md"), file_a);
let mut file_c = FileIndex::new();
file_c.add_cross_file_link(CrossFileLinkIndex {
target_path: "b.md".to_string(),
fragment: "".to_string(),
line: 1,
column: 1,
});
index.update_file(Path::new("docs/c.md"), file_c);
let dependents = index.get_dependents(Path::new("docs/b.md"));
assert_eq!(dependents.len(), 2);
assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
assert!(dependents.contains(&PathBuf::from("docs/c.md")));
}
#[test]
fn test_reverse_deps_update_clears_old() {
let mut index = WorkspaceIndex::new();
let mut file_a = FileIndex::new();
file_a.add_cross_file_link(CrossFileLinkIndex {
target_path: "b.md".to_string(),
fragment: "".to_string(),
line: 1,
column: 1,
});
index.update_file(Path::new("docs/a.md"), file_a);
assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
let mut file_a_updated = FileIndex::new();
file_a_updated.add_cross_file_link(CrossFileLinkIndex {
target_path: "c.md".to_string(),
fragment: "".to_string(),
line: 1,
column: 1,
});
index.update_file(Path::new("docs/a.md"), file_a_updated);
assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
let c_deps = index.get_dependents(Path::new("docs/c.md"));
assert_eq!(c_deps.len(), 1);
assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
}
#[test]
fn test_reverse_deps_remove_file() {
let mut index = WorkspaceIndex::new();
let mut file_a = FileIndex::new();
file_a.add_cross_file_link(CrossFileLinkIndex {
target_path: "b.md".to_string(),
fragment: "".to_string(),
line: 1,
column: 1,
});
index.update_file(Path::new("docs/a.md"), file_a);
assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
index.remove_file(Path::new("docs/a.md"));
assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
}
#[test]
fn test_normalize_path() {
let path = Path::new("docs/sub/../other.md");
let normalized = WorkspaceIndex::normalize_path(path);
assert_eq!(normalized, PathBuf::from("docs/other.md"));
let path2 = Path::new("docs/./other.md");
let normalized2 = WorkspaceIndex::normalize_path(path2);
assert_eq!(normalized2, PathBuf::from("docs/other.md"));
let path3 = Path::new("a/b/c/../../d.md");
let normalized3 = WorkspaceIndex::normalize_path(path3);
assert_eq!(normalized3, PathBuf::from("a/d.md"));
}
#[test]
fn test_clear_clears_reverse_deps() {
let mut index = WorkspaceIndex::new();
let mut file_a = FileIndex::new();
file_a.add_cross_file_link(CrossFileLinkIndex {
target_path: "b.md".to_string(),
fragment: "".to_string(),
line: 1,
column: 1,
});
index.update_file(Path::new("docs/a.md"), file_a);
assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
index.clear();
assert_eq!(index.file_count(), 0);
assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
}
#[test]
fn test_is_file_stale() {
let mut index = WorkspaceIndex::new();
assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
let file_index = FileIndex::with_hash("hash123".to_string());
index.insert_file(PathBuf::from("docs/test.md"), file_index);
assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
}
#[cfg(feature = "native")]
#[test]
fn test_cache_roundtrip() {
use std::fs;
let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
let mut index = WorkspaceIndex::new();
let mut file1 = FileIndex::with_hash("abc123".to_string());
file1.add_heading(HeadingIndex {
text: "Test Heading".to_string(),
auto_anchor: "test-heading".to_string(),
custom_anchor: Some("test".to_string()),
line: 1,
is_setext: false,
});
file1.add_cross_file_link(CrossFileLinkIndex {
target_path: "./other.md".to_string(),
fragment: "section".to_string(),
line: 5,
column: 3,
});
index.update_file(Path::new("docs/file1.md"), file1);
let mut file2 = FileIndex::with_hash("def456".to_string());
file2.add_heading(HeadingIndex {
text: "Another Heading".to_string(),
auto_anchor: "another-heading".to_string(),
custom_anchor: None,
line: 1,
is_setext: false,
});
index.update_file(Path::new("docs/other.md"), file2);
index.save_to_cache(&temp_dir).expect("Failed to save cache");
assert!(temp_dir.join("workspace_index.bin").exists());
let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
assert_eq!(loaded.file_count(), 2);
assert!(loaded.contains_file(Path::new("docs/file1.md")));
assert!(loaded.contains_file(Path::new("docs/other.md")));
let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
assert_eq!(file1_loaded.content_hash, "abc123");
assert_eq!(file1_loaded.headings.len(), 1);
assert_eq!(file1_loaded.headings[0].text, "Test Heading");
assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
assert_eq!(file1_loaded.cross_file_links.len(), 1);
assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
let dependents = loaded.get_dependents(Path::new("docs/other.md"));
assert_eq!(dependents.len(), 1);
assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
let _ = fs::remove_dir_all(&temp_dir);
}
#[cfg(feature = "native")]
#[test]
fn test_cache_missing_file() {
let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
let _ = std::fs::remove_dir_all(&temp_dir);
let result = WorkspaceIndex::load_from_cache(&temp_dir);
assert!(result.is_none());
}
#[cfg(feature = "native")]
#[test]
fn test_cache_corrupted_file() {
use std::fs;
let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
let result = WorkspaceIndex::load_from_cache(&temp_dir);
assert!(result.is_none());
assert!(!temp_dir.join("workspace_index.bin").exists());
let _ = fs::remove_dir_all(&temp_dir);
}
#[cfg(feature = "native")]
#[test]
fn test_cache_invalid_magic() {
use std::fs;
let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
let mut data = Vec::new();
data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
let result = WorkspaceIndex::load_from_cache(&temp_dir);
assert!(result.is_none());
assert!(!temp_dir.join("workspace_index.bin").exists());
let _ = fs::remove_dir_all(&temp_dir);
}
#[cfg(feature = "native")]
#[test]
fn test_cache_version_mismatch() {
use std::fs;
let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
let mut data = Vec::new();
data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
let result = WorkspaceIndex::load_from_cache(&temp_dir);
assert!(result.is_none());
assert!(!temp_dir.join("workspace_index.bin").exists());
let _ = fs::remove_dir_all(&temp_dir);
}
#[cfg(feature = "native")]
#[test]
fn test_cache_atomic_write() {
use std::fs;
let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
let index = WorkspaceIndex::new();
index.save_to_cache(&temp_dir).expect("Failed to save");
let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
assert_eq!(entries.len(), 1);
assert!(temp_dir.join("workspace_index.bin").exists());
let _ = fs::remove_dir_all(&temp_dir);
}
#[test]
fn test_has_anchor_auto_generated() {
let mut file_index = FileIndex::new();
file_index.add_heading(HeadingIndex {
text: "Installation Guide".to_string(),
auto_anchor: "installation-guide".to_string(),
custom_anchor: None,
line: 1,
is_setext: false,
});
assert!(file_index.has_anchor("installation-guide"));
assert!(file_index.has_anchor("Installation-Guide"));
assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
assert!(!file_index.has_anchor("nonexistent"));
}
#[test]
fn test_has_anchor_custom() {
let mut file_index = FileIndex::new();
file_index.add_heading(HeadingIndex {
text: "Installation Guide".to_string(),
auto_anchor: "installation-guide".to_string(),
custom_anchor: Some("install".to_string()),
line: 1,
is_setext: false,
});
assert!(file_index.has_anchor("installation-guide"));
assert!(file_index.has_anchor("install"));
assert!(file_index.has_anchor("Install"));
assert!(!file_index.has_anchor("nonexistent"));
}
#[test]
fn test_get_heading_by_anchor() {
let mut file_index = FileIndex::new();
file_index.add_heading(HeadingIndex {
text: "Installation Guide".to_string(),
auto_anchor: "installation-guide".to_string(),
custom_anchor: Some("install".to_string()),
line: 10,
is_setext: false,
});
file_index.add_heading(HeadingIndex {
text: "Configuration".to_string(),
auto_anchor: "configuration".to_string(),
custom_anchor: None,
line: 20,
is_setext: false,
});
let heading = file_index.get_heading_by_anchor("installation-guide");
assert!(heading.is_some());
assert_eq!(heading.unwrap().text, "Installation Guide");
assert_eq!(heading.unwrap().line, 10);
let heading = file_index.get_heading_by_anchor("install");
assert!(heading.is_some());
assert_eq!(heading.unwrap().text, "Installation Guide");
let heading = file_index.get_heading_by_anchor("configuration");
assert!(heading.is_some());
assert_eq!(heading.unwrap().text, "Configuration");
assert_eq!(heading.unwrap().line, 20);
assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
}
#[test]
fn test_anchor_lookup_many_headings() {
let mut file_index = FileIndex::new();
for i in 0..100 {
file_index.add_heading(HeadingIndex {
text: format!("Heading {i}"),
auto_anchor: format!("heading-{i}"),
custom_anchor: Some(format!("h{i}")),
line: i + 1,
is_setext: false,
});
}
for i in 0..100 {
assert!(file_index.has_anchor(&format!("heading-{i}")));
assert!(file_index.has_anchor(&format!("h{i}")));
let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
assert!(heading.is_some());
assert_eq!(heading.unwrap().line, i + 1);
}
}
#[test]
fn test_extract_cross_file_links_basic() {
use crate::config::MarkdownFlavor;
let content = "# Test\n\nSee [link](./other.md) for info.\n";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "./other.md");
assert_eq!(links[0].fragment, "");
assert_eq!(links[0].line, 3);
assert_eq!(links[0].column, 12);
}
#[test]
fn test_extract_cross_file_links_with_fragment() {
use crate::config::MarkdownFlavor;
let content = "Check [guide](./guide.md#install) here.\n";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "./guide.md");
assert_eq!(links[0].fragment, "install");
assert_eq!(links[0].line, 1);
assert_eq!(links[0].column, 15);
}
#[test]
fn test_extract_cross_file_links_multiple_on_same_line() {
use crate::config::MarkdownFlavor;
let content = "See [a](a.md) and [b](b.md) here.\n";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 2);
assert_eq!(links[0].target_path, "a.md");
assert_eq!(links[0].line, 1);
assert_eq!(links[0].column, 9);
assert_eq!(links[1].target_path, "b.md");
assert_eq!(links[1].line, 1);
assert_eq!(links[1].column, 23);
}
#[test]
fn test_extract_cross_file_links_angle_brackets() {
use crate::config::MarkdownFlavor;
let content = "See [link](<path/with (parens).md>) here.\n";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "path/with (parens).md");
assert_eq!(links[0].line, 1);
assert_eq!(links[0].column, 13);
}
#[test]
fn test_extract_cross_file_links_skips_external() {
use crate::config::MarkdownFlavor;
let content = r#"
[external](https://example.com)
[mailto](mailto:test@example.com)
[local](./local.md)
[fragment](#section)
[absolute](/docs/page.md)
"#;
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "./local.md");
}
#[test]
fn test_extract_cross_file_links_skips_non_markdown() {
use crate::config::MarkdownFlavor;
let content = r#"
[image](./photo.png)
[doc](./readme.md)
[pdf](./document.pdf)
"#;
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "./readme.md");
}
#[test]
fn test_extract_cross_file_links_skips_code_spans() {
use crate::config::MarkdownFlavor;
let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "./file.md");
}
#[test]
fn test_extract_cross_file_links_with_query_params() {
use crate::config::MarkdownFlavor;
let content = "See [doc](./file.md?raw=true) here.\n";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "./file.md");
}
#[test]
fn test_extract_cross_file_links_empty_content() {
use crate::config::MarkdownFlavor;
let content = "";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert!(links.is_empty());
}
#[test]
fn test_extract_cross_file_links_no_links() {
use crate::config::MarkdownFlavor;
let content = "# Just a heading\n\nSome text without links.\n";
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert!(links.is_empty());
}
#[test]
fn test_extract_cross_file_links_position_accuracy_issue_234() {
use crate::config::MarkdownFlavor;
let content = r#"# Test Document
Here is a [broken link](nonexistent-file.md) that should trigger MD057.
And another [link](also-missing.md) on this line.
"#;
let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
let links = extract_cross_file_links(&ctx);
assert_eq!(links.len(), 2);
assert_eq!(links[0].target_path, "nonexistent-file.md");
assert_eq!(links[0].line, 3);
assert_eq!(links[0].column, 25);
assert_eq!(links[1].target_path, "also-missing.md");
assert_eq!(links[1].line, 5);
assert_eq!(links[1].column, 20);
}
}