rumdl_lib/
workspace_index.rs

1//! Workspace-wide index for cross-file analysis
2//!
3//! This module provides infrastructure for rules that need to validate
4//! references across multiple files, such as MD051 which validates that
5//! cross-file link fragments point to valid headings.
6//!
7//! The index is built in parallel and designed for minimal memory overhead.
8//!
9//! ## Cache Format
10//!
11//! The workspace index can be persisted to disk for faster startup on
12//! repeated runs. The cache format includes a version header to detect
13//! incompatible format changes:
14//!
15//! ```text
16//! [4 bytes: magic "RWSI" - Rumdl Workspace Index]
17//! [4 bytes: format version (u32 little-endian)]
18//! [N bytes: postcard-serialized WorkspaceIndex]
19//! ```
20
21use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28use crate::utils::element_cache::ElementCache;
29
30// =============================================================================
31// Shared cross-file link extraction utilities
32//
33// These regexes and helpers are the canonical implementation for extracting
34// cross-file links. Both MD057 and LSP use this shared code path for correct
35// position tracking.
36// =============================================================================
37
38/// Regex to match the start of a link
39static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
40
41/// Regex to extract the URL from an angle-bracketed markdown link
42/// Format: `](<URL>)` or `](<URL> "title")`
43static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
45
46/// Regex to extract the URL from a normal markdown link (without angle brackets)
47/// Format: `](URL)` or `](URL "title")`
48static URL_EXTRACT_REGEX: LazyLock<Regex> =
49    LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
50
51/// Regex to detect URLs with explicit schemes
52static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
53    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
54
55/// Supported markdown file extensions
56const MARKDOWN_EXTENSIONS: &[&str] = &[
57    ".md",
58    ".markdown",
59    ".mdx",
60    ".mkd",
61    ".mkdn",
62    ".mdown",
63    ".mdwn",
64    ".qmd",
65    ".rmd",
66];
67
68/// Check if a path has a markdown extension (case-insensitive)
69#[inline]
70fn is_markdown_file(path: &str) -> bool {
71    let path_lower = path.to_lowercase();
72    MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
73}
74
75/// Strip query parameters and fragments from a URL path
76/// Returns the path portion before `?` or `#`
77fn strip_query_and_fragment(url: &str) -> &str {
78    let query_pos = url.find('?');
79    let fragment_pos = url.find('#');
80
81    match (query_pos, fragment_pos) {
82        (Some(q), Some(f)) => &url[..q.min(f)],
83        (Some(q), None) => &url[..q],
84        (None, Some(f)) => &url[..f],
85        (None, None) => url,
86    }
87}
88
89/// Extract cross-file links from content using correct regex-based position tracking.
90///
91/// This is the canonical implementation used by both MD057 and LSP to ensure
92/// consistent and correct column positions for diagnostic reporting.
93///
94/// Returns a vector of `CrossFileLinkIndex` entries, one for each markdown file
95/// link found in the content.
96pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
97    let content = ctx.content;
98
99    // Early returns for performance
100    if content.is_empty() || !content.contains("](") {
101        return Vec::new();
102    }
103
104    let mut links = Vec::new();
105    let lines: Vec<&str> = content.lines().collect();
106    let element_cache = ElementCache::new(content);
107    let line_index = &ctx.line_index;
108
109    // Track which lines we've already processed to avoid duplicates
110    // (ctx.links may have multiple entries for the same line)
111    let mut processed_lines = HashSet::new();
112
113    for link in &ctx.links {
114        let line_idx = link.line - 1;
115        if line_idx >= lines.len() {
116            continue;
117        }
118
119        // Skip if we've already processed this line
120        if !processed_lines.insert(line_idx) {
121            continue;
122        }
123
124        let line = lines[line_idx];
125        if !line.contains("](") {
126            continue;
127        }
128
129        // Find all links in this line
130        for link_match in LINK_START_REGEX.find_iter(line) {
131            let start_pos = link_match.start();
132            let end_pos = link_match.end();
133
134            // Calculate absolute position for code span detection
135            let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
136            let absolute_start_pos = line_start_byte + start_pos;
137
138            // Skip if in code span
139            if element_cache.is_in_code_span(absolute_start_pos) {
140                continue;
141            }
142
143            // Extract the URL (group 1) and fragment (group 2)
144            // Try angle-bracket regex first (handles URLs with parens)
145            let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
146                .captures_at(line, end_pos - 1)
147                .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
148
149            if let Some(caps) = caps_result
150                && let Some(url_group) = caps.get(1)
151            {
152                let file_path = url_group.as_str().trim();
153
154                // Skip empty, external, template variables, absolute URL paths,
155                // framework aliases, or fragment-only URLs
156                if file_path.is_empty()
157                    || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
158                    || file_path.starts_with("www.")
159                    || file_path.starts_with('#')
160                    || file_path.starts_with("{{")
161                    || file_path.starts_with("{%")
162                    || file_path.starts_with('/')
163                    || file_path.starts_with('~')
164                    || file_path.starts_with('@')
165                {
166                    continue;
167                }
168
169                // Strip query parameters before indexing
170                let file_path = strip_query_and_fragment(file_path);
171
172                // Get fragment from capture group 2 (includes # prefix)
173                let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
174
175                // Only index markdown file links for cross-file validation
176                if is_markdown_file(file_path) {
177                    links.push(CrossFileLinkIndex {
178                        target_path: file_path.to_string(),
179                        fragment: fragment.to_string(),
180                        line: link.line,
181                        column: url_group.start() + 1,
182                    });
183                }
184            }
185        }
186    }
187
188    links
189}
190
191/// Magic bytes identifying a workspace index cache file
192#[cfg(feature = "native")]
193const CACHE_MAGIC: &[u8; 4] = b"RWSI";
194
195/// Cache format version - increment when WorkspaceIndex serialization changes
196#[cfg(feature = "native")]
197const CACHE_FORMAT_VERSION: u32 = 5;
198
199/// Cache file name within the version directory
200#[cfg(feature = "native")]
201const CACHE_FILE_NAME: &str = "workspace_index.bin";
202
203/// Workspace-wide index for cross-file analysis
204///
205/// Contains pre-extracted information from all markdown files in the workspace,
206/// enabling rules to validate cross-file references efficiently.
207#[derive(Debug, Default, Clone, Serialize, Deserialize)]
208pub struct WorkspaceIndex {
209    /// Map from file path to its extracted data
210    files: HashMap<PathBuf, FileIndex>,
211    /// Reverse dependency graph: target file → files that link to it
212    /// Used to efficiently re-lint dependent files when a target changes
213    reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
214    /// Version counter for cache invalidation (incremented on any change)
215    version: u64,
216}
217
218/// Index data extracted from a single file
219#[derive(Debug, Clone, Default, Serialize, Deserialize)]
220pub struct FileIndex {
221    /// Headings in this file with their anchors
222    pub headings: Vec<HeadingIndex>,
223    /// Reference links in this file (for cross-file analysis)
224    pub reference_links: Vec<ReferenceLinkIndex>,
225    /// Cross-file links in this file (for MD051 cross-file validation)
226    pub cross_file_links: Vec<CrossFileLinkIndex>,
227    /// Defined reference IDs (e.g., from [ref]: url definitions)
228    /// Used to filter out reference links that have explicit definitions
229    pub defined_references: HashSet<String>,
230    /// Content hash for change detection
231    pub content_hash: String,
232    /// O(1) anchor lookup: lowercased anchor → heading index
233    /// Includes both auto-generated and custom anchors
234    anchor_to_heading: HashMap<String, usize>,
235    /// HTML anchors defined via <a id="..."> or <element id="..."> tags
236    /// Stored lowercase for case-insensitive matching
237    html_anchors: HashSet<String>,
238    /// Attribute anchors defined via { #id } syntax (kramdown/MkDocs attr_list)
239    /// Can appear on any element, not just headings
240    /// Stored lowercase for case-insensitive matching
241    attribute_anchors: HashSet<String>,
242    /// Rules disabled for the entire file (from inline comments)
243    /// Used by cross-file rules to respect inline disable directives
244    pub file_disabled_rules: HashSet<String>,
245    /// Rules disabled at specific lines (line number -> set of rule names)
246    /// Merges both persistent disables and line-specific disables
247    pub line_disabled_rules: HashMap<usize, HashSet<String>>,
248}
249
250/// Information about a heading for cross-file lookup
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct HeadingIndex {
253    /// The heading text (e.g., "Installation Guide")
254    pub text: String,
255    /// Auto-generated anchor (e.g., "installation-guide")
256    pub auto_anchor: String,
257    /// Custom anchor if present (e.g., "install")
258    pub custom_anchor: Option<String>,
259    /// Line number (1-indexed)
260    pub line: usize,
261}
262
263/// Information about a reference link for cross-file analysis
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct ReferenceLinkIndex {
266    /// The reference ID (the part in [text][ref])
267    pub reference_id: String,
268    /// Line number (1-indexed)
269    pub line: usize,
270    /// Column number (1-indexed)
271    pub column: usize,
272}
273
274/// Information about a cross-file link for validation
275#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct CrossFileLinkIndex {
277    /// The target file path (relative, as it appears in the link)
278    pub target_path: String,
279    /// The fragment/anchor being linked to (without #)
280    pub fragment: String,
281    /// Line number (1-indexed)
282    pub line: usize,
283    /// Column number (1-indexed)
284    pub column: usize,
285}
286
287/// Information about a vulnerable anchor (heading without custom ID)
288#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct VulnerableAnchor {
290    /// File path where the heading is located
291    pub file: PathBuf,
292    /// Line number of the heading
293    pub line: usize,
294    /// The heading text
295    pub text: String,
296}
297
298impl WorkspaceIndex {
299    /// Create a new empty workspace index
300    pub fn new() -> Self {
301        Self::default()
302    }
303
304    /// Get the current version (for cache invalidation)
305    pub fn version(&self) -> u64 {
306        self.version
307    }
308
309    /// Get the number of indexed files
310    pub fn file_count(&self) -> usize {
311        self.files.len()
312    }
313
314    /// Check if a file is in the index
315    pub fn contains_file(&self, path: &Path) -> bool {
316        self.files.contains_key(path)
317    }
318
319    /// Get the index data for a specific file
320    pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
321        self.files.get(path)
322    }
323
324    /// Insert or update a file's index data
325    pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
326        self.files.insert(path, index);
327        self.version = self.version.wrapping_add(1);
328    }
329
330    /// Remove a file from the index
331    pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
332        // Clean up reverse deps for this file
333        self.clear_reverse_deps_for(path);
334
335        let result = self.files.remove(path);
336        if result.is_some() {
337            self.version = self.version.wrapping_add(1);
338        }
339        result
340    }
341
342    /// Build a map of all "vulnerable" anchors across the workspace
343    ///
344    /// A vulnerable anchor is an auto-generated anchor for a heading that
345    /// does NOT have a custom anchor defined. These are problematic for
346    /// translated content because the anchor changes when the heading is translated.
347    ///
348    /// Returns: Map from lowercase anchor → Vec of VulnerableAnchor info
349    /// Multiple files can have headings with the same auto-generated anchor,
350    /// so we collect all occurrences.
351    pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
352        let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
353
354        for (file_path, file_index) in &self.files {
355            for heading in &file_index.headings {
356                // Only include headings WITHOUT custom anchors
357                if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
358                    let anchor_key = heading.auto_anchor.to_lowercase();
359                    vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
360                        file: file_path.clone(),
361                        line: heading.line,
362                        text: heading.text.clone(),
363                    });
364                }
365            }
366        }
367
368        vulnerable
369    }
370
371    /// Get all headings across the workspace (for debugging/testing)
372    pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
373        self.files
374            .iter()
375            .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
376    }
377
378    /// Iterate over all files in the index
379    pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
380        self.files.iter().map(|(p, i)| (p.as_path(), i))
381    }
382
383    /// Clear the entire index
384    pub fn clear(&mut self) {
385        self.files.clear();
386        self.reverse_deps.clear();
387        self.version = self.version.wrapping_add(1);
388    }
389
390    /// Update a file's index and maintain reverse dependencies
391    ///
392    /// This method:
393    /// 1. Removes this file as a source (dependent) from all reverse deps
394    /// 2. Inserts the new file index
395    /// 3. Builds new reverse deps from cross_file_links
396    pub fn update_file(&mut self, path: &Path, index: FileIndex) {
397        // Remove this file as a source (dependent) from all target entries
398        // Note: We don't remove it as a target - other files may still link to it
399        self.clear_reverse_deps_as_source(path);
400
401        // Build new reverse deps from cross_file_links
402        for link in &index.cross_file_links {
403            let target = self.resolve_target_path(path, &link.target_path);
404            self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
405        }
406
407        self.files.insert(path.to_path_buf(), index);
408        self.version = self.version.wrapping_add(1);
409    }
410
411    /// Get files that depend on (link to) the given file
412    ///
413    /// Returns a list of file paths that contain links targeting this file.
414    /// Used to re-lint dependent files when a target file changes.
415    pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
416        self.reverse_deps
417            .get(path)
418            .map(|set| set.iter().cloned().collect())
419            .unwrap_or_default()
420    }
421
422    /// Check if a file needs re-indexing based on its content hash
423    ///
424    /// Returns `true` if the file is not in the index or has a different hash.
425    pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
426        self.files
427            .get(path)
428            .map(|f| f.content_hash != current_hash)
429            .unwrap_or(true)
430    }
431
432    /// Retain only files that exist in the given set, removing deleted files
433    ///
434    /// This prunes stale entries from the cache for files that no longer exist.
435    /// Returns the number of files removed.
436    pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
437        let before_count = self.files.len();
438
439        // Collect files to remove
440        let to_remove: Vec<PathBuf> = self
441            .files
442            .keys()
443            .filter(|path| !current_files.contains(*path))
444            .cloned()
445            .collect();
446
447        // Remove each file properly (clears reverse deps)
448        for path in &to_remove {
449            self.remove_file(path);
450        }
451
452        before_count - self.files.len()
453    }
454
455    /// Save the workspace index to a cache file
456    ///
457    /// Uses postcard for efficient binary serialization with:
458    /// - Magic header for file type validation
459    /// - Format version for compatibility detection
460    /// - Atomic writes (temp file + rename) to prevent corruption
461    #[cfg(feature = "native")]
462    pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
463        use std::fs;
464        use std::io::Write;
465
466        // Ensure cache directory exists
467        fs::create_dir_all(cache_dir)?;
468
469        // Serialize the index data using postcard
470        let encoded = postcard::to_allocvec(self)
471            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
472
473        // Build versioned cache file: [magic][version][data]
474        let mut cache_data = Vec::with_capacity(8 + encoded.len());
475        cache_data.extend_from_slice(CACHE_MAGIC);
476        cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
477        cache_data.extend_from_slice(&encoded);
478
479        // Write atomically: write to temp file then rename
480        let final_path = cache_dir.join(CACHE_FILE_NAME);
481        let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
482
483        // Write to temp file
484        {
485            let mut file = fs::File::create(&temp_path)?;
486            file.write_all(&cache_data)?;
487            file.sync_all()?;
488        }
489
490        // Atomic rename
491        fs::rename(&temp_path, &final_path)?;
492
493        log::debug!(
494            "Saved workspace index to cache: {} files, {} bytes (format v{})",
495            self.files.len(),
496            cache_data.len(),
497            CACHE_FORMAT_VERSION
498        );
499
500        Ok(())
501    }
502
503    /// Load the workspace index from a cache file
504    ///
505    /// Returns `None` if:
506    /// - Cache file doesn't exist
507    /// - Magic header doesn't match
508    /// - Format version is incompatible
509    /// - Data is corrupted
510    #[cfg(feature = "native")]
511    pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
512        use std::fs;
513
514        let path = cache_dir.join(CACHE_FILE_NAME);
515        let data = fs::read(&path).ok()?;
516
517        // Validate header: need at least 8 bytes for magic + version
518        if data.len() < 8 {
519            log::warn!("Workspace index cache too small, discarding");
520            let _ = fs::remove_file(&path);
521            return None;
522        }
523
524        // Check magic header
525        if &data[0..4] != CACHE_MAGIC {
526            log::warn!("Workspace index cache has invalid magic header, discarding");
527            let _ = fs::remove_file(&path);
528            return None;
529        }
530
531        // Check format version
532        let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
533        if version != CACHE_FORMAT_VERSION {
534            log::info!(
535                "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
536            );
537            let _ = fs::remove_file(&path);
538            return None;
539        }
540
541        // Deserialize the index data using postcard
542        match postcard::from_bytes::<Self>(&data[8..]) {
543            Ok(index) => {
544                log::debug!(
545                    "Loaded workspace index from cache: {} files (format v{})",
546                    index.files.len(),
547                    version
548                );
549                Some(index)
550            }
551            Err(e) => {
552                log::warn!("Failed to deserialize workspace index cache: {e}");
553                let _ = fs::remove_file(&path);
554                None
555            }
556        }
557    }
558
559    /// Remove a file as a source from all reverse dependency entries
560    ///
561    /// This removes the file from being listed as a dependent in all target entries.
562    /// Used when updating a file (we need to remove old outgoing links before adding new ones).
563    fn clear_reverse_deps_as_source(&mut self, path: &Path) {
564        for deps in self.reverse_deps.values_mut() {
565            deps.remove(path);
566        }
567        // Clean up empty entries
568        self.reverse_deps.retain(|_, deps| !deps.is_empty());
569    }
570
571    /// Remove a file completely from reverse dependency tracking
572    ///
573    /// Removes the file as both a source (dependent) and as a target.
574    /// Used when deleting a file from the index.
575    fn clear_reverse_deps_for(&mut self, path: &Path) {
576        // Remove as source (dependent)
577        self.clear_reverse_deps_as_source(path);
578
579        // Also remove as target
580        self.reverse_deps.remove(path);
581    }
582
583    /// Resolve a relative path from a source file to an absolute target path
584    fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
585        // Get the directory containing the source file
586        let source_dir = source_file.parent().unwrap_or(Path::new(""));
587
588        // Join with the relative target and normalize
589        let target = source_dir.join(relative_target);
590
591        // Normalize the path (handle .., ., etc.)
592        Self::normalize_path(&target)
593    }
594
595    /// Normalize a path by resolving . and .. components
596    fn normalize_path(path: &Path) -> PathBuf {
597        let mut components = Vec::new();
598
599        for component in path.components() {
600            match component {
601                std::path::Component::ParentDir => {
602                    // Go up one level if possible
603                    if !components.is_empty() {
604                        components.pop();
605                    }
606                }
607                std::path::Component::CurDir => {
608                    // Skip current directory markers
609                }
610                _ => {
611                    components.push(component);
612                }
613            }
614        }
615
616        components.iter().collect()
617    }
618}
619
620impl FileIndex {
621    /// Create a new empty file index
622    pub fn new() -> Self {
623        Self::default()
624    }
625
626    /// Create a file index with the given content hash
627    pub fn with_hash(content_hash: String) -> Self {
628        Self {
629            content_hash,
630            ..Default::default()
631        }
632    }
633
634    /// Add a heading to the index
635    ///
636    /// Also updates the anchor lookup map for O(1) anchor queries
637    pub fn add_heading(&mut self, heading: HeadingIndex) {
638        let index = self.headings.len();
639
640        // Add auto-generated anchor to lookup map (lowercased for case-insensitive matching)
641        self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
642
643        // Add custom anchor if present
644        if let Some(ref custom) = heading.custom_anchor {
645            self.anchor_to_heading.insert(custom.to_lowercase(), index);
646        }
647
648        self.headings.push(heading);
649    }
650
651    /// Check if an anchor exists in this file (O(1) lookup)
652    ///
653    /// Returns true if the anchor matches any of:
654    /// - Auto-generated heading anchors
655    /// - Custom heading anchors (from {#id} syntax on headings)
656    /// - HTML anchors (from <a id="..."> or <element id="...">)
657    /// - Attribute anchors (from { #id } syntax on non-heading elements)
658    ///
659    /// Matching is case-insensitive.
660    pub fn has_anchor(&self, anchor: &str) -> bool {
661        let lower = anchor.to_lowercase();
662        self.anchor_to_heading.contains_key(&lower)
663            || self.html_anchors.contains(&lower)
664            || self.attribute_anchors.contains(&lower)
665    }
666
667    /// Add an HTML anchor (from <a id="..."> or <element id="..."> tags)
668    pub fn add_html_anchor(&mut self, anchor: String) {
669        if !anchor.is_empty() {
670            self.html_anchors.insert(anchor.to_lowercase());
671        }
672    }
673
674    /// Add an attribute anchor (from { #id } syntax on non-heading elements)
675    pub fn add_attribute_anchor(&mut self, anchor: String) {
676        if !anchor.is_empty() {
677            self.attribute_anchors.insert(anchor.to_lowercase());
678        }
679    }
680
681    /// Get the heading index for an anchor (O(1) lookup)
682    ///
683    /// Returns the index into `self.headings` if found.
684    pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
685        self.anchor_to_heading
686            .get(&anchor.to_lowercase())
687            .and_then(|&idx| self.headings.get(idx))
688    }
689
690    /// Add a reference link to the index
691    pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
692        self.reference_links.push(link);
693    }
694
695    /// Check if a rule is disabled at a specific line
696    ///
697    /// Used by cross-file rules to respect inline disable directives.
698    /// Checks both file-wide disables and line-specific disables.
699    pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
700        // Check file-wide disables (highest priority)
701        if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
702            return true;
703        }
704
705        // Check line-specific disables
706        if let Some(rules) = self.line_disabled_rules.get(&line) {
707            return rules.contains("*") || rules.contains(rule_name);
708        }
709
710        false
711    }
712
713    /// Add a cross-file link to the index (deduplicates by target_path, fragment, line)
714    pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
715        // Deduplicate: multiple rules may contribute the same link with different columns
716        // (e.g., MD051 uses link start, MD057 uses URL start)
717        let is_duplicate = self.cross_file_links.iter().any(|existing| {
718            existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
719        });
720        if !is_duplicate {
721            self.cross_file_links.push(link);
722        }
723    }
724
725    /// Add a defined reference ID (e.g., from [ref]: url)
726    pub fn add_defined_reference(&mut self, ref_id: String) {
727        self.defined_references.insert(ref_id);
728    }
729
730    /// Check if a reference ID has an explicit definition
731    pub fn has_defined_reference(&self, ref_id: &str) -> bool {
732        self.defined_references.contains(ref_id)
733    }
734
735    /// Check if the content hash matches
736    pub fn hash_matches(&self, hash: &str) -> bool {
737        self.content_hash == hash
738    }
739
740    /// Get the number of headings
741    pub fn heading_count(&self) -> usize {
742        self.headings.len()
743    }
744
745    /// Get the number of reference links
746    pub fn reference_link_count(&self) -> usize {
747        self.reference_links.len()
748    }
749}
750
751#[cfg(test)]
752mod tests {
753    use super::*;
754
755    #[test]
756    fn test_workspace_index_basic() {
757        let mut index = WorkspaceIndex::new();
758        assert_eq!(index.file_count(), 0);
759        assert_eq!(index.version(), 0);
760
761        let mut file_index = FileIndex::with_hash("abc123".to_string());
762        file_index.add_heading(HeadingIndex {
763            text: "Installation".to_string(),
764            auto_anchor: "installation".to_string(),
765            custom_anchor: None,
766            line: 1,
767        });
768
769        index.insert_file(PathBuf::from("docs/install.md"), file_index);
770        assert_eq!(index.file_count(), 1);
771        assert_eq!(index.version(), 1);
772
773        assert!(index.contains_file(Path::new("docs/install.md")));
774        assert!(!index.contains_file(Path::new("docs/other.md")));
775    }
776
777    #[test]
778    fn test_vulnerable_anchors() {
779        let mut index = WorkspaceIndex::new();
780
781        // File 1: heading without custom anchor (vulnerable)
782        let mut file1 = FileIndex::new();
783        file1.add_heading(HeadingIndex {
784            text: "Getting Started".to_string(),
785            auto_anchor: "getting-started".to_string(),
786            custom_anchor: None,
787            line: 1,
788        });
789        index.insert_file(PathBuf::from("docs/guide.md"), file1);
790
791        // File 2: heading with custom anchor (not vulnerable)
792        let mut file2 = FileIndex::new();
793        file2.add_heading(HeadingIndex {
794            text: "Installation".to_string(),
795            auto_anchor: "installation".to_string(),
796            custom_anchor: Some("install".to_string()),
797            line: 1,
798        });
799        index.insert_file(PathBuf::from("docs/install.md"), file2);
800
801        let vulnerable = index.get_vulnerable_anchors();
802        assert_eq!(vulnerable.len(), 1);
803        assert!(vulnerable.contains_key("getting-started"));
804        assert!(!vulnerable.contains_key("installation"));
805
806        let anchors = vulnerable.get("getting-started").unwrap();
807        assert_eq!(anchors.len(), 1);
808        assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
809        assert_eq!(anchors[0].text, "Getting Started");
810    }
811
812    #[test]
813    fn test_vulnerable_anchors_multiple_files_same_anchor() {
814        // Multiple files can have headings with the same auto-generated anchor
815        // get_vulnerable_anchors() should collect all of them
816        let mut index = WorkspaceIndex::new();
817
818        // File 1: has "Installation" heading (vulnerable)
819        let mut file1 = FileIndex::new();
820        file1.add_heading(HeadingIndex {
821            text: "Installation".to_string(),
822            auto_anchor: "installation".to_string(),
823            custom_anchor: None,
824            line: 1,
825        });
826        index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
827
828        // File 2: also has "Installation" heading with same anchor (vulnerable)
829        let mut file2 = FileIndex::new();
830        file2.add_heading(HeadingIndex {
831            text: "Installation".to_string(),
832            auto_anchor: "installation".to_string(),
833            custom_anchor: None,
834            line: 5,
835        });
836        index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
837
838        // File 3: has "Installation" but WITH custom anchor (not vulnerable)
839        let mut file3 = FileIndex::new();
840        file3.add_heading(HeadingIndex {
841            text: "Installation".to_string(),
842            auto_anchor: "installation".to_string(),
843            custom_anchor: Some("install".to_string()),
844            line: 10,
845        });
846        index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
847
848        let vulnerable = index.get_vulnerable_anchors();
849        assert_eq!(vulnerable.len(), 1); // One unique anchor
850        assert!(vulnerable.contains_key("installation"));
851
852        let anchors = vulnerable.get("installation").unwrap();
853        // Should have 2 entries (en and fr), NOT 3 (de has custom anchor)
854        assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
855
856        // Verify both files are represented
857        let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
858        assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
859        assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
860    }
861
862    #[test]
863    fn test_file_index_hash() {
864        let index = FileIndex::with_hash("hash123".to_string());
865        assert!(index.hash_matches("hash123"));
866        assert!(!index.hash_matches("other"));
867    }
868
869    #[test]
870    fn test_version_increment() {
871        let mut index = WorkspaceIndex::new();
872        assert_eq!(index.version(), 0);
873
874        index.insert_file(PathBuf::from("a.md"), FileIndex::new());
875        assert_eq!(index.version(), 1);
876
877        index.insert_file(PathBuf::from("b.md"), FileIndex::new());
878        assert_eq!(index.version(), 2);
879
880        index.remove_file(Path::new("a.md"));
881        assert_eq!(index.version(), 3);
882
883        // Removing non-existent file doesn't increment
884        index.remove_file(Path::new("nonexistent.md"));
885        assert_eq!(index.version(), 3);
886    }
887
888    #[test]
889    fn test_reverse_deps_basic() {
890        let mut index = WorkspaceIndex::new();
891
892        // File A links to file B
893        let mut file_a = FileIndex::new();
894        file_a.add_cross_file_link(CrossFileLinkIndex {
895            target_path: "b.md".to_string(),
896            fragment: "section".to_string(),
897            line: 10,
898            column: 5,
899        });
900        index.update_file(Path::new("docs/a.md"), file_a);
901
902        // Check that B has A as a dependent
903        let dependents = index.get_dependents(Path::new("docs/b.md"));
904        assert_eq!(dependents.len(), 1);
905        assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
906
907        // A has no dependents
908        let a_dependents = index.get_dependents(Path::new("docs/a.md"));
909        assert!(a_dependents.is_empty());
910    }
911
912    #[test]
913    fn test_reverse_deps_multiple() {
914        let mut index = WorkspaceIndex::new();
915
916        // Files A and C both link to B
917        let mut file_a = FileIndex::new();
918        file_a.add_cross_file_link(CrossFileLinkIndex {
919            target_path: "../b.md".to_string(),
920            fragment: "".to_string(),
921            line: 1,
922            column: 1,
923        });
924        index.update_file(Path::new("docs/sub/a.md"), file_a);
925
926        let mut file_c = FileIndex::new();
927        file_c.add_cross_file_link(CrossFileLinkIndex {
928            target_path: "b.md".to_string(),
929            fragment: "".to_string(),
930            line: 1,
931            column: 1,
932        });
933        index.update_file(Path::new("docs/c.md"), file_c);
934
935        // B should have both A and C as dependents
936        let dependents = index.get_dependents(Path::new("docs/b.md"));
937        assert_eq!(dependents.len(), 2);
938        assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
939        assert!(dependents.contains(&PathBuf::from("docs/c.md")));
940    }
941
942    #[test]
943    fn test_reverse_deps_update_clears_old() {
944        let mut index = WorkspaceIndex::new();
945
946        // File A initially links to B
947        let mut file_a = FileIndex::new();
948        file_a.add_cross_file_link(CrossFileLinkIndex {
949            target_path: "b.md".to_string(),
950            fragment: "".to_string(),
951            line: 1,
952            column: 1,
953        });
954        index.update_file(Path::new("docs/a.md"), file_a);
955
956        // Verify B has A as dependent
957        assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
958
959        // Update A to link to C instead of B
960        let mut file_a_updated = FileIndex::new();
961        file_a_updated.add_cross_file_link(CrossFileLinkIndex {
962            target_path: "c.md".to_string(),
963            fragment: "".to_string(),
964            line: 1,
965            column: 1,
966        });
967        index.update_file(Path::new("docs/a.md"), file_a_updated);
968
969        // B should no longer have A as dependent
970        assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
971
972        // C should now have A as dependent
973        let c_deps = index.get_dependents(Path::new("docs/c.md"));
974        assert_eq!(c_deps.len(), 1);
975        assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
976    }
977
978    #[test]
979    fn test_reverse_deps_remove_file() {
980        let mut index = WorkspaceIndex::new();
981
982        // File A links to B
983        let mut file_a = FileIndex::new();
984        file_a.add_cross_file_link(CrossFileLinkIndex {
985            target_path: "b.md".to_string(),
986            fragment: "".to_string(),
987            line: 1,
988            column: 1,
989        });
990        index.update_file(Path::new("docs/a.md"), file_a);
991
992        // Verify B has A as dependent
993        assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
994
995        // Remove file A
996        index.remove_file(Path::new("docs/a.md"));
997
998        // B should no longer have any dependents
999        assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1000    }
1001
1002    #[test]
1003    fn test_normalize_path() {
1004        // Test .. handling
1005        let path = Path::new("docs/sub/../other.md");
1006        let normalized = WorkspaceIndex::normalize_path(path);
1007        assert_eq!(normalized, PathBuf::from("docs/other.md"));
1008
1009        // Test . handling
1010        let path2 = Path::new("docs/./other.md");
1011        let normalized2 = WorkspaceIndex::normalize_path(path2);
1012        assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1013
1014        // Test multiple ..
1015        let path3 = Path::new("a/b/c/../../d.md");
1016        let normalized3 = WorkspaceIndex::normalize_path(path3);
1017        assert_eq!(normalized3, PathBuf::from("a/d.md"));
1018    }
1019
1020    #[test]
1021    fn test_clear_clears_reverse_deps() {
1022        let mut index = WorkspaceIndex::new();
1023
1024        // File A links to B
1025        let mut file_a = FileIndex::new();
1026        file_a.add_cross_file_link(CrossFileLinkIndex {
1027            target_path: "b.md".to_string(),
1028            fragment: "".to_string(),
1029            line: 1,
1030            column: 1,
1031        });
1032        index.update_file(Path::new("docs/a.md"), file_a);
1033
1034        // Verify B has A as dependent
1035        assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1036
1037        // Clear the index
1038        index.clear();
1039
1040        // Both files and reverse deps should be cleared
1041        assert_eq!(index.file_count(), 0);
1042        assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1043    }
1044
1045    #[test]
1046    fn test_is_file_stale() {
1047        let mut index = WorkspaceIndex::new();
1048
1049        // Non-existent file is always stale
1050        assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1051
1052        // Add a file with known hash
1053        let file_index = FileIndex::with_hash("hash123".to_string());
1054        index.insert_file(PathBuf::from("docs/test.md"), file_index);
1055
1056        // Same hash means not stale
1057        assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1058
1059        // Different hash means stale
1060        assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1061    }
1062
1063    #[cfg(feature = "native")]
1064    #[test]
1065    fn test_cache_roundtrip() {
1066        use std::fs;
1067
1068        // Create a temp directory
1069        let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1070        let _ = fs::remove_dir_all(&temp_dir);
1071        fs::create_dir_all(&temp_dir).unwrap();
1072
1073        // Create an index with some data
1074        let mut index = WorkspaceIndex::new();
1075
1076        let mut file1 = FileIndex::with_hash("abc123".to_string());
1077        file1.add_heading(HeadingIndex {
1078            text: "Test Heading".to_string(),
1079            auto_anchor: "test-heading".to_string(),
1080            custom_anchor: Some("test".to_string()),
1081            line: 1,
1082        });
1083        file1.add_cross_file_link(CrossFileLinkIndex {
1084            target_path: "./other.md".to_string(),
1085            fragment: "section".to_string(),
1086            line: 5,
1087            column: 3,
1088        });
1089        index.update_file(Path::new("docs/file1.md"), file1);
1090
1091        let mut file2 = FileIndex::with_hash("def456".to_string());
1092        file2.add_heading(HeadingIndex {
1093            text: "Another Heading".to_string(),
1094            auto_anchor: "another-heading".to_string(),
1095            custom_anchor: None,
1096            line: 1,
1097        });
1098        index.update_file(Path::new("docs/other.md"), file2);
1099
1100        // Save to cache
1101        index.save_to_cache(&temp_dir).expect("Failed to save cache");
1102
1103        // Verify cache file exists
1104        assert!(temp_dir.join("workspace_index.bin").exists());
1105
1106        // Load from cache
1107        let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1108
1109        // Verify data matches
1110        assert_eq!(loaded.file_count(), 2);
1111        assert!(loaded.contains_file(Path::new("docs/file1.md")));
1112        assert!(loaded.contains_file(Path::new("docs/other.md")));
1113
1114        // Check file1 details
1115        let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1116        assert_eq!(file1_loaded.content_hash, "abc123");
1117        assert_eq!(file1_loaded.headings.len(), 1);
1118        assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1119        assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1120        assert_eq!(file1_loaded.cross_file_links.len(), 1);
1121        assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1122
1123        // Check reverse deps were serialized correctly
1124        let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1125        assert_eq!(dependents.len(), 1);
1126        assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1127
1128        // Clean up
1129        let _ = fs::remove_dir_all(&temp_dir);
1130    }
1131
1132    #[cfg(feature = "native")]
1133    #[test]
1134    fn test_cache_missing_file() {
1135        let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1136        let _ = std::fs::remove_dir_all(&temp_dir);
1137
1138        // Should return None for non-existent cache
1139        let result = WorkspaceIndex::load_from_cache(&temp_dir);
1140        assert!(result.is_none());
1141    }
1142
1143    #[cfg(feature = "native")]
1144    #[test]
1145    fn test_cache_corrupted_file() {
1146        use std::fs;
1147
1148        let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1149        let _ = fs::remove_dir_all(&temp_dir);
1150        fs::create_dir_all(&temp_dir).unwrap();
1151
1152        // Write corrupted data (too small for header)
1153        fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1154
1155        // Should return None for corrupted cache (and remove the file)
1156        let result = WorkspaceIndex::load_from_cache(&temp_dir);
1157        assert!(result.is_none());
1158
1159        // Corrupted file should be removed
1160        assert!(!temp_dir.join("workspace_index.bin").exists());
1161
1162        // Clean up
1163        let _ = fs::remove_dir_all(&temp_dir);
1164    }
1165
1166    #[cfg(feature = "native")]
1167    #[test]
1168    fn test_cache_invalid_magic() {
1169        use std::fs;
1170
1171        let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1172        let _ = fs::remove_dir_all(&temp_dir);
1173        fs::create_dir_all(&temp_dir).unwrap();
1174
1175        // Write data with wrong magic header
1176        let mut data = Vec::new();
1177        data.extend_from_slice(b"XXXX"); // Wrong magic
1178        data.extend_from_slice(&1u32.to_le_bytes()); // Version 1
1179        data.extend_from_slice(&[0; 100]); // Some garbage data
1180        fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1181
1182        // Should return None for invalid magic
1183        let result = WorkspaceIndex::load_from_cache(&temp_dir);
1184        assert!(result.is_none());
1185
1186        // File should be removed
1187        assert!(!temp_dir.join("workspace_index.bin").exists());
1188
1189        // Clean up
1190        let _ = fs::remove_dir_all(&temp_dir);
1191    }
1192
1193    #[cfg(feature = "native")]
1194    #[test]
1195    fn test_cache_version_mismatch() {
1196        use std::fs;
1197
1198        let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1199        let _ = fs::remove_dir_all(&temp_dir);
1200        fs::create_dir_all(&temp_dir).unwrap();
1201
1202        // Write data with correct magic but wrong version
1203        let mut data = Vec::new();
1204        data.extend_from_slice(b"RWSI"); // Correct magic
1205        data.extend_from_slice(&999u32.to_le_bytes()); // Future version
1206        data.extend_from_slice(&[0; 100]); // Some garbage data
1207        fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1208
1209        // Should return None for version mismatch
1210        let result = WorkspaceIndex::load_from_cache(&temp_dir);
1211        assert!(result.is_none());
1212
1213        // File should be removed to trigger rebuild
1214        assert!(!temp_dir.join("workspace_index.bin").exists());
1215
1216        // Clean up
1217        let _ = fs::remove_dir_all(&temp_dir);
1218    }
1219
1220    #[cfg(feature = "native")]
1221    #[test]
1222    fn test_cache_atomic_write() {
1223        use std::fs;
1224
1225        // Test that atomic writes work (no temp files left behind)
1226        let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1227        let _ = fs::remove_dir_all(&temp_dir);
1228        fs::create_dir_all(&temp_dir).unwrap();
1229
1230        let index = WorkspaceIndex::new();
1231        index.save_to_cache(&temp_dir).expect("Failed to save");
1232
1233        // Only the final cache file should exist, no temp files
1234        let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1235        assert_eq!(entries.len(), 1);
1236        assert!(temp_dir.join("workspace_index.bin").exists());
1237
1238        // Clean up
1239        let _ = fs::remove_dir_all(&temp_dir);
1240    }
1241
1242    #[test]
1243    fn test_has_anchor_auto_generated() {
1244        let mut file_index = FileIndex::new();
1245        file_index.add_heading(HeadingIndex {
1246            text: "Installation Guide".to_string(),
1247            auto_anchor: "installation-guide".to_string(),
1248            custom_anchor: None,
1249            line: 1,
1250        });
1251
1252        // Should find by auto-generated anchor
1253        assert!(file_index.has_anchor("installation-guide"));
1254
1255        // Case-insensitive matching
1256        assert!(file_index.has_anchor("Installation-Guide"));
1257        assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1258
1259        // Should not find non-existent anchor
1260        assert!(!file_index.has_anchor("nonexistent"));
1261    }
1262
1263    #[test]
1264    fn test_has_anchor_custom() {
1265        let mut file_index = FileIndex::new();
1266        file_index.add_heading(HeadingIndex {
1267            text: "Installation Guide".to_string(),
1268            auto_anchor: "installation-guide".to_string(),
1269            custom_anchor: Some("install".to_string()),
1270            line: 1,
1271        });
1272
1273        // Should find by auto-generated anchor
1274        assert!(file_index.has_anchor("installation-guide"));
1275
1276        // Should also find by custom anchor
1277        assert!(file_index.has_anchor("install"));
1278        assert!(file_index.has_anchor("Install")); // case-insensitive
1279
1280        // Should not find non-existent anchor
1281        assert!(!file_index.has_anchor("nonexistent"));
1282    }
1283
1284    #[test]
1285    fn test_get_heading_by_anchor() {
1286        let mut file_index = FileIndex::new();
1287        file_index.add_heading(HeadingIndex {
1288            text: "Installation Guide".to_string(),
1289            auto_anchor: "installation-guide".to_string(),
1290            custom_anchor: Some("install".to_string()),
1291            line: 10,
1292        });
1293        file_index.add_heading(HeadingIndex {
1294            text: "Configuration".to_string(),
1295            auto_anchor: "configuration".to_string(),
1296            custom_anchor: None,
1297            line: 20,
1298        });
1299
1300        // Get by auto anchor
1301        let heading = file_index.get_heading_by_anchor("installation-guide");
1302        assert!(heading.is_some());
1303        assert_eq!(heading.unwrap().text, "Installation Guide");
1304        assert_eq!(heading.unwrap().line, 10);
1305
1306        // Get by custom anchor
1307        let heading = file_index.get_heading_by_anchor("install");
1308        assert!(heading.is_some());
1309        assert_eq!(heading.unwrap().text, "Installation Guide");
1310
1311        // Get second heading
1312        let heading = file_index.get_heading_by_anchor("configuration");
1313        assert!(heading.is_some());
1314        assert_eq!(heading.unwrap().text, "Configuration");
1315        assert_eq!(heading.unwrap().line, 20);
1316
1317        // Non-existent
1318        assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1319    }
1320
1321    #[test]
1322    fn test_anchor_lookup_many_headings() {
1323        // Test that O(1) lookup works with many headings
1324        let mut file_index = FileIndex::new();
1325
1326        // Add 100 headings
1327        for i in 0..100 {
1328            file_index.add_heading(HeadingIndex {
1329                text: format!("Heading {i}"),
1330                auto_anchor: format!("heading-{i}"),
1331                custom_anchor: Some(format!("h{i}")),
1332                line: i + 1,
1333            });
1334        }
1335
1336        // Verify all can be found
1337        for i in 0..100 {
1338            assert!(file_index.has_anchor(&format!("heading-{i}")));
1339            assert!(file_index.has_anchor(&format!("h{i}")));
1340
1341            let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1342            assert!(heading.is_some());
1343            assert_eq!(heading.unwrap().line, i + 1);
1344        }
1345    }
1346
1347    // =============================================================================
1348    // Tests for extract_cross_file_links utility
1349    // =============================================================================
1350
1351    #[test]
1352    fn test_extract_cross_file_links_basic() {
1353        use crate::config::MarkdownFlavor;
1354
1355        let content = "# Test\n\nSee [link](./other.md) for info.\n";
1356        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1357        let links = extract_cross_file_links(&ctx);
1358
1359        assert_eq!(links.len(), 1);
1360        assert_eq!(links[0].target_path, "./other.md");
1361        assert_eq!(links[0].fragment, "");
1362        assert_eq!(links[0].line, 3);
1363        // "See [link](" = 11 chars, so column 12 is where "./other.md" starts
1364        assert_eq!(links[0].column, 12);
1365    }
1366
1367    #[test]
1368    fn test_extract_cross_file_links_with_fragment() {
1369        use crate::config::MarkdownFlavor;
1370
1371        let content = "Check [guide](./guide.md#install) here.\n";
1372        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1373        let links = extract_cross_file_links(&ctx);
1374
1375        assert_eq!(links.len(), 1);
1376        assert_eq!(links[0].target_path, "./guide.md");
1377        assert_eq!(links[0].fragment, "install");
1378        assert_eq!(links[0].line, 1);
1379        // "Check [guide](" = 14 chars, so column 15 is where "./guide.md" starts
1380        assert_eq!(links[0].column, 15);
1381    }
1382
1383    #[test]
1384    fn test_extract_cross_file_links_multiple_on_same_line() {
1385        use crate::config::MarkdownFlavor;
1386
1387        let content = "See [a](a.md) and [b](b.md) here.\n";
1388        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1389        let links = extract_cross_file_links(&ctx);
1390
1391        assert_eq!(links.len(), 2);
1392
1393        assert_eq!(links[0].target_path, "a.md");
1394        assert_eq!(links[0].line, 1);
1395        // "See [a](" = 8 chars, so column 9
1396        assert_eq!(links[0].column, 9);
1397
1398        assert_eq!(links[1].target_path, "b.md");
1399        assert_eq!(links[1].line, 1);
1400        // "See [a](a.md) and [b](" = 22 chars, so column 23
1401        assert_eq!(links[1].column, 23);
1402    }
1403
1404    #[test]
1405    fn test_extract_cross_file_links_angle_brackets() {
1406        use crate::config::MarkdownFlavor;
1407
1408        let content = "See [link](<path/with (parens).md>) here.\n";
1409        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1410        let links = extract_cross_file_links(&ctx);
1411
1412        assert_eq!(links.len(), 1);
1413        assert_eq!(links[0].target_path, "path/with (parens).md");
1414        assert_eq!(links[0].line, 1);
1415        // "See [link](<" = 12 chars, so column 13
1416        assert_eq!(links[0].column, 13);
1417    }
1418
1419    #[test]
1420    fn test_extract_cross_file_links_skips_external() {
1421        use crate::config::MarkdownFlavor;
1422
1423        let content = r#"
1424[external](https://example.com)
1425[mailto](mailto:test@example.com)
1426[local](./local.md)
1427[fragment](#section)
1428[absolute](/docs/page.md)
1429"#;
1430        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1431        let links = extract_cross_file_links(&ctx);
1432
1433        // Only the local markdown link should be extracted
1434        assert_eq!(links.len(), 1);
1435        assert_eq!(links[0].target_path, "./local.md");
1436    }
1437
1438    #[test]
1439    fn test_extract_cross_file_links_skips_non_markdown() {
1440        use crate::config::MarkdownFlavor;
1441
1442        let content = r#"
1443[image](./photo.png)
1444[doc](./readme.md)
1445[pdf](./document.pdf)
1446"#;
1447        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1448        let links = extract_cross_file_links(&ctx);
1449
1450        // Only markdown files are indexed for cross-file validation
1451        assert_eq!(links.len(), 1);
1452        assert_eq!(links[0].target_path, "./readme.md");
1453    }
1454
1455    #[test]
1456    fn test_extract_cross_file_links_skips_code_spans() {
1457        use crate::config::MarkdownFlavor;
1458
1459        let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1460        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1461        let links = extract_cross_file_links(&ctx);
1462
1463        // Only the link outside code span should be extracted
1464        assert_eq!(links.len(), 1);
1465        assert_eq!(links[0].target_path, "./file.md");
1466    }
1467
1468    #[test]
1469    fn test_extract_cross_file_links_with_query_params() {
1470        use crate::config::MarkdownFlavor;
1471
1472        let content = "See [doc](./file.md?raw=true) here.\n";
1473        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1474        let links = extract_cross_file_links(&ctx);
1475
1476        assert_eq!(links.len(), 1);
1477        // Query params should be stripped
1478        assert_eq!(links[0].target_path, "./file.md");
1479    }
1480
1481    #[test]
1482    fn test_extract_cross_file_links_empty_content() {
1483        use crate::config::MarkdownFlavor;
1484
1485        let content = "";
1486        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1487        let links = extract_cross_file_links(&ctx);
1488
1489        assert!(links.is_empty());
1490    }
1491
1492    #[test]
1493    fn test_extract_cross_file_links_no_links() {
1494        use crate::config::MarkdownFlavor;
1495
1496        let content = "# Just a heading\n\nSome text without links.\n";
1497        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1498        let links = extract_cross_file_links(&ctx);
1499
1500        assert!(links.is_empty());
1501    }
1502
1503    #[test]
1504    fn test_extract_cross_file_links_position_accuracy_issue_234() {
1505        // This test verifies the fix for GitHub issue #234
1506        // The LSP was reporting incorrect column positions for MD057 diagnostics
1507        use crate::config::MarkdownFlavor;
1508
1509        let content = r#"# Test Document
1510
1511Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1512
1513And another [link](also-missing.md) on this line.
1514"#;
1515        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1516        let links = extract_cross_file_links(&ctx);
1517
1518        assert_eq!(links.len(), 2);
1519
1520        // First link: "Here is a [broken link](" = 24 chars, column 25
1521        assert_eq!(links[0].target_path, "nonexistent-file.md");
1522        assert_eq!(links[0].line, 3);
1523        assert_eq!(links[0].column, 25);
1524
1525        // Second link: "And another [link](" = 19 chars, column 20
1526        assert_eq!(links[1].target_path, "also-missing.md");
1527        assert_eq!(links[1].line, 5);
1528        assert_eq!(links[1].column, 20);
1529    }
1530}