mdbook_lint_core/rules/
mdbook006.rs

1//! MDBOOK006: Validate internal cross-reference links between chapters
2//!
3//! This rule validates anchor fragments in internal links, ensuring they point to valid headings
4//! in target files. It complements MDBOOK002 by focusing on the anchor validation that MDBOOK002 skips.
5
6use crate::rule::{AstRule, RuleCategory, RuleMetadata};
7use crate::{
8    Document,
9    violation::{Severity, Violation},
10};
11use comrak::nodes::{AstNode, NodeValue};
12use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, RwLock};
15use std::{fs, io};
16
17/// MDBOOK006: Validate internal cross-reference links between chapters
18///
19/// This rule validates that internal links with anchor fragments point to valid headings
20/// in the target files. It focuses specifically on cross-reference validation between
21/// chapters, ensuring that `[text](file.md#heading)` links work correctly.
22///
23/// The rule:
24/// - Only processes internal links with anchor fragments (e.g., `file.md#section`)
25/// - Resolves target files relative to the current document
26/// - Parses target files to extract heading anchors
27/// - Validates that the anchor fragment exists in the target file
28/// - Supports configurable anchor ID generation strategies
29/// - Caches parsed files to improve performance on large books
30///
31/// Anchor ID Generation:
32/// - Converts heading text to lowercase
33/// - Replaces spaces and non-alphanumeric characters with hyphens
34/// - Removes leading/trailing hyphens and consecutive hyphens
35/// - Handles Unicode characters appropriately
36#[derive(Default)]
37pub struct MDBOOK006 {
38    /// Cache of parsed heading anchors by file path to avoid re-parsing
39    anchor_cache: Arc<RwLock<HashMap<PathBuf, Vec<String>>>>,
40}
41
42impl AstRule for MDBOOK006 {
43    fn id(&self) -> &'static str {
44        "MDBOOK006"
45    }
46
47    fn name(&self) -> &'static str {
48        "internal-cross-references"
49    }
50
51    fn description(&self) -> &'static str {
52        "Internal cross-reference links must point to valid headings in target files"
53    }
54
55    fn metadata(&self) -> RuleMetadata {
56        RuleMetadata::stable(RuleCategory::MdBook).introduced_in("mdbook-lint v0.2.0")
57    }
58
59    fn check_ast<'a>(
60        &self,
61        document: &Document,
62        ast: &'a AstNode<'a>,
63    ) -> crate::error::Result<Vec<Violation>> {
64        let mut violations = Vec::new();
65
66        // Walk through all nodes in the AST
67        for node in ast.descendants() {
68            if let NodeValue::Link(link) = &node.data.borrow().value {
69                let url = &link.url;
70
71                // Skip external links
72                if is_external_link(url) {
73                    continue;
74                }
75
76                // Only process links with anchor fragments
77                if !url.contains('#') {
78                    continue;
79                }
80
81                // Skip same-document anchors (start with #)
82                if url.starts_with('#') {
83                    continue;
84                }
85
86                // Validate the cross-reference link
87                if let Some(violation) = self.validate_cross_reference(document, node, url)? {
88                    violations.push(violation);
89                }
90            }
91        }
92
93        Ok(violations)
94    }
95}
96
97impl MDBOOK006 {
98    /// Validate a cross-reference link with anchor fragment
99    fn validate_cross_reference<'a>(
100        &self,
101        document: &Document,
102        node: &'a AstNode<'a>,
103        url: &str,
104    ) -> crate::error::Result<Option<Violation>> {
105        // Split URL into file path and anchor
106        let parts: Vec<&str> = url.splitn(2, '#').collect();
107        if parts.len() != 2 {
108            return Ok(None); // No anchor fragment
109        }
110
111        let file_path = parts[0];
112        let anchor = parts[1];
113
114        // Skip empty file paths or anchors
115        if file_path.is_empty() || anchor.is_empty() {
116            return Ok(None);
117        }
118
119        // Resolve the target file path relative to current document
120        let target_path = self.resolve_target_path(&document.path, file_path);
121
122        // Check if target file exists
123        if !target_path.exists() {
124            // File doesn't exist - this should be caught by MDBOOK002, so we skip it
125            return Ok(None);
126        }
127
128        // Get anchors from the target file
129        let anchors = match self.get_file_anchors(&target_path)? {
130            Some(anchors) => anchors,
131            None => return Ok(None), // Couldn't parse file
132        };
133
134        // Check if the anchor exists in the target file
135        if !anchors.contains(&anchor.to_string()) {
136            let (line, column) = document.node_position(node).unwrap_or((1, 1));
137
138            // Create helpful suggestion
139            let suggestion = self.suggest_similar_anchor(anchor, &anchors);
140            let message = if let Some(suggestion) = suggestion {
141                format!(
142                    "Cross-reference anchor '{anchor}' not found in '{file_path}'. Did you mean '{suggestion}'?"
143                )
144            } else {
145                format!(
146                    "Cross-reference anchor '{}' not found in '{}'. Available anchors: {}",
147                    anchor,
148                    file_path,
149                    if anchors.is_empty() {
150                        "none".to_string()
151                    } else {
152                        anchors
153                            .iter()
154                            .take(5)
155                            .map(|s| format!("'{s}'"))
156                            .collect::<Vec<_>>()
157                            .join(", ")
158                    }
159                )
160            };
161
162            return Ok(Some(self.create_violation(
163                message,
164                line,
165                column,
166                Severity::Error,
167            )));
168        }
169
170        Ok(None)
171    }
172
173    /// Resolve target file path relative to current document
174    fn resolve_target_path(&self, current_doc_path: &Path, link_path: &str) -> PathBuf {
175        let current_dir = current_doc_path.parent().unwrap_or(Path::new("."));
176
177        if let Some(stripped) = link_path.strip_prefix("./") {
178            // Explicit relative path: ./file.md
179            current_dir.join(stripped)
180        } else if link_path.starts_with("../") {
181            // Parent directory path: ../file.md
182            current_dir.join(link_path)
183        } else if let Some(stripped) = link_path.strip_prefix('/') {
184            // Absolute path (relative to project root)
185            PathBuf::from(stripped)
186        } else {
187            // Implicit relative path: file.md
188            current_dir.join(link_path)
189        }
190    }
191
192    /// Get all heading anchors from a markdown file (with caching)
193    fn get_file_anchors(&self, file_path: &Path) -> io::Result<Option<Vec<String>>> {
194        let canonical_path = match file_path.canonicalize() {
195            Ok(path) => path,
196            Err(_) => file_path.to_path_buf(),
197        };
198
199        // Check cache first
200        {
201            if let Ok(cache) = self.anchor_cache.read()
202                && let Some(anchors) = cache.get(&canonical_path)
203            {
204                return Ok(Some(anchors.clone()));
205            }
206        }
207
208        // Read and parse the file
209        let content = match fs::read_to_string(file_path) {
210            Ok(content) => content,
211            Err(_) => return Ok(None), // File couldn't be read
212        };
213
214        let anchors = self.extract_heading_anchors(&content);
215
216        // Cache the result
217        {
218            if let Ok(mut cache) = self.anchor_cache.write() {
219                cache.insert(canonical_path, anchors.clone());
220            }
221        }
222
223        Ok(Some(anchors))
224    }
225
226    /// Extract heading anchors from markdown content
227    fn extract_heading_anchors(&self, content: &str) -> Vec<String> {
228        let mut anchors = Vec::new();
229
230        for line in content.lines() {
231            let line = line.trim();
232
233            // Match ATX headings (# ## ### etc)
234            if let Some(heading_text) = self.extract_atx_heading(line) {
235                let anchor = self.generate_anchor_id(&heading_text);
236                if !anchor.is_empty() {
237                    anchors.push(anchor);
238                }
239            }
240        }
241
242        // TODO: Handle Setext headings (underlined with = or -)
243        // This is less common in mdBook but could be added for completeness
244
245        anchors
246    }
247
248    /// Extract heading text from ATX heading line
249    fn extract_atx_heading(&self, line: &str) -> Option<String> {
250        if !line.starts_with('#') {
251            return None;
252        }
253
254        // Count leading hashes
255        let hash_count = line.chars().take_while(|&c| c == '#').count();
256        if hash_count == 0 || hash_count > 6 {
257            return None; // Invalid heading level
258        }
259
260        // Extract text after hashes
261        let rest = &line[hash_count..];
262        let text = if let Some(stripped) = rest.strip_prefix(' ') {
263            stripped
264        } else {
265            rest
266        };
267
268        // Remove trailing hashes if present (closed ATX style)
269        let text = text.trim_end_matches(['#', ' ']);
270
271        if text.is_empty() {
272            return None;
273        }
274
275        Some(text.to_string())
276    }
277
278    /// Generate anchor ID from heading text (following common markdown conventions)
279    fn generate_anchor_id(&self, heading_text: &str) -> String {
280        heading_text
281            .to_lowercase()
282            // Replace whitespace and non-alphanumeric with hyphens
283            .chars()
284            .map(|c| if c.is_alphanumeric() { c } else { '-' })
285            .collect::<String>()
286            // Remove consecutive hyphens
287            .split('-')
288            .filter(|part| !part.is_empty())
289            .collect::<Vec<_>>()
290            .join("-")
291    }
292
293    /// Suggest similar anchor that might be what the user intended
294    fn suggest_similar_anchor(&self, target: &str, available: &[String]) -> Option<String> {
295        if available.is_empty() {
296            return None;
297        }
298
299        // Simple similarity: find anchor that contains target or vice versa
300        for anchor in available {
301            if anchor.contains(target) || target.contains(anchor) {
302                return Some(anchor.clone());
303            }
304        }
305
306        // If no substring match, return the first available anchor as a suggestion
307        Some(available[0].clone())
308    }
309}
310
311/// Check if a URL is an external link
312fn is_external_link(url: &str) -> bool {
313    url.starts_with("http://")
314        || url.starts_with("https://")
315        || url.starts_with("mailto:")
316        || url.starts_with("ftp://")
317        || url.starts_with("tel:")
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use crate::rule::Rule;
324    use std::fs;
325    use tempfile::TempDir;
326
327    fn create_test_document(content: &str, file_path: &Path) -> crate::error::Result<Document> {
328        if let Some(parent) = file_path.parent() {
329            fs::create_dir_all(parent)?;
330        }
331        fs::write(file_path, content)?;
332        Document::new(content.to_string(), file_path.to_path_buf())
333    }
334
335    #[test]
336    fn test_mdbook006_valid_cross_references() -> crate::error::Result<()> {
337        let temp_dir = TempDir::new()?;
338        let root = temp_dir.path();
339
340        // Create target file with headings
341        let target_content = r#"# Chapter 2
342
343## Overview
344
345Some content here.
346
347### Implementation Details
348
349More details.
350"#;
351        create_test_document(target_content, &root.join("chapter2.md"))?;
352
353        // Create source file with links to target
354        let source_content = r#"# Chapter 1
355
356See [Chapter 2](chapter2.md#chapter-2) for more info.
357
358Check out the [overview](chapter2.md#overview) section.
359
360The [implementation](chapter2.md#implementation-details) is complex.
361"#;
362        let source_path = root.join("chapter1.md");
363        let doc = create_test_document(source_content, &source_path)?;
364
365        let rule = MDBOOK006::default();
366        let violations = rule.check(&doc)?;
367
368        assert_eq!(
369            violations.len(),
370            0,
371            "Valid cross-references should have no violations"
372        );
373        Ok(())
374    }
375
376    #[test]
377    fn test_mdbook006_invalid_anchor() -> crate::error::Result<()> {
378        let temp_dir = TempDir::new()?;
379        let root = temp_dir.path();
380
381        // Create target file with headings
382        let target_content = r#"# Chapter 2
383
384## Overview
385
386Some content.
387"#;
388        create_test_document(target_content, &root.join("chapter2.md"))?;
389
390        // Create source file with invalid anchor
391        let source_content = r#"# Chapter 1
392
393See [nonexistent section](chapter2.md#nonexistent).
394"#;
395        let source_path = root.join("chapter1.md");
396        let doc = create_test_document(source_content, &source_path)?;
397
398        let rule = MDBOOK006::default();
399        let violations = rule.check(&doc)?;
400
401        assert_eq!(violations.len(), 1);
402        assert_eq!(violations[0].rule_id, "MDBOOK006");
403        assert!(
404            violations[0]
405                .message
406                .contains("anchor 'nonexistent' not found")
407        );
408        assert!(violations[0].message.contains("chapter2.md"));
409        Ok(())
410    }
411
412    #[test]
413    fn test_mdbook006_missing_target_file() -> crate::error::Result<()> {
414        let temp_dir = TempDir::new()?;
415        let root = temp_dir.path();
416
417        // Create source file linking to nonexistent file
418        let source_content = r#"# Chapter 1
419
420See [missing](nonexistent.md#section).
421"#;
422        let source_path = root.join("chapter1.md");
423        let doc = create_test_document(source_content, &source_path)?;
424
425        let rule = MDBOOK006::default();
426        let violations = rule.check(&doc)?;
427
428        // Should not report violations for missing files (MDBOOK002's job)
429        assert_eq!(violations.len(), 0);
430        Ok(())
431    }
432
433    #[test]
434    fn test_mdbook006_same_document_anchors() -> crate::error::Result<()> {
435        let temp_dir = TempDir::new()?;
436        let root = temp_dir.path();
437
438        // Create file with internal anchor link
439        let content = r#"# Chapter 1
440
441## Section A
442
443See [Section B](#section-b) below.
444
445## Section B
446
447Content here.
448"#;
449        let file_path = root.join("chapter1.md");
450        let doc = create_test_document(content, &file_path)?;
451
452        let rule = MDBOOK006::default();
453        let violations = rule.check(&doc)?;
454
455        // Should not process same-document anchors
456        assert_eq!(violations.len(), 0);
457        Ok(())
458    }
459
460    #[test]
461    fn test_mdbook006_external_links() -> crate::error::Result<()> {
462        let temp_dir = TempDir::new()?;
463        let root = temp_dir.path();
464
465        // Create file with external links
466        let content = r#"# Chapter 1
467
468See [external](https://example.com#section).
469"#;
470        let file_path = root.join("chapter1.md");
471        let doc = create_test_document(content, &file_path)?;
472
473        let rule = MDBOOK006::default();
474        let violations = rule.check(&doc)?;
475
476        // Should ignore external links
477        assert_eq!(violations.len(), 0);
478        Ok(())
479    }
480
481    #[test]
482    fn test_mdbook006_no_anchor_links() -> crate::error::Result<()> {
483        let temp_dir = TempDir::new()?;
484        let root = temp_dir.path();
485
486        // Create target file
487        create_test_document("# Target", &root.join("target.md"))?;
488
489        // Create file with links without anchors
490        let content = r#"# Chapter 1
491
492See [target](target.md) for more.
493"#;
494        let file_path = root.join("chapter1.md");
495        let doc = create_test_document(content, &file_path)?;
496
497        let rule = MDBOOK006::default();
498        let violations = rule.check(&doc)?;
499
500        // Should ignore links without anchors
501        assert_eq!(violations.len(), 0);
502        Ok(())
503    }
504
505    #[test]
506    fn test_extract_atx_heading() {
507        let rule = MDBOOK006::default();
508
509        assert_eq!(
510            rule.extract_atx_heading("# Heading"),
511            Some("Heading".to_string())
512        );
513        assert_eq!(
514            rule.extract_atx_heading("## Sub Heading"),
515            Some("Sub Heading".to_string())
516        );
517        assert_eq!(
518            rule.extract_atx_heading("### Deep Heading ###"),
519            Some("Deep Heading".to_string())
520        );
521        assert_eq!(
522            rule.extract_atx_heading("#No Space"),
523            Some("No Space".to_string())
524        );
525
526        // Invalid cases
527        assert_eq!(rule.extract_atx_heading("Not a heading"), None);
528        assert_eq!(rule.extract_atx_heading(""), None);
529        assert_eq!(rule.extract_atx_heading("#"), None);
530        assert_eq!(rule.extract_atx_heading("# "), None);
531    }
532
533    #[test]
534    fn test_generate_anchor_id() {
535        let rule = MDBOOK006::default();
536
537        assert_eq!(rule.generate_anchor_id("Simple Heading"), "simple-heading");
538        assert_eq!(
539            rule.generate_anchor_id("Complex: Heading with! Punctuation?"),
540            "complex-heading-with-punctuation"
541        );
542        assert_eq!(
543            rule.generate_anchor_id("Multiple   Spaces"),
544            "multiple-spaces"
545        );
546        assert_eq!(rule.generate_anchor_id("UPPER case"), "upper-case");
547        assert_eq!(rule.generate_anchor_id("123 Numbers"), "123-numbers");
548        assert_eq!(rule.generate_anchor_id(""), "");
549    }
550
551    #[test]
552    fn test_mdbook006_nested_directories() -> crate::error::Result<()> {
553        let temp_dir = TempDir::new()?;
554        let root = temp_dir.path();
555
556        // Create nested target file
557        let target_content = r#"# Deep Chapter
558
559## Nested Section
560
561Content here.
562"#;
563        create_test_document(target_content, &root.join("guide/deep.md"))?;
564
565        // Create source file with relative link
566        let source_content = r#"# Main Chapter
567
568See [nested section](guide/deep.md#nested-section).
569"#;
570        let source_path = root.join("chapter.md");
571        let doc = create_test_document(source_content, &source_path)?;
572
573        let rule = MDBOOK006::default();
574        let violations = rule.check(&doc)?;
575
576        assert_eq!(
577            violations.len(),
578            0,
579            "Nested directory cross-references should work"
580        );
581        Ok(())
582    }
583
584    #[test]
585    fn test_mdbook006_helpful_suggestions() -> crate::error::Result<()> {
586        let temp_dir = TempDir::new()?;
587        let root = temp_dir.path();
588
589        // Create target file with similar heading
590        let target_content = r#"# Target
591
592## Implementation Details
593
594Content here.
595"#;
596        create_test_document(target_content, &root.join("target.md"))?;
597
598        // Create source file with similar but wrong anchor
599        let source_content = r#"# Source
600
601See [details](target.md#implementation).
602"#;
603        let source_path = root.join("source.md");
604        let doc = create_test_document(source_content, &source_path)?;
605
606        let rule = MDBOOK006::default();
607        let violations = rule.check(&doc)?;
608
609        assert_eq!(violations.len(), 1);
610        assert!(violations[0].message.contains("Did you mean"));
611        assert!(violations[0].message.contains("implementation-details"));
612        Ok(())
613    }
614}