mdbook_lint_core/rules/
mdbook002.rs

1//! MDBOOK002: Internal link validation
2//!
3//! This rule validates that internal links (relative paths) resolve to existing files.
4
5use crate::rule::{AstRule, RuleCategory, RuleMetadata};
6use crate::{
7    Document,
8    violation::{Severity, Violation},
9};
10use comrak::nodes::NodeValue;
11use std::path::{Path, PathBuf};
12
13/// Rule to check that internal links resolve to existing files
14pub struct MDBOOK002;
15
16impl AstRule for MDBOOK002 {
17    fn id(&self) -> &'static str {
18        "MDBOOK002"
19    }
20
21    fn name(&self) -> &'static str {
22        "internal-link-validation"
23    }
24
25    fn description(&self) -> &'static str {
26        "Internal links must resolve to existing files"
27    }
28
29    fn metadata(&self) -> RuleMetadata {
30        RuleMetadata::stable(RuleCategory::MdBook).introduced_in("mdbook-lint v0.1.0")
31    }
32
33    fn check_ast<'a>(
34        &self,
35        document: &Document,
36        ast: &'a comrak::nodes::AstNode<'a>,
37    ) -> crate::error::Result<Vec<Violation>> {
38        self.check_ast_nodes(document, ast)
39    }
40}
41
42impl MDBOOK002 {
43    /// Check AST nodes for internal link violations
44    fn check_ast_nodes<'a>(
45        &self,
46        document: &Document,
47        ast: &'a comrak::nodes::AstNode<'a>,
48    ) -> crate::error::Result<Vec<Violation>> {
49        let mut violations = Vec::new();
50
51        // Walk through all nodes in the AST
52        for node in ast.descendants() {
53            if let NodeValue::Link(link) = &node.data.borrow().value {
54                let url = &link.url;
55
56                // Skip external links (http/https/mailto/etc)
57                if is_external_link(url) {
58                    continue;
59                }
60
61                // Skip anchor-only links (same document)
62                if url.starts_with('#') {
63                    continue;
64                }
65
66                // Check if the internal link resolves
67                if let Some(violation) = validate_internal_link(document, node, url)? {
68                    violations.push(violation);
69                }
70            }
71        }
72
73        Ok(violations)
74    }
75}
76
77/// Check if a URL is an external link
78fn is_external_link(url: &str) -> bool {
79    url.starts_with("http://")
80        || url.starts_with("https://")
81        || url.starts_with("mailto:")
82        || url.starts_with("ftp://")
83        || url.starts_with("tel:")
84}
85
86/// Validate an internal link and return a violation if it doesn't resolve
87fn validate_internal_link<'a>(
88    document: &Document,
89    node: &'a comrak::nodes::AstNode<'a>,
90    url: &str,
91) -> crate::error::Result<Option<Violation>> {
92    // Remove anchor fragment if present (e.g., "file.md#section" -> "file.md")
93    let path_part = url.split('#').next().unwrap_or(url);
94
95    // Skip empty paths
96    if path_part.is_empty() {
97        return Ok(None);
98    }
99
100    // Resolve the target path relative to the current document
101    let target_path = resolve_link_path(&document.path, path_part);
102
103    // Check if the target file exists
104    if !target_path.exists() {
105        let (line, column) = document.node_position(node).unwrap_or((1, 1));
106
107        return Ok(Some(MDBOOK002.create_violation(
108            format!("Internal link '{url}' does not resolve to an existing file"),
109            line,
110            column,
111            Severity::Error,
112        )));
113    }
114
115    Ok(None)
116}
117
118/// Resolve a link path relative to the current document
119fn resolve_link_path(current_doc_path: &Path, link_path: &str) -> PathBuf {
120    let current_dir = current_doc_path.parent().unwrap_or(Path::new("."));
121
122    // Handle different path formats
123    if let Some(stripped) = link_path.strip_prefix("./") {
124        // Explicit relative path: ./file.md
125        current_dir.join(stripped)
126    } else if link_path.starts_with("../") {
127        // Parent directory path: ../file.md
128        current_dir.join(link_path)
129    } else if let Some(stripped) = link_path.strip_prefix('/') {
130        // Absolute path (relative to project root)
131        PathBuf::from(stripped)
132    } else {
133        // Implicit relative path: file.md
134        current_dir.join(link_path)
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use crate::rule::Rule;
142    use std::fs;
143    use tempfile::TempDir;
144
145    fn create_test_document(
146        content: &str,
147        file_name: &str,
148        temp_dir: &TempDir,
149    ) -> crate::error::Result<Document> {
150        let file_path = temp_dir.path().join(file_name);
151        fs::write(&file_path, content)?;
152        Document::new(content.to_string(), file_path)
153    }
154
155    #[test]
156    fn test_mdbook002_valid_links() -> crate::error::Result<()> {
157        let temp_dir = TempDir::new()?;
158
159        // Create target files
160        fs::write(temp_dir.path().join("target.md"), "# Target")?;
161        fs::create_dir_all(temp_dir.path().join("subdir"))?;
162        fs::write(temp_dir.path().join("subdir/other.md"), "# Other")?;
163
164        let content = r#"# Test Document
165
166[Valid relative link](./target.md)
167[Valid implicit link](target.md)
168[Valid subdirectory link](subdir/other.md)
169[Valid external link](https://example.com)
170[Valid anchor link](#section)
171"#;
172
173        let document = create_test_document(content, "test.md", &temp_dir)?;
174        let rule = MDBOOK002;
175        let violations = rule.check(&document)?;
176
177        assert_eq!(violations.len(), 0);
178        Ok(())
179    }
180
181    #[test]
182    fn test_mdbook002_invalid_links() -> crate::error::Result<()> {
183        let temp_dir = TempDir::new()?;
184
185        let content = r#"# Test Document
186
187[Invalid link](./nonexistent.md)
188[Another invalid link](missing/file.md)
189[Valid external link](https://example.com)
190"#;
191
192        let document = create_test_document(content, "test.md", &temp_dir)?;
193        let rule = MDBOOK002;
194        let violations = rule.check(&document)?;
195
196        assert_eq!(violations.len(), 2);
197        assert_eq!(violations[0].rule_id, "MDBOOK002");
198        assert!(violations[0].message.contains("nonexistent.md"));
199        assert_eq!(violations[1].rule_id, "MDBOOK002");
200        assert!(violations[1].message.contains("missing/file.md"));
201        Ok(())
202    }
203
204    #[test]
205    fn test_mdbook002_links_with_anchors() -> crate::error::Result<()> {
206        let temp_dir = TempDir::new()?;
207
208        // Create target file
209        fs::write(temp_dir.path().join("target.md"), "# Target")?;
210
211        let content = r#"# Test Document
212
213[Valid link with anchor](./target.md#section)
214[Invalid link with anchor](./nonexistent.md#section)
215"#;
216
217        let document = create_test_document(content, "test.md", &temp_dir)?;
218        let rule = MDBOOK002;
219        let violations = rule.check(&document)?;
220
221        assert_eq!(violations.len(), 1);
222        assert!(violations[0].message.contains("nonexistent.md#section"));
223        Ok(())
224    }
225
226    #[test]
227    fn test_is_external_link() {
228        assert!(is_external_link("https://example.com"));
229        assert!(is_external_link("http://example.com"));
230        assert!(is_external_link("mailto:test@example.com"));
231        assert!(is_external_link("ftp://files.example.com"));
232        assert!(is_external_link("tel:+1234567890"));
233
234        assert!(!is_external_link("./local.md"));
235        assert!(!is_external_link("../parent.md"));
236        assert!(!is_external_link("file.md"));
237        assert!(!is_external_link("#anchor"));
238    }
239
240    #[test]
241    fn test_resolve_link_path() {
242        let current_path = PathBuf::from("/project/src/chapter.md");
243
244        assert_eq!(
245            resolve_link_path(&current_path, "./other.md"),
246            PathBuf::from("/project/src/other.md")
247        );
248
249        assert_eq!(
250            resolve_link_path(&current_path, "../README.md"),
251            PathBuf::from("/project/src/../README.md")
252        );
253
254        assert_eq!(
255            resolve_link_path(&current_path, "other.md"),
256            PathBuf::from("/project/src/other.md")
257        );
258
259        assert_eq!(
260            resolve_link_path(&current_path, "subdir/file.md"),
261            PathBuf::from("/project/src/subdir/file.md")
262        );
263    }
264}