mdbook_lint_core/rules/
mdbook005.rs1use crate::rule::{Rule, RuleCategory, RuleMetadata};
7use crate::{
8 Document,
9 violation::{Severity, Violation},
10};
11use std::collections::HashSet;
12use std::path::{Path, PathBuf};
13use std::{fs, io};
14
15pub struct MDBOOK005 {
32 ignored_files: HashSet<String>,
34}
35
36impl Default for MDBOOK005 {
37 fn default() -> Self {
38 let mut ignored_files = HashSet::new();
39 ignored_files.insert("readme.md".to_string());
41 ignored_files.insert("contributing.md".to_string());
42 ignored_files.insert("license.md".to_string());
43 ignored_files.insert("changelog.md".to_string());
44 ignored_files.insert("summary.md".to_string()); Self { ignored_files }
47 }
48}
49
50impl MDBOOK005 {
51 pub fn with_ignored_files(additional_ignored: Vec<String>) -> Self {
53 let mut instance = Self::default();
54 for file in additional_ignored {
55 instance.ignored_files.insert(file.to_lowercase());
56 }
57 instance
58 }
59
60 pub fn ignore_file(&mut self, filename: &str) {
62 self.ignored_files.insert(filename.to_lowercase());
63 }
64}
65
66impl Rule for MDBOOK005 {
67 fn id(&self) -> &'static str {
68 "MDBOOK005"
69 }
70
71 fn name(&self) -> &'static str {
72 "orphaned-files"
73 }
74
75 fn description(&self) -> &'static str {
76 "Detect orphaned markdown files not referenced in SUMMARY.md"
77 }
78
79 fn metadata(&self) -> RuleMetadata {
80 RuleMetadata::stable(RuleCategory::MdBook).introduced_in("mdbook-lint v0.2.0")
81 }
82
83 fn check_with_ast<'a>(
84 &self,
85 document: &Document,
86 _ast: Option<&'a comrak::nodes::AstNode<'a>>,
87 ) -> crate::error::Result<Vec<Violation>> {
88 let mut violations = Vec::new();
89
90 if !is_summary_file(document) {
92 return Ok(violations);
93 }
94
95 let project_root = if document.path.is_absolute() {
97 document.path.parent().unwrap_or(Path::new("."))
98 } else {
99 Path::new(".")
101 };
102
103 let referenced_files = match self.parse_referenced_files(document) {
105 Ok(files) => files,
106 Err(_) => {
107 return Ok(violations);
109 }
110 };
111
112 let all_markdown_files = match self.find_markdown_files(project_root) {
114 Ok(files) => files,
115 Err(_) => {
116 return Ok(violations);
118 }
119 };
120
121 let orphaned_files = self.find_orphaned_files(&referenced_files, &all_markdown_files);
123
124 for orphaned_file in orphaned_files {
126 let relative_path = orphaned_file
127 .strip_prefix(project_root)
128 .unwrap_or(orphaned_file.as_path())
129 .to_string_lossy()
130 .replace('\\', "/") .to_string();
132
133 violations.push(self.create_violation(
134 format!("Orphaned file '{relative_path}' is not referenced in SUMMARY.md"),
135 1, 1,
137 Severity::Warning,
138 ));
139 }
140
141 Ok(violations)
142 }
143}
144
145impl MDBOOK005 {
146 fn parse_referenced_files(
148 &self,
149 document: &Document,
150 ) -> Result<HashSet<PathBuf>, Box<dyn std::error::Error>> {
151 let mut referenced = HashSet::new();
152 let project_root = document.path.parent().unwrap_or(Path::new("."));
153
154 for line in &document.lines {
155 if let Some(path) = self.extract_file_path(line) {
156 let absolute_path = project_root.join(&path);
158 if let Ok(canonical) = absolute_path.canonicalize() {
159 referenced.insert(canonical);
160 } else {
161 referenced.insert(absolute_path);
163 }
164 }
165 }
166
167 Ok(referenced)
168 }
169
170 fn extract_file_path(&self, line: &str) -> Option<String> {
172 if let Some(start) = line.find("](") {
174 let after_bracket = &line[start + 2..];
175 if let Some(end) = after_bracket.find(')') {
176 let path = &after_bracket[..end];
177
178 if path.is_empty() || path.starts_with("http://") || path.starts_with("https://") {
180 return None;
181 }
182
183 let path_without_anchor = path.split('#').next().unwrap_or(path);
185
186 if path_without_anchor.ends_with(".md")
188 || path_without_anchor.ends_with(".markdown")
189 {
190 return Some(path_without_anchor.to_string());
191 }
192 }
193 }
194
195 None
196 }
197
198 fn find_markdown_files(&self, project_root: &Path) -> io::Result<HashSet<PathBuf>> {
200 let mut markdown_files = HashSet::new();
201 scan_directory_recursive(project_root, &mut markdown_files)?;
202 Ok(markdown_files)
203 }
204
205 fn find_orphaned_files(
207 &self,
208 referenced: &HashSet<PathBuf>,
209 all_files: &HashSet<PathBuf>,
210 ) -> Vec<PathBuf> {
211 all_files
212 .iter()
213 .filter(|&file| {
214 if referenced.contains(file) {
216 return false;
217 }
218
219 if let Some(filename) = file.file_name().and_then(|n| n.to_str())
221 && self.ignored_files.contains(&filename.to_lowercase())
222 {
223 return false;
224 }
225
226 true
227 })
228 .cloned()
229 .collect()
230 }
231}
232
233fn is_summary_file(document: &Document) -> bool {
235 document
236 .path
237 .file_name()
238 .and_then(|name| name.to_str())
239 .map(|name| name.eq_ignore_ascii_case("summary.md"))
240 .unwrap_or(false)
241}
242
243fn scan_directory_recursive(dir: &Path, markdown_files: &mut HashSet<PathBuf>) -> io::Result<()> {
245 let entries = fs::read_dir(dir)?;
246
247 for entry in entries {
248 let entry = entry?;
249 let path = entry.path();
250
251 if path.is_dir() {
252 if let Some(dir_name) = path.file_name().and_then(|n| n.to_str())
254 && matches!(
255 dir_name,
256 "target" | "node_modules" | ".git" | ".svn" | ".hg"
257 )
258 {
259 continue;
260 }
261 scan_directory_recursive(&path, markdown_files)?;
263 } else if let Some(extension) = path.extension().and_then(|e| e.to_str())
264 && matches!(extension, "md" | "markdown")
265 {
266 if let Ok(canonical) = path.canonicalize() {
267 markdown_files.insert(canonical);
268 } else {
269 markdown_files.insert(path);
270 }
271 }
272 }
273
274 Ok(())
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280 use std::fs;
281 use tempfile::TempDir;
282
283 fn create_test_document(content: &str, file_path: &Path) -> crate::error::Result<Document> {
284 if let Some(parent) = file_path.parent() {
286 fs::create_dir_all(parent)?;
287 }
288 fs::write(file_path, content)?;
289 Document::new(content.to_string(), file_path.to_path_buf())
290 }
291
292 #[test]
293 fn test_mdbook005_no_orphans() -> crate::error::Result<()> {
294 let temp_dir = TempDir::new()?;
295 let root = temp_dir.path();
296
297 let summary_content = r#"# Summary
299
300[Introduction](intro.md)
301- [Chapter 1](chapter1.md)
302- [Chapter 2](chapter2.md)
303"#;
304 let summary_path = root.join("SUMMARY.md");
305 let doc = create_test_document(summary_content, &summary_path)?;
306
307 create_test_document("# Intro", &root.join("intro.md"))?;
309 create_test_document("# Chapter 1", &root.join("chapter1.md"))?;
310 create_test_document("# Chapter 2", &root.join("chapter2.md"))?;
311
312 let rule = MDBOOK005::default();
313 let violations = rule.check(&doc)?;
314
315 assert_eq!(
316 violations.len(),
317 0,
318 "Should have no violations when all files are referenced"
319 );
320 Ok(())
321 }
322
323 #[test]
324 fn test_mdbook005_detect_orphans() -> crate::error::Result<()> {
325 let temp_dir = TempDir::new()?;
326 let root = temp_dir.path();
327
328 let summary_content = r#"# Summary
330
331[Introduction](intro.md)
332- [Chapter 1](chapter1.md)
333"#;
334 let summary_path = root.join("SUMMARY.md");
335 let doc = create_test_document(summary_content, &summary_path)?;
336
337 create_test_document("# Intro", &root.join("intro.md"))?;
339 create_test_document("# Chapter 1", &root.join("chapter1.md"))?;
340
341 create_test_document("# Orphan", &root.join("orphan.md"))?;
343 create_test_document("# Another", &root.join("another.md"))?;
344
345 let rule = MDBOOK005::default();
346 let violations = rule.check(&doc)?;
347
348 assert_eq!(violations.len(), 2, "Should detect 2 orphaned files");
349
350 let messages: Vec<_> = violations.iter().map(|v| &v.message).collect();
351 assert!(messages.iter().any(|m| m.contains("orphan.md")));
352 assert!(messages.iter().any(|m| m.contains("another.md")));
353
354 Ok(())
355 }
356
357 #[test]
358 fn test_mdbook005_ignore_common_files() -> crate::error::Result<()> {
359 let temp_dir = TempDir::new()?;
360 let root = temp_dir.path();
361
362 let summary_content = r#"# Summary
364
365- [Chapter 1](chapter1.md)
366"#;
367 let summary_path = root.join("SUMMARY.md");
368 let doc = create_test_document(summary_content, &summary_path)?;
369
370 create_test_document("# Chapter 1", &root.join("chapter1.md"))?;
371
372 create_test_document("# README", &root.join("README.md"))?;
374 create_test_document("# Contributing", &root.join("CONTRIBUTING.md"))?;
375 create_test_document("# License", &root.join("LICENSE.md"))?;
376
377 let rule = MDBOOK005::default();
378 let violations = rule.check(&doc)?;
379
380 assert_eq!(
381 violations.len(),
382 0,
383 "Should ignore common files like README.md"
384 );
385 Ok(())
386 }
387
388 #[test]
389 fn test_mdbook005_nested_directories() -> crate::error::Result<()> {
390 let temp_dir = TempDir::new()?;
391 let root = temp_dir.path();
392
393 let summary_content = r#"# Summary
395
396- [Chapter 1](guide/chapter1.md)
397"#;
398 let summary_path = root.join("SUMMARY.md");
399 let doc = create_test_document(summary_content, &summary_path)?;
400
401 create_test_document("# Chapter 1", &root.join("guide/chapter1.md"))?;
403
404 create_test_document("# Orphan", &root.join("guide/orphan.md"))?;
406
407 let rule = MDBOOK005::default();
408 let violations = rule.check(&doc)?;
409
410 assert_eq!(
411 violations.len(),
412 1,
413 "Should detect orphaned files in subdirectories"
414 );
415 assert!(violations[0].message.contains("guide/orphan.md"));
416 Ok(())
417 }
418
419 #[test]
420 fn test_mdbook005_draft_chapters() -> crate::error::Result<()> {
421 let temp_dir = TempDir::new()?;
422 let root = temp_dir.path();
423
424 let summary_content = r#"# Summary
426
427- [Chapter 1](chapter1.md)
428- [Draft Chapter]()
429"#;
430 let summary_path = root.join("SUMMARY.md");
431 let doc = create_test_document(summary_content, &summary_path)?;
432
433 create_test_document("# Chapter 1", &root.join("chapter1.md"))?;
434 create_test_document("# Orphan", &root.join("orphan.md"))?;
435
436 let rule = MDBOOK005::default();
437 let violations = rule.check(&doc)?;
438
439 assert_eq!(violations.len(), 1);
441 assert!(violations[0].message.contains("orphan.md"));
442 Ok(())
443 }
444
445 #[test]
446 fn test_mdbook005_non_summary_files() -> crate::error::Result<()> {
447 let temp_dir = TempDir::new()?;
448
449 let content = "# Regular File";
451 let doc_path = temp_dir.path().join("README.md");
452 let doc = create_test_document(content, &doc_path)?;
453
454 let rule = MDBOOK005::default();
455 let violations = rule.check(&doc)?;
456
457 assert_eq!(
458 violations.len(),
459 0,
460 "Should not run on non-SUMMARY.md files"
461 );
462 Ok(())
463 }
464
465 #[test]
466 fn test_extract_file_path() {
467 let rule = MDBOOK005::default();
468
469 assert_eq!(
471 rule.extract_file_path("- [Chapter](chapter.md)"),
472 Some("chapter.md".to_string())
473 );
474 assert_eq!(
475 rule.extract_file_path("[Intro](intro.md)"),
476 Some("intro.md".to_string())
477 );
478 assert_eq!(
479 rule.extract_file_path(" - [Nested](sub/nested.md)"),
480 Some("sub/nested.md".to_string())
481 );
482
483 assert_eq!(
485 rule.extract_file_path("- [Link](file.md#section)"),
486 Some("file.md".to_string())
487 );
488
489 assert_eq!(rule.extract_file_path("- [Draft]()"), None);
491 assert_eq!(
492 rule.extract_file_path("- [External](https://example.com)"),
493 None
494 );
495 assert_eq!(rule.extract_file_path("- [Non-MD](image.png)"), None);
496 assert_eq!(rule.extract_file_path("Regular text"), None);
497 }
498
499 #[test]
500 fn test_custom_ignored_files() -> crate::error::Result<()> {
501 let temp_dir = TempDir::new()?;
502 let root = temp_dir.path();
503
504 let summary_content = r#"# Summary
505
506- [Chapter 1](chapter1.md)
507"#;
508 let summary_path = root.join("SUMMARY.md");
509 let doc = create_test_document(summary_content, &summary_path)?;
510
511 create_test_document("# Chapter 1", &root.join("chapter1.md"))?;
512 create_test_document("# Custom", &root.join("custom.md"))?;
513 create_test_document("# Orphan", &root.join("orphan.md"))?;
514
515 let rule = MDBOOK005::with_ignored_files(vec!["custom.md".to_string()]);
517 let violations = rule.check(&doc)?;
518
519 assert_eq!(violations.len(), 1);
521 assert!(violations[0].message.contains("orphan.md"));
522 assert!(!violations[0].message.contains("custom.md"));
523 Ok(())
524 }
525}