1use crate::rule::{AstRule, RuleCategory, RuleMetadata};
7use crate::{
8 Document,
9 violation::{Severity, Violation},
10};
11use comrak::nodes::AstNode;
12use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, RwLock};
15use std::{fs, io};
16
17#[derive(Default)]
38pub struct MDBOOK007 {
39 file_cache: Arc<RwLock<HashMap<PathBuf, Option<String>>>>,
41 processing_stack: Arc<RwLock<Vec<PathBuf>>>,
43}
44
45impl AstRule for MDBOOK007 {
46 fn id(&self) -> &'static str {
47 "MDBOOK007"
48 }
49
50 fn name(&self) -> &'static str {
51 "include-validation"
52 }
53
54 fn description(&self) -> &'static str {
55 "Include directives must point to existing files with valid syntax"
56 }
57
58 fn metadata(&self) -> RuleMetadata {
59 RuleMetadata::stable(RuleCategory::MdBook).introduced_in("mdbook-lint v0.2.0")
60 }
61
62 fn check_ast<'a>(
63 &self,
64 document: &Document,
65 _ast: &'a AstNode<'a>,
66 ) -> crate::error::Result<Vec<Violation>> {
67 let mut violations = Vec::new();
68
69 {
71 if let Ok(mut stack) = self.processing_stack.write() {
72 stack.clear();
73 stack.push(document.path.clone());
74 }
75 }
76
77 let include_directives = self.find_include_directives(&document.content);
79
80 for directive in include_directives {
81 if let Some(violation) = self.validate_include_directive(document, &directive)? {
82 violations.push(violation);
83 }
84 }
85
86 Ok(violations)
87 }
88}
89
90#[derive(Debug, Clone)]
92struct IncludeDirective {
93 #[allow(dead_code)]
95 full_match: String,
96 #[allow(dead_code)]
98 directive_type: String,
99 file_path: String,
101 range_or_anchor: Option<String>,
103 line_number: usize,
105 column: usize,
107}
108
109impl MDBOOK007 {
110 fn find_include_directives(&self, content: &str) -> Vec<IncludeDirective> {
112 let mut directives = Vec::new();
113
114 for (line_number, line) in content.lines().enumerate() {
115 if let Some(directive) = self.parse_include_directive(line, line_number + 1) {
118 directives.push(directive);
119 }
120 }
121
122 directives
123 }
124
125 fn parse_include_directive(&self, line: &str, line_number: usize) -> Option<IncludeDirective> {
127 let trimmed = line.trim();
129
130 if let Some(start) = trimmed.find("{{#")
132 && let Some(end) = trimmed[start..].find("}}")
133 {
134 let directive_content = &trimmed[start + 3..start + end];
135 let parts: Vec<&str> = directive_content.split_whitespace().collect();
136
137 if parts.len() >= 2 {
138 let directive_type = parts[0];
139
140 if directive_type == "include" || directive_type == "rustdoc_include" {
142 let file_spec = parts[1];
143 let (file_path, range_or_anchor) = self.parse_file_spec(file_spec);
144
145 return Some(IncludeDirective {
146 full_match: trimmed[start..start + end + 2].to_string(),
147 directive_type: directive_type.to_string(),
148 file_path: file_path.to_string(),
149 range_or_anchor,
150 line_number,
151 column: start + 1,
152 });
153 }
154 }
155 }
156
157 None
158 }
159
160 fn parse_file_spec<'a>(&self, file_spec: &'a str) -> (&'a str, Option<String>) {
162 if let Some(colon_pos) = file_spec.find(':') {
169 let file_path = &file_spec[..colon_pos];
170 let range_spec = &file_spec[colon_pos + 1..];
171 (file_path, Some(range_spec.to_string()))
172 } else {
173 (file_spec, None)
174 }
175 }
176
177 fn validate_include_directive(
179 &self,
180 document: &Document,
181 directive: &IncludeDirective,
182 ) -> crate::error::Result<Option<Violation>> {
183 let target_path = self.resolve_include_path(&document.path, &directive.file_path);
185
186 match self.get_file_content(&target_path)? {
188 Some(content) => {
189 if let Some(range_or_anchor) = &directive.range_or_anchor
191 && let Some(violation) = self.validate_range_or_anchor(
192 directive,
193 &target_path,
194 &content,
195 range_or_anchor,
196 )?
197 {
198 return Ok(Some(violation));
199 }
200
201 if let Some(violation) = self.check_circular_dependency(&target_path, directive)? {
203 return Ok(Some(violation));
204 }
205
206 Ok(None)
207 }
208 None => {
209 let message = format!(
211 "Include file '{}' not found. Resolved path: {}",
212 directive.file_path,
213 target_path.display()
214 );
215
216 Ok(Some(self.create_violation(
217 message,
218 directive.line_number,
219 directive.column,
220 Severity::Error,
221 )))
222 }
223 }
224 }
225
226 fn resolve_include_path(&self, current_doc_path: &Path, include_path: &str) -> PathBuf {
228 let current_dir = current_doc_path.parent().unwrap_or(Path::new("."));
229
230 if let Some(stripped) = include_path.strip_prefix('/') {
231 PathBuf::from(stripped)
233 } else {
234 current_dir.join(include_path)
236 }
237 }
238
239 fn get_file_content(&self, file_path: &Path) -> io::Result<Option<String>> {
241 let canonical_path = match file_path.canonicalize() {
242 Ok(path) => path,
243 Err(_) => file_path.to_path_buf(),
244 };
245
246 {
248 if let Ok(cache) = self.file_cache.read()
249 && let Some(cached_content) = cache.get(&canonical_path)
250 {
251 return Ok(cached_content.clone());
252 }
253 }
254
255 let content = fs::read_to_string(file_path).ok();
257
258 {
260 if let Ok(mut cache) = self.file_cache.write() {
261 cache.insert(canonical_path, content.clone());
262 }
263 }
264
265 Ok(content)
266 }
267
268 fn validate_range_or_anchor(
270 &self,
271 directive: &IncludeDirective,
272 target_path: &Path,
273 content: &str,
274 range_or_anchor: &str,
275 ) -> crate::error::Result<Option<Violation>> {
276 if self.is_line_range(range_or_anchor) {
278 return self.validate_line_range(directive, target_path, content, range_or_anchor);
279 }
280
281 if self.looks_like_malformed_line_range(range_or_anchor) {
283 return Ok(Some(self.create_violation(
284 format!("Invalid line number format '{range_or_anchor}'. Expected number or number:number format."),
285 directive.line_number,
286 directive.column,
287 Severity::Error,
288 )));
289 }
290
291 self.validate_anchor(directive, target_path, content, range_or_anchor)
293 }
294
295 fn is_line_range(&self, spec: &str) -> bool {
297 spec.chars().all(|c| c.is_ascii_digit() || c == ':') && !spec.is_empty()
299 }
300
301 fn looks_like_malformed_line_range(&self, spec: &str) -> bool {
303 if spec.is_empty() {
306 return false;
307 }
308
309 let has_digits = spec.chars().any(|c| c.is_ascii_digit());
310 let has_colon = spec.contains(':');
311
312 if has_digits {
315 let has_letters = spec.chars().any(|c| c.is_ascii_alphabetic());
316 if has_letters {
317 return true;
318 }
319 }
320
321 if has_colon && (spec.starts_with(':') || spec.ends_with(':')) {
323 return true;
324 }
325
326 if spec.len() <= 3
330 && spec.chars().all(|c| c.is_ascii_alphabetic())
331 && !spec.contains('_')
332 && !spec.contains('-')
333 {
334 return true;
335 }
336
337 false
338 }
339
340 fn validate_line_range(
342 &self,
343 directive: &IncludeDirective,
344 _target_path: &Path,
345 content: &str,
346 range_spec: &str,
347 ) -> crate::error::Result<Option<Violation>> {
348 let line_count = content.lines().count();
349
350 let (start_line, end_line) = if let Some(colon_pos) = range_spec.find(':') {
351 let start_str = &range_spec[..colon_pos];
353 let end_str = &range_spec[colon_pos + 1..];
354
355 let start = match start_str.parse::<usize>() {
356 Ok(n) if n > 0 => n,
357 _ => {
358 return Ok(Some(self.create_violation(
359 format!("Invalid start line number '{start_str}' in range specification"),
360 directive.line_number,
361 directive.column,
362 Severity::Error,
363 )));
364 }
365 };
366
367 let end = match end_str.parse::<usize>() {
368 Ok(n) if n > 0 => n,
369 _ => {
370 return Ok(Some(self.create_violation(
371 format!("Invalid end line number '{end_str}' in range specification"),
372 directive.line_number,
373 directive.column,
374 Severity::Error,
375 )));
376 }
377 };
378
379 if start > end {
380 return Ok(Some(self.create_violation(
381 format!("Start line {start} cannot be greater than end line {end}"),
382 directive.line_number,
383 directive.column,
384 Severity::Error,
385 )));
386 }
387
388 (start, end)
389 } else {
390 let line_num = match range_spec.parse::<usize>() {
392 Ok(n) if n > 0 => n,
393 _ => {
394 return Ok(Some(self.create_violation(
395 format!("Invalid line number '{range_spec}'"),
396 directive.line_number,
397 directive.column,
398 Severity::Error,
399 )));
400 }
401 };
402 (line_num, line_num)
403 };
404
405 if start_line > line_count || end_line > line_count {
407 let message = if start_line == end_line {
408 format!("Line {start_line} does not exist in file (file has {line_count} lines)")
409 } else {
410 format!(
411 "Line range {start_line}:{end_line} exceeds file length (file has {line_count} lines)"
412 )
413 };
414
415 return Ok(Some(self.create_violation(
416 message,
417 directive.line_number,
418 directive.column,
419 Severity::Error,
420 )));
421 }
422
423 Ok(None)
424 }
425
426 fn validate_anchor(
428 &self,
429 directive: &IncludeDirective,
430 _target_path: &Path,
431 content: &str,
432 anchor: &str,
433 ) -> crate::error::Result<Option<Violation>> {
434 let anchor_patterns = [
437 format!("// ANCHOR: {anchor}"),
438 format!("# ANCHOR: {anchor}"),
439 format!("<!-- ANCHOR: {anchor} -->"),
440 format!("<!-- anchor: {anchor} -->"),
441 ];
442
443 let mut found = false;
444 for line in content.lines() {
445 for pattern in &anchor_patterns {
446 if line.contains(pattern) {
447 found = true;
448 break;
449 }
450 }
451 if found {
452 break;
453 }
454 }
455
456 if !found {
457 return Ok(Some(self.create_violation(
458 format!(
459 "Anchor '{}' not found in included file. Expected patterns: {}",
460 anchor,
461 anchor_patterns.join(", ")
462 ),
463 directive.line_number,
464 directive.column,
465 Severity::Error,
466 )));
467 }
468
469 Ok(None)
470 }
471
472 fn check_circular_dependency(
474 &self,
475 target_path: &Path,
476 directive: &IncludeDirective,
477 ) -> crate::error::Result<Option<Violation>> {
478 {
479 if let Ok(stack) = self.processing_stack.read()
480 && stack.contains(&target_path.to_path_buf())
481 {
482 return Ok(Some(self.create_violation(
483 format!(
484 "Circular include dependency detected: {} -> {}",
485 stack.last().unwrap().display(),
486 target_path.display()
487 ),
488 directive.line_number,
489 directive.column,
490 Severity::Error,
491 )));
492 }
493 }
494
495 Ok(None)
496 }
497}
498
499#[cfg(test)]
500mod tests {
501 use super::*;
502 use crate::rule::Rule;
503 use std::fs;
504 use tempfile::TempDir;
505
506 fn create_test_document(content: &str, file_path: &Path) -> crate::error::Result<Document> {
507 if let Some(parent) = file_path.parent() {
508 fs::create_dir_all(parent)?;
509 }
510 fs::write(file_path, content)?;
511 Document::new(content.to_string(), file_path.to_path_buf())
512 }
513
514 #[test]
515 fn test_mdbook007_valid_basic_include() -> crate::error::Result<()> {
516 let temp_dir = TempDir::new()?;
517 let root = temp_dir.path();
518
519 create_test_document("Hello, included content!", &root.join("included.txt"))?;
521
522 let source_content = r#"# Chapter 1
524
525{{#include included.txt}}
526
527More content here."#;
528 let source_path = root.join("chapter.md");
529 let doc = create_test_document(source_content, &source_path)?;
530
531 let rule = MDBOOK007::default();
532 let violations = rule.check(&doc)?;
533
534 assert_eq!(
535 violations.len(),
536 0,
537 "Valid include should have no violations"
538 );
539 Ok(())
540 }
541
542 #[test]
543 fn test_mdbook007_missing_file() -> crate::error::Result<()> {
544 let temp_dir = TempDir::new()?;
545 let root = temp_dir.path();
546
547 let source_content = r#"# Chapter 1
549
550{{#include nonexistent.txt}}
551
552More content here."#;
553 let source_path = root.join("chapter.md");
554 let doc = create_test_document(source_content, &source_path)?;
555
556 let rule = MDBOOK007::default();
557 let violations = rule.check(&doc)?;
558
559 assert_eq!(violations.len(), 1);
560 assert_eq!(violations[0].rule_id, "MDBOOK007");
561 assert!(violations[0].message.contains("not found"));
562 assert!(violations[0].message.contains("nonexistent.txt"));
563 Ok(())
564 }
565
566 #[test]
567 fn test_mdbook007_valid_line_range() -> crate::error::Result<()> {
568 let temp_dir = TempDir::new()?;
569 let root = temp_dir.path();
570
571 let target_content = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n";
573 create_test_document(target_content, &root.join("lines.txt"))?;
574
575 let source_content = r#"# Chapter 1
577
578{{#include lines.txt:2:4}}
579
580More content here."#;
581 let source_path = root.join("chapter.md");
582 let doc = create_test_document(source_content, &source_path)?;
583
584 let rule = MDBOOK007::default();
585 let violations = rule.check(&doc)?;
586
587 assert_eq!(
588 violations.len(),
589 0,
590 "Valid line range should have no violations"
591 );
592 Ok(())
593 }
594
595 #[test]
596 fn test_mdbook007_invalid_line_range() -> crate::error::Result<()> {
597 let temp_dir = TempDir::new()?;
598 let root = temp_dir.path();
599
600 let target_content = "Line 1\nLine 2\nLine 3\n";
602 create_test_document(target_content, &root.join("lines.txt"))?;
603
604 let source_content = r#"# Chapter 1
606
607{{#include lines.txt:2:10}}
608
609More content here."#;
610 let source_path = root.join("chapter.md");
611 let doc = create_test_document(source_content, &source_path)?;
612
613 let rule = MDBOOK007::default();
614 let violations = rule.check(&doc)?;
615
616 assert_eq!(violations.len(), 1);
617 assert_eq!(violations[0].rule_id, "MDBOOK007");
618 assert!(violations[0].message.contains("exceeds file length"));
619 Ok(())
620 }
621
622 #[test]
623 fn test_mdbook007_single_line_include() -> crate::error::Result<()> {
624 let temp_dir = TempDir::new()?;
625 let root = temp_dir.path();
626
627 let target_content = "Line 1\nLine 2\nLine 3\n";
629 create_test_document(target_content, &root.join("lines.txt"))?;
630
631 let source_content = r#"# Chapter 1
633
634{{#include lines.txt:2}}
635
636More content here."#;
637 let source_path = root.join("chapter.md");
638 let doc = create_test_document(source_content, &source_path)?;
639
640 let rule = MDBOOK007::default();
641 let violations = rule.check(&doc)?;
642
643 assert_eq!(
644 violations.len(),
645 0,
646 "Valid single line include should have no violations"
647 );
648 Ok(())
649 }
650
651 #[test]
652 fn test_mdbook007_valid_anchor() -> crate::error::Result<()> {
653 let temp_dir = TempDir::new()?;
654 let root = temp_dir.path();
655
656 let target_content = r#"fn main() {
658 // ANCHOR: example
659 println!("Hello, world!");
660 // ANCHOR_END: example
661}"#;
662 create_test_document(target_content, &root.join("example.rs"))?;
663
664 let source_content = r#"# Chapter 1
666
667{{#include example.rs:example}}
668
669More content here."#;
670 let source_path = root.join("chapter.md");
671 let doc = create_test_document(source_content, &source_path)?;
672
673 let rule = MDBOOK007::default();
674 let violations = rule.check(&doc)?;
675
676 assert_eq!(
677 violations.len(),
678 0,
679 "Valid anchor include should have no violations"
680 );
681 Ok(())
682 }
683
684 #[test]
685 fn test_mdbook007_missing_anchor() -> crate::error::Result<()> {
686 let temp_dir = TempDir::new()?;
687 let root = temp_dir.path();
688
689 let target_content = r#"fn main() {
691 println!("Hello, world!");
692}"#;
693 create_test_document(target_content, &root.join("example.rs"))?;
694
695 let source_content = r#"# Chapter 1
697
698{{#include example.rs:missing_anchor}}
699
700More content here."#;
701 let source_path = root.join("chapter.md");
702 let doc = create_test_document(source_content, &source_path)?;
703
704 let rule = MDBOOK007::default();
705 let violations = rule.check(&doc)?;
706
707 assert_eq!(violations.len(), 1);
708 assert_eq!(violations[0].rule_id, "MDBOOK007");
709 assert!(
710 violations[0]
711 .message
712 .contains("Anchor 'missing_anchor' not found")
713 );
714 Ok(())
715 }
716
717 #[test]
718 fn test_mdbook007_rustdoc_include() -> crate::error::Result<()> {
719 let temp_dir = TempDir::new()?;
720 let root = temp_dir.path();
721
722 create_test_document("fn example() {}", &root.join("lib.rs"))?;
724
725 let source_content = r#"# Chapter 1
727
728{{#rustdoc_include lib.rs}}
729
730More content here."#;
731 let source_path = root.join("chapter.md");
732 let doc = create_test_document(source_content, &source_path)?;
733
734 let rule = MDBOOK007::default();
735 let violations = rule.check(&doc)?;
736
737 assert_eq!(
738 violations.len(),
739 0,
740 "Valid rustdoc_include should have no violations"
741 );
742 Ok(())
743 }
744
745 #[test]
746 fn test_mdbook007_invalid_line_number_format() -> crate::error::Result<()> {
747 let temp_dir = TempDir::new()?;
748 let root = temp_dir.path();
749
750 create_test_document("Line 1\nLine 2\n", &root.join("lines.txt"))?;
752
753 let source_content = r#"# Chapter 1
755
756{{#include lines.txt:abc}}
757
758More content here."#;
759 let source_path = root.join("chapter.md");
760 let doc = create_test_document(source_content, &source_path)?;
761
762 let rule = MDBOOK007::default();
763 let violations = rule.check(&doc)?;
764
765 assert_eq!(violations.len(), 1);
766 assert_eq!(violations[0].rule_id, "MDBOOK007");
767 assert!(violations[0].message.contains("Invalid line number format"));
768 Ok(())
769 }
770
771 #[test]
772 fn test_mdbook007_nested_includes() -> crate::error::Result<()> {
773 let temp_dir = TempDir::new()?;
774 let root = temp_dir.path();
775
776 fs::create_dir_all(root.join("nested"))?;
778 create_test_document("Nested content", &root.join("nested/file.txt"))?;
779
780 let source_content = r#"# Chapter 1
782
783{{#include nested/file.txt}}
784
785More content here."#;
786 let source_path = root.join("chapter.md");
787 let doc = create_test_document(source_content, &source_path)?;
788
789 let rule = MDBOOK007::default();
790 let violations = rule.check(&doc)?;
791
792 assert_eq!(
793 violations.len(),
794 0,
795 "Nested include should have no violations"
796 );
797 Ok(())
798 }
799
800 #[test]
801 fn test_parse_file_spec() {
802 let rule = MDBOOK007::default();
803
804 assert_eq!(rule.parse_file_spec("file.txt"), ("file.txt", None));
805 assert_eq!(
806 rule.parse_file_spec("file.rs:10:20"),
807 ("file.rs", Some("10:20".to_string()))
808 );
809 assert_eq!(
810 rule.parse_file_spec("file.rs:anchor"),
811 ("file.rs", Some("anchor".to_string()))
812 );
813 assert_eq!(
814 rule.parse_file_spec("path/to/file.txt:5"),
815 ("path/to/file.txt", Some("5".to_string()))
816 );
817 }
818
819 #[test]
820 fn test_is_line_range() {
821 let rule = MDBOOK007::default();
822
823 assert!(rule.is_line_range("10"));
824 assert!(rule.is_line_range("10:20"));
825 assert!(rule.is_line_range("1:1"));
826 assert!(!rule.is_line_range("anchor_name"));
827 assert!(!rule.is_line_range("10:anchor"));
828 assert!(!rule.is_line_range("abc:123"));
829 }
830
831 #[test]
832 fn test_looks_like_malformed_line_range() {
833 let rule = MDBOOK007::default();
834
835 assert!(rule.looks_like_malformed_line_range("10abc"));
837 assert!(rule.looks_like_malformed_line_range("abc10"));
838 assert!(rule.looks_like_malformed_line_range(":10"));
839 assert!(rule.looks_like_malformed_line_range("10:"));
840 assert!(rule.looks_like_malformed_line_range("10:abc"));
841 assert!(rule.looks_like_malformed_line_range("abc:123"));
842
843 assert!(!rule.looks_like_malformed_line_range("anchor_name"));
845 assert!(!rule.looks_like_malformed_line_range("valid-anchor"));
846 assert!(!rule.looks_like_malformed_line_range(""));
847
848 assert!(rule.looks_like_malformed_line_range("abc"));
850
851 assert!(!rule.looks_like_malformed_line_range("anchor_name"));
853 assert!(!rule.looks_like_malformed_line_range("valid-anchor"));
854 }
855}