1use crate::rule::{AstRule, RuleCategory, RuleMetadata};
7use crate::{
8 Document,
9 violation::{Severity, Violation},
10};
11use comrak::nodes::{AstNode, NodeValue};
12use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, RwLock};
15use std::{fs, io};
16
17#[derive(Default)]
37pub struct MDBOOK006 {
38 anchor_cache: Arc<RwLock<HashMap<PathBuf, Vec<String>>>>,
40}
41
42impl AstRule for MDBOOK006 {
43 fn id(&self) -> &'static str {
44 "MDBOOK006"
45 }
46
47 fn name(&self) -> &'static str {
48 "internal-cross-references"
49 }
50
51 fn description(&self) -> &'static str {
52 "Internal cross-reference links must point to valid headings in target files"
53 }
54
55 fn metadata(&self) -> RuleMetadata {
56 RuleMetadata::stable(RuleCategory::MdBook).introduced_in("mdbook-lint v0.2.0")
57 }
58
59 fn check_ast<'a>(
60 &self,
61 document: &Document,
62 ast: &'a AstNode<'a>,
63 ) -> crate::error::Result<Vec<Violation>> {
64 let mut violations = Vec::new();
65
66 for node in ast.descendants() {
68 if let NodeValue::Link(link) = &node.data.borrow().value {
69 let url = &link.url;
70
71 if is_external_link(url) {
73 continue;
74 }
75
76 if !url.contains('#') {
78 continue;
79 }
80
81 if url.starts_with('#') {
83 continue;
84 }
85
86 if let Some(violation) = self.validate_cross_reference(document, node, url)? {
88 violations.push(violation);
89 }
90 }
91 }
92
93 Ok(violations)
94 }
95}
96
97impl MDBOOK006 {
98 fn validate_cross_reference<'a>(
100 &self,
101 document: &Document,
102 node: &'a AstNode<'a>,
103 url: &str,
104 ) -> crate::error::Result<Option<Violation>> {
105 let parts: Vec<&str> = url.splitn(2, '#').collect();
107 if parts.len() != 2 {
108 return Ok(None); }
110
111 let file_path = parts[0];
112 let anchor = parts[1];
113
114 if file_path.is_empty() || anchor.is_empty() {
116 return Ok(None);
117 }
118
119 let target_path = self.resolve_target_path(&document.path, file_path);
121
122 if !target_path.exists() {
124 return Ok(None);
126 }
127
128 let anchors = match self.get_file_anchors(&target_path)? {
130 Some(anchors) => anchors,
131 None => return Ok(None), };
133
134 if !anchors.contains(&anchor.to_string()) {
136 let (line, column) = document.node_position(node).unwrap_or((1, 1));
137
138 let suggestion = self.suggest_similar_anchor(anchor, &anchors);
140 let message = if let Some(suggestion) = suggestion {
141 format!(
142 "Cross-reference anchor '{anchor}' not found in '{file_path}'. Did you mean '{suggestion}'?"
143 )
144 } else {
145 format!(
146 "Cross-reference anchor '{}' not found in '{}'. Available anchors: {}",
147 anchor,
148 file_path,
149 if anchors.is_empty() {
150 "none".to_string()
151 } else {
152 anchors
153 .iter()
154 .take(5)
155 .map(|s| format!("'{s}'"))
156 .collect::<Vec<_>>()
157 .join(", ")
158 }
159 )
160 };
161
162 return Ok(Some(self.create_violation(
163 message,
164 line,
165 column,
166 Severity::Error,
167 )));
168 }
169
170 Ok(None)
171 }
172
173 fn resolve_target_path(&self, current_doc_path: &Path, link_path: &str) -> PathBuf {
175 let current_dir = current_doc_path.parent().unwrap_or(Path::new("."));
176
177 if let Some(stripped) = link_path.strip_prefix("./") {
178 current_dir.join(stripped)
180 } else if link_path.starts_with("../") {
181 current_dir.join(link_path)
183 } else if let Some(stripped) = link_path.strip_prefix('/') {
184 PathBuf::from(stripped)
186 } else {
187 current_dir.join(link_path)
189 }
190 }
191
192 fn get_file_anchors(&self, file_path: &Path) -> io::Result<Option<Vec<String>>> {
194 let canonical_path = match file_path.canonicalize() {
195 Ok(path) => path,
196 Err(_) => file_path.to_path_buf(),
197 };
198
199 {
201 if let Ok(cache) = self.anchor_cache.read()
202 && let Some(anchors) = cache.get(&canonical_path)
203 {
204 return Ok(Some(anchors.clone()));
205 }
206 }
207
208 let content = match fs::read_to_string(file_path) {
210 Ok(content) => content,
211 Err(_) => return Ok(None), };
213
214 let anchors = self.extract_heading_anchors(&content);
215
216 {
218 if let Ok(mut cache) = self.anchor_cache.write() {
219 cache.insert(canonical_path, anchors.clone());
220 }
221 }
222
223 Ok(Some(anchors))
224 }
225
226 fn extract_heading_anchors(&self, content: &str) -> Vec<String> {
228 let mut anchors = Vec::new();
229
230 for line in content.lines() {
231 let line = line.trim();
232
233 if let Some(heading_text) = self.extract_atx_heading(line) {
235 let anchor = self.generate_anchor_id(&heading_text);
236 if !anchor.is_empty() {
237 anchors.push(anchor);
238 }
239 }
240 }
241
242 anchors
246 }
247
248 fn extract_atx_heading(&self, line: &str) -> Option<String> {
250 if !line.starts_with('#') {
251 return None;
252 }
253
254 let hash_count = line.chars().take_while(|&c| c == '#').count();
256 if hash_count == 0 || hash_count > 6 {
257 return None; }
259
260 let rest = &line[hash_count..];
262 let text = if let Some(stripped) = rest.strip_prefix(' ') {
263 stripped
264 } else {
265 rest
266 };
267
268 let text = text.trim_end_matches(['#', ' ']);
270
271 if text.is_empty() {
272 return None;
273 }
274
275 Some(text.to_string())
276 }
277
278 fn generate_anchor_id(&self, heading_text: &str) -> String {
280 heading_text
281 .to_lowercase()
282 .chars()
284 .map(|c| if c.is_alphanumeric() { c } else { '-' })
285 .collect::<String>()
286 .split('-')
288 .filter(|part| !part.is_empty())
289 .collect::<Vec<_>>()
290 .join("-")
291 }
292
293 fn suggest_similar_anchor(&self, target: &str, available: &[String]) -> Option<String> {
295 if available.is_empty() {
296 return None;
297 }
298
299 for anchor in available {
301 if anchor.contains(target) || target.contains(anchor) {
302 return Some(anchor.clone());
303 }
304 }
305
306 Some(available[0].clone())
308 }
309}
310
311fn is_external_link(url: &str) -> bool {
313 url.starts_with("http://")
314 || url.starts_with("https://")
315 || url.starts_with("mailto:")
316 || url.starts_with("ftp://")
317 || url.starts_with("tel:")
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323 use crate::rule::Rule;
324 use std::fs;
325 use tempfile::TempDir;
326
327 fn create_test_document(content: &str, file_path: &Path) -> crate::error::Result<Document> {
328 if let Some(parent) = file_path.parent() {
329 fs::create_dir_all(parent)?;
330 }
331 fs::write(file_path, content)?;
332 Document::new(content.to_string(), file_path.to_path_buf())
333 }
334
335 #[test]
336 fn test_mdbook006_valid_cross_references() -> crate::error::Result<()> {
337 let temp_dir = TempDir::new()?;
338 let root = temp_dir.path();
339
340 let target_content = r#"# Chapter 2
342
343## Overview
344
345Some content here.
346
347### Implementation Details
348
349More details.
350"#;
351 create_test_document(target_content, &root.join("chapter2.md"))?;
352
353 let source_content = r#"# Chapter 1
355
356See [Chapter 2](chapter2.md#chapter-2) for more info.
357
358Check out the [overview](chapter2.md#overview) section.
359
360The [implementation](chapter2.md#implementation-details) is complex.
361"#;
362 let source_path = root.join("chapter1.md");
363 let doc = create_test_document(source_content, &source_path)?;
364
365 let rule = MDBOOK006::default();
366 let violations = rule.check(&doc)?;
367
368 assert_eq!(
369 violations.len(),
370 0,
371 "Valid cross-references should have no violations"
372 );
373 Ok(())
374 }
375
376 #[test]
377 fn test_mdbook006_invalid_anchor() -> crate::error::Result<()> {
378 let temp_dir = TempDir::new()?;
379 let root = temp_dir.path();
380
381 let target_content = r#"# Chapter 2
383
384## Overview
385
386Some content.
387"#;
388 create_test_document(target_content, &root.join("chapter2.md"))?;
389
390 let source_content = r#"# Chapter 1
392
393See [nonexistent section](chapter2.md#nonexistent).
394"#;
395 let source_path = root.join("chapter1.md");
396 let doc = create_test_document(source_content, &source_path)?;
397
398 let rule = MDBOOK006::default();
399 let violations = rule.check(&doc)?;
400
401 assert_eq!(violations.len(), 1);
402 assert_eq!(violations[0].rule_id, "MDBOOK006");
403 assert!(
404 violations[0]
405 .message
406 .contains("anchor 'nonexistent' not found")
407 );
408 assert!(violations[0].message.contains("chapter2.md"));
409 Ok(())
410 }
411
412 #[test]
413 fn test_mdbook006_missing_target_file() -> crate::error::Result<()> {
414 let temp_dir = TempDir::new()?;
415 let root = temp_dir.path();
416
417 let source_content = r#"# Chapter 1
419
420See [missing](nonexistent.md#section).
421"#;
422 let source_path = root.join("chapter1.md");
423 let doc = create_test_document(source_content, &source_path)?;
424
425 let rule = MDBOOK006::default();
426 let violations = rule.check(&doc)?;
427
428 assert_eq!(violations.len(), 0);
430 Ok(())
431 }
432
433 #[test]
434 fn test_mdbook006_same_document_anchors() -> crate::error::Result<()> {
435 let temp_dir = TempDir::new()?;
436 let root = temp_dir.path();
437
438 let content = r#"# Chapter 1
440
441## Section A
442
443See [Section B](#section-b) below.
444
445## Section B
446
447Content here.
448"#;
449 let file_path = root.join("chapter1.md");
450 let doc = create_test_document(content, &file_path)?;
451
452 let rule = MDBOOK006::default();
453 let violations = rule.check(&doc)?;
454
455 assert_eq!(violations.len(), 0);
457 Ok(())
458 }
459
460 #[test]
461 fn test_mdbook006_external_links() -> crate::error::Result<()> {
462 let temp_dir = TempDir::new()?;
463 let root = temp_dir.path();
464
465 let content = r#"# Chapter 1
467
468See [external](https://example.com#section).
469"#;
470 let file_path = root.join("chapter1.md");
471 let doc = create_test_document(content, &file_path)?;
472
473 let rule = MDBOOK006::default();
474 let violations = rule.check(&doc)?;
475
476 assert_eq!(violations.len(), 0);
478 Ok(())
479 }
480
481 #[test]
482 fn test_mdbook006_no_anchor_links() -> crate::error::Result<()> {
483 let temp_dir = TempDir::new()?;
484 let root = temp_dir.path();
485
486 create_test_document("# Target", &root.join("target.md"))?;
488
489 let content = r#"# Chapter 1
491
492See [target](target.md) for more.
493"#;
494 let file_path = root.join("chapter1.md");
495 let doc = create_test_document(content, &file_path)?;
496
497 let rule = MDBOOK006::default();
498 let violations = rule.check(&doc)?;
499
500 assert_eq!(violations.len(), 0);
502 Ok(())
503 }
504
505 #[test]
506 fn test_extract_atx_heading() {
507 let rule = MDBOOK006::default();
508
509 assert_eq!(
510 rule.extract_atx_heading("# Heading"),
511 Some("Heading".to_string())
512 );
513 assert_eq!(
514 rule.extract_atx_heading("## Sub Heading"),
515 Some("Sub Heading".to_string())
516 );
517 assert_eq!(
518 rule.extract_atx_heading("### Deep Heading ###"),
519 Some("Deep Heading".to_string())
520 );
521 assert_eq!(
522 rule.extract_atx_heading("#No Space"),
523 Some("No Space".to_string())
524 );
525
526 assert_eq!(rule.extract_atx_heading("Not a heading"), None);
528 assert_eq!(rule.extract_atx_heading(""), None);
529 assert_eq!(rule.extract_atx_heading("#"), None);
530 assert_eq!(rule.extract_atx_heading("# "), None);
531 }
532
533 #[test]
534 fn test_generate_anchor_id() {
535 let rule = MDBOOK006::default();
536
537 assert_eq!(rule.generate_anchor_id("Simple Heading"), "simple-heading");
538 assert_eq!(
539 rule.generate_anchor_id("Complex: Heading with! Punctuation?"),
540 "complex-heading-with-punctuation"
541 );
542 assert_eq!(
543 rule.generate_anchor_id("Multiple Spaces"),
544 "multiple-spaces"
545 );
546 assert_eq!(rule.generate_anchor_id("UPPER case"), "upper-case");
547 assert_eq!(rule.generate_anchor_id("123 Numbers"), "123-numbers");
548 assert_eq!(rule.generate_anchor_id(""), "");
549 }
550
551 #[test]
552 fn test_mdbook006_nested_directories() -> crate::error::Result<()> {
553 let temp_dir = TempDir::new()?;
554 let root = temp_dir.path();
555
556 let target_content = r#"# Deep Chapter
558
559## Nested Section
560
561Content here.
562"#;
563 create_test_document(target_content, &root.join("guide/deep.md"))?;
564
565 let source_content = r#"# Main Chapter
567
568See [nested section](guide/deep.md#nested-section).
569"#;
570 let source_path = root.join("chapter.md");
571 let doc = create_test_document(source_content, &source_path)?;
572
573 let rule = MDBOOK006::default();
574 let violations = rule.check(&doc)?;
575
576 assert_eq!(
577 violations.len(),
578 0,
579 "Nested directory cross-references should work"
580 );
581 Ok(())
582 }
583
584 #[test]
585 fn test_mdbook006_helpful_suggestions() -> crate::error::Result<()> {
586 let temp_dir = TempDir::new()?;
587 let root = temp_dir.path();
588
589 let target_content = r#"# Target
591
592## Implementation Details
593
594Content here.
595"#;
596 create_test_document(target_content, &root.join("target.md"))?;
597
598 let source_content = r#"# Source
600
601See [details](target.md#implementation).
602"#;
603 let source_path = root.join("source.md");
604 let doc = create_test_document(source_content, &source_path)?;
605
606 let rule = MDBOOK006::default();
607 let violations = rule.check(&doc)?;
608
609 assert_eq!(violations.len(), 1);
610 assert!(violations[0].message.contains("Did you mean"));
611 assert!(violations[0].message.contains("implementation-details"));
612 Ok(())
613 }
614}