1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::calculate_line_range;
3use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
4
5struct FencedCodeBlock {
9 line_idx: usize,
11 language: String,
13 fence_marker: String,
15}
16
17#[derive(Debug, Default, Clone)]
18pub struct MD040FencedCodeLanguage;
19
20impl Rule for MD040FencedCodeLanguage {
21 fn name(&self) -> &'static str {
22 "MD040"
23 }
24
25 fn description(&self) -> &'static str {
26 "Code blocks should have a language specified"
27 }
28
29 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
30 let content = ctx.content;
31 let mut warnings = Vec::new();
32
33 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
35
36 let disabled_ranges = compute_disabled_ranges(content, self.name());
38
39 for block in fenced_blocks {
40 if is_line_disabled(&disabled_ranges, block.line_idx) {
42 continue;
43 }
44
45 let line = content.lines().nth(block.line_idx).unwrap_or("");
47 let trimmed = line.trim();
48 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
49
50 let has_title_only =
52 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
53
54 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
56 && after_fence.starts_with('{')
57 && after_fence.contains('}');
58
59 if (block.language.is_empty() || has_title_only) && !has_quarto_syntax {
61 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
62
63 warnings.push(LintWarning {
64 rule_name: Some(self.name().to_string()),
65 line: start_line,
66 column: start_col,
67 end_line,
68 end_column: end_col,
69 message: "Code block (```) missing language".to_string(),
70 severity: Severity::Warning,
71 fix: Some(Fix {
72 range: {
73 let trimmed_start = line.len() - line.trim_start().len();
74 let fence_len = block.fence_marker.len();
75 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
76 let fence_start_byte = line_start_byte + trimmed_start;
77 let fence_end_byte = fence_start_byte + fence_len;
78 fence_start_byte..fence_end_byte
79 },
80 replacement: format!("{}text", block.fence_marker),
81 }),
82 });
83 }
84 }
85
86 Ok(warnings)
87 }
88
89 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
90 let content = ctx.content;
91
92 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
94
95 let disabled_ranges = compute_disabled_ranges(content, self.name());
97
98 let mut lines_to_fix: std::collections::HashMap<usize, (&str, bool)> = std::collections::HashMap::new();
100
101 for block in &fenced_blocks {
102 if is_line_disabled(&disabled_ranges, block.line_idx) {
103 continue;
104 }
105
106 let line = content.lines().nth(block.line_idx).unwrap_or("");
107 let trimmed = line.trim();
108 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
109
110 let has_title_only =
111 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
112
113 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
114 && after_fence.starts_with('{')
115 && after_fence.contains('}');
116
117 if (block.language.is_empty() || has_title_only) && !has_quarto_syntax {
118 lines_to_fix.insert(block.line_idx, (&block.fence_marker, has_title_only));
119 }
120 }
121
122 let mut result = String::new();
124 for (i, line) in content.lines().enumerate() {
125 if let Some(&(fence_marker, has_title_only)) = lines_to_fix.get(&i) {
126 let indent = &line[..line.len() - line.trim_start().len()];
127 let trimmed = line.trim();
128 let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
129
130 if has_title_only {
131 result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
132 } else {
133 result.push_str(&format!("{indent}{fence_marker}text\n"));
134 }
135 } else {
136 result.push_str(line);
137 result.push('\n');
138 }
139 }
140
141 if !content.ends_with('\n') {
143 result.pop();
144 }
145
146 Ok(result)
147 }
148
149 fn category(&self) -> RuleCategory {
151 RuleCategory::CodeBlock
152 }
153
154 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
156 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
157 }
158
159 fn as_any(&self) -> &dyn std::any::Any {
160 self
161 }
162
163 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
164 where
165 Self: Sized,
166 {
167 Box::new(MD040FencedCodeLanguage)
168 }
169}
170
171fn detect_fenced_code_blocks(content: &str, line_offsets: &[usize]) -> Vec<FencedCodeBlock> {
173 let mut blocks = Vec::new();
174 let options = Options::all();
175 let parser = Parser::new_ext(content, options).into_offset_iter();
176
177 for (event, range) in parser {
178 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) = event {
179 let line_idx = line_idx_from_offset(line_offsets, range.start);
181
182 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
184 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
185 let line = content.get(line_start..line_end).unwrap_or("");
186 let trimmed = line.trim();
187 let fence_marker = if trimmed.starts_with('`') {
188 let count = trimmed.chars().take_while(|&c| c == '`').count();
189 "`".repeat(count)
190 } else if trimmed.starts_with('~') {
191 let count = trimmed.chars().take_while(|&c| c == '~').count();
192 "~".repeat(count)
193 } else {
194 "```".to_string() };
196
197 let language = info.split_whitespace().next().unwrap_or("").to_string();
199
200 blocks.push(FencedCodeBlock {
201 line_idx,
202 language,
203 fence_marker,
204 });
205 }
206 }
207
208 blocks
209}
210
211#[inline]
212fn line_idx_from_offset(line_offsets: &[usize], offset: usize) -> usize {
213 match line_offsets.binary_search(&offset) {
214 Ok(idx) => idx,
215 Err(idx) => idx.saturating_sub(1),
216 }
217}
218
219fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
221 let mut ranges = Vec::new();
222 let mut disabled_start: Option<usize> = None;
223
224 for (i, line) in content.lines().enumerate() {
225 let trimmed = line.trim();
226
227 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
228 && (rules.is_empty() || rules.contains(&rule_name))
229 && disabled_start.is_none()
230 {
231 disabled_start = Some(i);
232 }
233
234 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
235 && (rules.is_empty() || rules.contains(&rule_name))
236 && let Some(start) = disabled_start.take()
237 {
238 ranges.push((start, i));
239 }
240 }
241
242 if let Some(start) = disabled_start {
244 ranges.push((start, usize::MAX));
245 }
246
247 ranges
248}
249
250fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
252 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258 use crate::lint_context::LintContext;
259
260 fn run_check(content: &str) -> LintResult {
261 let rule = MD040FencedCodeLanguage;
262 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
263 rule.check(&ctx)
264 }
265
266 fn run_fix(content: &str) -> Result<String, LintError> {
267 let rule = MD040FencedCodeLanguage;
268 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
269 rule.fix(&ctx)
270 }
271
272 #[test]
273 fn test_code_blocks_with_language_specified() {
274 let content = r#"# Test
276
277```python
278print("Hello, world!")
279```
280
281```javascript
282console.log("Hello!");
283```
284"#;
285 let result = run_check(content).unwrap();
286 assert!(result.is_empty(), "No warnings expected for code blocks with language");
287 }
288
289 #[test]
290 fn test_code_blocks_without_language() {
291 let content = r#"# Test
292
293```
294print("Hello, world!")
295```
296"#;
297 let result = run_check(content).unwrap();
298 assert_eq!(result.len(), 1);
299 assert_eq!(result[0].message, "Code block (```) missing language");
300 assert_eq!(result[0].line, 3);
301 }
302
303 #[test]
304 fn test_code_blocks_with_empty_language() {
305 let content = r#"# Test
307
308```
309print("Hello, world!")
310```
311"#;
312 let result = run_check(content).unwrap();
313 assert_eq!(result.len(), 1);
314 assert_eq!(result[0].message, "Code block (```) missing language");
315 }
316
317 #[test]
318 fn test_indented_code_blocks_should_be_ignored() {
319 let content = r#"# Test
321
322 This is an indented code block
323 It should not trigger MD040
324"#;
325 let result = run_check(content).unwrap();
326 assert!(result.is_empty(), "Indented code blocks should be ignored");
327 }
328
329 #[test]
330 fn test_inline_code_spans_should_be_ignored() {
331 let content = r#"# Test
332
333This is `inline code` and should not trigger warnings.
334
335Use the `print()` function.
336"#;
337 let result = run_check(content).unwrap();
338 assert!(result.is_empty(), "Inline code spans should be ignored");
339 }
340
341 #[test]
342 fn test_tildes_vs_backticks_for_fences() {
343 let content_tildes_no_lang = r#"# Test
345
346~~~
347code here
348~~~
349"#;
350 let result = run_check(content_tildes_no_lang).unwrap();
351 assert_eq!(result.len(), 1);
352 assert_eq!(result[0].message, "Code block (```) missing language");
353
354 let content_tildes_with_lang = r#"# Test
356
357~~~python
358code here
359~~~
360"#;
361 let result = run_check(content_tildes_with_lang).unwrap();
362 assert!(result.is_empty());
363
364 let content_mixed = r#"# Test
366
367```python
368code here
369```
370
371~~~javascript
372more code
373~~~
374
375```
376no language
377```
378
379~~~
380also no language
381~~~
382"#;
383 let result = run_check(content_mixed).unwrap();
384 assert_eq!(result.len(), 2);
385 }
386
387 #[test]
388 fn test_language_with_additional_parameters() {
389 let content = r#"# Test
390
391```python {highlight=[1,2]}
392print("Line 1")
393print("Line 2")
394```
395
396```javascript {.line-numbers startFrom="10"}
397console.log("Hello");
398```
399
400```ruby {data-line="1,3-4"}
401puts "Hello"
402puts "World"
403puts "!"
404```
405"#;
406 let result = run_check(content).unwrap();
407 assert!(
408 result.is_empty(),
409 "Code blocks with language and parameters should pass"
410 );
411 }
412
413 #[test]
414 fn test_multiple_code_blocks_in_document() {
415 let content = r#"# Test Document
416
417First block without language:
418```
419code here
420```
421
422Second block with language:
423```python
424print("hello")
425```
426
427Third block without language:
428```
429more code
430```
431
432Fourth block with language:
433```javascript
434console.log("test");
435```
436"#;
437 let result = run_check(content).unwrap();
438 assert_eq!(result.len(), 2);
439 assert_eq!(result[0].line, 4);
440 assert_eq!(result[1].line, 14);
441 }
442
443 #[test]
444 fn test_nested_code_blocks_in_lists() {
445 let content = r#"# Test
446
447- Item 1
448 ```python
449 print("nested with language")
450 ```
451
452- Item 2
453 ```
454 nested without language
455 ```
456
457- Item 3
458 - Nested item
459 ```javascript
460 console.log("deeply nested");
461 ```
462
463 - Another nested
464 ```
465 no language
466 ```
467"#;
468 let result = run_check(content).unwrap();
469 assert_eq!(result.len(), 2);
470 assert_eq!(result[0].line, 9);
472 assert_eq!(result[1].line, 20);
473 }
474
475 #[test]
476 fn test_issue_257_list_indented_code_block_with_language() {
477 let content = r#"- Sample code:
480- ```java
481 List<Map<String,String>> inputs = new List<Map<String,String>>();
482 ```
483"#;
484 let result = run_check(content).unwrap();
486 assert!(
487 result.is_empty(),
488 "List-indented code block with language should not trigger MD040. Got: {result:?}",
489 );
490
491 let fixed = run_fix(content).unwrap();
493 assert_eq!(
494 fixed, content,
495 "Fix should not modify code blocks that already have a language"
496 );
497 assert!(
499 !fixed.contains("```text"),
500 "Fix should not add 'text' to closing fence of code block with language"
501 );
502 }
503
504 #[test]
505 fn test_issue_257_multiple_list_indented_blocks() {
506 let content = r#"# Document
508
5091. Step one
510 ```python
511 print("hello")
512 ```
5132. Step two
514
515- Item with nested code:
516 ```bash
517 echo "test"
518 ```
519
520- Another item:
521 ```javascript
522 console.log("test");
523 ```
524"#;
525 let result = run_check(content).unwrap();
527 assert!(
528 result.is_empty(),
529 "All list-indented code blocks have languages. Got: {result:?}",
530 );
531
532 let fixed = run_fix(content).unwrap();
534 assert_eq!(
535 fixed, content,
536 "Fix should not modify content when all blocks have languages"
537 );
538 }
539
540 #[test]
541 fn test_code_blocks_in_blockquotes() {
542 let content = r#"# Test
543
544> This is a blockquote
545> ```python
546> print("with language")
547> ```
548
549> Another blockquote
550> ```
551> without language
552> ```
553"#;
554 let result = run_check(content).unwrap();
555 assert_eq!(result.len(), 1);
558 }
559
560 #[test]
561 fn test_fix_method_adds_text_language() {
562 let content = r#"# Test
563
564```
565code without language
566```
567
568```python
569already has language
570```
571
572```
573another block without
574```
575"#;
576 let fixed = run_fix(content).unwrap();
577 assert!(fixed.contains("```text"));
578 assert!(fixed.contains("```python"));
579 assert_eq!(fixed.matches("```text").count(), 2);
580 }
581
582 #[test]
583 fn test_fix_preserves_indentation() {
584 let content = r#"# Test
585
586- List item
587 ```
588 indented code block
589 ```
590"#;
591 let fixed = run_fix(content).unwrap();
592 assert!(fixed.contains(" ```text"));
594 assert!(fixed.contains(" indented code block"));
595 }
596
597 #[test]
598 fn test_fix_preserves_indentation_numbered_list() {
599 let content = r#"1. Step 1
601
602 ```
603 foo
604 bar
605 ```
606"#;
607 let fixed = run_fix(content).unwrap();
608 assert!(fixed.contains(" ```text"));
610 assert!(fixed.contains(" foo"));
611 assert!(fixed.contains(" bar"));
612 assert!(!fixed.contains("\n```text\n"));
614 }
615
616 #[test]
617 fn test_fix_preserves_all_indentation() {
618 let content = r#"# Test
619
620Top-level code block:
621```
622top level
623```
624
6251. List item
626
627 ```
628 nested in list
629 ```
630
631Indented by 2 spaces:
632 ```
633 content
634 ```
635"#;
636 let fixed = run_fix(content).unwrap();
637
638 assert!(
640 fixed.contains("```text\ntop level"),
641 "Top-level code block indentation preserved"
642 );
643 assert!(
644 fixed.contains(" ```text\n nested in list"),
645 "List item code block indentation preserved"
646 );
647 assert!(
648 fixed.contains(" ```text\n content"),
649 "2-space indented code block indentation preserved"
650 );
651 }
652
653 #[test]
654 fn test_fix_with_tilde_fences() {
655 let content = r#"# Test
656
657~~~
658code with tildes
659~~~
660"#;
661 let fixed = run_fix(content).unwrap();
662 assert!(fixed.contains("~~~text"));
663 }
664
665 #[test]
666 fn test_longer_fence_markers() {
667 let content = r#"# Test
668
669````
670code with four backticks
671````
672
673`````python
674code with five backticks and language
675`````
676
677~~~~~~
678code with six tildes
679~~~~~~
680"#;
681 let result = run_check(content).unwrap();
682 assert_eq!(result.len(), 2);
683
684 let fixed = run_fix(content).unwrap();
685 assert!(fixed.contains("````text"));
686 assert!(fixed.contains("~~~~~~text"));
687 assert!(fixed.contains("`````python"));
688 }
689
690 #[test]
691 fn test_nested_code_blocks_different_markers() {
692 let content = r#"# Test
693
694````markdown
695This is a markdown block
696
697```python
698# This is nested code
699print("hello")
700```
701
702More markdown
703````
704"#;
705 let result = run_check(content).unwrap();
706 assert!(
707 result.is_empty(),
708 "Nested code blocks with different markers should not trigger warnings"
709 );
710 }
711
712 #[test]
713 fn test_disable_enable_comments() {
714 let content = r#"# Test
715
716<!-- rumdl-disable MD040 -->
717```
718this should not trigger warning
719```
720<!-- rumdl-enable MD040 -->
721
722```
723this should trigger warning
724```
725"#;
726 let result = run_check(content).unwrap();
727 assert_eq!(result.len(), 1);
728 assert_eq!(result[0].line, 9);
729 }
730
731 #[test]
732 fn test_fence_with_language_only_on_closing() {
733 let content = r#"# Test
735
736```
737code
738```python
739"#;
740 let result = run_check(content).unwrap();
741 assert_eq!(result.len(), 1);
742 }
743
744 #[test]
745 fn test_incomplete_code_blocks() {
746 let content = r#"# Test
748
749```python
750this code block is not closed"#;
751 let result = run_check(content).unwrap();
752 assert!(
753 result.is_empty(),
754 "Unclosed code blocks with language should not trigger warnings"
755 );
756
757 let content_no_lang = r#"# Test
759
760```
761this code block is not closed"#;
762 let result = run_check(content_no_lang).unwrap();
763 assert_eq!(result.len(), 1);
764 }
765
766 #[test]
767 fn test_fix_preserves_original_formatting() {
768 let content = r#"# Test
769
770```
771code
772```
773
774No newline at end"#;
775 let fixed = run_fix(content).unwrap();
776 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
777
778 let content_with_newline = "# Test\n\n```\ncode\n```\n";
779 let fixed = run_fix(content_with_newline).unwrap();
780 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
781 }
782
783 #[test]
784 fn test_edge_case_backticks_in_content() {
785 let content = r#"# Test
786
787```javascript
788console.log(`template string with backticks`);
789// This line has ``` in a comment
790```
791"#;
792 let result = run_check(content).unwrap();
793 assert!(
794 result.is_empty(),
795 "Backticks inside code blocks should not affect parsing"
796 );
797 }
798
799 #[test]
800 fn test_empty_document() {
801 let content = "";
802 let result = run_check(content).unwrap();
803 assert!(result.is_empty());
804 }
805
806 #[test]
807 fn test_should_skip_optimization() {
808 let rule = MD040FencedCodeLanguage;
809
810 let ctx = LintContext::new(
812 "# Just a header\n\nSome text",
813 crate::config::MarkdownFlavor::Standard,
814 None,
815 );
816 assert!(rule.should_skip(&ctx));
817
818 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard, None);
820 assert!(!rule.should_skip(&ctx));
821
822 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard, None);
824 assert!(!rule.should_skip(&ctx));
825
826 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard, None);
828 assert!(rule.should_skip(&ctx));
829 }
830
831 #[test]
832 fn test_quarto_code_chunk_syntax() {
833 let rule = MD040FencedCodeLanguage;
834
835 let content = r#"# Test
837
838```{r}
839x <- 1
840```
841
842```{python}
843x = 1
844```
845
846```{r, echo=FALSE}
847plot(x)
848```
849"#;
850 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
851 let result = rule.check(&ctx).unwrap();
852 assert!(
853 result.is_empty(),
854 "Quarto code chunks with {{language}} syntax should not trigger warnings"
855 );
856
857 let content_no_lang = r#"# Test
859
860```
861code without language
862```
863"#;
864 let ctx = LintContext::new(content_no_lang, crate::config::MarkdownFlavor::Quarto, None);
865 let result = rule.check(&ctx).unwrap();
866 assert_eq!(result.len(), 1, "Quarto files without language should trigger warning");
867
868 let content_standard = r#"# Test
870
871```{python}
872code
873```
874"#;
875 let ctx = LintContext::new(content_standard, crate::config::MarkdownFlavor::Standard, None);
876 let result = rule.check(&ctx).unwrap();
877 assert!(
880 result.is_empty(),
881 "Standard flavor should accept any non-empty after_fence content"
882 );
883 }
884}