1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::calculate_line_range;
3use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
4
5struct FencedCodeBlock {
9 line_idx: usize,
11 language: String,
13 fence_marker: String,
15}
16
17#[derive(Debug, Default, Clone)]
18pub struct MD040FencedCodeLanguage;
19
20impl Rule for MD040FencedCodeLanguage {
21 fn name(&self) -> &'static str {
22 "MD040"
23 }
24
25 fn description(&self) -> &'static str {
26 "Code blocks should have a language specified"
27 }
28
29 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
30 let content = ctx.content;
31 let mut warnings = Vec::new();
32
33 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
35
36 let disabled_ranges = compute_disabled_ranges(content, self.name());
38
39 for block in fenced_blocks {
40 if is_line_disabled(&disabled_ranges, block.line_idx) {
42 continue;
43 }
44
45 let line = content.lines().nth(block.line_idx).unwrap_or("");
47 let trimmed = line.trim();
48 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
49
50 let has_title_only =
52 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
53
54 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
56 && after_fence.starts_with('{')
57 && after_fence.contains('}');
58
59 if (block.language.is_empty() || has_title_only) && !has_quarto_syntax {
61 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
62
63 warnings.push(LintWarning {
64 rule_name: Some(self.name().to_string()),
65 line: start_line,
66 column: start_col,
67 end_line,
68 end_column: end_col,
69 message: "Code block (```) missing language".to_string(),
70 severity: Severity::Warning,
71 fix: Some(Fix {
72 range: {
73 let trimmed_start = line.len() - line.trim_start().len();
74 let fence_len = block.fence_marker.len();
75 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
76 let fence_start_byte = line_start_byte + trimmed_start;
77 let fence_end_byte = fence_start_byte + fence_len;
78 fence_start_byte..fence_end_byte
79 },
80 replacement: format!("{}text", block.fence_marker),
81 }),
82 });
83 }
84 }
85
86 Ok(warnings)
87 }
88
89 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
90 let content = ctx.content;
91
92 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
94
95 let disabled_ranges = compute_disabled_ranges(content, self.name());
97
98 let mut lines_to_fix: std::collections::HashMap<usize, (&str, bool)> = std::collections::HashMap::new();
100
101 for block in &fenced_blocks {
102 if is_line_disabled(&disabled_ranges, block.line_idx) {
103 continue;
104 }
105
106 let line = content.lines().nth(block.line_idx).unwrap_or("");
107 let trimmed = line.trim();
108 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
109
110 let has_title_only =
111 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
112
113 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
114 && after_fence.starts_with('{')
115 && after_fence.contains('}');
116
117 if (block.language.is_empty() || has_title_only) && !has_quarto_syntax {
118 lines_to_fix.insert(block.line_idx, (&block.fence_marker, has_title_only));
119 }
120 }
121
122 let mut result = String::new();
124 for (i, line) in content.lines().enumerate() {
125 if let Some(&(fence_marker, has_title_only)) = lines_to_fix.get(&i) {
126 let indent = &line[..line.len() - line.trim_start().len()];
127 let trimmed = line.trim();
128 let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
129
130 if has_title_only {
131 result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
132 } else {
133 result.push_str(&format!("{indent}{fence_marker}text\n"));
134 }
135 } else {
136 result.push_str(line);
137 result.push('\n');
138 }
139 }
140
141 if !content.ends_with('\n') {
143 result.pop();
144 }
145
146 Ok(result)
147 }
148
149 fn category(&self) -> RuleCategory {
151 RuleCategory::CodeBlock
152 }
153
154 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
156 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
157 }
158
159 fn as_any(&self) -> &dyn std::any::Any {
160 self
161 }
162
163 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
164 where
165 Self: Sized,
166 {
167 Box::new(MD040FencedCodeLanguage)
168 }
169}
170
171fn detect_fenced_code_blocks(content: &str, line_offsets: &[usize]) -> Vec<FencedCodeBlock> {
173 let mut blocks = Vec::new();
174 let options = Options::all();
175 let parser = Parser::new_ext(content, options).into_offset_iter();
176
177 for (event, range) in parser {
178 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) = event {
179 let line_idx = line_offsets
181 .iter()
182 .enumerate()
183 .rev()
184 .find(|&(_, offset)| *offset <= range.start)
185 .map(|(idx, _)| idx)
186 .unwrap_or(0);
187
188 let line = content.lines().nth(line_idx).unwrap_or("");
190 let trimmed = line.trim();
191 let fence_marker = if trimmed.starts_with('`') {
192 let count = trimmed.chars().take_while(|&c| c == '`').count();
193 "`".repeat(count)
194 } else if trimmed.starts_with('~') {
195 let count = trimmed.chars().take_while(|&c| c == '~').count();
196 "~".repeat(count)
197 } else {
198 "```".to_string() };
200
201 let language = info.split_whitespace().next().unwrap_or("").to_string();
203
204 blocks.push(FencedCodeBlock {
205 line_idx,
206 language,
207 fence_marker,
208 });
209 }
210 }
211
212 blocks
213}
214
215fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
217 let mut ranges = Vec::new();
218 let mut disabled_start: Option<usize> = None;
219
220 for (i, line) in content.lines().enumerate() {
221 let trimmed = line.trim();
222
223 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
224 && (rules.is_empty() || rules.contains(&rule_name))
225 && disabled_start.is_none()
226 {
227 disabled_start = Some(i);
228 }
229
230 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
231 && (rules.is_empty() || rules.contains(&rule_name))
232 && let Some(start) = disabled_start.take()
233 {
234 ranges.push((start, i));
235 }
236 }
237
238 if let Some(start) = disabled_start {
240 ranges.push((start, usize::MAX));
241 }
242
243 ranges
244}
245
246fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
248 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
249}
250
251#[cfg(test)]
252mod tests {
253 use super::*;
254 use crate::lint_context::LintContext;
255
256 fn run_check(content: &str) -> LintResult {
257 let rule = MD040FencedCodeLanguage;
258 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
259 rule.check(&ctx)
260 }
261
262 fn run_fix(content: &str) -> Result<String, LintError> {
263 let rule = MD040FencedCodeLanguage;
264 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
265 rule.fix(&ctx)
266 }
267
268 #[test]
269 fn test_code_blocks_with_language_specified() {
270 let content = r#"# Test
272
273```python
274print("Hello, world!")
275```
276
277```javascript
278console.log("Hello!");
279```
280"#;
281 let result = run_check(content).unwrap();
282 assert!(result.is_empty(), "No warnings expected for code blocks with language");
283 }
284
285 #[test]
286 fn test_code_blocks_without_language() {
287 let content = r#"# Test
288
289```
290print("Hello, world!")
291```
292"#;
293 let result = run_check(content).unwrap();
294 assert_eq!(result.len(), 1);
295 assert_eq!(result[0].message, "Code block (```) missing language");
296 assert_eq!(result[0].line, 3);
297 }
298
299 #[test]
300 fn test_code_blocks_with_empty_language() {
301 let content = r#"# Test
303
304```
305print("Hello, world!")
306```
307"#;
308 let result = run_check(content).unwrap();
309 assert_eq!(result.len(), 1);
310 assert_eq!(result[0].message, "Code block (```) missing language");
311 }
312
313 #[test]
314 fn test_indented_code_blocks_should_be_ignored() {
315 let content = r#"# Test
317
318 This is an indented code block
319 It should not trigger MD040
320"#;
321 let result = run_check(content).unwrap();
322 assert!(result.is_empty(), "Indented code blocks should be ignored");
323 }
324
325 #[test]
326 fn test_inline_code_spans_should_be_ignored() {
327 let content = r#"# Test
328
329This is `inline code` and should not trigger warnings.
330
331Use the `print()` function.
332"#;
333 let result = run_check(content).unwrap();
334 assert!(result.is_empty(), "Inline code spans should be ignored");
335 }
336
337 #[test]
338 fn test_tildes_vs_backticks_for_fences() {
339 let content_tildes_no_lang = r#"# Test
341
342~~~
343code here
344~~~
345"#;
346 let result = run_check(content_tildes_no_lang).unwrap();
347 assert_eq!(result.len(), 1);
348 assert_eq!(result[0].message, "Code block (```) missing language");
349
350 let content_tildes_with_lang = r#"# Test
352
353~~~python
354code here
355~~~
356"#;
357 let result = run_check(content_tildes_with_lang).unwrap();
358 assert!(result.is_empty());
359
360 let content_mixed = r#"# Test
362
363```python
364code here
365```
366
367~~~javascript
368more code
369~~~
370
371```
372no language
373```
374
375~~~
376also no language
377~~~
378"#;
379 let result = run_check(content_mixed).unwrap();
380 assert_eq!(result.len(), 2);
381 }
382
383 #[test]
384 fn test_language_with_additional_parameters() {
385 let content = r#"# Test
386
387```python {highlight=[1,2]}
388print("Line 1")
389print("Line 2")
390```
391
392```javascript {.line-numbers startFrom="10"}
393console.log("Hello");
394```
395
396```ruby {data-line="1,3-4"}
397puts "Hello"
398puts "World"
399puts "!"
400```
401"#;
402 let result = run_check(content).unwrap();
403 assert!(
404 result.is_empty(),
405 "Code blocks with language and parameters should pass"
406 );
407 }
408
409 #[test]
410 fn test_multiple_code_blocks_in_document() {
411 let content = r#"# Test Document
412
413First block without language:
414```
415code here
416```
417
418Second block with language:
419```python
420print("hello")
421```
422
423Third block without language:
424```
425more code
426```
427
428Fourth block with language:
429```javascript
430console.log("test");
431```
432"#;
433 let result = run_check(content).unwrap();
434 assert_eq!(result.len(), 2);
435 assert_eq!(result[0].line, 4);
436 assert_eq!(result[1].line, 14);
437 }
438
439 #[test]
440 fn test_nested_code_blocks_in_lists() {
441 let content = r#"# Test
442
443- Item 1
444 ```python
445 print("nested with language")
446 ```
447
448- Item 2
449 ```
450 nested without language
451 ```
452
453- Item 3
454 - Nested item
455 ```javascript
456 console.log("deeply nested");
457 ```
458
459 - Another nested
460 ```
461 no language
462 ```
463"#;
464 let result = run_check(content).unwrap();
465 assert_eq!(result.len(), 2);
466 assert_eq!(result[0].line, 9);
468 assert_eq!(result[1].line, 20);
469 }
470
471 #[test]
472 fn test_issue_257_list_indented_code_block_with_language() {
473 let content = r#"- Sample code:
476- ```java
477 List<Map<String,String>> inputs = new List<Map<String,String>>();
478 ```
479"#;
480 let result = run_check(content).unwrap();
482 assert!(
483 result.is_empty(),
484 "List-indented code block with language should not trigger MD040. Got: {result:?}",
485 );
486
487 let fixed = run_fix(content).unwrap();
489 assert_eq!(
490 fixed, content,
491 "Fix should not modify code blocks that already have a language"
492 );
493 assert!(
495 !fixed.contains("```text"),
496 "Fix should not add 'text' to closing fence of code block with language"
497 );
498 }
499
500 #[test]
501 fn test_issue_257_multiple_list_indented_blocks() {
502 let content = r#"# Document
504
5051. Step one
506 ```python
507 print("hello")
508 ```
5092. Step two
510
511- Item with nested code:
512 ```bash
513 echo "test"
514 ```
515
516- Another item:
517 ```javascript
518 console.log("test");
519 ```
520"#;
521 let result = run_check(content).unwrap();
523 assert!(
524 result.is_empty(),
525 "All list-indented code blocks have languages. Got: {result:?}",
526 );
527
528 let fixed = run_fix(content).unwrap();
530 assert_eq!(
531 fixed, content,
532 "Fix should not modify content when all blocks have languages"
533 );
534 }
535
536 #[test]
537 fn test_code_blocks_in_blockquotes() {
538 let content = r#"# Test
539
540> This is a blockquote
541> ```python
542> print("with language")
543> ```
544
545> Another blockquote
546> ```
547> without language
548> ```
549"#;
550 let result = run_check(content).unwrap();
551 assert_eq!(result.len(), 1);
554 }
555
556 #[test]
557 fn test_fix_method_adds_text_language() {
558 let content = r#"# Test
559
560```
561code without language
562```
563
564```python
565already has language
566```
567
568```
569another block without
570```
571"#;
572 let fixed = run_fix(content).unwrap();
573 assert!(fixed.contains("```text"));
574 assert!(fixed.contains("```python"));
575 assert_eq!(fixed.matches("```text").count(), 2);
576 }
577
578 #[test]
579 fn test_fix_preserves_indentation() {
580 let content = r#"# Test
581
582- List item
583 ```
584 indented code block
585 ```
586"#;
587 let fixed = run_fix(content).unwrap();
588 assert!(fixed.contains(" ```text"));
590 assert!(fixed.contains(" indented code block"));
591 }
592
593 #[test]
594 fn test_fix_preserves_indentation_numbered_list() {
595 let content = r#"1. Step 1
597
598 ```
599 foo
600 bar
601 ```
602"#;
603 let fixed = run_fix(content).unwrap();
604 assert!(fixed.contains(" ```text"));
606 assert!(fixed.contains(" foo"));
607 assert!(fixed.contains(" bar"));
608 assert!(!fixed.contains("\n```text\n"));
610 }
611
612 #[test]
613 fn test_fix_preserves_all_indentation() {
614 let content = r#"# Test
615
616Top-level code block:
617```
618top level
619```
620
6211. List item
622
623 ```
624 nested in list
625 ```
626
627Indented by 2 spaces:
628 ```
629 content
630 ```
631"#;
632 let fixed = run_fix(content).unwrap();
633
634 assert!(
636 fixed.contains("```text\ntop level"),
637 "Top-level code block indentation preserved"
638 );
639 assert!(
640 fixed.contains(" ```text\n nested in list"),
641 "List item code block indentation preserved"
642 );
643 assert!(
644 fixed.contains(" ```text\n content"),
645 "2-space indented code block indentation preserved"
646 );
647 }
648
649 #[test]
650 fn test_fix_with_tilde_fences() {
651 let content = r#"# Test
652
653~~~
654code with tildes
655~~~
656"#;
657 let fixed = run_fix(content).unwrap();
658 assert!(fixed.contains("~~~text"));
659 }
660
661 #[test]
662 fn test_longer_fence_markers() {
663 let content = r#"# Test
664
665````
666code with four backticks
667````
668
669`````python
670code with five backticks and language
671`````
672
673~~~~~~
674code with six tildes
675~~~~~~
676"#;
677 let result = run_check(content).unwrap();
678 assert_eq!(result.len(), 2);
679
680 let fixed = run_fix(content).unwrap();
681 assert!(fixed.contains("````text"));
682 assert!(fixed.contains("~~~~~~text"));
683 assert!(fixed.contains("`````python"));
684 }
685
686 #[test]
687 fn test_nested_code_blocks_different_markers() {
688 let content = r#"# Test
689
690````markdown
691This is a markdown block
692
693```python
694# This is nested code
695print("hello")
696```
697
698More markdown
699````
700"#;
701 let result = run_check(content).unwrap();
702 assert!(
703 result.is_empty(),
704 "Nested code blocks with different markers should not trigger warnings"
705 );
706 }
707
708 #[test]
709 fn test_disable_enable_comments() {
710 let content = r#"# Test
711
712<!-- rumdl-disable MD040 -->
713```
714this should not trigger warning
715```
716<!-- rumdl-enable MD040 -->
717
718```
719this should trigger warning
720```
721"#;
722 let result = run_check(content).unwrap();
723 assert_eq!(result.len(), 1);
724 assert_eq!(result[0].line, 9);
725 }
726
727 #[test]
728 fn test_fence_with_language_only_on_closing() {
729 let content = r#"# Test
731
732```
733code
734```python
735"#;
736 let result = run_check(content).unwrap();
737 assert_eq!(result.len(), 1);
738 }
739
740 #[test]
741 fn test_incomplete_code_blocks() {
742 let content = r#"# Test
744
745```python
746this code block is not closed"#;
747 let result = run_check(content).unwrap();
748 assert!(
749 result.is_empty(),
750 "Unclosed code blocks with language should not trigger warnings"
751 );
752
753 let content_no_lang = r#"# Test
755
756```
757this code block is not closed"#;
758 let result = run_check(content_no_lang).unwrap();
759 assert_eq!(result.len(), 1);
760 }
761
762 #[test]
763 fn test_fix_preserves_original_formatting() {
764 let content = r#"# Test
765
766```
767code
768```
769
770No newline at end"#;
771 let fixed = run_fix(content).unwrap();
772 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
773
774 let content_with_newline = "# Test\n\n```\ncode\n```\n";
775 let fixed = run_fix(content_with_newline).unwrap();
776 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
777 }
778
779 #[test]
780 fn test_edge_case_backticks_in_content() {
781 let content = r#"# Test
782
783```javascript
784console.log(`template string with backticks`);
785// This line has ``` in a comment
786```
787"#;
788 let result = run_check(content).unwrap();
789 assert!(
790 result.is_empty(),
791 "Backticks inside code blocks should not affect parsing"
792 );
793 }
794
795 #[test]
796 fn test_empty_document() {
797 let content = "";
798 let result = run_check(content).unwrap();
799 assert!(result.is_empty());
800 }
801
802 #[test]
803 fn test_should_skip_optimization() {
804 let rule = MD040FencedCodeLanguage;
805
806 let ctx = LintContext::new(
808 "# Just a header\n\nSome text",
809 crate::config::MarkdownFlavor::Standard,
810 None,
811 );
812 assert!(rule.should_skip(&ctx));
813
814 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard, None);
816 assert!(!rule.should_skip(&ctx));
817
818 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard, None);
820 assert!(!rule.should_skip(&ctx));
821
822 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard, None);
824 assert!(rule.should_skip(&ctx));
825 }
826
827 #[test]
828 fn test_quarto_code_chunk_syntax() {
829 let rule = MD040FencedCodeLanguage;
830
831 let content = r#"# Test
833
834```{r}
835x <- 1
836```
837
838```{python}
839x = 1
840```
841
842```{r, echo=FALSE}
843plot(x)
844```
845"#;
846 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
847 let result = rule.check(&ctx).unwrap();
848 assert!(
849 result.is_empty(),
850 "Quarto code chunks with {{language}} syntax should not trigger warnings"
851 );
852
853 let content_no_lang = r#"# Test
855
856```
857code without language
858```
859"#;
860 let ctx = LintContext::new(content_no_lang, crate::config::MarkdownFlavor::Quarto, None);
861 let result = rule.check(&ctx).unwrap();
862 assert_eq!(result.len(), 1, "Quarto files without language should trigger warning");
863
864 let content_standard = r#"# Test
866
867```{python}
868code
869```
870"#;
871 let ctx = LintContext::new(content_standard, crate::config::MarkdownFlavor::Standard, None);
872 let result = rule.check(&ctx).unwrap();
873 assert!(
876 result.is_empty(),
877 "Standard flavor should accept any non-empty after_fence content"
878 );
879 }
880}