rumdl_lib/rules/
md040_fenced_code_language.rs1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::calculate_line_range;
3
4#[derive(Debug, Default, Clone)]
9pub struct MD040FencedCodeLanguage;
10
11impl Rule for MD040FencedCodeLanguage {
12 fn name(&self) -> &'static str {
13 "MD040"
14 }
15
16 fn description(&self) -> &'static str {
17 "Code blocks should have a language specified"
18 }
19
20 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
21 let content = ctx.content;
22 let _line_index = &ctx.line_index;
23
24 let mut warnings = Vec::new();
25
26 let mut in_code_block = false;
27 let mut current_fence_marker: Option<String> = None;
28 let mut opening_fence_indent: usize = 0;
29
30 let mut is_disabled = false;
32
33 for (i, line) in content.lines().enumerate() {
34 let trimmed = line.trim();
35
36 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
38 && (rules.is_empty() || rules.contains(&self.name()))
39 {
40 is_disabled = true;
41 }
42 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
43 && (rules.is_empty() || rules.contains(&self.name()))
44 {
45 is_disabled = false;
46 }
47
48 if is_disabled {
50 continue;
51 }
52
53 let fence_marker = if trimmed.starts_with("```") {
55 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
56 if backtick_count >= 3 {
57 Some("`".repeat(backtick_count))
58 } else {
59 None
60 }
61 } else if trimmed.starts_with("~~~") {
62 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
63 if tilde_count >= 3 {
64 Some("~".repeat(tilde_count))
65 } else {
66 None
67 }
68 } else {
69 None
70 };
71
72 if let Some(fence_marker) = fence_marker {
73 if in_code_block {
74 if let Some(ref current_marker) = current_fence_marker {
76 let current_indent = line.len() - line.trim_start().len();
77 if fence_marker == *current_marker
80 && trimmed[current_marker.len()..].trim().is_empty()
81 && current_indent <= opening_fence_indent
82 {
83 in_code_block = false;
85 current_fence_marker = None;
86 opening_fence_indent = 0;
87 }
88 }
90 } else {
91 let after_fence = trimmed[fence_marker.len()..].trim();
94
95 let has_title_only =
99 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
100
101 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
104 && after_fence.starts_with('{')
105 && after_fence.contains('}');
106
107 if (after_fence.is_empty() || has_title_only) && !has_quarto_syntax {
108 let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
110
111 warnings.push(LintWarning {
112 rule_name: Some(self.name().to_string()),
113 line: start_line,
114 column: start_col,
115 end_line,
116 end_column: end_col,
117 message: "Code block (```) missing language".to_string(),
118 severity: Severity::Warning,
119 fix: Some(Fix {
120 range: {
121 let trimmed_start = line.len() - line.trim_start().len();
123 let fence_len = fence_marker.len();
124 let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
125 let fence_start_byte = line_start_byte + trimmed_start;
126 let fence_end_byte = fence_start_byte + fence_len;
127 fence_start_byte..fence_end_byte
128 },
129 replacement: format!("{fence_marker}text"),
130 }),
131 });
132 }
133
134 in_code_block = true;
135 current_fence_marker = Some(fence_marker);
136 opening_fence_indent = line.len() - line.trim_start().len();
137 }
138 }
139 }
141
142 Ok(warnings)
143 }
144
145 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
146 let content = ctx.content;
147 let _line_index = &ctx.line_index;
148
149 let mut result = String::new();
150 let mut in_code_block = false;
151 let mut current_fence_marker: Option<String> = None;
152 let mut fence_needs_language = false;
153 let mut original_indent = String::new();
154 let mut opening_fence_indent: usize = 0;
155
156 let lines: Vec<&str> = content.lines().collect();
157
158 let mut is_disabled = false;
160
161 for line in lines.iter() {
162 let trimmed = line.trim();
163
164 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
166 && (rules.is_empty() || rules.contains(&self.name()))
167 {
168 is_disabled = true;
169 }
170 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
171 && (rules.is_empty() || rules.contains(&self.name()))
172 {
173 is_disabled = false;
174 }
175
176 if is_disabled {
178 result.push_str(line);
179 result.push('\n');
180 continue;
181 }
182
183 let fence_marker = if trimmed.starts_with("```") {
185 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
186 if backtick_count >= 3 {
187 Some("`".repeat(backtick_count))
188 } else {
189 None
190 }
191 } else if trimmed.starts_with("~~~") {
192 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
193 if tilde_count >= 3 {
194 Some("~".repeat(tilde_count))
195 } else {
196 None
197 }
198 } else {
199 None
200 };
201
202 if let Some(fence_marker) = fence_marker {
203 if in_code_block {
204 if let Some(ref current_marker) = current_fence_marker {
206 let current_indent = line.len() - line.trim_start().len();
207 if fence_marker == *current_marker
208 && trimmed[current_marker.len()..].trim().is_empty()
209 && current_indent <= opening_fence_indent
210 {
211 if fence_needs_language {
213 result.push_str(&format!("{original_indent}{trimmed}\n"));
215 } else {
216 result.push_str(line);
218 result.push('\n');
219 }
220 in_code_block = false;
221 current_fence_marker = None;
222 fence_needs_language = false;
223 original_indent.clear();
224 opening_fence_indent = 0;
225 } else {
226 result.push_str(line);
228 result.push('\n');
229 }
230 } else {
231 result.push_str(line);
233 result.push('\n');
234 }
235 } else {
236 let line_indent = line[..line.len() - line.trim_start().len()].to_string();
239
240 let after_fence = trimmed[fence_marker.len()..].trim();
242
243 let has_title_only =
245 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
246
247 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
249 && after_fence.starts_with('{')
250 && after_fence.contains('}');
251
252 if (after_fence.is_empty() || has_title_only) && !has_quarto_syntax {
253 original_indent = line_indent;
255 if has_title_only {
256 result.push_str(&format!("{original_indent}{fence_marker}text {after_fence}\n"));
258 } else {
259 result.push_str(&format!("{original_indent}{fence_marker}text\n"));
260 }
261 fence_needs_language = true;
262 } else {
263 result.push_str(line);
265 result.push('\n');
266 fence_needs_language = false;
267 }
268
269 in_code_block = true;
270 current_fence_marker = Some(fence_marker);
271 opening_fence_indent = line.len() - line.trim_start().len();
272 }
273 } else if in_code_block {
274 result.push_str(line);
276 result.push('\n');
277 } else {
278 result.push_str(line);
280 result.push('\n');
281 }
282 }
283
284 if !content.ends_with('\n') {
286 result.pop();
287 }
288
289 Ok(result)
290 }
291
292 fn category(&self) -> RuleCategory {
294 RuleCategory::CodeBlock
295 }
296
297 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
299 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
300 }
301
302 fn as_any(&self) -> &dyn std::any::Any {
303 self
304 }
305
306 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
307 where
308 Self: Sized,
309 {
310 Box::new(MD040FencedCodeLanguage)
311 }
312}
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317 use crate::lint_context::LintContext;
318
319 fn run_check(content: &str) -> LintResult {
320 let rule = MD040FencedCodeLanguage;
321 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
322 rule.check(&ctx)
323 }
324
325 fn run_fix(content: &str) -> Result<String, LintError> {
326 let rule = MD040FencedCodeLanguage;
327 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
328 rule.fix(&ctx)
329 }
330
331 #[test]
332 fn test_code_blocks_with_language_specified() {
333 let content = r#"# Test
335
336```python
337print("Hello, world!")
338```
339
340```javascript
341console.log("Hello!");
342```
343"#;
344 let result = run_check(content).unwrap();
345 assert!(result.is_empty(), "No warnings expected for code blocks with language");
346 }
347
348 #[test]
349 fn test_code_blocks_without_language() {
350 let content = r#"# Test
351
352```
353print("Hello, world!")
354```
355"#;
356 let result = run_check(content).unwrap();
357 assert_eq!(result.len(), 1);
358 assert_eq!(result[0].message, "Code block (```) missing language");
359 assert_eq!(result[0].line, 3);
360 }
361
362 #[test]
363 fn test_code_blocks_with_empty_language() {
364 let content = r#"# Test
366
367```
368print("Hello, world!")
369```
370"#;
371 let result = run_check(content).unwrap();
372 assert_eq!(result.len(), 1);
373 assert_eq!(result[0].message, "Code block (```) missing language");
374 }
375
376 #[test]
377 fn test_indented_code_blocks_should_be_ignored() {
378 let content = r#"# Test
380
381 This is an indented code block
382 It should not trigger MD040
383"#;
384 let result = run_check(content).unwrap();
385 assert!(result.is_empty(), "Indented code blocks should be ignored");
386 }
387
388 #[test]
389 fn test_inline_code_spans_should_be_ignored() {
390 let content = r#"# Test
391
392This is `inline code` and should not trigger warnings.
393
394Use the `print()` function.
395"#;
396 let result = run_check(content).unwrap();
397 assert!(result.is_empty(), "Inline code spans should be ignored");
398 }
399
400 #[test]
401 fn test_tildes_vs_backticks_for_fences() {
402 let content_tildes_no_lang = r#"# Test
404
405~~~
406code here
407~~~
408"#;
409 let result = run_check(content_tildes_no_lang).unwrap();
410 assert_eq!(result.len(), 1);
411 assert_eq!(result[0].message, "Code block (```) missing language");
412
413 let content_tildes_with_lang = r#"# Test
415
416~~~python
417code here
418~~~
419"#;
420 let result = run_check(content_tildes_with_lang).unwrap();
421 assert!(result.is_empty());
422
423 let content_mixed = r#"# Test
425
426```python
427code here
428```
429
430~~~javascript
431more code
432~~~
433
434```
435no language
436```
437
438~~~
439also no language
440~~~
441"#;
442 let result = run_check(content_mixed).unwrap();
443 assert_eq!(result.len(), 2);
444 }
445
446 #[test]
447 fn test_language_with_additional_parameters() {
448 let content = r#"# Test
449
450```python {highlight=[1,2]}
451print("Line 1")
452print("Line 2")
453```
454
455```javascript {.line-numbers startFrom="10"}
456console.log("Hello");
457```
458
459```ruby {data-line="1,3-4"}
460puts "Hello"
461puts "World"
462puts "!"
463```
464"#;
465 let result = run_check(content).unwrap();
466 assert!(
467 result.is_empty(),
468 "Code blocks with language and parameters should pass"
469 );
470 }
471
472 #[test]
473 fn test_multiple_code_blocks_in_document() {
474 let content = r#"# Test Document
475
476First block without language:
477```
478code here
479```
480
481Second block with language:
482```python
483print("hello")
484```
485
486Third block without language:
487```
488more code
489```
490
491Fourth block with language:
492```javascript
493console.log("test");
494```
495"#;
496 let result = run_check(content).unwrap();
497 assert_eq!(result.len(), 2);
498 assert_eq!(result[0].line, 4);
499 assert_eq!(result[1].line, 14);
500 }
501
502 #[test]
503 fn test_nested_code_blocks_in_lists() {
504 let content = r#"# Test
505
506- Item 1
507 ```python
508 print("nested with language")
509 ```
510
511- Item 2
512 ```
513 nested without language
514 ```
515
516- Item 3
517 - Nested item
518 ```javascript
519 console.log("deeply nested");
520 ```
521
522 - Another nested
523 ```
524 no language
525 ```
526"#;
527 let result = run_check(content).unwrap();
528 assert_eq!(result.len(), 2);
529 assert_eq!(result[0].line, 9);
531 assert_eq!(result[1].line, 20);
532 }
533
534 #[test]
535 fn test_code_blocks_in_blockquotes() {
536 let content = r#"# Test
537
538> This is a blockquote
539> ```python
540> print("with language")
541> ```
542
543> Another blockquote
544> ```
545> without language
546> ```
547"#;
548 let result = run_check(content).unwrap();
549 assert_eq!(result.len(), 0);
552 }
553
554 #[test]
555 fn test_fix_method_adds_text_language() {
556 let content = r#"# Test
557
558```
559code without language
560```
561
562```python
563already has language
564```
565
566```
567another block without
568```
569"#;
570 let fixed = run_fix(content).unwrap();
571 assert!(fixed.contains("```text"));
572 assert!(fixed.contains("```python"));
573 assert_eq!(fixed.matches("```text").count(), 2);
574 }
575
576 #[test]
577 fn test_fix_preserves_indentation() {
578 let content = r#"# Test
579
580- List item
581 ```
582 indented code block
583 ```
584"#;
585 let fixed = run_fix(content).unwrap();
586 assert!(fixed.contains(" ```text"));
588 assert!(fixed.contains(" indented code block"));
589 }
590
591 #[test]
592 fn test_fix_preserves_indentation_numbered_list() {
593 let content = r#"1. Step 1
595
596 ```
597 foo
598 bar
599 ```
600"#;
601 let fixed = run_fix(content).unwrap();
602 assert!(fixed.contains(" ```text"));
604 assert!(fixed.contains(" foo"));
605 assert!(fixed.contains(" bar"));
606 assert!(!fixed.contains("\n```text\n"));
608 }
609
610 #[test]
611 fn test_fix_preserves_all_indentation() {
612 let content = r#"# Test
613
614Top-level code block:
615```
616top level
617```
618
6191. List item
620
621 ```
622 nested in list
623 ```
624
625Indented by 2 spaces:
626 ```
627 content
628 ```
629"#;
630 let fixed = run_fix(content).unwrap();
631
632 assert!(
634 fixed.contains("```text\ntop level"),
635 "Top-level code block indentation preserved"
636 );
637 assert!(
638 fixed.contains(" ```text\n nested in list"),
639 "List item code block indentation preserved"
640 );
641 assert!(
642 fixed.contains(" ```text\n content"),
643 "2-space indented code block indentation preserved"
644 );
645 }
646
647 #[test]
648 fn test_fix_with_tilde_fences() {
649 let content = r#"# Test
650
651~~~
652code with tildes
653~~~
654"#;
655 let fixed = run_fix(content).unwrap();
656 assert!(fixed.contains("~~~text"));
657 }
658
659 #[test]
660 fn test_longer_fence_markers() {
661 let content = r#"# Test
662
663````
664code with four backticks
665````
666
667`````python
668code with five backticks and language
669`````
670
671~~~~~~
672code with six tildes
673~~~~~~
674"#;
675 let result = run_check(content).unwrap();
676 assert_eq!(result.len(), 2);
677
678 let fixed = run_fix(content).unwrap();
679 assert!(fixed.contains("````text"));
680 assert!(fixed.contains("~~~~~~text"));
681 assert!(fixed.contains("`````python"));
682 }
683
684 #[test]
685 fn test_nested_code_blocks_different_markers() {
686 let content = r#"# Test
687
688````markdown
689This is a markdown block
690
691```python
692# This is nested code
693print("hello")
694```
695
696More markdown
697````
698"#;
699 let result = run_check(content).unwrap();
700 assert!(
701 result.is_empty(),
702 "Nested code blocks with different markers should not trigger warnings"
703 );
704 }
705
706 #[test]
707 fn test_disable_enable_comments() {
708 let content = r#"# Test
709
710<!-- rumdl-disable MD040 -->
711```
712this should not trigger warning
713```
714<!-- rumdl-enable MD040 -->
715
716```
717this should trigger warning
718```
719"#;
720 let result = run_check(content).unwrap();
721 assert_eq!(result.len(), 1);
722 assert_eq!(result[0].line, 9);
723 }
724
725 #[test]
726 fn test_fence_with_language_only_on_closing() {
727 let content = r#"# Test
729
730```
731code
732```python
733"#;
734 let result = run_check(content).unwrap();
735 assert_eq!(result.len(), 1);
736 }
737
738 #[test]
739 fn test_incomplete_code_blocks() {
740 let content = r#"# Test
742
743```python
744this code block is not closed"#;
745 let result = run_check(content).unwrap();
746 assert!(
747 result.is_empty(),
748 "Unclosed code blocks with language should not trigger warnings"
749 );
750
751 let content_no_lang = r#"# Test
753
754```
755this code block is not closed"#;
756 let result = run_check(content_no_lang).unwrap();
757 assert_eq!(result.len(), 1);
758 }
759
760 #[test]
761 fn test_fix_preserves_original_formatting() {
762 let content = r#"# Test
763
764```
765code
766```
767
768No newline at end"#;
769 let fixed = run_fix(content).unwrap();
770 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
771
772 let content_with_newline = "# Test\n\n```\ncode\n```\n";
773 let fixed = run_fix(content_with_newline).unwrap();
774 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
775 }
776
777 #[test]
778 fn test_edge_case_backticks_in_content() {
779 let content = r#"# Test
780
781```javascript
782console.log(`template string with backticks`);
783// This line has ``` in a comment
784```
785"#;
786 let result = run_check(content).unwrap();
787 assert!(
788 result.is_empty(),
789 "Backticks inside code blocks should not affect parsing"
790 );
791 }
792
793 #[test]
794 fn test_empty_document() {
795 let content = "";
796 let result = run_check(content).unwrap();
797 assert!(result.is_empty());
798 }
799
800 #[test]
801 fn test_should_skip_optimization() {
802 let rule = MD040FencedCodeLanguage;
803
804 let ctx = LintContext::new("# Just a header\n\nSome text", crate::config::MarkdownFlavor::Standard);
806 assert!(rule.should_skip(&ctx));
807
808 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard);
810 assert!(!rule.should_skip(&ctx));
811
812 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard);
814 assert!(!rule.should_skip(&ctx));
815
816 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
818 assert!(rule.should_skip(&ctx));
819 }
820
821 #[test]
822 fn test_quarto_code_chunk_syntax() {
823 let rule = MD040FencedCodeLanguage;
824
825 let content = r#"# Test
827
828```{r}
829x <- 1
830```
831
832```{python}
833x = 1
834```
835
836```{r, echo=FALSE}
837plot(x)
838```
839"#;
840 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto);
841 let result = rule.check(&ctx).unwrap();
842 assert!(
843 result.is_empty(),
844 "Quarto code chunks with {{language}} syntax should not trigger warnings"
845 );
846
847 let content_no_lang = r#"# Test
849
850```
851code without language
852```
853"#;
854 let ctx = LintContext::new(content_no_lang, crate::config::MarkdownFlavor::Quarto);
855 let result = rule.check(&ctx).unwrap();
856 assert_eq!(result.len(), 1, "Quarto files without language should trigger warning");
857
858 let content_standard = r#"# Test
860
861```{python}
862code
863```
864"#;
865 let ctx = LintContext::new(content_standard, crate::config::MarkdownFlavor::Standard);
866 let result = rule.check(&ctx).unwrap();
867 assert!(
870 result.is_empty(),
871 "Standard flavor should accept any non-empty after_fence content"
872 );
873 }
874}