1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::calculate_line_range;
3
4struct DisableState {
9 is_disabled: bool,
10}
11
12impl DisableState {
13 fn new() -> Self {
14 Self { is_disabled: false }
15 }
16
17 fn update(&mut self, line: &str, rule_name: &str) {
19 if let Some(rules) = crate::rule::parse_disable_comment(line)
21 && (rules.is_empty() || rules.contains(&rule_name))
22 {
23 self.is_disabled = true;
24 }
25 if let Some(rules) = crate::rule::parse_enable_comment(line)
27 && (rules.is_empty() || rules.contains(&rule_name))
28 {
29 self.is_disabled = false;
30 }
31 }
32
33 fn is_disabled(&self) -> bool {
34 self.is_disabled
35 }
36}
37
38#[derive(Debug, Default, Clone)]
39pub struct MD040FencedCodeLanguage;
40
41impl Rule for MD040FencedCodeLanguage {
42 fn name(&self) -> &'static str {
43 "MD040"
44 }
45
46 fn description(&self) -> &'static str {
47 "Code blocks should have a language specified"
48 }
49
50 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
51 let content = ctx.content;
52 let _line_index = &ctx.line_index;
53
54 let mut warnings = Vec::new();
55
56 let mut in_code_block = false;
57 let mut current_fence_marker: Option<String> = None;
58 let mut opening_fence_indent: usize = 0;
59
60 let mut is_disabled = false;
62
63 for (i, line) in content.lines().enumerate() {
64 let trimmed = line.trim();
65
66 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
68 && (rules.is_empty() || rules.contains(&self.name()))
69 {
70 is_disabled = true;
71 }
72 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
73 && (rules.is_empty() || rules.contains(&self.name()))
74 {
75 is_disabled = false;
76 }
77
78 if is_disabled {
80 continue;
81 }
82
83 let fence_marker = if trimmed.starts_with("```") {
85 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
86 if backtick_count >= 3 {
87 Some("`".repeat(backtick_count))
88 } else {
89 None
90 }
91 } else if trimmed.starts_with("~~~") {
92 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
93 if tilde_count >= 3 {
94 Some("~".repeat(tilde_count))
95 } else {
96 None
97 }
98 } else {
99 None
100 };
101
102 if let Some(fence_marker) = fence_marker {
103 if in_code_block {
104 if let Some(ref current_marker) = current_fence_marker {
106 let current_indent = line.len() - line.trim_start().len();
107 if fence_marker == *current_marker
110 && trimmed[current_marker.len()..].trim().is_empty()
111 && current_indent <= opening_fence_indent
112 {
113 in_code_block = false;
115 current_fence_marker = None;
116 opening_fence_indent = 0;
117 }
118 }
120 } else {
121 let after_fence = trimmed[fence_marker.len()..].trim();
124
125 let has_title_only =
129 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
130
131 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
134 && after_fence.starts_with('{')
135 && after_fence.contains('}');
136
137 if (after_fence.is_empty() || has_title_only) && !has_quarto_syntax {
138 let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
140
141 warnings.push(LintWarning {
142 rule_name: Some(self.name().to_string()),
143 line: start_line,
144 column: start_col,
145 end_line,
146 end_column: end_col,
147 message: "Code block (```) missing language".to_string(),
148 severity: Severity::Warning,
149 fix: Some(Fix {
150 range: {
151 let trimmed_start = line.len() - line.trim_start().len();
153 let fence_len = fence_marker.len();
154 let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
155 let fence_start_byte = line_start_byte + trimmed_start;
156 let fence_end_byte = fence_start_byte + fence_len;
157 fence_start_byte..fence_end_byte
158 },
159 replacement: format!("{fence_marker}text"),
160 }),
161 });
162 }
163
164 in_code_block = true;
165 current_fence_marker = Some(fence_marker);
166 opening_fence_indent = line.len() - line.trim_start().len();
167 }
168 }
169 }
171
172 Ok(warnings)
173 }
174
175 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
176 let content = ctx.content;
177 let _line_index = &ctx.line_index;
178
179 let mut result = String::new();
180 let mut in_code_block = false;
181 let mut current_fence_marker: Option<String> = None;
182 let mut fence_needs_language = false;
183 let mut original_indent = String::new();
184 let mut opening_fence_indent: usize = 0;
185
186 let lines: Vec<&str> = content.lines().collect();
187
188 let mut disable_state = DisableState::new();
190
191 for line in lines.iter() {
192 let trimmed = line.trim();
193
194 disable_state.update(trimmed, self.name());
196
197 if disable_state.is_disabled() {
199 result.push_str(line);
200 result.push('\n');
201 continue;
202 }
203
204 let fence_marker = if trimmed.starts_with("```") {
206 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
207 if backtick_count >= 3 {
208 Some("`".repeat(backtick_count))
209 } else {
210 None
211 }
212 } else if trimmed.starts_with("~~~") {
213 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
214 if tilde_count >= 3 {
215 Some("~".repeat(tilde_count))
216 } else {
217 None
218 }
219 } else {
220 None
221 };
222
223 if let Some(fence_marker) = fence_marker {
224 if in_code_block {
225 if let Some(ref current_marker) = current_fence_marker {
227 let current_indent = line.len() - line.trim_start().len();
228 if fence_marker == *current_marker
229 && trimmed[current_marker.len()..].trim().is_empty()
230 && current_indent <= opening_fence_indent
231 {
232 if fence_needs_language {
234 result.push_str(&format!("{original_indent}{trimmed}\n"));
236 } else {
237 result.push_str(line);
239 result.push('\n');
240 }
241 in_code_block = false;
242 current_fence_marker = None;
243 fence_needs_language = false;
244 original_indent.clear();
245 opening_fence_indent = 0;
246 } else {
247 result.push_str(line);
249 result.push('\n');
250 }
251 } else {
252 result.push_str(line);
254 result.push('\n');
255 }
256 } else {
257 let line_indent = line[..line.len() - line.trim_start().len()].to_string();
260
261 let after_fence = trimmed[fence_marker.len()..].trim();
263
264 let has_title_only =
266 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
267
268 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
270 && after_fence.starts_with('{')
271 && after_fence.contains('}');
272
273 if (after_fence.is_empty() || has_title_only) && !has_quarto_syntax {
274 original_indent = line_indent;
276 if has_title_only {
277 result.push_str(&format!("{original_indent}{fence_marker}text {after_fence}\n"));
279 } else {
280 result.push_str(&format!("{original_indent}{fence_marker}text\n"));
281 }
282 fence_needs_language = true;
283 } else {
284 result.push_str(line);
286 result.push('\n');
287 fence_needs_language = false;
288 }
289
290 in_code_block = true;
291 current_fence_marker = Some(fence_marker);
292 opening_fence_indent = line.len() - line.trim_start().len();
293 }
294 } else if in_code_block {
295 result.push_str(line);
297 result.push('\n');
298 } else {
299 result.push_str(line);
301 result.push('\n');
302 }
303 }
304
305 if !content.ends_with('\n') {
307 result.pop();
308 }
309
310 Ok(result)
311 }
312
313 fn category(&self) -> RuleCategory {
315 RuleCategory::CodeBlock
316 }
317
318 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
320 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
321 }
322
323 fn as_any(&self) -> &dyn std::any::Any {
324 self
325 }
326
327 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
328 where
329 Self: Sized,
330 {
331 Box::new(MD040FencedCodeLanguage)
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338 use crate::lint_context::LintContext;
339
340 fn run_check(content: &str) -> LintResult {
341 let rule = MD040FencedCodeLanguage;
342 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
343 rule.check(&ctx)
344 }
345
346 fn run_fix(content: &str) -> Result<String, LintError> {
347 let rule = MD040FencedCodeLanguage;
348 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
349 rule.fix(&ctx)
350 }
351
352 #[test]
353 fn test_code_blocks_with_language_specified() {
354 let content = r#"# Test
356
357```python
358print("Hello, world!")
359```
360
361```javascript
362console.log("Hello!");
363```
364"#;
365 let result = run_check(content).unwrap();
366 assert!(result.is_empty(), "No warnings expected for code blocks with language");
367 }
368
369 #[test]
370 fn test_code_blocks_without_language() {
371 let content = r#"# Test
372
373```
374print("Hello, world!")
375```
376"#;
377 let result = run_check(content).unwrap();
378 assert_eq!(result.len(), 1);
379 assert_eq!(result[0].message, "Code block (```) missing language");
380 assert_eq!(result[0].line, 3);
381 }
382
383 #[test]
384 fn test_code_blocks_with_empty_language() {
385 let content = r#"# Test
387
388```
389print("Hello, world!")
390```
391"#;
392 let result = run_check(content).unwrap();
393 assert_eq!(result.len(), 1);
394 assert_eq!(result[0].message, "Code block (```) missing language");
395 }
396
397 #[test]
398 fn test_indented_code_blocks_should_be_ignored() {
399 let content = r#"# Test
401
402 This is an indented code block
403 It should not trigger MD040
404"#;
405 let result = run_check(content).unwrap();
406 assert!(result.is_empty(), "Indented code blocks should be ignored");
407 }
408
409 #[test]
410 fn test_inline_code_spans_should_be_ignored() {
411 let content = r#"# Test
412
413This is `inline code` and should not trigger warnings.
414
415Use the `print()` function.
416"#;
417 let result = run_check(content).unwrap();
418 assert!(result.is_empty(), "Inline code spans should be ignored");
419 }
420
421 #[test]
422 fn test_tildes_vs_backticks_for_fences() {
423 let content_tildes_no_lang = r#"# Test
425
426~~~
427code here
428~~~
429"#;
430 let result = run_check(content_tildes_no_lang).unwrap();
431 assert_eq!(result.len(), 1);
432 assert_eq!(result[0].message, "Code block (```) missing language");
433
434 let content_tildes_with_lang = r#"# Test
436
437~~~python
438code here
439~~~
440"#;
441 let result = run_check(content_tildes_with_lang).unwrap();
442 assert!(result.is_empty());
443
444 let content_mixed = r#"# Test
446
447```python
448code here
449```
450
451~~~javascript
452more code
453~~~
454
455```
456no language
457```
458
459~~~
460also no language
461~~~
462"#;
463 let result = run_check(content_mixed).unwrap();
464 assert_eq!(result.len(), 2);
465 }
466
467 #[test]
468 fn test_language_with_additional_parameters() {
469 let content = r#"# Test
470
471```python {highlight=[1,2]}
472print("Line 1")
473print("Line 2")
474```
475
476```javascript {.line-numbers startFrom="10"}
477console.log("Hello");
478```
479
480```ruby {data-line="1,3-4"}
481puts "Hello"
482puts "World"
483puts "!"
484```
485"#;
486 let result = run_check(content).unwrap();
487 assert!(
488 result.is_empty(),
489 "Code blocks with language and parameters should pass"
490 );
491 }
492
493 #[test]
494 fn test_multiple_code_blocks_in_document() {
495 let content = r#"# Test Document
496
497First block without language:
498```
499code here
500```
501
502Second block with language:
503```python
504print("hello")
505```
506
507Third block without language:
508```
509more code
510```
511
512Fourth block with language:
513```javascript
514console.log("test");
515```
516"#;
517 let result = run_check(content).unwrap();
518 assert_eq!(result.len(), 2);
519 assert_eq!(result[0].line, 4);
520 assert_eq!(result[1].line, 14);
521 }
522
523 #[test]
524 fn test_nested_code_blocks_in_lists() {
525 let content = r#"# Test
526
527- Item 1
528 ```python
529 print("nested with language")
530 ```
531
532- Item 2
533 ```
534 nested without language
535 ```
536
537- Item 3
538 - Nested item
539 ```javascript
540 console.log("deeply nested");
541 ```
542
543 - Another nested
544 ```
545 no language
546 ```
547"#;
548 let result = run_check(content).unwrap();
549 assert_eq!(result.len(), 2);
550 assert_eq!(result[0].line, 9);
552 assert_eq!(result[1].line, 20);
553 }
554
555 #[test]
556 fn test_code_blocks_in_blockquotes() {
557 let content = r#"# Test
558
559> This is a blockquote
560> ```python
561> print("with language")
562> ```
563
564> Another blockquote
565> ```
566> without language
567> ```
568"#;
569 let result = run_check(content).unwrap();
570 assert_eq!(result.len(), 0);
573 }
574
575 #[test]
576 fn test_fix_method_adds_text_language() {
577 let content = r#"# Test
578
579```
580code without language
581```
582
583```python
584already has language
585```
586
587```
588another block without
589```
590"#;
591 let fixed = run_fix(content).unwrap();
592 assert!(fixed.contains("```text"));
593 assert!(fixed.contains("```python"));
594 assert_eq!(fixed.matches("```text").count(), 2);
595 }
596
597 #[test]
598 fn test_fix_preserves_indentation() {
599 let content = r#"# Test
600
601- List item
602 ```
603 indented code block
604 ```
605"#;
606 let fixed = run_fix(content).unwrap();
607 assert!(fixed.contains(" ```text"));
609 assert!(fixed.contains(" indented code block"));
610 }
611
612 #[test]
613 fn test_fix_preserves_indentation_numbered_list() {
614 let content = r#"1. Step 1
616
617 ```
618 foo
619 bar
620 ```
621"#;
622 let fixed = run_fix(content).unwrap();
623 assert!(fixed.contains(" ```text"));
625 assert!(fixed.contains(" foo"));
626 assert!(fixed.contains(" bar"));
627 assert!(!fixed.contains("\n```text\n"));
629 }
630
631 #[test]
632 fn test_fix_preserves_all_indentation() {
633 let content = r#"# Test
634
635Top-level code block:
636```
637top level
638```
639
6401. List item
641
642 ```
643 nested in list
644 ```
645
646Indented by 2 spaces:
647 ```
648 content
649 ```
650"#;
651 let fixed = run_fix(content).unwrap();
652
653 assert!(
655 fixed.contains("```text\ntop level"),
656 "Top-level code block indentation preserved"
657 );
658 assert!(
659 fixed.contains(" ```text\n nested in list"),
660 "List item code block indentation preserved"
661 );
662 assert!(
663 fixed.contains(" ```text\n content"),
664 "2-space indented code block indentation preserved"
665 );
666 }
667
668 #[test]
669 fn test_fix_with_tilde_fences() {
670 let content = r#"# Test
671
672~~~
673code with tildes
674~~~
675"#;
676 let fixed = run_fix(content).unwrap();
677 assert!(fixed.contains("~~~text"));
678 }
679
680 #[test]
681 fn test_longer_fence_markers() {
682 let content = r#"# Test
683
684````
685code with four backticks
686````
687
688`````python
689code with five backticks and language
690`````
691
692~~~~~~
693code with six tildes
694~~~~~~
695"#;
696 let result = run_check(content).unwrap();
697 assert_eq!(result.len(), 2);
698
699 let fixed = run_fix(content).unwrap();
700 assert!(fixed.contains("````text"));
701 assert!(fixed.contains("~~~~~~text"));
702 assert!(fixed.contains("`````python"));
703 }
704
705 #[test]
706 fn test_nested_code_blocks_different_markers() {
707 let content = r#"# Test
708
709````markdown
710This is a markdown block
711
712```python
713# This is nested code
714print("hello")
715```
716
717More markdown
718````
719"#;
720 let result = run_check(content).unwrap();
721 assert!(
722 result.is_empty(),
723 "Nested code blocks with different markers should not trigger warnings"
724 );
725 }
726
727 #[test]
728 fn test_disable_enable_comments() {
729 let content = r#"# Test
730
731<!-- rumdl-disable MD040 -->
732```
733this should not trigger warning
734```
735<!-- rumdl-enable MD040 -->
736
737```
738this should trigger warning
739```
740"#;
741 let result = run_check(content).unwrap();
742 assert_eq!(result.len(), 1);
743 assert_eq!(result[0].line, 9);
744 }
745
746 #[test]
747 fn test_fence_with_language_only_on_closing() {
748 let content = r#"# Test
750
751```
752code
753```python
754"#;
755 let result = run_check(content).unwrap();
756 assert_eq!(result.len(), 1);
757 }
758
759 #[test]
760 fn test_incomplete_code_blocks() {
761 let content = r#"# Test
763
764```python
765this code block is not closed"#;
766 let result = run_check(content).unwrap();
767 assert!(
768 result.is_empty(),
769 "Unclosed code blocks with language should not trigger warnings"
770 );
771
772 let content_no_lang = r#"# Test
774
775```
776this code block is not closed"#;
777 let result = run_check(content_no_lang).unwrap();
778 assert_eq!(result.len(), 1);
779 }
780
781 #[test]
782 fn test_fix_preserves_original_formatting() {
783 let content = r#"# Test
784
785```
786code
787```
788
789No newline at end"#;
790 let fixed = run_fix(content).unwrap();
791 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
792
793 let content_with_newline = "# Test\n\n```\ncode\n```\n";
794 let fixed = run_fix(content_with_newline).unwrap();
795 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
796 }
797
798 #[test]
799 fn test_edge_case_backticks_in_content() {
800 let content = r#"# Test
801
802```javascript
803console.log(`template string with backticks`);
804// This line has ``` in a comment
805```
806"#;
807 let result = run_check(content).unwrap();
808 assert!(
809 result.is_empty(),
810 "Backticks inside code blocks should not affect parsing"
811 );
812 }
813
814 #[test]
815 fn test_empty_document() {
816 let content = "";
817 let result = run_check(content).unwrap();
818 assert!(result.is_empty());
819 }
820
821 #[test]
822 fn test_should_skip_optimization() {
823 let rule = MD040FencedCodeLanguage;
824
825 let ctx = LintContext::new(
827 "# Just a header\n\nSome text",
828 crate::config::MarkdownFlavor::Standard,
829 None,
830 );
831 assert!(rule.should_skip(&ctx));
832
833 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard, None);
835 assert!(!rule.should_skip(&ctx));
836
837 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard, None);
839 assert!(!rule.should_skip(&ctx));
840
841 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard, None);
843 assert!(rule.should_skip(&ctx));
844 }
845
846 #[test]
847 fn test_quarto_code_chunk_syntax() {
848 let rule = MD040FencedCodeLanguage;
849
850 let content = r#"# Test
852
853```{r}
854x <- 1
855```
856
857```{python}
858x = 1
859```
860
861```{r, echo=FALSE}
862plot(x)
863```
864"#;
865 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
866 let result = rule.check(&ctx).unwrap();
867 assert!(
868 result.is_empty(),
869 "Quarto code chunks with {{language}} syntax should not trigger warnings"
870 );
871
872 let content_no_lang = r#"# Test
874
875```
876code without language
877```
878"#;
879 let ctx = LintContext::new(content_no_lang, crate::config::MarkdownFlavor::Quarto, None);
880 let result = rule.check(&ctx).unwrap();
881 assert_eq!(result.len(), 1, "Quarto files without language should trigger warning");
882
883 let content_standard = r#"# Test
885
886```{python}
887code
888```
889"#;
890 let ctx = LintContext::new(content_standard, crate::config::MarkdownFlavor::Standard, None);
891 let result = rule.check(&ctx).unwrap();
892 assert!(
895 result.is_empty(),
896 "Standard flavor should accept any non-empty after_fence content"
897 );
898 }
899}