rumdl_lib/rules/
md040_fenced_code_language.rs1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
3use crate::utils::range_utils::{LineIndex, calculate_line_range};
4
5#[derive(Debug, Default, Clone)]
10pub struct MD040FencedCodeLanguage;
11
12impl Rule for MD040FencedCodeLanguage {
13 fn name(&self) -> &'static str {
14 "MD040"
15 }
16
17 fn description(&self) -> &'static str {
18 "Code blocks should have a language specified"
19 }
20
21 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
22 let content = ctx.content;
23 let _line_index = LineIndex::new(content.to_string());
24
25 let mut warnings = Vec::new();
26
27 let mut in_code_block = false;
28 let mut current_fence_marker: Option<String> = None;
29 let mut opening_fence_indent: usize = 0;
30
31 let mut is_disabled = false;
33
34 for (i, line) in content.lines().enumerate() {
35 let trimmed = line.trim();
36
37 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
39 && (rules.is_empty() || rules.contains(&self.name()))
40 {
41 is_disabled = true;
42 }
43 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
44 && (rules.is_empty() || rules.contains(&self.name()))
45 {
46 is_disabled = false;
47 }
48
49 if is_disabled {
51 continue;
52 }
53
54 let fence_marker = if trimmed.starts_with("```") {
56 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
57 if backtick_count >= 3 {
58 Some("`".repeat(backtick_count))
59 } else {
60 None
61 }
62 } else if trimmed.starts_with("~~~") {
63 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
64 if tilde_count >= 3 {
65 Some("~".repeat(tilde_count))
66 } else {
67 None
68 }
69 } else {
70 None
71 };
72
73 if let Some(fence_marker) = fence_marker {
74 if in_code_block {
75 if let Some(ref current_marker) = current_fence_marker {
77 let current_indent = line.len() - line.trim_start().len();
78 if fence_marker == *current_marker
81 && trimmed[current_marker.len()..].trim().is_empty()
82 && current_indent <= opening_fence_indent
83 {
84 in_code_block = false;
86 current_fence_marker = None;
87 opening_fence_indent = 0;
88 }
89 }
91 } else {
92 let after_fence = trimmed[fence_marker.len()..].trim();
95 if after_fence.is_empty() {
96 let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
98
99 warnings.push(LintWarning {
100 rule_name: Some(self.name()),
101 line: start_line,
102 column: start_col,
103 end_line,
104 end_column: end_col,
105 message: "Code block (```) missing language".to_string(),
106 severity: Severity::Warning,
107 fix: Some(Fix {
108 range: {
109 let trimmed_start = line.len() - line.trim_start().len();
111 let fence_len = fence_marker.len();
112 let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
113 let fence_start_byte = line_start_byte + trimmed_start;
114 let fence_end_byte = fence_start_byte + fence_len;
115 fence_start_byte..fence_end_byte
116 },
117 replacement: format!("{fence_marker}text"),
118 }),
119 });
120 }
121
122 in_code_block = true;
123 current_fence_marker = Some(fence_marker);
124 opening_fence_indent = line.len() - line.trim_start().len();
125 }
126 }
127 }
129
130 Ok(warnings)
131 }
132
133 fn check_with_structure(
135 &self,
136 ctx: &crate::lint_context::LintContext,
137 _doc_structure: &DocumentStructure,
138 ) -> LintResult {
139 self.check(ctx)
142 }
143
144 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
145 let content = ctx.content;
146 let _line_index = LineIndex::new(content.to_string());
147
148 let mut result = String::new();
149 let mut in_code_block = false;
150 let mut current_fence_marker: Option<String> = None;
151 let mut fence_needs_language = false;
152 let mut original_indent = String::new();
153 let mut opening_fence_indent: usize = 0;
154
155 let lines: Vec<&str> = content.lines().collect();
156
157 let is_in_nested_context = |line_idx: usize| -> bool {
159 for i in (0..line_idx).rev() {
161 let line = lines.get(i).unwrap_or(&"");
162 let trimmed = line.trim();
163
164 if trimmed.is_empty() {
166 continue;
167 }
168
169 if line.trim_start().starts_with('>') {
171 return true;
172 }
173
174 if line.len() - line.trim_start().len() >= 2 {
176 let after_indent = line.trim_start();
177 if after_indent.starts_with("- ")
178 || after_indent.starts_with("* ")
179 || after_indent.starts_with("+ ")
180 || (after_indent.len() > 2
181 && after_indent.chars().nth(0).unwrap_or(' ').is_ascii_digit()
182 && after_indent.chars().nth(1).unwrap_or(' ') == '.'
183 && after_indent.chars().nth(2).unwrap_or(' ') == ' ')
184 {
185 return true;
186 }
187 }
188
189 if line.starts_with(|c: char| !c.is_whitespace()) {
191 break;
192 }
193 }
194 false
195 };
196
197 let mut is_disabled = false;
199
200 for (i, line) in lines.iter().enumerate() {
201 let trimmed = line.trim();
202
203 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
205 && (rules.is_empty() || rules.contains(&self.name()))
206 {
207 is_disabled = true;
208 }
209 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
210 && (rules.is_empty() || rules.contains(&self.name()))
211 {
212 is_disabled = false;
213 }
214
215 if is_disabled {
217 result.push_str(line);
218 result.push('\n');
219 continue;
220 }
221
222 let fence_marker = if trimmed.starts_with("```") {
224 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
225 if backtick_count >= 3 {
226 Some("`".repeat(backtick_count))
227 } else {
228 None
229 }
230 } else if trimmed.starts_with("~~~") {
231 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
232 if tilde_count >= 3 {
233 Some("~".repeat(tilde_count))
234 } else {
235 None
236 }
237 } else {
238 None
239 };
240
241 if let Some(fence_marker) = fence_marker {
242 if in_code_block {
243 if let Some(ref current_marker) = current_fence_marker {
245 let current_indent = line.len() - line.trim_start().len();
246 if fence_marker == *current_marker
247 && trimmed[current_marker.len()..].trim().is_empty()
248 && current_indent <= opening_fence_indent
249 {
250 if fence_needs_language {
252 result.push_str(&format!("{original_indent}{trimmed}\n"));
254 } else {
255 result.push_str(line);
257 result.push('\n');
258 }
259 in_code_block = false;
260 current_fence_marker = None;
261 fence_needs_language = false;
262 original_indent.clear();
263 opening_fence_indent = 0;
264 } else {
265 result.push_str(line);
267 result.push('\n');
268 }
269 } else {
270 result.push_str(line);
272 result.push('\n');
273 }
274 } else {
275 let line_indent = line[..line.len() - line.trim_start().len()].to_string();
278
279 let after_fence = trimmed[fence_marker.len()..].trim();
281 if after_fence.is_empty() {
282 let should_preserve_indent = is_in_nested_context(i);
284
285 if should_preserve_indent {
286 original_indent = line_indent;
288 result.push_str(&format!("{original_indent}{fence_marker}text\n"));
289 } else {
290 original_indent = String::new();
292 result.push_str(&format!("{fence_marker}text\n"));
293 }
294 fence_needs_language = true;
295 } else {
296 result.push_str(line);
298 result.push('\n');
299 fence_needs_language = false;
300 }
301
302 in_code_block = true;
303 current_fence_marker = Some(fence_marker);
304 opening_fence_indent = line.len() - line.trim_start().len();
305 }
306 } else if in_code_block {
307 result.push_str(line);
309 result.push('\n');
310 } else {
311 result.push_str(line);
313 result.push('\n');
314 }
315 }
316
317 if !content.ends_with('\n') {
319 result.pop();
320 }
321
322 Ok(result)
323 }
324
325 fn category(&self) -> RuleCategory {
327 RuleCategory::CodeBlock
328 }
329
330 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
332 let content = ctx.content;
333 content.is_empty() || (!content.contains("```") && !content.contains("~~~"))
334 }
335
336 fn as_any(&self) -> &dyn std::any::Any {
337 self
338 }
339
340 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
341 where
342 Self: Sized,
343 {
344 Box::new(MD040FencedCodeLanguage)
345 }
346}
347
348impl DocumentStructureExtensions for MD040FencedCodeLanguage {
349 fn has_relevant_elements(
350 &self,
351 ctx: &crate::lint_context::LintContext,
352 _doc_structure: &DocumentStructure,
353 ) -> bool {
354 let content = ctx.content;
355 content.contains("```") || content.contains("~~~")
357 }
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363 use crate::lint_context::LintContext;
364
365 fn run_check(content: &str) -> LintResult {
366 let rule = MD040FencedCodeLanguage;
367 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
368 rule.check(&ctx)
369 }
370
371 fn run_fix(content: &str) -> Result<String, LintError> {
372 let rule = MD040FencedCodeLanguage;
373 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
374 rule.fix(&ctx)
375 }
376
377 #[test]
378 fn test_code_blocks_with_language_specified() {
379 let content = r#"# Test
381
382```python
383print("Hello, world!")
384```
385
386```javascript
387console.log("Hello!");
388```
389"#;
390 let result = run_check(content).unwrap();
391 assert!(result.is_empty(), "No warnings expected for code blocks with language");
392 }
393
394 #[test]
395 fn test_code_blocks_without_language() {
396 let content = r#"# Test
397
398```
399print("Hello, world!")
400```
401"#;
402 let result = run_check(content).unwrap();
403 assert_eq!(result.len(), 1);
404 assert_eq!(result[0].message, "Code block (```) missing language");
405 assert_eq!(result[0].line, 3);
406 }
407
408 #[test]
409 fn test_code_blocks_with_empty_language() {
410 let content = r#"# Test
412
413```
414print("Hello, world!")
415```
416"#;
417 let result = run_check(content).unwrap();
418 assert_eq!(result.len(), 1);
419 assert_eq!(result[0].message, "Code block (```) missing language");
420 }
421
422 #[test]
423 fn test_indented_code_blocks_should_be_ignored() {
424 let content = r#"# Test
426
427 This is an indented code block
428 It should not trigger MD040
429"#;
430 let result = run_check(content).unwrap();
431 assert!(result.is_empty(), "Indented code blocks should be ignored");
432 }
433
434 #[test]
435 fn test_inline_code_spans_should_be_ignored() {
436 let content = r#"# Test
437
438This is `inline code` and should not trigger warnings.
439
440Use the `print()` function.
441"#;
442 let result = run_check(content).unwrap();
443 assert!(result.is_empty(), "Inline code spans should be ignored");
444 }
445
446 #[test]
447 fn test_tildes_vs_backticks_for_fences() {
448 let content_tildes_no_lang = r#"# Test
450
451~~~
452code here
453~~~
454"#;
455 let result = run_check(content_tildes_no_lang).unwrap();
456 assert_eq!(result.len(), 1);
457 assert_eq!(result[0].message, "Code block (```) missing language");
458
459 let content_tildes_with_lang = r#"# Test
461
462~~~python
463code here
464~~~
465"#;
466 let result = run_check(content_tildes_with_lang).unwrap();
467 assert!(result.is_empty());
468
469 let content_mixed = r#"# Test
471
472```python
473code here
474```
475
476~~~javascript
477more code
478~~~
479
480```
481no language
482```
483
484~~~
485also no language
486~~~
487"#;
488 let result = run_check(content_mixed).unwrap();
489 assert_eq!(result.len(), 2);
490 }
491
492 #[test]
493 fn test_language_with_additional_parameters() {
494 let content = r#"# Test
495
496```python {highlight=[1,2]}
497print("Line 1")
498print("Line 2")
499```
500
501```javascript {.line-numbers startFrom="10"}
502console.log("Hello");
503```
504
505```ruby {data-line="1,3-4"}
506puts "Hello"
507puts "World"
508puts "!"
509```
510"#;
511 let result = run_check(content).unwrap();
512 assert!(
513 result.is_empty(),
514 "Code blocks with language and parameters should pass"
515 );
516 }
517
518 #[test]
519 fn test_multiple_code_blocks_in_document() {
520 let content = r#"# Test Document
521
522First block without language:
523```
524code here
525```
526
527Second block with language:
528```python
529print("hello")
530```
531
532Third block without language:
533```
534more code
535```
536
537Fourth block with language:
538```javascript
539console.log("test");
540```
541"#;
542 let result = run_check(content).unwrap();
543 assert_eq!(result.len(), 2);
544 assert_eq!(result[0].line, 4);
545 assert_eq!(result[1].line, 14);
546 }
547
548 #[test]
549 fn test_nested_code_blocks_in_lists() {
550 let content = r#"# Test
551
552- Item 1
553 ```python
554 print("nested with language")
555 ```
556
557- Item 2
558 ```
559 nested without language
560 ```
561
562- Item 3
563 - Nested item
564 ```javascript
565 console.log("deeply nested");
566 ```
567
568 - Another nested
569 ```
570 no language
571 ```
572"#;
573 let result = run_check(content).unwrap();
574 assert_eq!(result.len(), 2);
575 assert_eq!(result[0].line, 9);
577 assert_eq!(result[1].line, 20);
578 }
579
580 #[test]
581 fn test_code_blocks_in_blockquotes() {
582 let content = r#"# Test
583
584> This is a blockquote
585> ```python
586> print("with language")
587> ```
588
589> Another blockquote
590> ```
591> without language
592> ```
593"#;
594 let result = run_check(content).unwrap();
595 assert_eq!(result.len(), 0);
598 }
599
600 #[test]
601 fn test_fix_method_adds_text_language() {
602 let content = r#"# Test
603
604```
605code without language
606```
607
608```python
609already has language
610```
611
612```
613another block without
614```
615"#;
616 let fixed = run_fix(content).unwrap();
617 assert!(fixed.contains("```text"));
618 assert!(fixed.contains("```python"));
619 assert_eq!(fixed.matches("```text").count(), 2);
620 }
621
622 #[test]
623 fn test_fix_preserves_indentation() {
624 let content = r#"# Test
625
626- List item
627 ```
628 indented code block
629 ```
630"#;
631 let fixed = run_fix(content).unwrap();
632 assert!(fixed.contains("```text"));
636 assert!(fixed.contains(" indented code block"));
637 }
638
639 #[test]
640 fn test_fix_with_tilde_fences() {
641 let content = r#"# Test
642
643~~~
644code with tildes
645~~~
646"#;
647 let fixed = run_fix(content).unwrap();
648 assert!(fixed.contains("~~~text"));
649 }
650
651 #[test]
652 fn test_longer_fence_markers() {
653 let content = r#"# Test
654
655````
656code with four backticks
657````
658
659`````python
660code with five backticks and language
661`````
662
663~~~~~~
664code with six tildes
665~~~~~~
666"#;
667 let result = run_check(content).unwrap();
668 assert_eq!(result.len(), 2);
669
670 let fixed = run_fix(content).unwrap();
671 assert!(fixed.contains("````text"));
672 assert!(fixed.contains("~~~~~~text"));
673 assert!(fixed.contains("`````python"));
674 }
675
676 #[test]
677 fn test_nested_code_blocks_different_markers() {
678 let content = r#"# Test
679
680````markdown
681This is a markdown block
682
683```python
684# This is nested code
685print("hello")
686```
687
688More markdown
689````
690"#;
691 let result = run_check(content).unwrap();
692 assert!(
693 result.is_empty(),
694 "Nested code blocks with different markers should not trigger warnings"
695 );
696 }
697
698 #[test]
699 fn test_disable_enable_comments() {
700 let content = r#"# Test
701
702<!-- rumdl-disable MD040 -->
703```
704this should not trigger warning
705```
706<!-- rumdl-enable MD040 -->
707
708```
709this should trigger warning
710```
711"#;
712 let result = run_check(content).unwrap();
713 assert_eq!(result.len(), 1);
714 assert_eq!(result[0].line, 9);
715 }
716
717 #[test]
718 fn test_fence_with_language_only_on_closing() {
719 let content = r#"# Test
721
722```
723code
724```python
725"#;
726 let result = run_check(content).unwrap();
727 assert_eq!(result.len(), 1);
728 }
729
730 #[test]
731 fn test_incomplete_code_blocks() {
732 let content = r#"# Test
734
735```python
736this code block is not closed"#;
737 let result = run_check(content).unwrap();
738 assert!(
739 result.is_empty(),
740 "Unclosed code blocks with language should not trigger warnings"
741 );
742
743 let content_no_lang = r#"# Test
745
746```
747this code block is not closed"#;
748 let result = run_check(content_no_lang).unwrap();
749 assert_eq!(result.len(), 1);
750 }
751
752 #[test]
753 fn test_fix_preserves_original_formatting() {
754 let content = r#"# Test
755
756```
757code
758```
759
760No newline at end"#;
761 let fixed = run_fix(content).unwrap();
762 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
763
764 let content_with_newline = "# Test\n\n```\ncode\n```\n";
765 let fixed = run_fix(content_with_newline).unwrap();
766 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
767 }
768
769 #[test]
770 fn test_edge_case_backticks_in_content() {
771 let content = r#"# Test
772
773```javascript
774console.log(`template string with backticks`);
775// This line has ``` in a comment
776```
777"#;
778 let result = run_check(content).unwrap();
779 assert!(
780 result.is_empty(),
781 "Backticks inside code blocks should not affect parsing"
782 );
783 }
784
785 #[test]
786 fn test_empty_document() {
787 let content = "";
788 let result = run_check(content).unwrap();
789 assert!(result.is_empty());
790 }
791
792 #[test]
793 fn test_should_skip_optimization() {
794 let rule = MD040FencedCodeLanguage;
795
796 let ctx = LintContext::new("# Just a header\n\nSome text", crate::config::MarkdownFlavor::Standard);
798 assert!(rule.should_skip(&ctx));
799
800 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard);
802 assert!(!rule.should_skip(&ctx));
803
804 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard);
806 assert!(!rule.should_skip(&ctx));
807
808 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
810 assert!(rule.should_skip(&ctx));
811 }
812}