rumdl_lib/rules/
md040_fenced_code_language.rs1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::{LineIndex, calculate_line_range};
3
4#[derive(Debug, Default, Clone)]
9pub struct MD040FencedCodeLanguage;
10
11impl Rule for MD040FencedCodeLanguage {
12 fn name(&self) -> &'static str {
13 "MD040"
14 }
15
16 fn description(&self) -> &'static str {
17 "Code blocks should have a language specified"
18 }
19
20 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
21 let content = ctx.content;
22 let _line_index = LineIndex::new(content.to_string());
23
24 let mut warnings = Vec::new();
25
26 let mut in_code_block = false;
27 let mut current_fence_marker: Option<String> = None;
28 let mut opening_fence_indent: usize = 0;
29
30 let mut is_disabled = false;
32
33 for (i, line) in content.lines().enumerate() {
34 let trimmed = line.trim();
35
36 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
38 && (rules.is_empty() || rules.contains(&self.name()))
39 {
40 is_disabled = true;
41 }
42 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
43 && (rules.is_empty() || rules.contains(&self.name()))
44 {
45 is_disabled = false;
46 }
47
48 if is_disabled {
50 continue;
51 }
52
53 let fence_marker = if trimmed.starts_with("```") {
55 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
56 if backtick_count >= 3 {
57 Some("`".repeat(backtick_count))
58 } else {
59 None
60 }
61 } else if trimmed.starts_with("~~~") {
62 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
63 if tilde_count >= 3 {
64 Some("~".repeat(tilde_count))
65 } else {
66 None
67 }
68 } else {
69 None
70 };
71
72 if let Some(fence_marker) = fence_marker {
73 if in_code_block {
74 if let Some(ref current_marker) = current_fence_marker {
76 let current_indent = line.len() - line.trim_start().len();
77 if fence_marker == *current_marker
80 && trimmed[current_marker.len()..].trim().is_empty()
81 && current_indent <= opening_fence_indent
82 {
83 in_code_block = false;
85 current_fence_marker = None;
86 opening_fence_indent = 0;
87 }
88 }
90 } else {
91 let after_fence = trimmed[fence_marker.len()..].trim();
94
95 let has_title_only =
99 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
100
101 if after_fence.is_empty() || has_title_only {
102 let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
104
105 warnings.push(LintWarning {
106 rule_name: Some(self.name()),
107 line: start_line,
108 column: start_col,
109 end_line,
110 end_column: end_col,
111 message: "Code block (```) missing language".to_string(),
112 severity: Severity::Warning,
113 fix: Some(Fix {
114 range: {
115 let trimmed_start = line.len() - line.trim_start().len();
117 let fence_len = fence_marker.len();
118 let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
119 let fence_start_byte = line_start_byte + trimmed_start;
120 let fence_end_byte = fence_start_byte + fence_len;
121 fence_start_byte..fence_end_byte
122 },
123 replacement: format!("{fence_marker}text"),
124 }),
125 });
126 }
127
128 in_code_block = true;
129 current_fence_marker = Some(fence_marker);
130 opening_fence_indent = line.len() - line.trim_start().len();
131 }
132 }
133 }
135
136 Ok(warnings)
137 }
138
139 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
140 let content = ctx.content;
141 let _line_index = LineIndex::new(content.to_string());
142
143 let mut result = String::new();
144 let mut in_code_block = false;
145 let mut current_fence_marker: Option<String> = None;
146 let mut fence_needs_language = false;
147 let mut original_indent = String::new();
148 let mut opening_fence_indent: usize = 0;
149
150 let lines: Vec<&str> = content.lines().collect();
151
152 let is_in_nested_context = |line_idx: usize| -> bool {
154 for i in (0..line_idx).rev() {
156 let line = lines.get(i).unwrap_or(&"");
157 let trimmed = line.trim();
158
159 if trimmed.is_empty() {
161 continue;
162 }
163
164 if line.trim_start().starts_with('>') {
166 return true;
167 }
168
169 if line.len() - line.trim_start().len() >= 2 {
171 let after_indent = line.trim_start();
172 if after_indent.starts_with("- ")
173 || after_indent.starts_with("* ")
174 || after_indent.starts_with("+ ")
175 || (after_indent.len() > 2
176 && after_indent.as_bytes().first().is_some_and(|&b| b.is_ascii_digit())
177 && after_indent.as_bytes().get(1) == Some(&b'.')
178 && after_indent.as_bytes().get(2) == Some(&b' '))
179 {
180 return true;
181 }
182 }
183
184 if line.starts_with(|c: char| !c.is_whitespace()) {
186 break;
187 }
188 }
189 false
190 };
191
192 let mut is_disabled = false;
194
195 for (i, line) in lines.iter().enumerate() {
196 let trimmed = line.trim();
197
198 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
200 && (rules.is_empty() || rules.contains(&self.name()))
201 {
202 is_disabled = true;
203 }
204 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
205 && (rules.is_empty() || rules.contains(&self.name()))
206 {
207 is_disabled = false;
208 }
209
210 if is_disabled {
212 result.push_str(line);
213 result.push('\n');
214 continue;
215 }
216
217 let fence_marker = if trimmed.starts_with("```") {
219 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
220 if backtick_count >= 3 {
221 Some("`".repeat(backtick_count))
222 } else {
223 None
224 }
225 } else if trimmed.starts_with("~~~") {
226 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
227 if tilde_count >= 3 {
228 Some("~".repeat(tilde_count))
229 } else {
230 None
231 }
232 } else {
233 None
234 };
235
236 if let Some(fence_marker) = fence_marker {
237 if in_code_block {
238 if let Some(ref current_marker) = current_fence_marker {
240 let current_indent = line.len() - line.trim_start().len();
241 if fence_marker == *current_marker
242 && trimmed[current_marker.len()..].trim().is_empty()
243 && current_indent <= opening_fence_indent
244 {
245 if fence_needs_language {
247 result.push_str(&format!("{original_indent}{trimmed}\n"));
249 } else {
250 result.push_str(line);
252 result.push('\n');
253 }
254 in_code_block = false;
255 current_fence_marker = None;
256 fence_needs_language = false;
257 original_indent.clear();
258 opening_fence_indent = 0;
259 } else {
260 result.push_str(line);
262 result.push('\n');
263 }
264 } else {
265 result.push_str(line);
267 result.push('\n');
268 }
269 } else {
270 let line_indent = line[..line.len() - line.trim_start().len()].to_string();
273
274 let after_fence = trimmed[fence_marker.len()..].trim();
276
277 let has_title_only =
279 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
280
281 if after_fence.is_empty() || has_title_only {
282 let should_preserve_indent = is_in_nested_context(i);
284
285 if should_preserve_indent {
286 original_indent = line_indent;
288 if has_title_only {
289 result.push_str(&format!("{original_indent}{fence_marker}text {after_fence}\n"));
291 } else {
292 result.push_str(&format!("{original_indent}{fence_marker}text\n"));
293 }
294 } else {
295 original_indent = String::new();
297 if has_title_only {
298 result.push_str(&format!("{fence_marker}text {after_fence}\n"));
300 } else {
301 result.push_str(&format!("{fence_marker}text\n"));
302 }
303 }
304 fence_needs_language = true;
305 } else {
306 result.push_str(line);
308 result.push('\n');
309 fence_needs_language = false;
310 }
311
312 in_code_block = true;
313 current_fence_marker = Some(fence_marker);
314 opening_fence_indent = line.len() - line.trim_start().len();
315 }
316 } else if in_code_block {
317 result.push_str(line);
319 result.push('\n');
320 } else {
321 result.push_str(line);
323 result.push('\n');
324 }
325 }
326
327 if !content.ends_with('\n') {
329 result.pop();
330 }
331
332 Ok(result)
333 }
334
335 fn category(&self) -> RuleCategory {
337 RuleCategory::CodeBlock
338 }
339
340 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
342 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
343 }
344
345 fn as_any(&self) -> &dyn std::any::Any {
346 self
347 }
348
349 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
350 where
351 Self: Sized,
352 {
353 Box::new(MD040FencedCodeLanguage)
354 }
355}
356
357#[cfg(test)]
358mod tests {
359 use super::*;
360 use crate::lint_context::LintContext;
361
362 fn run_check(content: &str) -> LintResult {
363 let rule = MD040FencedCodeLanguage;
364 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
365 rule.check(&ctx)
366 }
367
368 fn run_fix(content: &str) -> Result<String, LintError> {
369 let rule = MD040FencedCodeLanguage;
370 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
371 rule.fix(&ctx)
372 }
373
374 #[test]
375 fn test_code_blocks_with_language_specified() {
376 let content = r#"# Test
378
379```python
380print("Hello, world!")
381```
382
383```javascript
384console.log("Hello!");
385```
386"#;
387 let result = run_check(content).unwrap();
388 assert!(result.is_empty(), "No warnings expected for code blocks with language");
389 }
390
391 #[test]
392 fn test_code_blocks_without_language() {
393 let content = r#"# Test
394
395```
396print("Hello, world!")
397```
398"#;
399 let result = run_check(content).unwrap();
400 assert_eq!(result.len(), 1);
401 assert_eq!(result[0].message, "Code block (```) missing language");
402 assert_eq!(result[0].line, 3);
403 }
404
405 #[test]
406 fn test_code_blocks_with_empty_language() {
407 let content = r#"# Test
409
410```
411print("Hello, world!")
412```
413"#;
414 let result = run_check(content).unwrap();
415 assert_eq!(result.len(), 1);
416 assert_eq!(result[0].message, "Code block (```) missing language");
417 }
418
419 #[test]
420 fn test_indented_code_blocks_should_be_ignored() {
421 let content = r#"# Test
423
424 This is an indented code block
425 It should not trigger MD040
426"#;
427 let result = run_check(content).unwrap();
428 assert!(result.is_empty(), "Indented code blocks should be ignored");
429 }
430
431 #[test]
432 fn test_inline_code_spans_should_be_ignored() {
433 let content = r#"# Test
434
435This is `inline code` and should not trigger warnings.
436
437Use the `print()` function.
438"#;
439 let result = run_check(content).unwrap();
440 assert!(result.is_empty(), "Inline code spans should be ignored");
441 }
442
443 #[test]
444 fn test_tildes_vs_backticks_for_fences() {
445 let content_tildes_no_lang = r#"# Test
447
448~~~
449code here
450~~~
451"#;
452 let result = run_check(content_tildes_no_lang).unwrap();
453 assert_eq!(result.len(), 1);
454 assert_eq!(result[0].message, "Code block (```) missing language");
455
456 let content_tildes_with_lang = r#"# Test
458
459~~~python
460code here
461~~~
462"#;
463 let result = run_check(content_tildes_with_lang).unwrap();
464 assert!(result.is_empty());
465
466 let content_mixed = r#"# Test
468
469```python
470code here
471```
472
473~~~javascript
474more code
475~~~
476
477```
478no language
479```
480
481~~~
482also no language
483~~~
484"#;
485 let result = run_check(content_mixed).unwrap();
486 assert_eq!(result.len(), 2);
487 }
488
489 #[test]
490 fn test_language_with_additional_parameters() {
491 let content = r#"# Test
492
493```python {highlight=[1,2]}
494print("Line 1")
495print("Line 2")
496```
497
498```javascript {.line-numbers startFrom="10"}
499console.log("Hello");
500```
501
502```ruby {data-line="1,3-4"}
503puts "Hello"
504puts "World"
505puts "!"
506```
507"#;
508 let result = run_check(content).unwrap();
509 assert!(
510 result.is_empty(),
511 "Code blocks with language and parameters should pass"
512 );
513 }
514
515 #[test]
516 fn test_multiple_code_blocks_in_document() {
517 let content = r#"# Test Document
518
519First block without language:
520```
521code here
522```
523
524Second block with language:
525```python
526print("hello")
527```
528
529Third block without language:
530```
531more code
532```
533
534Fourth block with language:
535```javascript
536console.log("test");
537```
538"#;
539 let result = run_check(content).unwrap();
540 assert_eq!(result.len(), 2);
541 assert_eq!(result[0].line, 4);
542 assert_eq!(result[1].line, 14);
543 }
544
545 #[test]
546 fn test_nested_code_blocks_in_lists() {
547 let content = r#"# Test
548
549- Item 1
550 ```python
551 print("nested with language")
552 ```
553
554- Item 2
555 ```
556 nested without language
557 ```
558
559- Item 3
560 - Nested item
561 ```javascript
562 console.log("deeply nested");
563 ```
564
565 - Another nested
566 ```
567 no language
568 ```
569"#;
570 let result = run_check(content).unwrap();
571 assert_eq!(result.len(), 2);
572 assert_eq!(result[0].line, 9);
574 assert_eq!(result[1].line, 20);
575 }
576
577 #[test]
578 fn test_code_blocks_in_blockquotes() {
579 let content = r#"# Test
580
581> This is a blockquote
582> ```python
583> print("with language")
584> ```
585
586> Another blockquote
587> ```
588> without language
589> ```
590"#;
591 let result = run_check(content).unwrap();
592 assert_eq!(result.len(), 0);
595 }
596
597 #[test]
598 fn test_fix_method_adds_text_language() {
599 let content = r#"# Test
600
601```
602code without language
603```
604
605```python
606already has language
607```
608
609```
610another block without
611```
612"#;
613 let fixed = run_fix(content).unwrap();
614 assert!(fixed.contains("```text"));
615 assert!(fixed.contains("```python"));
616 assert_eq!(fixed.matches("```text").count(), 2);
617 }
618
619 #[test]
620 fn test_fix_preserves_indentation() {
621 let content = r#"# Test
622
623- List item
624 ```
625 indented code block
626 ```
627"#;
628 let fixed = run_fix(content).unwrap();
629 assert!(fixed.contains("```text"));
633 assert!(fixed.contains(" indented code block"));
634 }
635
636 #[test]
637 fn test_fix_with_tilde_fences() {
638 let content = r#"# Test
639
640~~~
641code with tildes
642~~~
643"#;
644 let fixed = run_fix(content).unwrap();
645 assert!(fixed.contains("~~~text"));
646 }
647
648 #[test]
649 fn test_longer_fence_markers() {
650 let content = r#"# Test
651
652````
653code with four backticks
654````
655
656`````python
657code with five backticks and language
658`````
659
660~~~~~~
661code with six tildes
662~~~~~~
663"#;
664 let result = run_check(content).unwrap();
665 assert_eq!(result.len(), 2);
666
667 let fixed = run_fix(content).unwrap();
668 assert!(fixed.contains("````text"));
669 assert!(fixed.contains("~~~~~~text"));
670 assert!(fixed.contains("`````python"));
671 }
672
673 #[test]
674 fn test_nested_code_blocks_different_markers() {
675 let content = r#"# Test
676
677````markdown
678This is a markdown block
679
680```python
681# This is nested code
682print("hello")
683```
684
685More markdown
686````
687"#;
688 let result = run_check(content).unwrap();
689 assert!(
690 result.is_empty(),
691 "Nested code blocks with different markers should not trigger warnings"
692 );
693 }
694
695 #[test]
696 fn test_disable_enable_comments() {
697 let content = r#"# Test
698
699<!-- rumdl-disable MD040 -->
700```
701this should not trigger warning
702```
703<!-- rumdl-enable MD040 -->
704
705```
706this should trigger warning
707```
708"#;
709 let result = run_check(content).unwrap();
710 assert_eq!(result.len(), 1);
711 assert_eq!(result[0].line, 9);
712 }
713
714 #[test]
715 fn test_fence_with_language_only_on_closing() {
716 let content = r#"# Test
718
719```
720code
721```python
722"#;
723 let result = run_check(content).unwrap();
724 assert_eq!(result.len(), 1);
725 }
726
727 #[test]
728 fn test_incomplete_code_blocks() {
729 let content = r#"# Test
731
732```python
733this code block is not closed"#;
734 let result = run_check(content).unwrap();
735 assert!(
736 result.is_empty(),
737 "Unclosed code blocks with language should not trigger warnings"
738 );
739
740 let content_no_lang = r#"# Test
742
743```
744this code block is not closed"#;
745 let result = run_check(content_no_lang).unwrap();
746 assert_eq!(result.len(), 1);
747 }
748
749 #[test]
750 fn test_fix_preserves_original_formatting() {
751 let content = r#"# Test
752
753```
754code
755```
756
757No newline at end"#;
758 let fixed = run_fix(content).unwrap();
759 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
760
761 let content_with_newline = "# Test\n\n```\ncode\n```\n";
762 let fixed = run_fix(content_with_newline).unwrap();
763 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
764 }
765
766 #[test]
767 fn test_edge_case_backticks_in_content() {
768 let content = r#"# Test
769
770```javascript
771console.log(`template string with backticks`);
772// This line has ``` in a comment
773```
774"#;
775 let result = run_check(content).unwrap();
776 assert!(
777 result.is_empty(),
778 "Backticks inside code blocks should not affect parsing"
779 );
780 }
781
782 #[test]
783 fn test_empty_document() {
784 let content = "";
785 let result = run_check(content).unwrap();
786 assert!(result.is_empty());
787 }
788
789 #[test]
790 fn test_should_skip_optimization() {
791 let rule = MD040FencedCodeLanguage;
792
793 let ctx = LintContext::new("# Just a header\n\nSome text", crate::config::MarkdownFlavor::Standard);
795 assert!(rule.should_skip(&ctx));
796
797 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard);
799 assert!(!rule.should_skip(&ctx));
800
801 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard);
803 assert!(!rule.should_skip(&ctx));
804
805 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
807 assert!(rule.should_skip(&ctx));
808 }
809}