rumdl_lib/rules/
md040_fenced_code_language.rs1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::{LineIndex, calculate_line_range};
3
4#[derive(Debug, Default, Clone)]
9pub struct MD040FencedCodeLanguage;
10
11impl Rule for MD040FencedCodeLanguage {
12 fn name(&self) -> &'static str {
13 "MD040"
14 }
15
16 fn description(&self) -> &'static str {
17 "Code blocks should have a language specified"
18 }
19
20 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
21 let content = ctx.content;
22 let _line_index = LineIndex::new(content.to_string());
23
24 let mut warnings = Vec::new();
25
26 let mut in_code_block = false;
27 let mut current_fence_marker: Option<String> = None;
28 let mut opening_fence_indent: usize = 0;
29
30 let mut is_disabled = false;
32
33 for (i, line) in content.lines().enumerate() {
34 let trimmed = line.trim();
35
36 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
38 && (rules.is_empty() || rules.contains(&self.name()))
39 {
40 is_disabled = true;
41 }
42 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
43 && (rules.is_empty() || rules.contains(&self.name()))
44 {
45 is_disabled = false;
46 }
47
48 if is_disabled {
50 continue;
51 }
52
53 let fence_marker = if trimmed.starts_with("```") {
55 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
56 if backtick_count >= 3 {
57 Some("`".repeat(backtick_count))
58 } else {
59 None
60 }
61 } else if trimmed.starts_with("~~~") {
62 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
63 if tilde_count >= 3 {
64 Some("~".repeat(tilde_count))
65 } else {
66 None
67 }
68 } else {
69 None
70 };
71
72 if let Some(fence_marker) = fence_marker {
73 if in_code_block {
74 if let Some(ref current_marker) = current_fence_marker {
76 let current_indent = line.len() - line.trim_start().len();
77 if fence_marker == *current_marker
80 && trimmed[current_marker.len()..].trim().is_empty()
81 && current_indent <= opening_fence_indent
82 {
83 in_code_block = false;
85 current_fence_marker = None;
86 opening_fence_indent = 0;
87 }
88 }
90 } else {
91 let after_fence = trimmed[fence_marker.len()..].trim();
94
95 let has_title_only =
99 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
100
101 if after_fence.is_empty() || has_title_only {
102 let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
104
105 warnings.push(LintWarning {
106 rule_name: Some(self.name()),
107 line: start_line,
108 column: start_col,
109 end_line,
110 end_column: end_col,
111 message: "Code block (```) missing language".to_string(),
112 severity: Severity::Warning,
113 fix: Some(Fix {
114 range: {
115 let trimmed_start = line.len() - line.trim_start().len();
117 let fence_len = fence_marker.len();
118 let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
119 let fence_start_byte = line_start_byte + trimmed_start;
120 let fence_end_byte = fence_start_byte + fence_len;
121 fence_start_byte..fence_end_byte
122 },
123 replacement: format!("{fence_marker}text"),
124 }),
125 });
126 }
127
128 in_code_block = true;
129 current_fence_marker = Some(fence_marker);
130 opening_fence_indent = line.len() - line.trim_start().len();
131 }
132 }
133 }
135
136 Ok(warnings)
137 }
138
139 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
140 let content = ctx.content;
141 let _line_index = LineIndex::new(content.to_string());
142
143 let mut result = String::new();
144 let mut in_code_block = false;
145 let mut current_fence_marker: Option<String> = None;
146 let mut fence_needs_language = false;
147 let mut original_indent = String::new();
148 let mut opening_fence_indent: usize = 0;
149
150 let lines: Vec<&str> = content.lines().collect();
151
152 let is_in_nested_context = |line_idx: usize| -> bool {
154 for i in (0..line_idx).rev() {
156 let line = lines.get(i).unwrap_or(&"");
157 let trimmed = line.trim();
158
159 if trimmed.is_empty() {
161 continue;
162 }
163
164 if line.trim_start().starts_with('>') {
166 return true;
167 }
168
169 if line.len() - line.trim_start().len() >= 2 {
171 let after_indent = line.trim_start();
172 if after_indent.starts_with("- ")
173 || after_indent.starts_with("* ")
174 || after_indent.starts_with("+ ")
175 || (after_indent.len() > 2
176 && after_indent.as_bytes().first().is_some_and(|&b| b.is_ascii_digit())
177 && after_indent.as_bytes().get(1) == Some(&b'.')
178 && after_indent.as_bytes().get(2) == Some(&b' '))
179 {
180 return true;
181 }
182 }
183
184 if line.starts_with(|c: char| !c.is_whitespace()) {
186 break;
187 }
188 }
189 false
190 };
191
192 let mut is_disabled = false;
194
195 for (i, line) in lines.iter().enumerate() {
196 let trimmed = line.trim();
197
198 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
200 && (rules.is_empty() || rules.contains(&self.name()))
201 {
202 is_disabled = true;
203 }
204 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
205 && (rules.is_empty() || rules.contains(&self.name()))
206 {
207 is_disabled = false;
208 }
209
210 if is_disabled {
212 result.push_str(line);
213 result.push('\n');
214 continue;
215 }
216
217 let fence_marker = if trimmed.starts_with("```") {
219 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
220 if backtick_count >= 3 {
221 Some("`".repeat(backtick_count))
222 } else {
223 None
224 }
225 } else if trimmed.starts_with("~~~") {
226 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
227 if tilde_count >= 3 {
228 Some("~".repeat(tilde_count))
229 } else {
230 None
231 }
232 } else {
233 None
234 };
235
236 if let Some(fence_marker) = fence_marker {
237 if in_code_block {
238 if let Some(ref current_marker) = current_fence_marker {
240 let current_indent = line.len() - line.trim_start().len();
241 if fence_marker == *current_marker
242 && trimmed[current_marker.len()..].trim().is_empty()
243 && current_indent <= opening_fence_indent
244 {
245 if fence_needs_language {
247 result.push_str(&format!("{original_indent}{trimmed}\n"));
249 } else {
250 result.push_str(line);
252 result.push('\n');
253 }
254 in_code_block = false;
255 current_fence_marker = None;
256 fence_needs_language = false;
257 original_indent.clear();
258 opening_fence_indent = 0;
259 } else {
260 result.push_str(line);
262 result.push('\n');
263 }
264 } else {
265 result.push_str(line);
267 result.push('\n');
268 }
269 } else {
270 let line_indent = line[..line.len() - line.trim_start().len()].to_string();
273
274 let after_fence = trimmed[fence_marker.len()..].trim();
276
277 let has_title_only =
279 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
280
281 if after_fence.is_empty() || has_title_only {
282 let should_preserve_indent = is_in_nested_context(i);
284
285 if should_preserve_indent {
286 original_indent = line_indent;
288 if has_title_only {
289 result.push_str(&format!("{original_indent}{fence_marker}text {after_fence}\n"));
291 } else {
292 result.push_str(&format!("{original_indent}{fence_marker}text\n"));
293 }
294 } else {
295 original_indent = String::new();
297 if has_title_only {
298 result.push_str(&format!("{fence_marker}text {after_fence}\n"));
300 } else {
301 result.push_str(&format!("{fence_marker}text\n"));
302 }
303 }
304 fence_needs_language = true;
305 } else {
306 result.push_str(line);
308 result.push('\n');
309 fence_needs_language = false;
310 }
311
312 in_code_block = true;
313 current_fence_marker = Some(fence_marker);
314 opening_fence_indent = line.len() - line.trim_start().len();
315 }
316 } else if in_code_block {
317 result.push_str(line);
319 result.push('\n');
320 } else {
321 result.push_str(line);
323 result.push('\n');
324 }
325 }
326
327 if !content.ends_with('\n') {
329 result.pop();
330 }
331
332 Ok(result)
333 }
334
335 fn category(&self) -> RuleCategory {
337 RuleCategory::CodeBlock
338 }
339
340 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
342 let content = ctx.content;
343 content.is_empty() || (!content.contains("```") && !content.contains("~~~"))
344 }
345
346 fn as_any(&self) -> &dyn std::any::Any {
347 self
348 }
349
350 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
351 where
352 Self: Sized,
353 {
354 Box::new(MD040FencedCodeLanguage)
355 }
356}
357
358#[cfg(test)]
359mod tests {
360 use super::*;
361 use crate::lint_context::LintContext;
362
363 fn run_check(content: &str) -> LintResult {
364 let rule = MD040FencedCodeLanguage;
365 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
366 rule.check(&ctx)
367 }
368
369 fn run_fix(content: &str) -> Result<String, LintError> {
370 let rule = MD040FencedCodeLanguage;
371 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
372 rule.fix(&ctx)
373 }
374
375 #[test]
376 fn test_code_blocks_with_language_specified() {
377 let content = r#"# Test
379
380```python
381print("Hello, world!")
382```
383
384```javascript
385console.log("Hello!");
386```
387"#;
388 let result = run_check(content).unwrap();
389 assert!(result.is_empty(), "No warnings expected for code blocks with language");
390 }
391
392 #[test]
393 fn test_code_blocks_without_language() {
394 let content = r#"# Test
395
396```
397print("Hello, world!")
398```
399"#;
400 let result = run_check(content).unwrap();
401 assert_eq!(result.len(), 1);
402 assert_eq!(result[0].message, "Code block (```) missing language");
403 assert_eq!(result[0].line, 3);
404 }
405
406 #[test]
407 fn test_code_blocks_with_empty_language() {
408 let content = r#"# Test
410
411```
412print("Hello, world!")
413```
414"#;
415 let result = run_check(content).unwrap();
416 assert_eq!(result.len(), 1);
417 assert_eq!(result[0].message, "Code block (```) missing language");
418 }
419
420 #[test]
421 fn test_indented_code_blocks_should_be_ignored() {
422 let content = r#"# Test
424
425 This is an indented code block
426 It should not trigger MD040
427"#;
428 let result = run_check(content).unwrap();
429 assert!(result.is_empty(), "Indented code blocks should be ignored");
430 }
431
432 #[test]
433 fn test_inline_code_spans_should_be_ignored() {
434 let content = r#"# Test
435
436This is `inline code` and should not trigger warnings.
437
438Use the `print()` function.
439"#;
440 let result = run_check(content).unwrap();
441 assert!(result.is_empty(), "Inline code spans should be ignored");
442 }
443
444 #[test]
445 fn test_tildes_vs_backticks_for_fences() {
446 let content_tildes_no_lang = r#"# Test
448
449~~~
450code here
451~~~
452"#;
453 let result = run_check(content_tildes_no_lang).unwrap();
454 assert_eq!(result.len(), 1);
455 assert_eq!(result[0].message, "Code block (```) missing language");
456
457 let content_tildes_with_lang = r#"# Test
459
460~~~python
461code here
462~~~
463"#;
464 let result = run_check(content_tildes_with_lang).unwrap();
465 assert!(result.is_empty());
466
467 let content_mixed = r#"# Test
469
470```python
471code here
472```
473
474~~~javascript
475more code
476~~~
477
478```
479no language
480```
481
482~~~
483also no language
484~~~
485"#;
486 let result = run_check(content_mixed).unwrap();
487 assert_eq!(result.len(), 2);
488 }
489
490 #[test]
491 fn test_language_with_additional_parameters() {
492 let content = r#"# Test
493
494```python {highlight=[1,2]}
495print("Line 1")
496print("Line 2")
497```
498
499```javascript {.line-numbers startFrom="10"}
500console.log("Hello");
501```
502
503```ruby {data-line="1,3-4"}
504puts "Hello"
505puts "World"
506puts "!"
507```
508"#;
509 let result = run_check(content).unwrap();
510 assert!(
511 result.is_empty(),
512 "Code blocks with language and parameters should pass"
513 );
514 }
515
516 #[test]
517 fn test_multiple_code_blocks_in_document() {
518 let content = r#"# Test Document
519
520First block without language:
521```
522code here
523```
524
525Second block with language:
526```python
527print("hello")
528```
529
530Third block without language:
531```
532more code
533```
534
535Fourth block with language:
536```javascript
537console.log("test");
538```
539"#;
540 let result = run_check(content).unwrap();
541 assert_eq!(result.len(), 2);
542 assert_eq!(result[0].line, 4);
543 assert_eq!(result[1].line, 14);
544 }
545
546 #[test]
547 fn test_nested_code_blocks_in_lists() {
548 let content = r#"# Test
549
550- Item 1
551 ```python
552 print("nested with language")
553 ```
554
555- Item 2
556 ```
557 nested without language
558 ```
559
560- Item 3
561 - Nested item
562 ```javascript
563 console.log("deeply nested");
564 ```
565
566 - Another nested
567 ```
568 no language
569 ```
570"#;
571 let result = run_check(content).unwrap();
572 assert_eq!(result.len(), 2);
573 assert_eq!(result[0].line, 9);
575 assert_eq!(result[1].line, 20);
576 }
577
578 #[test]
579 fn test_code_blocks_in_blockquotes() {
580 let content = r#"# Test
581
582> This is a blockquote
583> ```python
584> print("with language")
585> ```
586
587> Another blockquote
588> ```
589> without language
590> ```
591"#;
592 let result = run_check(content).unwrap();
593 assert_eq!(result.len(), 0);
596 }
597
598 #[test]
599 fn test_fix_method_adds_text_language() {
600 let content = r#"# Test
601
602```
603code without language
604```
605
606```python
607already has language
608```
609
610```
611another block without
612```
613"#;
614 let fixed = run_fix(content).unwrap();
615 assert!(fixed.contains("```text"));
616 assert!(fixed.contains("```python"));
617 assert_eq!(fixed.matches("```text").count(), 2);
618 }
619
620 #[test]
621 fn test_fix_preserves_indentation() {
622 let content = r#"# Test
623
624- List item
625 ```
626 indented code block
627 ```
628"#;
629 let fixed = run_fix(content).unwrap();
630 assert!(fixed.contains("```text"));
634 assert!(fixed.contains(" indented code block"));
635 }
636
637 #[test]
638 fn test_fix_with_tilde_fences() {
639 let content = r#"# Test
640
641~~~
642code with tildes
643~~~
644"#;
645 let fixed = run_fix(content).unwrap();
646 assert!(fixed.contains("~~~text"));
647 }
648
649 #[test]
650 fn test_longer_fence_markers() {
651 let content = r#"# Test
652
653````
654code with four backticks
655````
656
657`````python
658code with five backticks and language
659`````
660
661~~~~~~
662code with six tildes
663~~~~~~
664"#;
665 let result = run_check(content).unwrap();
666 assert_eq!(result.len(), 2);
667
668 let fixed = run_fix(content).unwrap();
669 assert!(fixed.contains("````text"));
670 assert!(fixed.contains("~~~~~~text"));
671 assert!(fixed.contains("`````python"));
672 }
673
674 #[test]
675 fn test_nested_code_blocks_different_markers() {
676 let content = r#"# Test
677
678````markdown
679This is a markdown block
680
681```python
682# This is nested code
683print("hello")
684```
685
686More markdown
687````
688"#;
689 let result = run_check(content).unwrap();
690 assert!(
691 result.is_empty(),
692 "Nested code blocks with different markers should not trigger warnings"
693 );
694 }
695
696 #[test]
697 fn test_disable_enable_comments() {
698 let content = r#"# Test
699
700<!-- rumdl-disable MD040 -->
701```
702this should not trigger warning
703```
704<!-- rumdl-enable MD040 -->
705
706```
707this should trigger warning
708```
709"#;
710 let result = run_check(content).unwrap();
711 assert_eq!(result.len(), 1);
712 assert_eq!(result[0].line, 9);
713 }
714
715 #[test]
716 fn test_fence_with_language_only_on_closing() {
717 let content = r#"# Test
719
720```
721code
722```python
723"#;
724 let result = run_check(content).unwrap();
725 assert_eq!(result.len(), 1);
726 }
727
728 #[test]
729 fn test_incomplete_code_blocks() {
730 let content = r#"# Test
732
733```python
734this code block is not closed"#;
735 let result = run_check(content).unwrap();
736 assert!(
737 result.is_empty(),
738 "Unclosed code blocks with language should not trigger warnings"
739 );
740
741 let content_no_lang = r#"# Test
743
744```
745this code block is not closed"#;
746 let result = run_check(content_no_lang).unwrap();
747 assert_eq!(result.len(), 1);
748 }
749
750 #[test]
751 fn test_fix_preserves_original_formatting() {
752 let content = r#"# Test
753
754```
755code
756```
757
758No newline at end"#;
759 let fixed = run_fix(content).unwrap();
760 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
761
762 let content_with_newline = "# Test\n\n```\ncode\n```\n";
763 let fixed = run_fix(content_with_newline).unwrap();
764 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
765 }
766
767 #[test]
768 fn test_edge_case_backticks_in_content() {
769 let content = r#"# Test
770
771```javascript
772console.log(`template string with backticks`);
773// This line has ``` in a comment
774```
775"#;
776 let result = run_check(content).unwrap();
777 assert!(
778 result.is_empty(),
779 "Backticks inside code blocks should not affect parsing"
780 );
781 }
782
783 #[test]
784 fn test_empty_document() {
785 let content = "";
786 let result = run_check(content).unwrap();
787 assert!(result.is_empty());
788 }
789
790 #[test]
791 fn test_should_skip_optimization() {
792 let rule = MD040FencedCodeLanguage;
793
794 let ctx = LintContext::new("# Just a header\n\nSome text", crate::config::MarkdownFlavor::Standard);
796 assert!(rule.should_skip(&ctx));
797
798 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard);
800 assert!(!rule.should_skip(&ctx));
801
802 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard);
804 assert!(!rule.should_skip(&ctx));
805
806 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
808 assert!(rule.should_skip(&ctx));
809 }
810}