rumdl_lib/rules/
md040_fenced_code_language.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::{LineIndex, calculate_line_range};
3
4/// Rule MD040: Fenced code blocks should have a language
5///
6/// See [docs/md040.md](../../docs/md040.md) for full documentation, configuration, and examples.
7
8#[derive(Debug, Default, Clone)]
9pub struct MD040FencedCodeLanguage;
10
11impl Rule for MD040FencedCodeLanguage {
12    fn name(&self) -> &'static str {
13        "MD040"
14    }
15
16    fn description(&self) -> &'static str {
17        "Code blocks should have a language specified"
18    }
19
20    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
21        let content = ctx.content;
22        let _line_index = LineIndex::new(content.to_string());
23
24        let mut warnings = Vec::new();
25
26        let mut in_code_block = false;
27        let mut current_fence_marker: Option<String> = None;
28        let mut opening_fence_indent: usize = 0;
29
30        // Pre-compute disabled state to avoid O(n²) complexity
31        let mut is_disabled = false;
32
33        for (i, line) in content.lines().enumerate() {
34            let trimmed = line.trim();
35
36            // Update disabled state incrementally
37            if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
38                && (rules.is_empty() || rules.contains(&self.name()))
39            {
40                is_disabled = true;
41            }
42            if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
43                && (rules.is_empty() || rules.contains(&self.name()))
44            {
45                is_disabled = false;
46            }
47
48            // Skip processing if rule is disabled
49            if is_disabled {
50                continue;
51            }
52
53            // Determine fence marker if this is a fence line
54            let fence_marker = if trimmed.starts_with("```") {
55                let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
56                if backtick_count >= 3 {
57                    Some("`".repeat(backtick_count))
58                } else {
59                    None
60                }
61            } else if trimmed.starts_with("~~~") {
62                let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
63                if tilde_count >= 3 {
64                    Some("~".repeat(tilde_count))
65                } else {
66                    None
67                }
68            } else {
69                None
70            };
71
72            if let Some(fence_marker) = fence_marker {
73                if in_code_block {
74                    // We're inside a code block, check if this closes it
75                    if let Some(ref current_marker) = current_fence_marker {
76                        let current_indent = line.len() - line.trim_start().len();
77                        // Only close if the fence marker exactly matches the opening marker AND has no content after
78                        // AND the indentation is not greater than the opening fence
79                        if fence_marker == *current_marker
80                            && trimmed[current_marker.len()..].trim().is_empty()
81                            && current_indent <= opening_fence_indent
82                        {
83                            // This closes the current code block
84                            in_code_block = false;
85                            current_fence_marker = None;
86                            opening_fence_indent = 0;
87                        }
88                        // else: This is content inside a code block, ignore completely
89                    }
90                } else {
91                    // We're outside a code block, this opens one
92                    // Check if language is specified
93                    let after_fence = trimmed[fence_marker.len()..].trim();
94
95                    // Check if it has MkDocs title attribute but no language
96                    // Pattern: ``` title="Title" (missing language)
97                    // Valid: ```python title="Title" or ```py title="Title"
98                    let has_title_only =
99                        ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
100
101                    if after_fence.is_empty() || has_title_only {
102                        // Calculate precise character range for the entire fence line that needs a language
103                        let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
104
105                        warnings.push(LintWarning {
106                            rule_name: Some(self.name().to_string()),
107                            line: start_line,
108                            column: start_col,
109                            end_line,
110                            end_column: end_col,
111                            message: "Code block (```) missing language".to_string(),
112                            severity: Severity::Warning,
113                            fix: Some(Fix {
114                                range: {
115                                    // Replace just the fence marker with fence+language
116                                    let trimmed_start = line.len() - line.trim_start().len();
117                                    let fence_len = fence_marker.len();
118                                    let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
119                                    let fence_start_byte = line_start_byte + trimmed_start;
120                                    let fence_end_byte = fence_start_byte + fence_len;
121                                    fence_start_byte..fence_end_byte
122                                },
123                                replacement: format!("{fence_marker}text"),
124                            }),
125                        });
126                    }
127
128                    in_code_block = true;
129                    current_fence_marker = Some(fence_marker);
130                    opening_fence_indent = line.len() - line.trim_start().len();
131                }
132            }
133            // If we're inside a code block and this line is not a fence, ignore it
134        }
135
136        Ok(warnings)
137    }
138
139    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
140        let content = ctx.content;
141        let _line_index = LineIndex::new(content.to_string());
142
143        let mut result = String::new();
144        let mut in_code_block = false;
145        let mut current_fence_marker: Option<String> = None;
146        let mut fence_needs_language = false;
147        let mut original_indent = String::new();
148        let mut opening_fence_indent: usize = 0;
149
150        let lines: Vec<&str> = content.lines().collect();
151
152        // Pre-compute disabled state to avoid O(n²) complexity
153        let mut is_disabled = false;
154
155        for line in lines.iter() {
156            let trimmed = line.trim();
157
158            // Update disabled state incrementally
159            if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
160                && (rules.is_empty() || rules.contains(&self.name()))
161            {
162                is_disabled = true;
163            }
164            if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
165                && (rules.is_empty() || rules.contains(&self.name()))
166            {
167                is_disabled = false;
168            }
169
170            // Skip processing if rule is disabled, preserve the line as-is
171            if is_disabled {
172                result.push_str(line);
173                result.push('\n');
174                continue;
175            }
176
177            // Determine fence marker if this is a fence line
178            let fence_marker = if trimmed.starts_with("```") {
179                let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
180                if backtick_count >= 3 {
181                    Some("`".repeat(backtick_count))
182                } else {
183                    None
184                }
185            } else if trimmed.starts_with("~~~") {
186                let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
187                if tilde_count >= 3 {
188                    Some("~".repeat(tilde_count))
189                } else {
190                    None
191                }
192            } else {
193                None
194            };
195
196            if let Some(fence_marker) = fence_marker {
197                if in_code_block {
198                    // We're inside a code block, check if this closes it
199                    if let Some(ref current_marker) = current_fence_marker {
200                        let current_indent = line.len() - line.trim_start().len();
201                        if fence_marker == *current_marker
202                            && trimmed[current_marker.len()..].trim().is_empty()
203                            && current_indent <= opening_fence_indent
204                        {
205                            // This closes the current code block
206                            if fence_needs_language {
207                                // Use the same indentation as the opening fence
208                                result.push_str(&format!("{original_indent}{trimmed}\n"));
209                            } else {
210                                // Preserve original line as-is
211                                result.push_str(line);
212                                result.push('\n');
213                            }
214                            in_code_block = false;
215                            current_fence_marker = None;
216                            fence_needs_language = false;
217                            original_indent.clear();
218                            opening_fence_indent = 0;
219                        } else {
220                            // This is content inside a code block (different fence marker) - preserve exactly as-is
221                            result.push_str(line);
222                            result.push('\n');
223                        }
224                    } else {
225                        // This shouldn't happen, but preserve as content
226                        result.push_str(line);
227                        result.push('\n');
228                    }
229                } else {
230                    // We're outside a code block, this opens one
231                    // Capture the original indentation
232                    let line_indent = line[..line.len() - line.trim_start().len()].to_string();
233
234                    // Add 'text' as default language for opening fence if no language specified
235                    let after_fence = trimmed[fence_marker.len()..].trim();
236
237                    // Check if it has MkDocs title attribute but no language
238                    let has_title_only =
239                        ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
240
241                    if after_fence.is_empty() || has_title_only {
242                        // Always preserve the original indentation - adding a language tag should not change indentation
243                        original_indent = line_indent;
244                        if has_title_only {
245                            // Insert language before title attribute
246                            result.push_str(&format!("{original_indent}{fence_marker}text {after_fence}\n"));
247                        } else {
248                            result.push_str(&format!("{original_indent}{fence_marker}text\n"));
249                        }
250                        fence_needs_language = true;
251                    } else {
252                        // Keep original line as-is since it already has a language
253                        result.push_str(line);
254                        result.push('\n');
255                        fence_needs_language = false;
256                    }
257
258                    in_code_block = true;
259                    current_fence_marker = Some(fence_marker);
260                    opening_fence_indent = line.len() - line.trim_start().len();
261                }
262            } else if in_code_block {
263                // We're inside a code block and this is not a fence line - preserve exactly as-is
264                result.push_str(line);
265                result.push('\n');
266            } else {
267                // We're outside code blocks and this is not a fence line - preserve as-is
268                result.push_str(line);
269                result.push('\n');
270            }
271        }
272
273        // Remove trailing newline if the original content didn't have one
274        if !content.ends_with('\n') {
275            result.pop();
276        }
277
278        Ok(result)
279    }
280
281    /// Get the category of this rule for selective processing
282    fn category(&self) -> RuleCategory {
283        RuleCategory::CodeBlock
284    }
285
286    /// Check if this rule should be skipped
287    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
288        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
289    }
290
291    fn as_any(&self) -> &dyn std::any::Any {
292        self
293    }
294
295    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
296    where
297        Self: Sized,
298    {
299        Box::new(MD040FencedCodeLanguage)
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306    use crate::lint_context::LintContext;
307
308    fn run_check(content: &str) -> LintResult {
309        let rule = MD040FencedCodeLanguage;
310        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
311        rule.check(&ctx)
312    }
313
314    fn run_fix(content: &str) -> Result<String, LintError> {
315        let rule = MD040FencedCodeLanguage;
316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
317        rule.fix(&ctx)
318    }
319
320    #[test]
321    fn test_code_blocks_with_language_specified() {
322        // Basic test with language
323        let content = r#"# Test
324
325```python
326print("Hello, world!")
327```
328
329```javascript
330console.log("Hello!");
331```
332"#;
333        let result = run_check(content).unwrap();
334        assert!(result.is_empty(), "No warnings expected for code blocks with language");
335    }
336
337    #[test]
338    fn test_code_blocks_without_language() {
339        let content = r#"# Test
340
341```
342print("Hello, world!")
343```
344"#;
345        let result = run_check(content).unwrap();
346        assert_eq!(result.len(), 1);
347        assert_eq!(result[0].message, "Code block (```) missing language");
348        assert_eq!(result[0].line, 3);
349    }
350
351    #[test]
352    fn test_code_blocks_with_empty_language() {
353        // Test with spaces after the fence
354        let content = r#"# Test
355
356```
357print("Hello, world!")
358```
359"#;
360        let result = run_check(content).unwrap();
361        assert_eq!(result.len(), 1);
362        assert_eq!(result[0].message, "Code block (```) missing language");
363    }
364
365    #[test]
366    fn test_indented_code_blocks_should_be_ignored() {
367        // Indented code blocks (4 spaces) should not trigger the rule
368        let content = r#"# Test
369
370    This is an indented code block
371    It should not trigger MD040
372"#;
373        let result = run_check(content).unwrap();
374        assert!(result.is_empty(), "Indented code blocks should be ignored");
375    }
376
377    #[test]
378    fn test_inline_code_spans_should_be_ignored() {
379        let content = r#"# Test
380
381This is `inline code` and should not trigger warnings.
382
383Use the `print()` function.
384"#;
385        let result = run_check(content).unwrap();
386        assert!(result.is_empty(), "Inline code spans should be ignored");
387    }
388
389    #[test]
390    fn test_tildes_vs_backticks_for_fences() {
391        // Test tilde fences without language
392        let content_tildes_no_lang = r#"# Test
393
394~~~
395code here
396~~~
397"#;
398        let result = run_check(content_tildes_no_lang).unwrap();
399        assert_eq!(result.len(), 1);
400        assert_eq!(result[0].message, "Code block (```) missing language");
401
402        // Test tilde fences with language
403        let content_tildes_with_lang = r#"# Test
404
405~~~python
406code here
407~~~
408"#;
409        let result = run_check(content_tildes_with_lang).unwrap();
410        assert!(result.is_empty());
411
412        // Mixed fences
413        let content_mixed = r#"# Test
414
415```python
416code here
417```
418
419~~~javascript
420more code
421~~~
422
423```
424no language
425```
426
427~~~
428also no language
429~~~
430"#;
431        let result = run_check(content_mixed).unwrap();
432        assert_eq!(result.len(), 2);
433    }
434
435    #[test]
436    fn test_language_with_additional_parameters() {
437        let content = r#"# Test
438
439```python {highlight=[1,2]}
440print("Line 1")
441print("Line 2")
442```
443
444```javascript {.line-numbers startFrom="10"}
445console.log("Hello");
446```
447
448```ruby {data-line="1,3-4"}
449puts "Hello"
450puts "World"
451puts "!"
452```
453"#;
454        let result = run_check(content).unwrap();
455        assert!(
456            result.is_empty(),
457            "Code blocks with language and parameters should pass"
458        );
459    }
460
461    #[test]
462    fn test_multiple_code_blocks_in_document() {
463        let content = r#"# Test Document
464
465First block without language:
466```
467code here
468```
469
470Second block with language:
471```python
472print("hello")
473```
474
475Third block without language:
476```
477more code
478```
479
480Fourth block with language:
481```javascript
482console.log("test");
483```
484"#;
485        let result = run_check(content).unwrap();
486        assert_eq!(result.len(), 2);
487        assert_eq!(result[0].line, 4);
488        assert_eq!(result[1].line, 14);
489    }
490
491    #[test]
492    fn test_nested_code_blocks_in_lists() {
493        let content = r#"# Test
494
495- Item 1
496  ```python
497  print("nested with language")
498  ```
499
500- Item 2
501  ```
502  nested without language
503  ```
504
505- Item 3
506  - Nested item
507    ```javascript
508    console.log("deeply nested");
509    ```
510
511  - Another nested
512    ```
513    no language
514    ```
515"#;
516        let result = run_check(content).unwrap();
517        assert_eq!(result.len(), 2);
518        // Check that it detects the blocks without language
519        assert_eq!(result[0].line, 9);
520        assert_eq!(result[1].line, 20);
521    }
522
523    #[test]
524    fn test_code_blocks_in_blockquotes() {
525        let content = r#"# Test
526
527> This is a blockquote
528> ```python
529> print("with language")
530> ```
531
532> Another blockquote
533> ```
534> without language
535> ```
536"#;
537        let result = run_check(content).unwrap();
538        // The implementation doesn't detect code blocks inside blockquotes
539        // This is by design to avoid complexity with nested structures
540        assert_eq!(result.len(), 0);
541    }
542
543    #[test]
544    fn test_fix_method_adds_text_language() {
545        let content = r#"# Test
546
547```
548code without language
549```
550
551```python
552already has language
553```
554
555```
556another block without
557```
558"#;
559        let fixed = run_fix(content).unwrap();
560        assert!(fixed.contains("```text"));
561        assert!(fixed.contains("```python"));
562        assert_eq!(fixed.matches("```text").count(), 2);
563    }
564
565    #[test]
566    fn test_fix_preserves_indentation() {
567        let content = r#"# Test
568
569- List item
570  ```
571  indented code block
572  ```
573"#;
574        let fixed = run_fix(content).unwrap();
575        // Should preserve indentation for list items
576        assert!(fixed.contains("  ```text"));
577        assert!(fixed.contains("  indented code block"));
578    }
579
580    #[test]
581    fn test_fix_preserves_indentation_numbered_list() {
582        // Test case from issue #122
583        let content = r#"1. Step 1
584
585    ```
586    foo
587    bar
588    ```
589"#;
590        let fixed = run_fix(content).unwrap();
591        // Should preserve 4-space indentation for numbered list content
592        assert!(fixed.contains("    ```text"));
593        assert!(fixed.contains("    foo"));
594        assert!(fixed.contains("    bar"));
595        // Should not remove indentation
596        assert!(!fixed.contains("\n```text\n"));
597    }
598
599    #[test]
600    fn test_fix_preserves_all_indentation() {
601        let content = r#"# Test
602
603Top-level code block:
604```
605top level
606```
607
6081. List item
609
610    ```
611    nested in list
612    ```
613
614Indented by 2 spaces:
615  ```
616  content
617  ```
618"#;
619        let fixed = run_fix(content).unwrap();
620
621        // All indentation should be preserved exactly as-is
622        assert!(
623            fixed.contains("```text\ntop level"),
624            "Top-level code block indentation preserved"
625        );
626        assert!(
627            fixed.contains("    ```text\n    nested in list"),
628            "List item code block indentation preserved"
629        );
630        assert!(
631            fixed.contains("  ```text\n  content"),
632            "2-space indented code block indentation preserved"
633        );
634    }
635
636    #[test]
637    fn test_fix_with_tilde_fences() {
638        let content = r#"# Test
639
640~~~
641code with tildes
642~~~
643"#;
644        let fixed = run_fix(content).unwrap();
645        assert!(fixed.contains("~~~text"));
646    }
647
648    #[test]
649    fn test_longer_fence_markers() {
650        let content = r#"# Test
651
652````
653code with four backticks
654````
655
656`````python
657code with five backticks and language
658`````
659
660~~~~~~
661code with six tildes
662~~~~~~
663"#;
664        let result = run_check(content).unwrap();
665        assert_eq!(result.len(), 2);
666
667        let fixed = run_fix(content).unwrap();
668        assert!(fixed.contains("````text"));
669        assert!(fixed.contains("~~~~~~text"));
670        assert!(fixed.contains("`````python"));
671    }
672
673    #[test]
674    fn test_nested_code_blocks_different_markers() {
675        let content = r#"# Test
676
677````markdown
678This is a markdown block
679
680```python
681# This is nested code
682print("hello")
683```
684
685More markdown
686````
687"#;
688        let result = run_check(content).unwrap();
689        assert!(
690            result.is_empty(),
691            "Nested code blocks with different markers should not trigger warnings"
692        );
693    }
694
695    #[test]
696    fn test_disable_enable_comments() {
697        let content = r#"# Test
698
699<!-- rumdl-disable MD040 -->
700```
701this should not trigger warning
702```
703<!-- rumdl-enable MD040 -->
704
705```
706this should trigger warning
707```
708"#;
709        let result = run_check(content).unwrap();
710        assert_eq!(result.len(), 1);
711        assert_eq!(result[0].line, 9);
712    }
713
714    #[test]
715    fn test_fence_with_language_only_on_closing() {
716        // Edge case: language on closing fence should not be interpreted
717        let content = r#"# Test
718
719```
720code
721```python
722"#;
723        let result = run_check(content).unwrap();
724        assert_eq!(result.len(), 1);
725    }
726
727    #[test]
728    fn test_incomplete_code_blocks() {
729        // Test unclosed code block
730        let content = r#"# Test
731
732```python
733this code block is not closed"#;
734        let result = run_check(content).unwrap();
735        assert!(
736            result.is_empty(),
737            "Unclosed code blocks with language should not trigger warnings"
738        );
739
740        // Test unclosed code block without language
741        let content_no_lang = r#"# Test
742
743```
744this code block is not closed"#;
745        let result = run_check(content_no_lang).unwrap();
746        assert_eq!(result.len(), 1);
747    }
748
749    #[test]
750    fn test_fix_preserves_original_formatting() {
751        let content = r#"# Test
752
753```
754code
755```
756
757No newline at end"#;
758        let fixed = run_fix(content).unwrap();
759        assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
760
761        let content_with_newline = "# Test\n\n```\ncode\n```\n";
762        let fixed = run_fix(content_with_newline).unwrap();
763        assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
764    }
765
766    #[test]
767    fn test_edge_case_backticks_in_content() {
768        let content = r#"# Test
769
770```javascript
771console.log(`template string with backticks`);
772// This line has ``` in a comment
773```
774"#;
775        let result = run_check(content).unwrap();
776        assert!(
777            result.is_empty(),
778            "Backticks inside code blocks should not affect parsing"
779        );
780    }
781
782    #[test]
783    fn test_empty_document() {
784        let content = "";
785        let result = run_check(content).unwrap();
786        assert!(result.is_empty());
787    }
788
789    #[test]
790    fn test_should_skip_optimization() {
791        let rule = MD040FencedCodeLanguage;
792
793        // Document without code fences should skip
794        let ctx = LintContext::new("# Just a header\n\nSome text", crate::config::MarkdownFlavor::Standard);
795        assert!(rule.should_skip(&ctx));
796
797        // Document with backtick fences should not skip
798        let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard);
799        assert!(!rule.should_skip(&ctx));
800
801        // Document with tilde fences should not skip
802        let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard);
803        assert!(!rule.should_skip(&ctx));
804
805        // Empty document should skip
806        let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
807        assert!(rule.should_skip(&ctx));
808    }
809}