rumdl_lib/rules/
md040_fenced_code_language.rs1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::range_utils::{LineIndex, calculate_line_range};
3
4#[derive(Debug, Default, Clone)]
9pub struct MD040FencedCodeLanguage;
10
11impl Rule for MD040FencedCodeLanguage {
12 fn name(&self) -> &'static str {
13 "MD040"
14 }
15
16 fn description(&self) -> &'static str {
17 "Code blocks should have a language specified"
18 }
19
20 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
21 let content = ctx.content;
22 let _line_index = LineIndex::new(content.to_string());
23
24 let mut warnings = Vec::new();
25
26 let mut in_code_block = false;
27 let mut current_fence_marker: Option<String> = None;
28 let mut opening_fence_indent: usize = 0;
29
30 let mut is_disabled = false;
32
33 for (i, line) in content.lines().enumerate() {
34 let trimmed = line.trim();
35
36 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
38 && (rules.is_empty() || rules.contains(&self.name()))
39 {
40 is_disabled = true;
41 }
42 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
43 && (rules.is_empty() || rules.contains(&self.name()))
44 {
45 is_disabled = false;
46 }
47
48 if is_disabled {
50 continue;
51 }
52
53 let fence_marker = if trimmed.starts_with("```") {
55 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
56 if backtick_count >= 3 {
57 Some("`".repeat(backtick_count))
58 } else {
59 None
60 }
61 } else if trimmed.starts_with("~~~") {
62 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
63 if tilde_count >= 3 {
64 Some("~".repeat(tilde_count))
65 } else {
66 None
67 }
68 } else {
69 None
70 };
71
72 if let Some(fence_marker) = fence_marker {
73 if in_code_block {
74 if let Some(ref current_marker) = current_fence_marker {
76 let current_indent = line.len() - line.trim_start().len();
77 if fence_marker == *current_marker
80 && trimmed[current_marker.len()..].trim().is_empty()
81 && current_indent <= opening_fence_indent
82 {
83 in_code_block = false;
85 current_fence_marker = None;
86 opening_fence_indent = 0;
87 }
88 }
90 } else {
91 let after_fence = trimmed[fence_marker.len()..].trim();
94
95 let has_title_only =
99 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
100
101 if after_fence.is_empty() || has_title_only {
102 let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
104
105 warnings.push(LintWarning {
106 rule_name: Some(self.name().to_string()),
107 line: start_line,
108 column: start_col,
109 end_line,
110 end_column: end_col,
111 message: "Code block (```) missing language".to_string(),
112 severity: Severity::Warning,
113 fix: Some(Fix {
114 range: {
115 let trimmed_start = line.len() - line.trim_start().len();
117 let fence_len = fence_marker.len();
118 let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
119 let fence_start_byte = line_start_byte + trimmed_start;
120 let fence_end_byte = fence_start_byte + fence_len;
121 fence_start_byte..fence_end_byte
122 },
123 replacement: format!("{fence_marker}text"),
124 }),
125 });
126 }
127
128 in_code_block = true;
129 current_fence_marker = Some(fence_marker);
130 opening_fence_indent = line.len() - line.trim_start().len();
131 }
132 }
133 }
135
136 Ok(warnings)
137 }
138
139 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
140 let content = ctx.content;
141 let _line_index = LineIndex::new(content.to_string());
142
143 let mut result = String::new();
144 let mut in_code_block = false;
145 let mut current_fence_marker: Option<String> = None;
146 let mut fence_needs_language = false;
147 let mut original_indent = String::new();
148 let mut opening_fence_indent: usize = 0;
149
150 let lines: Vec<&str> = content.lines().collect();
151
152 let mut is_disabled = false;
154
155 for line in lines.iter() {
156 let trimmed = line.trim();
157
158 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
160 && (rules.is_empty() || rules.contains(&self.name()))
161 {
162 is_disabled = true;
163 }
164 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
165 && (rules.is_empty() || rules.contains(&self.name()))
166 {
167 is_disabled = false;
168 }
169
170 if is_disabled {
172 result.push_str(line);
173 result.push('\n');
174 continue;
175 }
176
177 let fence_marker = if trimmed.starts_with("```") {
179 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
180 if backtick_count >= 3 {
181 Some("`".repeat(backtick_count))
182 } else {
183 None
184 }
185 } else if trimmed.starts_with("~~~") {
186 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
187 if tilde_count >= 3 {
188 Some("~".repeat(tilde_count))
189 } else {
190 None
191 }
192 } else {
193 None
194 };
195
196 if let Some(fence_marker) = fence_marker {
197 if in_code_block {
198 if let Some(ref current_marker) = current_fence_marker {
200 let current_indent = line.len() - line.trim_start().len();
201 if fence_marker == *current_marker
202 && trimmed[current_marker.len()..].trim().is_empty()
203 && current_indent <= opening_fence_indent
204 {
205 if fence_needs_language {
207 result.push_str(&format!("{original_indent}{trimmed}\n"));
209 } else {
210 result.push_str(line);
212 result.push('\n');
213 }
214 in_code_block = false;
215 current_fence_marker = None;
216 fence_needs_language = false;
217 original_indent.clear();
218 opening_fence_indent = 0;
219 } else {
220 result.push_str(line);
222 result.push('\n');
223 }
224 } else {
225 result.push_str(line);
227 result.push('\n');
228 }
229 } else {
230 let line_indent = line[..line.len() - line.trim_start().len()].to_string();
233
234 let after_fence = trimmed[fence_marker.len()..].trim();
236
237 let has_title_only =
239 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
240
241 if after_fence.is_empty() || has_title_only {
242 original_indent = line_indent;
244 if has_title_only {
245 result.push_str(&format!("{original_indent}{fence_marker}text {after_fence}\n"));
247 } else {
248 result.push_str(&format!("{original_indent}{fence_marker}text\n"));
249 }
250 fence_needs_language = true;
251 } else {
252 result.push_str(line);
254 result.push('\n');
255 fence_needs_language = false;
256 }
257
258 in_code_block = true;
259 current_fence_marker = Some(fence_marker);
260 opening_fence_indent = line.len() - line.trim_start().len();
261 }
262 } else if in_code_block {
263 result.push_str(line);
265 result.push('\n');
266 } else {
267 result.push_str(line);
269 result.push('\n');
270 }
271 }
272
273 if !content.ends_with('\n') {
275 result.pop();
276 }
277
278 Ok(result)
279 }
280
281 fn category(&self) -> RuleCategory {
283 RuleCategory::CodeBlock
284 }
285
286 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
288 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
289 }
290
291 fn as_any(&self) -> &dyn std::any::Any {
292 self
293 }
294
295 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
296 where
297 Self: Sized,
298 {
299 Box::new(MD040FencedCodeLanguage)
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306 use crate::lint_context::LintContext;
307
308 fn run_check(content: &str) -> LintResult {
309 let rule = MD040FencedCodeLanguage;
310 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
311 rule.check(&ctx)
312 }
313
314 fn run_fix(content: &str) -> Result<String, LintError> {
315 let rule = MD040FencedCodeLanguage;
316 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
317 rule.fix(&ctx)
318 }
319
320 #[test]
321 fn test_code_blocks_with_language_specified() {
322 let content = r#"# Test
324
325```python
326print("Hello, world!")
327```
328
329```javascript
330console.log("Hello!");
331```
332"#;
333 let result = run_check(content).unwrap();
334 assert!(result.is_empty(), "No warnings expected for code blocks with language");
335 }
336
337 #[test]
338 fn test_code_blocks_without_language() {
339 let content = r#"# Test
340
341```
342print("Hello, world!")
343```
344"#;
345 let result = run_check(content).unwrap();
346 assert_eq!(result.len(), 1);
347 assert_eq!(result[0].message, "Code block (```) missing language");
348 assert_eq!(result[0].line, 3);
349 }
350
351 #[test]
352 fn test_code_blocks_with_empty_language() {
353 let content = r#"# Test
355
356```
357print("Hello, world!")
358```
359"#;
360 let result = run_check(content).unwrap();
361 assert_eq!(result.len(), 1);
362 assert_eq!(result[0].message, "Code block (```) missing language");
363 }
364
365 #[test]
366 fn test_indented_code_blocks_should_be_ignored() {
367 let content = r#"# Test
369
370 This is an indented code block
371 It should not trigger MD040
372"#;
373 let result = run_check(content).unwrap();
374 assert!(result.is_empty(), "Indented code blocks should be ignored");
375 }
376
377 #[test]
378 fn test_inline_code_spans_should_be_ignored() {
379 let content = r#"# Test
380
381This is `inline code` and should not trigger warnings.
382
383Use the `print()` function.
384"#;
385 let result = run_check(content).unwrap();
386 assert!(result.is_empty(), "Inline code spans should be ignored");
387 }
388
389 #[test]
390 fn test_tildes_vs_backticks_for_fences() {
391 let content_tildes_no_lang = r#"# Test
393
394~~~
395code here
396~~~
397"#;
398 let result = run_check(content_tildes_no_lang).unwrap();
399 assert_eq!(result.len(), 1);
400 assert_eq!(result[0].message, "Code block (```) missing language");
401
402 let content_tildes_with_lang = r#"# Test
404
405~~~python
406code here
407~~~
408"#;
409 let result = run_check(content_tildes_with_lang).unwrap();
410 assert!(result.is_empty());
411
412 let content_mixed = r#"# Test
414
415```python
416code here
417```
418
419~~~javascript
420more code
421~~~
422
423```
424no language
425```
426
427~~~
428also no language
429~~~
430"#;
431 let result = run_check(content_mixed).unwrap();
432 assert_eq!(result.len(), 2);
433 }
434
435 #[test]
436 fn test_language_with_additional_parameters() {
437 let content = r#"# Test
438
439```python {highlight=[1,2]}
440print("Line 1")
441print("Line 2")
442```
443
444```javascript {.line-numbers startFrom="10"}
445console.log("Hello");
446```
447
448```ruby {data-line="1,3-4"}
449puts "Hello"
450puts "World"
451puts "!"
452```
453"#;
454 let result = run_check(content).unwrap();
455 assert!(
456 result.is_empty(),
457 "Code blocks with language and parameters should pass"
458 );
459 }
460
461 #[test]
462 fn test_multiple_code_blocks_in_document() {
463 let content = r#"# Test Document
464
465First block without language:
466```
467code here
468```
469
470Second block with language:
471```python
472print("hello")
473```
474
475Third block without language:
476```
477more code
478```
479
480Fourth block with language:
481```javascript
482console.log("test");
483```
484"#;
485 let result = run_check(content).unwrap();
486 assert_eq!(result.len(), 2);
487 assert_eq!(result[0].line, 4);
488 assert_eq!(result[1].line, 14);
489 }
490
491 #[test]
492 fn test_nested_code_blocks_in_lists() {
493 let content = r#"# Test
494
495- Item 1
496 ```python
497 print("nested with language")
498 ```
499
500- Item 2
501 ```
502 nested without language
503 ```
504
505- Item 3
506 - Nested item
507 ```javascript
508 console.log("deeply nested");
509 ```
510
511 - Another nested
512 ```
513 no language
514 ```
515"#;
516 let result = run_check(content).unwrap();
517 assert_eq!(result.len(), 2);
518 assert_eq!(result[0].line, 9);
520 assert_eq!(result[1].line, 20);
521 }
522
523 #[test]
524 fn test_code_blocks_in_blockquotes() {
525 let content = r#"# Test
526
527> This is a blockquote
528> ```python
529> print("with language")
530> ```
531
532> Another blockquote
533> ```
534> without language
535> ```
536"#;
537 let result = run_check(content).unwrap();
538 assert_eq!(result.len(), 0);
541 }
542
543 #[test]
544 fn test_fix_method_adds_text_language() {
545 let content = r#"# Test
546
547```
548code without language
549```
550
551```python
552already has language
553```
554
555```
556another block without
557```
558"#;
559 let fixed = run_fix(content).unwrap();
560 assert!(fixed.contains("```text"));
561 assert!(fixed.contains("```python"));
562 assert_eq!(fixed.matches("```text").count(), 2);
563 }
564
565 #[test]
566 fn test_fix_preserves_indentation() {
567 let content = r#"# Test
568
569- List item
570 ```
571 indented code block
572 ```
573"#;
574 let fixed = run_fix(content).unwrap();
575 assert!(fixed.contains(" ```text"));
577 assert!(fixed.contains(" indented code block"));
578 }
579
580 #[test]
581 fn test_fix_preserves_indentation_numbered_list() {
582 let content = r#"1. Step 1
584
585 ```
586 foo
587 bar
588 ```
589"#;
590 let fixed = run_fix(content).unwrap();
591 assert!(fixed.contains(" ```text"));
593 assert!(fixed.contains(" foo"));
594 assert!(fixed.contains(" bar"));
595 assert!(!fixed.contains("\n```text\n"));
597 }
598
599 #[test]
600 fn test_fix_preserves_all_indentation() {
601 let content = r#"# Test
602
603Top-level code block:
604```
605top level
606```
607
6081. List item
609
610 ```
611 nested in list
612 ```
613
614Indented by 2 spaces:
615 ```
616 content
617 ```
618"#;
619 let fixed = run_fix(content).unwrap();
620
621 assert!(
623 fixed.contains("```text\ntop level"),
624 "Top-level code block indentation preserved"
625 );
626 assert!(
627 fixed.contains(" ```text\n nested in list"),
628 "List item code block indentation preserved"
629 );
630 assert!(
631 fixed.contains(" ```text\n content"),
632 "2-space indented code block indentation preserved"
633 );
634 }
635
636 #[test]
637 fn test_fix_with_tilde_fences() {
638 let content = r#"# Test
639
640~~~
641code with tildes
642~~~
643"#;
644 let fixed = run_fix(content).unwrap();
645 assert!(fixed.contains("~~~text"));
646 }
647
648 #[test]
649 fn test_longer_fence_markers() {
650 let content = r#"# Test
651
652````
653code with four backticks
654````
655
656`````python
657code with five backticks and language
658`````
659
660~~~~~~
661code with six tildes
662~~~~~~
663"#;
664 let result = run_check(content).unwrap();
665 assert_eq!(result.len(), 2);
666
667 let fixed = run_fix(content).unwrap();
668 assert!(fixed.contains("````text"));
669 assert!(fixed.contains("~~~~~~text"));
670 assert!(fixed.contains("`````python"));
671 }
672
673 #[test]
674 fn test_nested_code_blocks_different_markers() {
675 let content = r#"# Test
676
677````markdown
678This is a markdown block
679
680```python
681# This is nested code
682print("hello")
683```
684
685More markdown
686````
687"#;
688 let result = run_check(content).unwrap();
689 assert!(
690 result.is_empty(),
691 "Nested code blocks with different markers should not trigger warnings"
692 );
693 }
694
695 #[test]
696 fn test_disable_enable_comments() {
697 let content = r#"# Test
698
699<!-- rumdl-disable MD040 -->
700```
701this should not trigger warning
702```
703<!-- rumdl-enable MD040 -->
704
705```
706this should trigger warning
707```
708"#;
709 let result = run_check(content).unwrap();
710 assert_eq!(result.len(), 1);
711 assert_eq!(result[0].line, 9);
712 }
713
714 #[test]
715 fn test_fence_with_language_only_on_closing() {
716 let content = r#"# Test
718
719```
720code
721```python
722"#;
723 let result = run_check(content).unwrap();
724 assert_eq!(result.len(), 1);
725 }
726
727 #[test]
728 fn test_incomplete_code_blocks() {
729 let content = r#"# Test
731
732```python
733this code block is not closed"#;
734 let result = run_check(content).unwrap();
735 assert!(
736 result.is_empty(),
737 "Unclosed code blocks with language should not trigger warnings"
738 );
739
740 let content_no_lang = r#"# Test
742
743```
744this code block is not closed"#;
745 let result = run_check(content_no_lang).unwrap();
746 assert_eq!(result.len(), 1);
747 }
748
749 #[test]
750 fn test_fix_preserves_original_formatting() {
751 let content = r#"# Test
752
753```
754code
755```
756
757No newline at end"#;
758 let fixed = run_fix(content).unwrap();
759 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
760
761 let content_with_newline = "# Test\n\n```\ncode\n```\n";
762 let fixed = run_fix(content_with_newline).unwrap();
763 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
764 }
765
766 #[test]
767 fn test_edge_case_backticks_in_content() {
768 let content = r#"# Test
769
770```javascript
771console.log(`template string with backticks`);
772// This line has ``` in a comment
773```
774"#;
775 let result = run_check(content).unwrap();
776 assert!(
777 result.is_empty(),
778 "Backticks inside code blocks should not affect parsing"
779 );
780 }
781
782 #[test]
783 fn test_empty_document() {
784 let content = "";
785 let result = run_check(content).unwrap();
786 assert!(result.is_empty());
787 }
788
789 #[test]
790 fn test_should_skip_optimization() {
791 let rule = MD040FencedCodeLanguage;
792
793 let ctx = LintContext::new("# Just a header\n\nSome text", crate::config::MarkdownFlavor::Standard);
795 assert!(rule.should_skip(&ctx));
796
797 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard);
799 assert!(!rule.should_skip(&ctx));
800
801 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard);
803 assert!(!rule.should_skip(&ctx));
804
805 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
807 assert!(rule.should_skip(&ctx));
808 }
809}