rumdl_lib/rules/
md040_fenced_code_language.rs1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
3use crate::utils::range_utils::{LineIndex, calculate_line_range};
4
5#[derive(Debug, Default, Clone)]
10pub struct MD040FencedCodeLanguage;
11
12impl Rule for MD040FencedCodeLanguage {
13 fn name(&self) -> &'static str {
14 "MD040"
15 }
16
17 fn description(&self) -> &'static str {
18 "Code blocks should have a language specified"
19 }
20
21 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
22 let content = ctx.content;
23 let _line_index = LineIndex::new(content.to_string());
24
25 let mut warnings = Vec::new();
26
27 let mut in_code_block = false;
28 let mut current_fence_marker: Option<String> = None;
29 let mut opening_fence_indent: usize = 0;
30
31 let mut is_disabled = false;
33
34 for (i, line) in content.lines().enumerate() {
35 let trimmed = line.trim();
36
37 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
39 && (rules.is_empty() || rules.contains(&self.name()))
40 {
41 is_disabled = true;
42 }
43 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
44 && (rules.is_empty() || rules.contains(&self.name()))
45 {
46 is_disabled = false;
47 }
48
49 if is_disabled {
51 continue;
52 }
53
54 let fence_marker = if trimmed.starts_with("```") {
56 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
57 if backtick_count >= 3 {
58 Some("`".repeat(backtick_count))
59 } else {
60 None
61 }
62 } else if trimmed.starts_with("~~~") {
63 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
64 if tilde_count >= 3 {
65 Some("~".repeat(tilde_count))
66 } else {
67 None
68 }
69 } else {
70 None
71 };
72
73 if let Some(fence_marker) = fence_marker {
74 if in_code_block {
75 if let Some(ref current_marker) = current_fence_marker {
77 let current_indent = line.len() - line.trim_start().len();
78 if fence_marker == *current_marker
81 && trimmed[current_marker.len()..].trim().is_empty()
82 && current_indent <= opening_fence_indent
83 {
84 in_code_block = false;
86 current_fence_marker = None;
87 opening_fence_indent = 0;
88 }
89 }
91 } else {
92 let after_fence = trimmed[fence_marker.len()..].trim();
95
96 let has_title_only =
100 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
101
102 if after_fence.is_empty() || has_title_only {
103 let (start_line, start_col, end_line, end_col) = calculate_line_range(i + 1, line);
105
106 warnings.push(LintWarning {
107 rule_name: Some(self.name()),
108 line: start_line,
109 column: start_col,
110 end_line,
111 end_column: end_col,
112 message: "Code block (```) missing language".to_string(),
113 severity: Severity::Warning,
114 fix: Some(Fix {
115 range: {
116 let trimmed_start = line.len() - line.trim_start().len();
118 let fence_len = fence_marker.len();
119 let line_start_byte = ctx.line_offsets.get(i).copied().unwrap_or(0);
120 let fence_start_byte = line_start_byte + trimmed_start;
121 let fence_end_byte = fence_start_byte + fence_len;
122 fence_start_byte..fence_end_byte
123 },
124 replacement: format!("{fence_marker}text"),
125 }),
126 });
127 }
128
129 in_code_block = true;
130 current_fence_marker = Some(fence_marker);
131 opening_fence_indent = line.len() - line.trim_start().len();
132 }
133 }
134 }
136
137 Ok(warnings)
138 }
139
140 fn check_with_structure(
142 &self,
143 ctx: &crate::lint_context::LintContext,
144 _doc_structure: &DocumentStructure,
145 ) -> LintResult {
146 self.check(ctx)
149 }
150
151 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
152 let content = ctx.content;
153 let _line_index = LineIndex::new(content.to_string());
154
155 let mut result = String::new();
156 let mut in_code_block = false;
157 let mut current_fence_marker: Option<String> = None;
158 let mut fence_needs_language = false;
159 let mut original_indent = String::new();
160 let mut opening_fence_indent: usize = 0;
161
162 let lines: Vec<&str> = content.lines().collect();
163
164 let is_in_nested_context = |line_idx: usize| -> bool {
166 for i in (0..line_idx).rev() {
168 let line = lines.get(i).unwrap_or(&"");
169 let trimmed = line.trim();
170
171 if trimmed.is_empty() {
173 continue;
174 }
175
176 if line.trim_start().starts_with('>') {
178 return true;
179 }
180
181 if line.len() - line.trim_start().len() >= 2 {
183 let after_indent = line.trim_start();
184 if after_indent.starts_with("- ")
185 || after_indent.starts_with("* ")
186 || after_indent.starts_with("+ ")
187 || (after_indent.len() > 2
188 && after_indent.as_bytes().first().is_some_and(|&b| b.is_ascii_digit())
189 && after_indent.as_bytes().get(1) == Some(&b'.')
190 && after_indent.as_bytes().get(2) == Some(&b' '))
191 {
192 return true;
193 }
194 }
195
196 if line.starts_with(|c: char| !c.is_whitespace()) {
198 break;
199 }
200 }
201 false
202 };
203
204 let mut is_disabled = false;
206
207 for (i, line) in lines.iter().enumerate() {
208 let trimmed = line.trim();
209
210 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
212 && (rules.is_empty() || rules.contains(&self.name()))
213 {
214 is_disabled = true;
215 }
216 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
217 && (rules.is_empty() || rules.contains(&self.name()))
218 {
219 is_disabled = false;
220 }
221
222 if is_disabled {
224 result.push_str(line);
225 result.push('\n');
226 continue;
227 }
228
229 let fence_marker = if trimmed.starts_with("```") {
231 let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
232 if backtick_count >= 3 {
233 Some("`".repeat(backtick_count))
234 } else {
235 None
236 }
237 } else if trimmed.starts_with("~~~") {
238 let tilde_count = trimmed.chars().take_while(|&c| c == '~').count();
239 if tilde_count >= 3 {
240 Some("~".repeat(tilde_count))
241 } else {
242 None
243 }
244 } else {
245 None
246 };
247
248 if let Some(fence_marker) = fence_marker {
249 if in_code_block {
250 if let Some(ref current_marker) = current_fence_marker {
252 let current_indent = line.len() - line.trim_start().len();
253 if fence_marker == *current_marker
254 && trimmed[current_marker.len()..].trim().is_empty()
255 && current_indent <= opening_fence_indent
256 {
257 if fence_needs_language {
259 result.push_str(&format!("{original_indent}{trimmed}\n"));
261 } else {
262 result.push_str(line);
264 result.push('\n');
265 }
266 in_code_block = false;
267 current_fence_marker = None;
268 fence_needs_language = false;
269 original_indent.clear();
270 opening_fence_indent = 0;
271 } else {
272 result.push_str(line);
274 result.push('\n');
275 }
276 } else {
277 result.push_str(line);
279 result.push('\n');
280 }
281 } else {
282 let line_indent = line[..line.len() - line.trim_start().len()].to_string();
285
286 let after_fence = trimmed[fence_marker.len()..].trim();
288
289 let has_title_only =
291 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
292
293 if after_fence.is_empty() || has_title_only {
294 let should_preserve_indent = is_in_nested_context(i);
296
297 if should_preserve_indent {
298 original_indent = line_indent;
300 if has_title_only {
301 result.push_str(&format!("{original_indent}{fence_marker}text {after_fence}\n"));
303 } else {
304 result.push_str(&format!("{original_indent}{fence_marker}text\n"));
305 }
306 } else {
307 original_indent = String::new();
309 if has_title_only {
310 result.push_str(&format!("{fence_marker}text {after_fence}\n"));
312 } else {
313 result.push_str(&format!("{fence_marker}text\n"));
314 }
315 }
316 fence_needs_language = true;
317 } else {
318 result.push_str(line);
320 result.push('\n');
321 fence_needs_language = false;
322 }
323
324 in_code_block = true;
325 current_fence_marker = Some(fence_marker);
326 opening_fence_indent = line.len() - line.trim_start().len();
327 }
328 } else if in_code_block {
329 result.push_str(line);
331 result.push('\n');
332 } else {
333 result.push_str(line);
335 result.push('\n');
336 }
337 }
338
339 if !content.ends_with('\n') {
341 result.pop();
342 }
343
344 Ok(result)
345 }
346
347 fn category(&self) -> RuleCategory {
349 RuleCategory::CodeBlock
350 }
351
352 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
354 let content = ctx.content;
355 content.is_empty() || (!content.contains("```") && !content.contains("~~~"))
356 }
357
358 fn as_any(&self) -> &dyn std::any::Any {
359 self
360 }
361
362 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
363 where
364 Self: Sized,
365 {
366 Box::new(MD040FencedCodeLanguage)
367 }
368}
369
370impl DocumentStructureExtensions for MD040FencedCodeLanguage {
371 fn has_relevant_elements(
372 &self,
373 ctx: &crate::lint_context::LintContext,
374 _doc_structure: &DocumentStructure,
375 ) -> bool {
376 let content = ctx.content;
377 content.contains("```") || content.contains("~~~")
379 }
380}
381
382#[cfg(test)]
383mod tests {
384 use super::*;
385 use crate::lint_context::LintContext;
386
387 fn run_check(content: &str) -> LintResult {
388 let rule = MD040FencedCodeLanguage;
389 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
390 rule.check(&ctx)
391 }
392
393 fn run_fix(content: &str) -> Result<String, LintError> {
394 let rule = MD040FencedCodeLanguage;
395 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
396 rule.fix(&ctx)
397 }
398
399 #[test]
400 fn test_code_blocks_with_language_specified() {
401 let content = r#"# Test
403
404```python
405print("Hello, world!")
406```
407
408```javascript
409console.log("Hello!");
410```
411"#;
412 let result = run_check(content).unwrap();
413 assert!(result.is_empty(), "No warnings expected for code blocks with language");
414 }
415
416 #[test]
417 fn test_code_blocks_without_language() {
418 let content = r#"# Test
419
420```
421print("Hello, world!")
422```
423"#;
424 let result = run_check(content).unwrap();
425 assert_eq!(result.len(), 1);
426 assert_eq!(result[0].message, "Code block (```) missing language");
427 assert_eq!(result[0].line, 3);
428 }
429
430 #[test]
431 fn test_code_blocks_with_empty_language() {
432 let content = r#"# Test
434
435```
436print("Hello, world!")
437```
438"#;
439 let result = run_check(content).unwrap();
440 assert_eq!(result.len(), 1);
441 assert_eq!(result[0].message, "Code block (```) missing language");
442 }
443
444 #[test]
445 fn test_indented_code_blocks_should_be_ignored() {
446 let content = r#"# Test
448
449 This is an indented code block
450 It should not trigger MD040
451"#;
452 let result = run_check(content).unwrap();
453 assert!(result.is_empty(), "Indented code blocks should be ignored");
454 }
455
456 #[test]
457 fn test_inline_code_spans_should_be_ignored() {
458 let content = r#"# Test
459
460This is `inline code` and should not trigger warnings.
461
462Use the `print()` function.
463"#;
464 let result = run_check(content).unwrap();
465 assert!(result.is_empty(), "Inline code spans should be ignored");
466 }
467
468 #[test]
469 fn test_tildes_vs_backticks_for_fences() {
470 let content_tildes_no_lang = r#"# Test
472
473~~~
474code here
475~~~
476"#;
477 let result = run_check(content_tildes_no_lang).unwrap();
478 assert_eq!(result.len(), 1);
479 assert_eq!(result[0].message, "Code block (```) missing language");
480
481 let content_tildes_with_lang = r#"# Test
483
484~~~python
485code here
486~~~
487"#;
488 let result = run_check(content_tildes_with_lang).unwrap();
489 assert!(result.is_empty());
490
491 let content_mixed = r#"# Test
493
494```python
495code here
496```
497
498~~~javascript
499more code
500~~~
501
502```
503no language
504```
505
506~~~
507also no language
508~~~
509"#;
510 let result = run_check(content_mixed).unwrap();
511 assert_eq!(result.len(), 2);
512 }
513
514 #[test]
515 fn test_language_with_additional_parameters() {
516 let content = r#"# Test
517
518```python {highlight=[1,2]}
519print("Line 1")
520print("Line 2")
521```
522
523```javascript {.line-numbers startFrom="10"}
524console.log("Hello");
525```
526
527```ruby {data-line="1,3-4"}
528puts "Hello"
529puts "World"
530puts "!"
531```
532"#;
533 let result = run_check(content).unwrap();
534 assert!(
535 result.is_empty(),
536 "Code blocks with language and parameters should pass"
537 );
538 }
539
540 #[test]
541 fn test_multiple_code_blocks_in_document() {
542 let content = r#"# Test Document
543
544First block without language:
545```
546code here
547```
548
549Second block with language:
550```python
551print("hello")
552```
553
554Third block without language:
555```
556more code
557```
558
559Fourth block with language:
560```javascript
561console.log("test");
562```
563"#;
564 let result = run_check(content).unwrap();
565 assert_eq!(result.len(), 2);
566 assert_eq!(result[0].line, 4);
567 assert_eq!(result[1].line, 14);
568 }
569
570 #[test]
571 fn test_nested_code_blocks_in_lists() {
572 let content = r#"# Test
573
574- Item 1
575 ```python
576 print("nested with language")
577 ```
578
579- Item 2
580 ```
581 nested without language
582 ```
583
584- Item 3
585 - Nested item
586 ```javascript
587 console.log("deeply nested");
588 ```
589
590 - Another nested
591 ```
592 no language
593 ```
594"#;
595 let result = run_check(content).unwrap();
596 assert_eq!(result.len(), 2);
597 assert_eq!(result[0].line, 9);
599 assert_eq!(result[1].line, 20);
600 }
601
602 #[test]
603 fn test_code_blocks_in_blockquotes() {
604 let content = r#"# Test
605
606> This is a blockquote
607> ```python
608> print("with language")
609> ```
610
611> Another blockquote
612> ```
613> without language
614> ```
615"#;
616 let result = run_check(content).unwrap();
617 assert_eq!(result.len(), 0);
620 }
621
622 #[test]
623 fn test_fix_method_adds_text_language() {
624 let content = r#"# Test
625
626```
627code without language
628```
629
630```python
631already has language
632```
633
634```
635another block without
636```
637"#;
638 let fixed = run_fix(content).unwrap();
639 assert!(fixed.contains("```text"));
640 assert!(fixed.contains("```python"));
641 assert_eq!(fixed.matches("```text").count(), 2);
642 }
643
644 #[test]
645 fn test_fix_preserves_indentation() {
646 let content = r#"# Test
647
648- List item
649 ```
650 indented code block
651 ```
652"#;
653 let fixed = run_fix(content).unwrap();
654 assert!(fixed.contains("```text"));
658 assert!(fixed.contains(" indented code block"));
659 }
660
661 #[test]
662 fn test_fix_with_tilde_fences() {
663 let content = r#"# Test
664
665~~~
666code with tildes
667~~~
668"#;
669 let fixed = run_fix(content).unwrap();
670 assert!(fixed.contains("~~~text"));
671 }
672
673 #[test]
674 fn test_longer_fence_markers() {
675 let content = r#"# Test
676
677````
678code with four backticks
679````
680
681`````python
682code with five backticks and language
683`````
684
685~~~~~~
686code with six tildes
687~~~~~~
688"#;
689 let result = run_check(content).unwrap();
690 assert_eq!(result.len(), 2);
691
692 let fixed = run_fix(content).unwrap();
693 assert!(fixed.contains("````text"));
694 assert!(fixed.contains("~~~~~~text"));
695 assert!(fixed.contains("`````python"));
696 }
697
698 #[test]
699 fn test_nested_code_blocks_different_markers() {
700 let content = r#"# Test
701
702````markdown
703This is a markdown block
704
705```python
706# This is nested code
707print("hello")
708```
709
710More markdown
711````
712"#;
713 let result = run_check(content).unwrap();
714 assert!(
715 result.is_empty(),
716 "Nested code blocks with different markers should not trigger warnings"
717 );
718 }
719
720 #[test]
721 fn test_disable_enable_comments() {
722 let content = r#"# Test
723
724<!-- rumdl-disable MD040 -->
725```
726this should not trigger warning
727```
728<!-- rumdl-enable MD040 -->
729
730```
731this should trigger warning
732```
733"#;
734 let result = run_check(content).unwrap();
735 assert_eq!(result.len(), 1);
736 assert_eq!(result[0].line, 9);
737 }
738
739 #[test]
740 fn test_fence_with_language_only_on_closing() {
741 let content = r#"# Test
743
744```
745code
746```python
747"#;
748 let result = run_check(content).unwrap();
749 assert_eq!(result.len(), 1);
750 }
751
752 #[test]
753 fn test_incomplete_code_blocks() {
754 let content = r#"# Test
756
757```python
758this code block is not closed"#;
759 let result = run_check(content).unwrap();
760 assert!(
761 result.is_empty(),
762 "Unclosed code blocks with language should not trigger warnings"
763 );
764
765 let content_no_lang = r#"# Test
767
768```
769this code block is not closed"#;
770 let result = run_check(content_no_lang).unwrap();
771 assert_eq!(result.len(), 1);
772 }
773
774 #[test]
775 fn test_fix_preserves_original_formatting() {
776 let content = r#"# Test
777
778```
779code
780```
781
782No newline at end"#;
783 let fixed = run_fix(content).unwrap();
784 assert!(!fixed.ends_with('\n'), "Fix should preserve lack of trailing newline");
785
786 let content_with_newline = "# Test\n\n```\ncode\n```\n";
787 let fixed = run_fix(content_with_newline).unwrap();
788 assert!(fixed.ends_with('\n'), "Fix should preserve trailing newline");
789 }
790
791 #[test]
792 fn test_edge_case_backticks_in_content() {
793 let content = r#"# Test
794
795```javascript
796console.log(`template string with backticks`);
797// This line has ``` in a comment
798```
799"#;
800 let result = run_check(content).unwrap();
801 assert!(
802 result.is_empty(),
803 "Backticks inside code blocks should not affect parsing"
804 );
805 }
806
807 #[test]
808 fn test_empty_document() {
809 let content = "";
810 let result = run_check(content).unwrap();
811 assert!(result.is_empty());
812 }
813
814 #[test]
815 fn test_should_skip_optimization() {
816 let rule = MD040FencedCodeLanguage;
817
818 let ctx = LintContext::new("# Just a header\n\nSome text", crate::config::MarkdownFlavor::Standard);
820 assert!(rule.should_skip(&ctx));
821
822 let ctx = LintContext::new("```\ncode\n```", crate::config::MarkdownFlavor::Standard);
824 assert!(!rule.should_skip(&ctx));
825
826 let ctx = LintContext::new("~~~\ncode\n~~~", crate::config::MarkdownFlavor::Standard);
828 assert!(!rule.should_skip(&ctx));
829
830 let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
832 assert!(rule.should_skip(&ctx));
833 }
834}