1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7pub mod md040_config;
11
12const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20 "title=", "hl_lines=", "linenums=", ".", "#", ];
26
27#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30 MKDOCS_SUPERFENCES_ATTR_PREFIXES
31 .iter()
32 .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37 line_idx: usize,
39 language: String,
41 fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47 config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51 pub fn with_config(config: MD040Config) -> Self {
52 Self { config }
53 }
54
55 fn validate_config(&self) -> Vec<String> {
57 let mut errors = Vec::new();
58
59 for (canonical, alias) in &self.config.preferred_aliases {
61 if let Some(actual_canonical) = resolve_canonical(canonical) {
63 if !is_valid_alias(actual_canonical, alias)
64 && let Some(valid_aliases) = get_aliases(actual_canonical)
65 {
66 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
67 let valid_str = valid_list
68 .iter()
69 .map(|s| format!("'{s}'"))
70 .collect::<Vec<_>>()
71 .join(", ");
72 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
73 errors.push(format!(
74 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
75 ));
76 }
77 } else {
78 errors.push(format!(
79 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
80 ));
81 }
82 }
83
84 errors
85 }
86
87 fn compute_preferred_labels(
89 &self,
90 blocks: &[FencedCodeBlock],
91 disabled_ranges: &[(usize, usize)],
92 ) -> HashMap<String, String> {
93 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
95
96 for block in blocks {
97 if is_line_disabled(disabled_ranges, block.line_idx) {
98 continue;
99 }
100 if block.language.is_empty() {
101 continue;
102 }
103 if let Some(canonical) = resolve_canonical(&block.language) {
104 by_canonical
105 .entry(canonical.to_string())
106 .or_default()
107 .push(&block.language);
108 }
109 }
110
111 let mut result = HashMap::new();
113
114 for (canonical, labels) in by_canonical {
115 let winner = if let Some(preferred) = self
117 .config
118 .preferred_aliases
119 .iter()
120 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
121 .map(|(_, v)| v.clone())
122 {
123 preferred
124 } else {
125 let mut counts: HashMap<&str, usize> = HashMap::new();
127 for label in &labels {
128 *counts.entry(*label).or_default() += 1;
129 }
130
131 let max_count = counts.values().max().copied().unwrap_or(0);
132 let winners: Vec<_> = counts
133 .iter()
134 .filter(|(_, c)| **c == max_count)
135 .map(|(l, _)| *l)
136 .collect();
137
138 if winners.len() == 1 {
139 winners[0].to_string()
140 } else {
141 default_alias(&canonical)
143 .filter(|default| winners.contains(default))
144 .map_or_else(
145 || winners.into_iter().min().unwrap().to_string(),
146 std::string::ToString::to_string,
147 )
148 }
149 };
150
151 result.insert(canonical, winner);
152 }
153
154 result
155 }
156
157 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
159 if !self.config.allowed_languages.is_empty() {
161 let allowed = self.config.allowed_languages.join(", ");
162 let Some(canonical) = canonical else {
163 return Some(format!(
164 "Language '{original_label}' is not in the allowed list: {allowed}"
165 ));
166 };
167 if !self
168 .config
169 .allowed_languages
170 .iter()
171 .any(|a| a.eq_ignore_ascii_case(canonical))
172 {
173 return Some(format!(
174 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
175 ));
176 }
177 } else if !self.config.disallowed_languages.is_empty()
178 && canonical.is_some_and(|canonical| {
179 self.config
180 .disallowed_languages
181 .iter()
182 .any(|d| d.eq_ignore_ascii_case(canonical))
183 })
184 {
185 let canonical = canonical.unwrap_or("unknown");
186 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
187 }
188 None
189 }
190
191 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
193 if resolve_canonical(label).is_some() {
194 return None;
195 }
196
197 match self.config.unknown_language_action {
198 UnknownLanguageAction::Ignore => None,
199 UnknownLanguageAction::Warn => Some((
200 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
201 Severity::Warning,
202 )),
203 UnknownLanguageAction::Error => Some((
204 format!("Unknown language '{label}' (not in GitHub Linguist)"),
205 Severity::Error,
206 )),
207 }
208 }
209}
210
211impl Rule for MD040FencedCodeLanguage {
212 fn name(&self) -> &'static str {
213 "MD040"
214 }
215
216 fn description(&self) -> &'static str {
217 "Code blocks should have a language specified"
218 }
219
220 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
221 let content = ctx.content;
222 let mut warnings = Vec::new();
223
224 for error in self.validate_config() {
226 warnings.push(LintWarning {
227 rule_name: Some(self.name().to_string()),
228 line: 1,
229 column: 1,
230 end_line: 1,
231 end_column: 1,
232 message: format!("[config error] {error}"),
233 severity: Severity::Error,
234 fix: None,
235 });
236 }
237
238 let fenced_blocks = derive_fenced_code_blocks(ctx);
240
241 let disabled_ranges = compute_disabled_ranges(content, self.name());
243
244 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
246 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
247 } else {
248 HashMap::new()
249 };
250
251 let lines = ctx.raw_lines();
252
253 for block in &fenced_blocks {
254 if is_line_disabled(&disabled_ranges, block.line_idx) {
256 continue;
257 }
258
259 let line = lines.get(block.line_idx).unwrap_or(&"");
261 let trimmed = line.trim();
262 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
263
264 let has_mkdocs_attrs_only =
266 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
267
268 let is_myst_directive =
271 ctx.flavor.supports_myst_directives() && after_fence.starts_with('{') && after_fence.contains('}') && {
272 let name = after_fence.trim_start_matches('{').split('}').next().unwrap_or("");
273 !name.is_empty() && name.chars().next().is_some_and(|c| c.is_alphabetic() || c == '_')
274 };
275
276 let is_pandoc_raw =
287 ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_raw_block_lang(after_fence);
288 let is_pandoc_class_attr =
289 ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_code_class_attr(after_fence);
290 let is_quarto_exec = ctx.flavor == crate::config::MarkdownFlavor::Quarto
291 && after_fence.starts_with('{')
292 && after_fence.ends_with('}')
293 && !is_pandoc_raw
294 && !is_pandoc_class_attr;
295 let has_pandoc_or_quarto_syntax = is_pandoc_raw || is_pandoc_class_attr || is_quarto_exec;
296 let is_unrecognized_brace_syntax = after_fence.starts_with('{')
297 && after_fence.ends_with('}')
298 && !has_pandoc_or_quarto_syntax
299 && !is_myst_directive;
300
301 let needs_language = !has_mkdocs_attrs_only
302 && !is_myst_directive
303 && (block.language.is_empty()
304 || is_superfences_attribute(&block.language)
305 || is_unrecognized_brace_syntax);
306
307 if needs_language && !has_pandoc_or_quarto_syntax {
308 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
309
310 warnings.push(LintWarning {
311 rule_name: Some(self.name().to_string()),
312 line: start_line,
313 column: start_col,
314 end_line,
315 end_column: end_col,
316 message: "Code block (```) missing language".to_string(),
317 severity: Severity::Warning,
318 fix: Some(Fix::new(
319 {
320 let trimmed = line.trim_start();
321 let trimmed_start = line.len() - trimmed.len();
322 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
323 let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
324 let line_end_byte = line_start_byte + line.len();
328 fence_end_byte..line_end_byte
329 },
330 {
331 let trimmed = line.trim_start();
332 let after_fence = &trimmed[block.fence_marker.len()..];
333 let after_fence_trimmed = after_fence.trim();
334 if after_fence_trimmed.is_empty() {
335 "text".to_string()
336 } else {
337 format!("text {after_fence_trimmed}")
338 }
339 },
340 )),
341 });
342 continue;
343 }
344
345 if has_pandoc_or_quarto_syntax {
347 continue;
348 }
349
350 let canonical = resolve_canonical(&block.language);
351
352 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
354 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
355
356 warnings.push(LintWarning {
357 rule_name: Some(self.name().to_string()),
358 line: start_line,
359 column: start_col,
360 end_line,
361 end_column: end_col,
362 message: msg,
363 severity: Severity::Warning,
364 fix: None,
365 });
366 continue;
367 }
368
369 if canonical.is_none() {
371 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
372 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
373
374 warnings.push(LintWarning {
375 rule_name: Some(self.name().to_string()),
376 line: start_line,
377 column: start_col,
378 end_line,
379 end_column: end_col,
380 message: msg,
381 severity,
382 fix: None,
383 });
384 }
385 continue;
386 }
387
388 if self.config.style == LanguageStyle::Consistent
390 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
391 && &block.language != preferred
392 {
393 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
394
395 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
396 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
397 Fix::new(
398 (line_start_byte + label_start)..(line_start_byte + label_end),
399 preferred.clone(),
400 )
401 });
402 let lang = &block.language;
403 let canonical = canonical.unwrap();
404
405 warnings.push(LintWarning {
406 rule_name: Some(self.name().to_string()),
407 line: start_line,
408 column: start_col,
409 end_line,
410 end_column: end_col,
411 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
412 severity: Severity::Warning,
413 fix,
414 });
415 }
416 }
417
418 Ok(warnings)
419 }
420
421 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
422 if self.should_skip(ctx) {
423 return Ok(ctx.content.to_string());
424 }
425 let warnings = self.check(ctx)?;
426 if warnings.is_empty() {
427 return Ok(ctx.content.to_string());
428 }
429 let warnings =
430 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
431 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
432 }
433
434 fn category(&self) -> RuleCategory {
436 RuleCategory::CodeBlock
437 }
438
439 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
441 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
442 }
443
444 fn as_any(&self) -> &dyn std::any::Any {
445 self
446 }
447
448 fn default_config_section(&self) -> Option<(String, toml::Value)> {
449 let default_config = MD040Config::default();
450 let json_value = serde_json::to_value(&default_config).ok()?;
451 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
452
453 if let toml::Value::Table(table) = toml_value {
454 if !table.is_empty() {
455 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
456 } else {
457 None
458 }
459 } else {
460 None
461 }
462 }
463
464 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
465 where
466 Self: Sized,
467 {
468 let rule_config: MD040Config = load_rule_config(config);
469 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
470 }
471}
472
473fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
475 let content = ctx.content;
476 let line_offsets = &ctx.line_offsets;
477
478 ctx.code_block_details
479 .iter()
480 .filter(|d| d.is_fenced)
481 .map(|detail| {
482 let line_idx = match line_offsets.binary_search(&detail.start) {
483 Ok(idx) => idx,
484 Err(idx) => idx.saturating_sub(1),
485 };
486
487 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
489 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
490 let line = content.get(line_start..line_end).unwrap_or("");
491 let trimmed = line.trim();
492 let fence_marker = if trimmed.starts_with('`') {
493 let count = trimmed.chars().take_while(|&c| c == '`').count();
494 "`".repeat(count)
495 } else if trimmed.starts_with('~') {
496 let count = trimmed.chars().take_while(|&c| c == '~').count();
497 "~".repeat(count)
498 } else {
499 "```".to_string()
500 };
501
502 let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
503
504 FencedCodeBlock {
505 line_idx,
506 language,
507 fence_marker,
508 }
509 })
510 .collect()
511}
512
513fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
515 let mut ranges = Vec::new();
516 let mut disabled_start: Option<usize> = None;
517
518 for (i, line) in content.lines().enumerate() {
519 let trimmed = line.trim();
520
521 if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
522 && (rules.is_empty() || rules.contains(&rule_name))
523 && disabled_start.is_none()
524 {
525 disabled_start = Some(i);
526 }
527
528 if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
529 && (rules.is_empty() || rules.contains(&rule_name))
530 && let Some(start) = disabled_start.take()
531 {
532 ranges.push((start, i));
533 }
534 }
535
536 if let Some(start) = disabled_start {
538 ranges.push((start, usize::MAX));
539 }
540
541 ranges
542}
543
544fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
546 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
547}
548
549fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
551 let trimmed_start = line.len() - line.trim_start().len();
552 let after_indent = &line[trimmed_start..];
553 if !after_indent.starts_with(fence_marker) {
554 return None;
555 }
556 let after_fence = &after_indent[fence_marker.len()..];
557
558 let label_start_rel = after_fence
559 .char_indices()
560 .find(|&(_, ch)| !ch.is_whitespace())
561 .map(|(idx, _)| idx)?;
562 let after_label = &after_fence[label_start_rel..];
563 let label_end_rel = after_label
564 .char_indices()
565 .find(|&(_, ch)| ch.is_whitespace())
566 .map_or(after_fence.len(), |(idx, _)| label_start_rel + idx);
567
568 Some((
569 trimmed_start + fence_marker.len() + label_start_rel,
570 trimmed_start + fence_marker.len() + label_end_rel,
571 ))
572}
573
574#[cfg(test)]
575mod tests {
576 use super::*;
577 use crate::lint_context::LintContext;
578
579 fn run_check(content: &str) -> LintResult {
580 let rule = MD040FencedCodeLanguage::default();
581 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
582 rule.check(&ctx)
583 }
584
585 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
586 let rule = MD040FencedCodeLanguage::with_config(config);
587 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
588 rule.check(&ctx)
589 }
590
591 fn run_fix(content: &str) -> Result<String, LintError> {
592 let rule = MD040FencedCodeLanguage::default();
593 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
594 rule.fix(&ctx)
595 }
596
597 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
598 let rule = MD040FencedCodeLanguage::with_config(config);
599 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
600 rule.fix(&ctx)
601 }
602
603 fn run_check_mkdocs(content: &str) -> LintResult {
604 let rule = MD040FencedCodeLanguage::default();
605 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
606 rule.check(&ctx)
607 }
608
609 #[test]
614 fn test_code_blocks_with_language_specified() {
615 let content = r#"# Test
616
617```python
618print("Hello, world!")
619```
620
621```javascript
622console.log("Hello!");
623```
624"#;
625 let result = run_check(content).unwrap();
626 assert!(result.is_empty(), "No warnings expected for code blocks with language");
627 }
628
629 #[test]
630 fn test_code_blocks_without_language() {
631 let content = r#"# Test
632
633```
634print("Hello, world!")
635```
636"#;
637 let result = run_check(content).unwrap();
638 assert_eq!(result.len(), 1);
639 assert_eq!(result[0].message, "Code block (```) missing language");
640 assert_eq!(result[0].line, 3);
641 }
642
643 #[test]
644 fn test_fix_method_adds_text_language() {
645 let content = r#"# Test
646
647```
648code without language
649```
650
651```python
652already has language
653```
654
655```
656another block without
657```
658"#;
659 let fixed = run_fix(content).unwrap();
660 assert!(fixed.contains("```text"));
661 assert!(fixed.contains("```python"));
662 assert_eq!(fixed.matches("```text").count(), 2);
663 }
664
665 #[test]
666 fn test_fix_preserves_indentation() {
667 let content = r#"# Test
668
669- List item
670 ```
671 indented code block
672 ```
673"#;
674 let fixed = run_fix(content).unwrap();
675 assert!(fixed.contains(" ```text"));
676 }
677
678 #[test]
683 fn test_consistent_mode_detects_inconsistency() {
684 let content = r#"```bash
685echo hi
686```
687
688```sh
689echo there
690```
691
692```bash
693echo again
694```
695"#;
696 let config = MD040Config {
697 style: LanguageStyle::Consistent,
698 ..Default::default()
699 };
700 let result = run_check_with_config(content, config).unwrap();
701 assert_eq!(result.len(), 1);
702 assert!(result[0].message.contains("Inconsistent"));
703 assert!(result[0].message.contains("sh"));
704 assert!(result[0].message.contains("bash"));
705 }
706
707 #[test]
708 fn test_consistent_mode_fix_normalizes() {
709 let content = r#"```bash
710echo hi
711```
712
713```sh
714echo there
715```
716
717```bash
718echo again
719```
720"#;
721 let config = MD040Config {
722 style: LanguageStyle::Consistent,
723 ..Default::default()
724 };
725 let fixed = run_fix_with_config(content, config).unwrap();
726 assert_eq!(fixed.matches("```bash").count(), 3);
727 assert_eq!(fixed.matches("```sh").count(), 0);
728 }
729
730 #[test]
731 fn test_consistent_mode_tie_break_uses_curated_default() {
732 let content = r#"```bash
734echo hi
735```
736
737```sh
738echo there
739```
740"#;
741 let config = MD040Config {
742 style: LanguageStyle::Consistent,
743 ..Default::default()
744 };
745 let fixed = run_fix_with_config(content, config).unwrap();
746 assert_eq!(fixed.matches("```bash").count(), 2);
748 }
749
750 #[test]
751 fn test_consistent_mode_with_preferred_alias() {
752 let content = r#"```bash
753echo hi
754```
755
756```sh
757echo there
758```
759"#;
760 let mut preferred = HashMap::new();
761 preferred.insert("Shell".to_string(), "sh".to_string());
762
763 let config = MD040Config {
764 style: LanguageStyle::Consistent,
765 preferred_aliases: preferred,
766 ..Default::default()
767 };
768 let fixed = run_fix_with_config(content, config).unwrap();
769 assert_eq!(fixed.matches("```sh").count(), 2);
770 assert_eq!(fixed.matches("```bash").count(), 0);
771 }
772
773 #[test]
774 fn test_consistent_mode_ignores_disabled_blocks() {
775 let content = r#"```bash
776echo hi
777```
778<!-- rumdl-disable MD040 -->
779```sh
780echo there
781```
782```sh
783echo again
784```
785<!-- rumdl-enable MD040 -->
786"#;
787 let config = MD040Config {
788 style: LanguageStyle::Consistent,
789 ..Default::default()
790 };
791 let result = run_check_with_config(content, config).unwrap();
792 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
793 }
794
795 #[test]
796 fn test_fix_preserves_attributes() {
797 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
798 let config = MD040Config {
799 style: LanguageStyle::Consistent,
800 ..Default::default()
801 };
802 let fixed = run_fix_with_config(content, config).unwrap();
803 assert!(fixed.contains("```bash {.highlight}"));
804 }
805
806 #[test]
807 fn test_fix_preserves_spacing_before_label() {
808 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
809 let config = MD040Config {
810 style: LanguageStyle::Consistent,
811 ..Default::default()
812 };
813 let fixed = run_fix_with_config(content, config).unwrap();
814 assert!(fixed.contains("``` bash {.highlight}"));
815 assert!(!fixed.contains("``` sh {.highlight}"));
816 }
817
818 #[test]
823 fn test_allowlist_blocks_unlisted() {
824 let content = "```java\ncode\n```";
825 let config = MD040Config {
826 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
827 ..Default::default()
828 };
829 let result = run_check_with_config(content, config).unwrap();
830 assert_eq!(result.len(), 1);
831 assert!(result[0].message.contains("not in the allowed list"));
832 }
833
834 #[test]
835 fn test_allowlist_allows_listed() {
836 let content = "```python\ncode\n```";
837 let config = MD040Config {
838 allowed_languages: vec!["Python".to_string()],
839 ..Default::default()
840 };
841 let result = run_check_with_config(content, config).unwrap();
842 assert!(result.is_empty());
843 }
844
845 #[test]
846 fn test_allowlist_blocks_unknown_language() {
847 let content = "```mysterylang\ncode\n```";
848 let config = MD040Config {
849 allowed_languages: vec!["Python".to_string()],
850 ..Default::default()
851 };
852 let result = run_check_with_config(content, config).unwrap();
853 assert_eq!(result.len(), 1);
854 assert!(result[0].message.contains("allowed list"));
855 }
856
857 #[test]
858 fn test_allowlist_case_insensitive() {
859 let content = "```python\ncode\n```";
860 let config = MD040Config {
861 allowed_languages: vec!["PYTHON".to_string()],
862 ..Default::default()
863 };
864 let result = run_check_with_config(content, config).unwrap();
865 assert!(result.is_empty());
866 }
867
868 #[test]
869 fn test_denylist_blocks_listed() {
870 let content = "```java\ncode\n```";
871 let config = MD040Config {
872 disallowed_languages: vec!["Java".to_string()],
873 ..Default::default()
874 };
875 let result = run_check_with_config(content, config).unwrap();
876 assert_eq!(result.len(), 1);
877 assert!(result[0].message.contains("disallowed"));
878 }
879
880 #[test]
881 fn test_denylist_allows_unlisted() {
882 let content = "```python\ncode\n```";
883 let config = MD040Config {
884 disallowed_languages: vec!["Java".to_string()],
885 ..Default::default()
886 };
887 let result = run_check_with_config(content, config).unwrap();
888 assert!(result.is_empty());
889 }
890
891 #[test]
892 fn test_allowlist_takes_precedence_over_denylist() {
893 let content = "```python\ncode\n```";
894 let config = MD040Config {
895 allowed_languages: vec!["Python".to_string()],
896 disallowed_languages: vec!["Python".to_string()], ..Default::default()
898 };
899 let result = run_check_with_config(content, config).unwrap();
900 assert!(result.is_empty());
901 }
902
903 #[test]
908 fn test_unknown_language_ignore_default() {
909 let content = "```mycustomlang\ncode\n```";
910 let result = run_check(content).unwrap();
911 assert!(result.is_empty(), "Unknown languages ignored by default");
912 }
913
914 #[test]
915 fn test_unknown_language_warn() {
916 let content = "```mycustomlang\ncode\n```";
917 let config = MD040Config {
918 unknown_language_action: UnknownLanguageAction::Warn,
919 ..Default::default()
920 };
921 let result = run_check_with_config(content, config).unwrap();
922 assert_eq!(result.len(), 1);
923 assert!(result[0].message.contains("Unknown language"));
924 assert!(result[0].message.contains("mycustomlang"));
925 assert_eq!(result[0].severity, Severity::Warning);
926 }
927
928 #[test]
929 fn test_unknown_language_error() {
930 let content = "```mycustomlang\ncode\n```";
931 let config = MD040Config {
932 unknown_language_action: UnknownLanguageAction::Error,
933 ..Default::default()
934 };
935 let result = run_check_with_config(content, config).unwrap();
936 assert_eq!(result.len(), 1);
937 assert!(result[0].message.contains("Unknown language"));
938 assert_eq!(result[0].severity, Severity::Error);
939 }
940
941 #[test]
946 fn test_invalid_preferred_alias_detected() {
947 let mut preferred = HashMap::new();
948 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
949
950 let config = MD040Config {
951 style: LanguageStyle::Consistent,
952 preferred_aliases: preferred,
953 ..Default::default()
954 };
955 let rule = MD040FencedCodeLanguage::with_config(config);
956 let errors = rule.validate_config();
957 assert_eq!(errors.len(), 1);
958 assert!(errors[0].contains("Invalid alias"));
959 assert!(errors[0].contains("invalid_alias"));
960 }
961
962 #[test]
963 fn test_unknown_language_in_preferred_aliases_detected() {
964 let mut preferred = HashMap::new();
965 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
966
967 let config = MD040Config {
968 style: LanguageStyle::Consistent,
969 preferred_aliases: preferred,
970 ..Default::default()
971 };
972 let rule = MD040FencedCodeLanguage::with_config(config);
973 let errors = rule.validate_config();
974 assert_eq!(errors.len(), 1);
975 assert!(errors[0].contains("Unknown language"));
976 }
977
978 #[test]
979 fn test_valid_preferred_alias_accepted() {
980 let mut preferred = HashMap::new();
981 preferred.insert("Shell".to_string(), "bash".to_string());
982 preferred.insert("JavaScript".to_string(), "js".to_string());
983
984 let config = MD040Config {
985 style: LanguageStyle::Consistent,
986 preferred_aliases: preferred,
987 ..Default::default()
988 };
989 let rule = MD040FencedCodeLanguage::with_config(config);
990 let errors = rule.validate_config();
991 assert!(errors.is_empty());
992 }
993
994 #[test]
995 fn test_config_error_uses_valid_line_column() {
996 let config = md040_config::MD040Config {
997 preferred_aliases: {
998 let mut map = std::collections::HashMap::new();
999 map.insert("Shell".to_string(), "invalid_alias".to_string());
1000 map
1001 },
1002 ..Default::default()
1003 };
1004 let rule = MD040FencedCodeLanguage::with_config(config);
1005
1006 let content = "```shell\necho hello\n```";
1007 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1008 let result = rule.check(&ctx).unwrap();
1009
1010 let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1012 assert!(config_error.is_some(), "Should have a config error warning");
1013
1014 let warning = config_error.unwrap();
1015 assert!(
1017 warning.line >= 1,
1018 "Config error line should be >= 1, got {}",
1019 warning.line
1020 );
1021 assert!(
1022 warning.column >= 1,
1023 "Config error column should be >= 1, got {}",
1024 warning.column
1025 );
1026 }
1027
1028 #[test]
1033 fn test_linguist_resolution() {
1034 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1035 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1036 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1037 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1038 assert_eq!(resolve_canonical("python"), Some("Python"));
1039 assert_eq!(resolve_canonical("unknown_lang"), None);
1040 }
1041
1042 #[test]
1043 fn test_linguist_resolution_case_insensitive() {
1044 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1045 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1046 assert_eq!(resolve_canonical("Python"), Some("Python"));
1047 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1048 }
1049
1050 #[test]
1051 fn test_alias_validation() {
1052 assert!(is_valid_alias("Shell", "bash"));
1053 assert!(is_valid_alias("Shell", "sh"));
1054 assert!(is_valid_alias("Shell", "zsh"));
1055 assert!(!is_valid_alias("Shell", "python"));
1056 assert!(!is_valid_alias("Shell", "invalid"));
1057 }
1058
1059 #[test]
1060 fn test_default_alias() {
1061 assert_eq!(default_alias("Shell"), Some("bash"));
1062 assert_eq!(default_alias("JavaScript"), Some("js"));
1063 assert_eq!(default_alias("Python"), Some("python"));
1064 }
1065
1066 #[test]
1071 fn test_mixed_case_labels_normalized() {
1072 let content = r#"```BASH
1073echo hi
1074```
1075
1076```Bash
1077echo there
1078```
1079
1080```bash
1081echo again
1082```
1083"#;
1084 let config = MD040Config {
1085 style: LanguageStyle::Consistent,
1086 ..Default::default()
1087 };
1088 let result = run_check_with_config(content, config).unwrap();
1090 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1094 }
1095
1096 #[test]
1097 fn test_multiple_languages_independent() {
1098 let content = r#"```bash
1099shell code
1100```
1101
1102```python
1103python code
1104```
1105
1106```sh
1107more shell
1108```
1109
1110```python3
1111more python
1112```
1113"#;
1114 let config = MD040Config {
1115 style: LanguageStyle::Consistent,
1116 ..Default::default()
1117 };
1118 let result = run_check_with_config(content, config).unwrap();
1119 assert_eq!(result.len(), 2);
1121 }
1122
1123 #[test]
1124 fn test_tilde_fences() {
1125 let content = r#"~~~bash
1126echo hi
1127~~~
1128
1129~~~sh
1130echo there
1131~~~
1132"#;
1133 let config = MD040Config {
1134 style: LanguageStyle::Consistent,
1135 ..Default::default()
1136 };
1137 let result = run_check_with_config(content, config.clone()).unwrap();
1138 assert_eq!(result.len(), 1);
1139
1140 let fixed = run_fix_with_config(content, config).unwrap();
1141 assert!(fixed.contains("~~~bash"));
1142 assert!(!fixed.contains("~~~sh"));
1143 }
1144
1145 #[test]
1146 fn test_longer_fence_markers_preserved() {
1147 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1148 let config = MD040Config {
1149 style: LanguageStyle::Consistent,
1150 ..Default::default()
1151 };
1152 let fixed = run_fix_with_config(content, config).unwrap();
1153 assert!(fixed.contains("````bash"));
1154 assert!(fixed.contains("```bash"));
1155 }
1156
1157 #[test]
1158 fn test_empty_document() {
1159 let result = run_check("").unwrap();
1160 assert!(result.is_empty());
1161 }
1162
1163 #[test]
1164 fn test_no_code_blocks() {
1165 let content = "# Just a heading\n\nSome text.";
1166 let result = run_check(content).unwrap();
1167 assert!(result.is_empty());
1168 }
1169
1170 #[test]
1171 fn test_single_code_block_no_inconsistency() {
1172 let content = "```bash\necho hi\n```";
1173 let config = MD040Config {
1174 style: LanguageStyle::Consistent,
1175 ..Default::default()
1176 };
1177 let result = run_check_with_config(content, config).unwrap();
1178 assert!(result.is_empty(), "Single block has no inconsistency");
1179 }
1180
1181 #[test]
1182 fn test_idempotent_fix() {
1183 let content = r#"```bash
1184echo hi
1185```
1186
1187```sh
1188echo there
1189```
1190"#;
1191 let config = MD040Config {
1192 style: LanguageStyle::Consistent,
1193 ..Default::default()
1194 };
1195 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1196 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1197 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1198 }
1199
1200 #[test]
1205 fn test_mkdocs_superfences_title_only() {
1206 let content = r#"```title="Example"
1208echo hi
1209```
1210"#;
1211 let result = run_check_mkdocs(content).unwrap();
1212 assert!(
1213 result.is_empty(),
1214 "MkDocs superfences with title= should not require language"
1215 );
1216 }
1217
1218 #[test]
1219 fn test_mkdocs_superfences_hl_lines() {
1220 let content = r#"```hl_lines="1 2"
1222line 1
1223line 2
1224```
1225"#;
1226 let result = run_check_mkdocs(content).unwrap();
1227 assert!(
1228 result.is_empty(),
1229 "MkDocs superfences with hl_lines= should not require language"
1230 );
1231 }
1232
1233 #[test]
1234 fn test_mkdocs_superfences_linenums() {
1235 let content = r#"```linenums="1"
1237line 1
1238line 2
1239```
1240"#;
1241 let result = run_check_mkdocs(content).unwrap();
1242 assert!(
1243 result.is_empty(),
1244 "MkDocs superfences with linenums= should not require language"
1245 );
1246 }
1247
1248 #[test]
1249 fn test_mkdocs_superfences_class() {
1250 let content = r#"```.my-class
1252some text
1253```
1254"#;
1255 let result = run_check_mkdocs(content).unwrap();
1256 assert!(
1257 result.is_empty(),
1258 "MkDocs superfences with .class should not require language"
1259 );
1260 }
1261
1262 #[test]
1263 fn test_mkdocs_superfences_id() {
1264 let content = r#"```#my-id
1266some text
1267```
1268"#;
1269 let result = run_check_mkdocs(content).unwrap();
1270 assert!(
1271 result.is_empty(),
1272 "MkDocs superfences with #id should not require language"
1273 );
1274 }
1275
1276 #[test]
1277 fn test_mkdocs_superfences_with_language() {
1278 let content = r#"```python title="Example" hl_lines="1"
1280print("hello")
1281```
1282"#;
1283 let result = run_check_mkdocs(content).unwrap();
1284 assert!(result.is_empty(), "Code block with language and attrs should pass");
1285 }
1286
1287 #[test]
1288 fn test_standard_flavor_no_special_handling() {
1289 let content = r#"```title="Example"
1291echo hi
1292```
1293"#;
1294 let result = run_check(content).unwrap();
1295 assert_eq!(
1296 result.len(),
1297 1,
1298 "Standard flavor should warn about title= without language"
1299 );
1300 }
1301
1302 #[test]
1303 fn test_pandoc_raw_block_skipped_under_pandoc_flavor() {
1304 let rule = MD040FencedCodeLanguage::default();
1307 let content = "```{=html}\n<div>raw html</div>\n```\n";
1308 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1309 let result = rule.check(&ctx).unwrap();
1310 assert!(
1311 result.is_empty(),
1312 "MD040 should skip Pandoc raw blocks ({{=html}}) under Pandoc flavor: {result:?}"
1313 );
1314 }
1315
1316 #[test]
1317 fn test_pandoc_raw_block_skipped_under_quarto_flavor() {
1318 let rule = MD040FencedCodeLanguage::default();
1320 let content = "```{=html}\n<div>raw html</div>\n```\n";
1321 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1322 let result = rule.check(&ctx).unwrap();
1323 assert!(
1324 result.is_empty(),
1325 "MD040 should skip Pandoc raw blocks ({{=html}}) under Quarto flavor: {result:?}"
1326 );
1327 }
1328
1329 #[test]
1332 fn test_pandoc_accepts_raw_html_block() {
1333 use crate::config::MarkdownFlavor;
1334 let rule = MD040FencedCodeLanguage::default();
1335 let content = "```{=html}\n<div>raw</div>\n```\n";
1336 let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1337 let result = rule.check(&ctx).unwrap();
1338 assert!(result.is_empty(), "MD040 should accept ```{{=html}}```: {result:?}");
1339 }
1340
1341 #[test]
1344 fn test_pandoc_rejects_quarto_exec_blocks() {
1345 use crate::config::MarkdownFlavor;
1346 let rule = MD040FencedCodeLanguage::default();
1347 let content = "```{r}\nsummary(data)\n```\n";
1348 let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1349 let result = rule.check(&ctx).unwrap();
1350 assert!(
1351 !result.is_empty(),
1352 "MD040 under Pandoc should flag `{{r}}` (Quarto-only)"
1353 );
1354 }
1355
1356 #[test]
1358 fn test_quarto_still_accepts_exec_block() {
1359 use crate::config::MarkdownFlavor;
1360 let rule = MD040FencedCodeLanguage::default();
1361 let content = "```{r}\nsummary(data)\n```\n";
1362 let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
1363 let result = rule.check(&ctx).unwrap();
1364 assert!(
1365 result.is_empty(),
1366 "MD040 under Quarto should accept `{{r}}`: {result:?}"
1367 );
1368 }
1369
1370 #[test]
1371 fn test_quarto_exec_block_skipped_under_quarto_only() {
1372 let rule = MD040FencedCodeLanguage::default();
1376 let content = "```{r}\n1 + 1\n```\n";
1377
1378 let ctx_quarto = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1379 let result_quarto = rule.check(&ctx_quarto).unwrap();
1380 assert!(
1381 result_quarto.is_empty(),
1382 "MD040 should skip Quarto exec chunks under Quarto flavor: {result_quarto:?}"
1383 );
1384
1385 let ctx_pandoc = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1388 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1389 assert!(
1390 !result_pandoc.is_empty(),
1391 "MD040 should flag `{{r}}` under Pandoc as missing a real language"
1392 );
1393 }
1394
1395 #[test]
1398 fn test_pandoc_class_attr_accepted_as_language() {
1399 use crate::config::MarkdownFlavor;
1400 let rule = MD040FencedCodeLanguage::default();
1401 let content = "```{.python}\nprint(\"hi\")\n```\n";
1402
1403 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1404 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1405 assert!(
1406 result_pandoc.is_empty(),
1407 "MD040 under Pandoc should accept ```{{.python}}``` as language declaration: {result_pandoc:?}"
1408 );
1409
1410 let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1411 let result_quarto = rule.check(&ctx_quarto).unwrap();
1412 assert!(
1413 result_quarto.is_empty(),
1414 "MD040 under Quarto should accept ```{{.python}}``` as language declaration: {result_quarto:?}"
1415 );
1416 }
1417
1418 #[test]
1421 fn test_pandoc_class_attr_with_extra_attributes_accepted() {
1422 use crate::config::MarkdownFlavor;
1423 let rule = MD040FencedCodeLanguage::default();
1424 let content = "```{.haskell .numberLines}\nmain = putStrLn \"hi\"\n```\n";
1425
1426 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1427 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1428 assert!(
1429 result_pandoc.is_empty(),
1430 "MD040 under Pandoc should accept ```{{.haskell .numberLines}}```: {result_pandoc:?}"
1431 );
1432
1433 let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1434 let result_quarto = rule.check(&ctx_quarto).unwrap();
1435 assert!(
1436 result_quarto.is_empty(),
1437 "MD040 under Quarto should accept ```{{.haskell .numberLines}}```: {result_quarto:?}"
1438 );
1439 }
1440
1441 #[test]
1444 fn test_pandoc_class_attr_with_id_and_keyvalue_accepted() {
1445 use crate::config::MarkdownFlavor;
1446 let rule = MD040FencedCodeLanguage::default();
1447 let content = "```{#snippet .python startFrom=\"10\"}\nprint(1)\n```\n";
1448
1449 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1450 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1451 assert!(
1452 result_pandoc.is_empty(),
1453 "MD040 under Pandoc should accept ```{{#snippet .python …}}```: {result_pandoc:?}"
1454 );
1455 }
1456
1457 #[test]
1460 fn test_standard_still_flags_pandoc_class_attr() {
1461 use crate::config::MarkdownFlavor;
1462 let rule = MD040FencedCodeLanguage::default();
1463 let content = "```{.python}\nprint(\"hi\")\n```\n";
1464
1465 let ctx_standard = LintContext::new(content, MarkdownFlavor::Standard, None);
1466 let result_standard = rule.check(&ctx_standard).unwrap();
1467 assert!(
1468 !result_standard.is_empty(),
1469 "MD040 under Standard should still flag ```{{.python}}``` (no Pandoc support)"
1470 );
1471 }
1472
1473 #[test]
1476 fn test_pandoc_id_only_attr_still_flagged() {
1477 use crate::config::MarkdownFlavor;
1478 let rule = MD040FencedCodeLanguage::default();
1479 let content = "```{#myid}\ncode here\n```\n";
1480
1481 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1482 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1483 assert!(
1484 !result_pandoc.is_empty(),
1485 "MD040 under Pandoc should flag ```{{#myid}}``` — id without class declares no language"
1486 );
1487 }
1488
1489 #[test]
1491 fn test_pandoc_empty_braces_still_flagged() {
1492 use crate::config::MarkdownFlavor;
1493 let rule = MD040FencedCodeLanguage::default();
1494 let content = "```{}\ncode here\n```\n";
1495
1496 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1497 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1498 assert!(
1499 !result_pandoc.is_empty(),
1500 "MD040 under Pandoc should flag ```{{}}``` (no language declared)"
1501 );
1502 }
1503}