1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7pub mod md040_config;
11
12const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20 "title=", "hl_lines=", "linenums=", ".", "#", ];
26
27#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30 MKDOCS_SUPERFENCES_ATTR_PREFIXES
31 .iter()
32 .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37 line_idx: usize,
39 language: String,
41 fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47 config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51 pub fn with_config(config: MD040Config) -> Self {
52 Self { config }
53 }
54
55 fn validate_config(&self) -> Vec<String> {
57 let mut errors = Vec::new();
58
59 for (canonical, alias) in &self.config.preferred_aliases {
61 if let Some(actual_canonical) = resolve_canonical(canonical) {
63 if !is_valid_alias(actual_canonical, alias)
64 && let Some(valid_aliases) = get_aliases(actual_canonical)
65 {
66 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
67 let valid_str = valid_list
68 .iter()
69 .map(|s| format!("'{s}'"))
70 .collect::<Vec<_>>()
71 .join(", ");
72 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
73 errors.push(format!(
74 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
75 ));
76 }
77 } else {
78 errors.push(format!(
79 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
80 ));
81 }
82 }
83
84 errors
85 }
86
87 fn compute_preferred_labels(
89 &self,
90 blocks: &[FencedCodeBlock],
91 disabled_ranges: &[(usize, usize)],
92 ) -> HashMap<String, String> {
93 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
95
96 for block in blocks {
97 if is_line_disabled(disabled_ranges, block.line_idx) {
98 continue;
99 }
100 if block.language.is_empty() {
101 continue;
102 }
103 if let Some(canonical) = resolve_canonical(&block.language) {
104 by_canonical
105 .entry(canonical.to_string())
106 .or_default()
107 .push(&block.language);
108 }
109 }
110
111 let mut result = HashMap::new();
113
114 for (canonical, labels) in by_canonical {
115 let winner = if let Some(preferred) = self
117 .config
118 .preferred_aliases
119 .iter()
120 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
121 .map(|(_, v)| v.clone())
122 {
123 preferred
124 } else {
125 let mut counts: HashMap<&str, usize> = HashMap::new();
127 for label in &labels {
128 *counts.entry(*label).or_default() += 1;
129 }
130
131 let max_count = counts.values().max().copied().unwrap_or(0);
132 let winners: Vec<_> = counts
133 .iter()
134 .filter(|(_, c)| **c == max_count)
135 .map(|(l, _)| *l)
136 .collect();
137
138 if winners.len() == 1 {
139 winners[0].to_string()
140 } else {
141 default_alias(&canonical)
143 .filter(|default| winners.contains(default))
144 .map_or_else(
145 || winners.into_iter().min().unwrap().to_string(),
146 std::string::ToString::to_string,
147 )
148 }
149 };
150
151 result.insert(canonical, winner);
152 }
153
154 result
155 }
156
157 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
159 if !self.config.allowed_languages.is_empty() {
161 let allowed = self.config.allowed_languages.join(", ");
162 let Some(canonical) = canonical else {
163 return Some(format!(
164 "Language '{original_label}' is not in the allowed list: {allowed}"
165 ));
166 };
167 if !self
168 .config
169 .allowed_languages
170 .iter()
171 .any(|a| a.eq_ignore_ascii_case(canonical))
172 {
173 return Some(format!(
174 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
175 ));
176 }
177 } else if !self.config.disallowed_languages.is_empty()
178 && canonical.is_some_and(|canonical| {
179 self.config
180 .disallowed_languages
181 .iter()
182 .any(|d| d.eq_ignore_ascii_case(canonical))
183 })
184 {
185 let canonical = canonical.unwrap_or("unknown");
186 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
187 }
188 None
189 }
190
191 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
193 if resolve_canonical(label).is_some() {
194 return None;
195 }
196
197 match self.config.unknown_language_action {
198 UnknownLanguageAction::Ignore => None,
199 UnknownLanguageAction::Warn => Some((
200 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
201 Severity::Warning,
202 )),
203 UnknownLanguageAction::Error => Some((
204 format!("Unknown language '{label}' (not in GitHub Linguist)"),
205 Severity::Error,
206 )),
207 }
208 }
209}
210
211impl Rule for MD040FencedCodeLanguage {
212 fn name(&self) -> &'static str {
213 "MD040"
214 }
215
216 fn description(&self) -> &'static str {
217 "Code blocks should have a language specified"
218 }
219
220 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
221 let content = ctx.content;
222 let mut warnings = Vec::new();
223
224 for error in self.validate_config() {
226 warnings.push(LintWarning {
227 rule_name: Some(self.name().to_string()),
228 line: 1,
229 column: 1,
230 end_line: 1,
231 end_column: 1,
232 message: format!("[config error] {error}"),
233 severity: Severity::Error,
234 fix: None,
235 });
236 }
237
238 let fenced_blocks = derive_fenced_code_blocks(ctx);
240
241 let disabled_ranges = compute_disabled_ranges(content, self.name());
243
244 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
246 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
247 } else {
248 HashMap::new()
249 };
250
251 let lines = ctx.raw_lines();
252
253 for block in &fenced_blocks {
254 if is_line_disabled(&disabled_ranges, block.line_idx) {
256 continue;
257 }
258
259 let line = lines.get(block.line_idx).unwrap_or(&"");
261 let trimmed = line.trim();
262 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
263
264 let has_mkdocs_attrs_only =
266 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
267
268 let is_pandoc_raw =
279 ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_raw_block_lang(after_fence);
280 let is_pandoc_class_attr =
281 ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_code_class_attr(after_fence);
282 let is_quarto_exec = ctx.flavor == crate::config::MarkdownFlavor::Quarto
283 && after_fence.starts_with('{')
284 && after_fence.ends_with('}')
285 && !is_pandoc_raw
286 && !is_pandoc_class_attr;
287 let has_pandoc_or_quarto_syntax = is_pandoc_raw || is_pandoc_class_attr || is_quarto_exec;
288 let is_unrecognized_brace_syntax =
289 after_fence.starts_with('{') && after_fence.ends_with('}') && !has_pandoc_or_quarto_syntax;
290
291 let needs_language = !has_mkdocs_attrs_only
292 && (block.language.is_empty()
293 || is_superfences_attribute(&block.language)
294 || is_unrecognized_brace_syntax);
295
296 if needs_language && !has_pandoc_or_quarto_syntax {
297 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
298
299 warnings.push(LintWarning {
300 rule_name: Some(self.name().to_string()),
301 line: start_line,
302 column: start_col,
303 end_line,
304 end_column: end_col,
305 message: "Code block (```) missing language".to_string(),
306 severity: Severity::Warning,
307 fix: Some(Fix::new(
308 {
309 let trimmed = line.trim_start();
310 let trimmed_start = line.len() - trimmed.len();
311 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
312 let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
313 let line_end_byte = line_start_byte + line.len();
317 fence_end_byte..line_end_byte
318 },
319 {
320 let trimmed = line.trim_start();
321 let after_fence = &trimmed[block.fence_marker.len()..];
322 let after_fence_trimmed = after_fence.trim();
323 if after_fence_trimmed.is_empty() {
324 "text".to_string()
325 } else {
326 format!("text {after_fence_trimmed}")
327 }
328 },
329 )),
330 });
331 continue;
332 }
333
334 if has_pandoc_or_quarto_syntax {
336 continue;
337 }
338
339 let canonical = resolve_canonical(&block.language);
340
341 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
343 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
344
345 warnings.push(LintWarning {
346 rule_name: Some(self.name().to_string()),
347 line: start_line,
348 column: start_col,
349 end_line,
350 end_column: end_col,
351 message: msg,
352 severity: Severity::Warning,
353 fix: None,
354 });
355 continue;
356 }
357
358 if canonical.is_none() {
360 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
361 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
362
363 warnings.push(LintWarning {
364 rule_name: Some(self.name().to_string()),
365 line: start_line,
366 column: start_col,
367 end_line,
368 end_column: end_col,
369 message: msg,
370 severity,
371 fix: None,
372 });
373 }
374 continue;
375 }
376
377 if self.config.style == LanguageStyle::Consistent
379 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
380 && &block.language != preferred
381 {
382 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
383
384 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
385 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
386 Fix::new(
387 (line_start_byte + label_start)..(line_start_byte + label_end),
388 preferred.clone(),
389 )
390 });
391 let lang = &block.language;
392 let canonical = canonical.unwrap();
393
394 warnings.push(LintWarning {
395 rule_name: Some(self.name().to_string()),
396 line: start_line,
397 column: start_col,
398 end_line,
399 end_column: end_col,
400 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
401 severity: Severity::Warning,
402 fix,
403 });
404 }
405 }
406
407 Ok(warnings)
408 }
409
410 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
411 if self.should_skip(ctx) {
412 return Ok(ctx.content.to_string());
413 }
414 let warnings = self.check(ctx)?;
415 if warnings.is_empty() {
416 return Ok(ctx.content.to_string());
417 }
418 let warnings =
419 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
420 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
421 }
422
423 fn category(&self) -> RuleCategory {
425 RuleCategory::CodeBlock
426 }
427
428 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
430 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
431 }
432
433 fn as_any(&self) -> &dyn std::any::Any {
434 self
435 }
436
437 fn default_config_section(&self) -> Option<(String, toml::Value)> {
438 let default_config = MD040Config::default();
439 let json_value = serde_json::to_value(&default_config).ok()?;
440 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
441
442 if let toml::Value::Table(table) = toml_value {
443 if !table.is_empty() {
444 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
445 } else {
446 None
447 }
448 } else {
449 None
450 }
451 }
452
453 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
454 where
455 Self: Sized,
456 {
457 let rule_config: MD040Config = load_rule_config(config);
458 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
459 }
460}
461
462fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
464 let content = ctx.content;
465 let line_offsets = &ctx.line_offsets;
466
467 ctx.code_block_details
468 .iter()
469 .filter(|d| d.is_fenced)
470 .map(|detail| {
471 let line_idx = match line_offsets.binary_search(&detail.start) {
472 Ok(idx) => idx,
473 Err(idx) => idx.saturating_sub(1),
474 };
475
476 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
478 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
479 let line = content.get(line_start..line_end).unwrap_or("");
480 let trimmed = line.trim();
481 let fence_marker = if trimmed.starts_with('`') {
482 let count = trimmed.chars().take_while(|&c| c == '`').count();
483 "`".repeat(count)
484 } else if trimmed.starts_with('~') {
485 let count = trimmed.chars().take_while(|&c| c == '~').count();
486 "~".repeat(count)
487 } else {
488 "```".to_string()
489 };
490
491 let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
492
493 FencedCodeBlock {
494 line_idx,
495 language,
496 fence_marker,
497 }
498 })
499 .collect()
500}
501
502fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
504 let mut ranges = Vec::new();
505 let mut disabled_start: Option<usize> = None;
506
507 for (i, line) in content.lines().enumerate() {
508 let trimmed = line.trim();
509
510 if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
511 && (rules.is_empty() || rules.contains(&rule_name))
512 && disabled_start.is_none()
513 {
514 disabled_start = Some(i);
515 }
516
517 if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
518 && (rules.is_empty() || rules.contains(&rule_name))
519 && let Some(start) = disabled_start.take()
520 {
521 ranges.push((start, i));
522 }
523 }
524
525 if let Some(start) = disabled_start {
527 ranges.push((start, usize::MAX));
528 }
529
530 ranges
531}
532
533fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
535 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
536}
537
538fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
540 let trimmed_start = line.len() - line.trim_start().len();
541 let after_indent = &line[trimmed_start..];
542 if !after_indent.starts_with(fence_marker) {
543 return None;
544 }
545 let after_fence = &after_indent[fence_marker.len()..];
546
547 let label_start_rel = after_fence
548 .char_indices()
549 .find(|&(_, ch)| !ch.is_whitespace())
550 .map(|(idx, _)| idx)?;
551 let after_label = &after_fence[label_start_rel..];
552 let label_end_rel = after_label
553 .char_indices()
554 .find(|&(_, ch)| ch.is_whitespace())
555 .map_or(after_fence.len(), |(idx, _)| label_start_rel + idx);
556
557 Some((
558 trimmed_start + fence_marker.len() + label_start_rel,
559 trimmed_start + fence_marker.len() + label_end_rel,
560 ))
561}
562
563#[cfg(test)]
564mod tests {
565 use super::*;
566 use crate::lint_context::LintContext;
567
568 fn run_check(content: &str) -> LintResult {
569 let rule = MD040FencedCodeLanguage::default();
570 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
571 rule.check(&ctx)
572 }
573
574 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
575 let rule = MD040FencedCodeLanguage::with_config(config);
576 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
577 rule.check(&ctx)
578 }
579
580 fn run_fix(content: &str) -> Result<String, LintError> {
581 let rule = MD040FencedCodeLanguage::default();
582 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
583 rule.fix(&ctx)
584 }
585
586 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
587 let rule = MD040FencedCodeLanguage::with_config(config);
588 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
589 rule.fix(&ctx)
590 }
591
592 fn run_check_mkdocs(content: &str) -> LintResult {
593 let rule = MD040FencedCodeLanguage::default();
594 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
595 rule.check(&ctx)
596 }
597
598 #[test]
603 fn test_code_blocks_with_language_specified() {
604 let content = r#"# Test
605
606```python
607print("Hello, world!")
608```
609
610```javascript
611console.log("Hello!");
612```
613"#;
614 let result = run_check(content).unwrap();
615 assert!(result.is_empty(), "No warnings expected for code blocks with language");
616 }
617
618 #[test]
619 fn test_code_blocks_without_language() {
620 let content = r#"# Test
621
622```
623print("Hello, world!")
624```
625"#;
626 let result = run_check(content).unwrap();
627 assert_eq!(result.len(), 1);
628 assert_eq!(result[0].message, "Code block (```) missing language");
629 assert_eq!(result[0].line, 3);
630 }
631
632 #[test]
633 fn test_fix_method_adds_text_language() {
634 let content = r#"# Test
635
636```
637code without language
638```
639
640```python
641already has language
642```
643
644```
645another block without
646```
647"#;
648 let fixed = run_fix(content).unwrap();
649 assert!(fixed.contains("```text"));
650 assert!(fixed.contains("```python"));
651 assert_eq!(fixed.matches("```text").count(), 2);
652 }
653
654 #[test]
655 fn test_fix_preserves_indentation() {
656 let content = r#"# Test
657
658- List item
659 ```
660 indented code block
661 ```
662"#;
663 let fixed = run_fix(content).unwrap();
664 assert!(fixed.contains(" ```text"));
665 }
666
667 #[test]
672 fn test_consistent_mode_detects_inconsistency() {
673 let content = r#"```bash
674echo hi
675```
676
677```sh
678echo there
679```
680
681```bash
682echo again
683```
684"#;
685 let config = MD040Config {
686 style: LanguageStyle::Consistent,
687 ..Default::default()
688 };
689 let result = run_check_with_config(content, config).unwrap();
690 assert_eq!(result.len(), 1);
691 assert!(result[0].message.contains("Inconsistent"));
692 assert!(result[0].message.contains("sh"));
693 assert!(result[0].message.contains("bash"));
694 }
695
696 #[test]
697 fn test_consistent_mode_fix_normalizes() {
698 let content = r#"```bash
699echo hi
700```
701
702```sh
703echo there
704```
705
706```bash
707echo again
708```
709"#;
710 let config = MD040Config {
711 style: LanguageStyle::Consistent,
712 ..Default::default()
713 };
714 let fixed = run_fix_with_config(content, config).unwrap();
715 assert_eq!(fixed.matches("```bash").count(), 3);
716 assert_eq!(fixed.matches("```sh").count(), 0);
717 }
718
719 #[test]
720 fn test_consistent_mode_tie_break_uses_curated_default() {
721 let content = r#"```bash
723echo hi
724```
725
726```sh
727echo there
728```
729"#;
730 let config = MD040Config {
731 style: LanguageStyle::Consistent,
732 ..Default::default()
733 };
734 let fixed = run_fix_with_config(content, config).unwrap();
735 assert_eq!(fixed.matches("```bash").count(), 2);
737 }
738
739 #[test]
740 fn test_consistent_mode_with_preferred_alias() {
741 let content = r#"```bash
742echo hi
743```
744
745```sh
746echo there
747```
748"#;
749 let mut preferred = HashMap::new();
750 preferred.insert("Shell".to_string(), "sh".to_string());
751
752 let config = MD040Config {
753 style: LanguageStyle::Consistent,
754 preferred_aliases: preferred,
755 ..Default::default()
756 };
757 let fixed = run_fix_with_config(content, config).unwrap();
758 assert_eq!(fixed.matches("```sh").count(), 2);
759 assert_eq!(fixed.matches("```bash").count(), 0);
760 }
761
762 #[test]
763 fn test_consistent_mode_ignores_disabled_blocks() {
764 let content = r#"```bash
765echo hi
766```
767<!-- rumdl-disable MD040 -->
768```sh
769echo there
770```
771```sh
772echo again
773```
774<!-- rumdl-enable MD040 -->
775"#;
776 let config = MD040Config {
777 style: LanguageStyle::Consistent,
778 ..Default::default()
779 };
780 let result = run_check_with_config(content, config).unwrap();
781 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
782 }
783
784 #[test]
785 fn test_fix_preserves_attributes() {
786 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
787 let config = MD040Config {
788 style: LanguageStyle::Consistent,
789 ..Default::default()
790 };
791 let fixed = run_fix_with_config(content, config).unwrap();
792 assert!(fixed.contains("```bash {.highlight}"));
793 }
794
795 #[test]
796 fn test_fix_preserves_spacing_before_label() {
797 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
798 let config = MD040Config {
799 style: LanguageStyle::Consistent,
800 ..Default::default()
801 };
802 let fixed = run_fix_with_config(content, config).unwrap();
803 assert!(fixed.contains("``` bash {.highlight}"));
804 assert!(!fixed.contains("``` sh {.highlight}"));
805 }
806
807 #[test]
812 fn test_allowlist_blocks_unlisted() {
813 let content = "```java\ncode\n```";
814 let config = MD040Config {
815 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
816 ..Default::default()
817 };
818 let result = run_check_with_config(content, config).unwrap();
819 assert_eq!(result.len(), 1);
820 assert!(result[0].message.contains("not in the allowed list"));
821 }
822
823 #[test]
824 fn test_allowlist_allows_listed() {
825 let content = "```python\ncode\n```";
826 let config = MD040Config {
827 allowed_languages: vec!["Python".to_string()],
828 ..Default::default()
829 };
830 let result = run_check_with_config(content, config).unwrap();
831 assert!(result.is_empty());
832 }
833
834 #[test]
835 fn test_allowlist_blocks_unknown_language() {
836 let content = "```mysterylang\ncode\n```";
837 let config = MD040Config {
838 allowed_languages: vec!["Python".to_string()],
839 ..Default::default()
840 };
841 let result = run_check_with_config(content, config).unwrap();
842 assert_eq!(result.len(), 1);
843 assert!(result[0].message.contains("allowed list"));
844 }
845
846 #[test]
847 fn test_allowlist_case_insensitive() {
848 let content = "```python\ncode\n```";
849 let config = MD040Config {
850 allowed_languages: vec!["PYTHON".to_string()],
851 ..Default::default()
852 };
853 let result = run_check_with_config(content, config).unwrap();
854 assert!(result.is_empty());
855 }
856
857 #[test]
858 fn test_denylist_blocks_listed() {
859 let content = "```java\ncode\n```";
860 let config = MD040Config {
861 disallowed_languages: vec!["Java".to_string()],
862 ..Default::default()
863 };
864 let result = run_check_with_config(content, config).unwrap();
865 assert_eq!(result.len(), 1);
866 assert!(result[0].message.contains("disallowed"));
867 }
868
869 #[test]
870 fn test_denylist_allows_unlisted() {
871 let content = "```python\ncode\n```";
872 let config = MD040Config {
873 disallowed_languages: vec!["Java".to_string()],
874 ..Default::default()
875 };
876 let result = run_check_with_config(content, config).unwrap();
877 assert!(result.is_empty());
878 }
879
880 #[test]
881 fn test_allowlist_takes_precedence_over_denylist() {
882 let content = "```python\ncode\n```";
883 let config = MD040Config {
884 allowed_languages: vec!["Python".to_string()],
885 disallowed_languages: vec!["Python".to_string()], ..Default::default()
887 };
888 let result = run_check_with_config(content, config).unwrap();
889 assert!(result.is_empty());
890 }
891
892 #[test]
897 fn test_unknown_language_ignore_default() {
898 let content = "```mycustomlang\ncode\n```";
899 let result = run_check(content).unwrap();
900 assert!(result.is_empty(), "Unknown languages ignored by default");
901 }
902
903 #[test]
904 fn test_unknown_language_warn() {
905 let content = "```mycustomlang\ncode\n```";
906 let config = MD040Config {
907 unknown_language_action: UnknownLanguageAction::Warn,
908 ..Default::default()
909 };
910 let result = run_check_with_config(content, config).unwrap();
911 assert_eq!(result.len(), 1);
912 assert!(result[0].message.contains("Unknown language"));
913 assert!(result[0].message.contains("mycustomlang"));
914 assert_eq!(result[0].severity, Severity::Warning);
915 }
916
917 #[test]
918 fn test_unknown_language_error() {
919 let content = "```mycustomlang\ncode\n```";
920 let config = MD040Config {
921 unknown_language_action: UnknownLanguageAction::Error,
922 ..Default::default()
923 };
924 let result = run_check_with_config(content, config).unwrap();
925 assert_eq!(result.len(), 1);
926 assert!(result[0].message.contains("Unknown language"));
927 assert_eq!(result[0].severity, Severity::Error);
928 }
929
930 #[test]
935 fn test_invalid_preferred_alias_detected() {
936 let mut preferred = HashMap::new();
937 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
938
939 let config = MD040Config {
940 style: LanguageStyle::Consistent,
941 preferred_aliases: preferred,
942 ..Default::default()
943 };
944 let rule = MD040FencedCodeLanguage::with_config(config);
945 let errors = rule.validate_config();
946 assert_eq!(errors.len(), 1);
947 assert!(errors[0].contains("Invalid alias"));
948 assert!(errors[0].contains("invalid_alias"));
949 }
950
951 #[test]
952 fn test_unknown_language_in_preferred_aliases_detected() {
953 let mut preferred = HashMap::new();
954 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
955
956 let config = MD040Config {
957 style: LanguageStyle::Consistent,
958 preferred_aliases: preferred,
959 ..Default::default()
960 };
961 let rule = MD040FencedCodeLanguage::with_config(config);
962 let errors = rule.validate_config();
963 assert_eq!(errors.len(), 1);
964 assert!(errors[0].contains("Unknown language"));
965 }
966
967 #[test]
968 fn test_valid_preferred_alias_accepted() {
969 let mut preferred = HashMap::new();
970 preferred.insert("Shell".to_string(), "bash".to_string());
971 preferred.insert("JavaScript".to_string(), "js".to_string());
972
973 let config = MD040Config {
974 style: LanguageStyle::Consistent,
975 preferred_aliases: preferred,
976 ..Default::default()
977 };
978 let rule = MD040FencedCodeLanguage::with_config(config);
979 let errors = rule.validate_config();
980 assert!(errors.is_empty());
981 }
982
983 #[test]
984 fn test_config_error_uses_valid_line_column() {
985 let config = md040_config::MD040Config {
986 preferred_aliases: {
987 let mut map = std::collections::HashMap::new();
988 map.insert("Shell".to_string(), "invalid_alias".to_string());
989 map
990 },
991 ..Default::default()
992 };
993 let rule = MD040FencedCodeLanguage::with_config(config);
994
995 let content = "```shell\necho hello\n```";
996 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
997 let result = rule.check(&ctx).unwrap();
998
999 let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1001 assert!(config_error.is_some(), "Should have a config error warning");
1002
1003 let warning = config_error.unwrap();
1004 assert!(
1006 warning.line >= 1,
1007 "Config error line should be >= 1, got {}",
1008 warning.line
1009 );
1010 assert!(
1011 warning.column >= 1,
1012 "Config error column should be >= 1, got {}",
1013 warning.column
1014 );
1015 }
1016
1017 #[test]
1022 fn test_linguist_resolution() {
1023 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1024 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1025 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1026 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1027 assert_eq!(resolve_canonical("python"), Some("Python"));
1028 assert_eq!(resolve_canonical("unknown_lang"), None);
1029 }
1030
1031 #[test]
1032 fn test_linguist_resolution_case_insensitive() {
1033 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1034 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1035 assert_eq!(resolve_canonical("Python"), Some("Python"));
1036 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1037 }
1038
1039 #[test]
1040 fn test_alias_validation() {
1041 assert!(is_valid_alias("Shell", "bash"));
1042 assert!(is_valid_alias("Shell", "sh"));
1043 assert!(is_valid_alias("Shell", "zsh"));
1044 assert!(!is_valid_alias("Shell", "python"));
1045 assert!(!is_valid_alias("Shell", "invalid"));
1046 }
1047
1048 #[test]
1049 fn test_default_alias() {
1050 assert_eq!(default_alias("Shell"), Some("bash"));
1051 assert_eq!(default_alias("JavaScript"), Some("js"));
1052 assert_eq!(default_alias("Python"), Some("python"));
1053 }
1054
1055 #[test]
1060 fn test_mixed_case_labels_normalized() {
1061 let content = r#"```BASH
1062echo hi
1063```
1064
1065```Bash
1066echo there
1067```
1068
1069```bash
1070echo again
1071```
1072"#;
1073 let config = MD040Config {
1074 style: LanguageStyle::Consistent,
1075 ..Default::default()
1076 };
1077 let result = run_check_with_config(content, config).unwrap();
1079 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1083 }
1084
1085 #[test]
1086 fn test_multiple_languages_independent() {
1087 let content = r#"```bash
1088shell code
1089```
1090
1091```python
1092python code
1093```
1094
1095```sh
1096more shell
1097```
1098
1099```python3
1100more python
1101```
1102"#;
1103 let config = MD040Config {
1104 style: LanguageStyle::Consistent,
1105 ..Default::default()
1106 };
1107 let result = run_check_with_config(content, config).unwrap();
1108 assert_eq!(result.len(), 2);
1110 }
1111
1112 #[test]
1113 fn test_tilde_fences() {
1114 let content = r#"~~~bash
1115echo hi
1116~~~
1117
1118~~~sh
1119echo there
1120~~~
1121"#;
1122 let config = MD040Config {
1123 style: LanguageStyle::Consistent,
1124 ..Default::default()
1125 };
1126 let result = run_check_with_config(content, config.clone()).unwrap();
1127 assert_eq!(result.len(), 1);
1128
1129 let fixed = run_fix_with_config(content, config).unwrap();
1130 assert!(fixed.contains("~~~bash"));
1131 assert!(!fixed.contains("~~~sh"));
1132 }
1133
1134 #[test]
1135 fn test_longer_fence_markers_preserved() {
1136 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1137 let config = MD040Config {
1138 style: LanguageStyle::Consistent,
1139 ..Default::default()
1140 };
1141 let fixed = run_fix_with_config(content, config).unwrap();
1142 assert!(fixed.contains("````bash"));
1143 assert!(fixed.contains("```bash"));
1144 }
1145
1146 #[test]
1147 fn test_empty_document() {
1148 let result = run_check("").unwrap();
1149 assert!(result.is_empty());
1150 }
1151
1152 #[test]
1153 fn test_no_code_blocks() {
1154 let content = "# Just a heading\n\nSome text.";
1155 let result = run_check(content).unwrap();
1156 assert!(result.is_empty());
1157 }
1158
1159 #[test]
1160 fn test_single_code_block_no_inconsistency() {
1161 let content = "```bash\necho hi\n```";
1162 let config = MD040Config {
1163 style: LanguageStyle::Consistent,
1164 ..Default::default()
1165 };
1166 let result = run_check_with_config(content, config).unwrap();
1167 assert!(result.is_empty(), "Single block has no inconsistency");
1168 }
1169
1170 #[test]
1171 fn test_idempotent_fix() {
1172 let content = r#"```bash
1173echo hi
1174```
1175
1176```sh
1177echo there
1178```
1179"#;
1180 let config = MD040Config {
1181 style: LanguageStyle::Consistent,
1182 ..Default::default()
1183 };
1184 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1185 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1186 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1187 }
1188
1189 #[test]
1194 fn test_mkdocs_superfences_title_only() {
1195 let content = r#"```title="Example"
1197echo hi
1198```
1199"#;
1200 let result = run_check_mkdocs(content).unwrap();
1201 assert!(
1202 result.is_empty(),
1203 "MkDocs superfences with title= should not require language"
1204 );
1205 }
1206
1207 #[test]
1208 fn test_mkdocs_superfences_hl_lines() {
1209 let content = r#"```hl_lines="1 2"
1211line 1
1212line 2
1213```
1214"#;
1215 let result = run_check_mkdocs(content).unwrap();
1216 assert!(
1217 result.is_empty(),
1218 "MkDocs superfences with hl_lines= should not require language"
1219 );
1220 }
1221
1222 #[test]
1223 fn test_mkdocs_superfences_linenums() {
1224 let content = r#"```linenums="1"
1226line 1
1227line 2
1228```
1229"#;
1230 let result = run_check_mkdocs(content).unwrap();
1231 assert!(
1232 result.is_empty(),
1233 "MkDocs superfences with linenums= should not require language"
1234 );
1235 }
1236
1237 #[test]
1238 fn test_mkdocs_superfences_class() {
1239 let content = r#"```.my-class
1241some text
1242```
1243"#;
1244 let result = run_check_mkdocs(content).unwrap();
1245 assert!(
1246 result.is_empty(),
1247 "MkDocs superfences with .class should not require language"
1248 );
1249 }
1250
1251 #[test]
1252 fn test_mkdocs_superfences_id() {
1253 let content = r#"```#my-id
1255some text
1256```
1257"#;
1258 let result = run_check_mkdocs(content).unwrap();
1259 assert!(
1260 result.is_empty(),
1261 "MkDocs superfences with #id should not require language"
1262 );
1263 }
1264
1265 #[test]
1266 fn test_mkdocs_superfences_with_language() {
1267 let content = r#"```python title="Example" hl_lines="1"
1269print("hello")
1270```
1271"#;
1272 let result = run_check_mkdocs(content).unwrap();
1273 assert!(result.is_empty(), "Code block with language and attrs should pass");
1274 }
1275
1276 #[test]
1277 fn test_standard_flavor_no_special_handling() {
1278 let content = r#"```title="Example"
1280echo hi
1281```
1282"#;
1283 let result = run_check(content).unwrap();
1284 assert_eq!(
1285 result.len(),
1286 1,
1287 "Standard flavor should warn about title= without language"
1288 );
1289 }
1290
1291 #[test]
1292 fn test_pandoc_raw_block_skipped_under_pandoc_flavor() {
1293 let rule = MD040FencedCodeLanguage::default();
1296 let content = "```{=html}\n<div>raw html</div>\n```\n";
1297 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1298 let result = rule.check(&ctx).unwrap();
1299 assert!(
1300 result.is_empty(),
1301 "MD040 should skip Pandoc raw blocks ({{=html}}) under Pandoc flavor: {result:?}"
1302 );
1303 }
1304
1305 #[test]
1306 fn test_pandoc_raw_block_skipped_under_quarto_flavor() {
1307 let rule = MD040FencedCodeLanguage::default();
1309 let content = "```{=html}\n<div>raw html</div>\n```\n";
1310 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1311 let result = rule.check(&ctx).unwrap();
1312 assert!(
1313 result.is_empty(),
1314 "MD040 should skip Pandoc raw blocks ({{=html}}) under Quarto flavor: {result:?}"
1315 );
1316 }
1317
1318 #[test]
1321 fn test_pandoc_accepts_raw_html_block() {
1322 use crate::config::MarkdownFlavor;
1323 let rule = MD040FencedCodeLanguage::default();
1324 let content = "```{=html}\n<div>raw</div>\n```\n";
1325 let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1326 let result = rule.check(&ctx).unwrap();
1327 assert!(result.is_empty(), "MD040 should accept ```{{=html}}```: {result:?}");
1328 }
1329
1330 #[test]
1333 fn test_pandoc_rejects_quarto_exec_blocks() {
1334 use crate::config::MarkdownFlavor;
1335 let rule = MD040FencedCodeLanguage::default();
1336 let content = "```{r}\nsummary(data)\n```\n";
1337 let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1338 let result = rule.check(&ctx).unwrap();
1339 assert!(
1340 !result.is_empty(),
1341 "MD040 under Pandoc should flag `{{r}}` (Quarto-only)"
1342 );
1343 }
1344
1345 #[test]
1347 fn test_quarto_still_accepts_exec_block() {
1348 use crate::config::MarkdownFlavor;
1349 let rule = MD040FencedCodeLanguage::default();
1350 let content = "```{r}\nsummary(data)\n```\n";
1351 let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
1352 let result = rule.check(&ctx).unwrap();
1353 assert!(
1354 result.is_empty(),
1355 "MD040 under Quarto should accept `{{r}}`: {result:?}"
1356 );
1357 }
1358
1359 #[test]
1360 fn test_quarto_exec_block_skipped_under_quarto_only() {
1361 let rule = MD040FencedCodeLanguage::default();
1365 let content = "```{r}\n1 + 1\n```\n";
1366
1367 let ctx_quarto = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1368 let result_quarto = rule.check(&ctx_quarto).unwrap();
1369 assert!(
1370 result_quarto.is_empty(),
1371 "MD040 should skip Quarto exec chunks under Quarto flavor: {result_quarto:?}"
1372 );
1373
1374 let ctx_pandoc = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1377 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1378 assert!(
1379 !result_pandoc.is_empty(),
1380 "MD040 should flag `{{r}}` under Pandoc as missing a real language"
1381 );
1382 }
1383
1384 #[test]
1387 fn test_pandoc_class_attr_accepted_as_language() {
1388 use crate::config::MarkdownFlavor;
1389 let rule = MD040FencedCodeLanguage::default();
1390 let content = "```{.python}\nprint(\"hi\")\n```\n";
1391
1392 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1393 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1394 assert!(
1395 result_pandoc.is_empty(),
1396 "MD040 under Pandoc should accept ```{{.python}}``` as language declaration: {result_pandoc:?}"
1397 );
1398
1399 let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1400 let result_quarto = rule.check(&ctx_quarto).unwrap();
1401 assert!(
1402 result_quarto.is_empty(),
1403 "MD040 under Quarto should accept ```{{.python}}``` as language declaration: {result_quarto:?}"
1404 );
1405 }
1406
1407 #[test]
1410 fn test_pandoc_class_attr_with_extra_attributes_accepted() {
1411 use crate::config::MarkdownFlavor;
1412 let rule = MD040FencedCodeLanguage::default();
1413 let content = "```{.haskell .numberLines}\nmain = putStrLn \"hi\"\n```\n";
1414
1415 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1416 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1417 assert!(
1418 result_pandoc.is_empty(),
1419 "MD040 under Pandoc should accept ```{{.haskell .numberLines}}```: {result_pandoc:?}"
1420 );
1421
1422 let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1423 let result_quarto = rule.check(&ctx_quarto).unwrap();
1424 assert!(
1425 result_quarto.is_empty(),
1426 "MD040 under Quarto should accept ```{{.haskell .numberLines}}```: {result_quarto:?}"
1427 );
1428 }
1429
1430 #[test]
1433 fn test_pandoc_class_attr_with_id_and_keyvalue_accepted() {
1434 use crate::config::MarkdownFlavor;
1435 let rule = MD040FencedCodeLanguage::default();
1436 let content = "```{#snippet .python startFrom=\"10\"}\nprint(1)\n```\n";
1437
1438 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1439 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1440 assert!(
1441 result_pandoc.is_empty(),
1442 "MD040 under Pandoc should accept ```{{#snippet .python …}}```: {result_pandoc:?}"
1443 );
1444 }
1445
1446 #[test]
1449 fn test_standard_still_flags_pandoc_class_attr() {
1450 use crate::config::MarkdownFlavor;
1451 let rule = MD040FencedCodeLanguage::default();
1452 let content = "```{.python}\nprint(\"hi\")\n```\n";
1453
1454 let ctx_standard = LintContext::new(content, MarkdownFlavor::Standard, None);
1455 let result_standard = rule.check(&ctx_standard).unwrap();
1456 assert!(
1457 !result_standard.is_empty(),
1458 "MD040 under Standard should still flag ```{{.python}}``` (no Pandoc support)"
1459 );
1460 }
1461
1462 #[test]
1465 fn test_pandoc_id_only_attr_still_flagged() {
1466 use crate::config::MarkdownFlavor;
1467 let rule = MD040FencedCodeLanguage::default();
1468 let content = "```{#myid}\ncode here\n```\n";
1469
1470 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1471 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1472 assert!(
1473 !result_pandoc.is_empty(),
1474 "MD040 under Pandoc should flag ```{{#myid}}``` — id without class declares no language"
1475 );
1476 }
1477
1478 #[test]
1480 fn test_pandoc_empty_braces_still_flagged() {
1481 use crate::config::MarkdownFlavor;
1482 let rule = MD040FencedCodeLanguage::default();
1483 let content = "```{}\ncode here\n```\n";
1484
1485 let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1486 let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1487 assert!(
1488 !result_pandoc.is_empty(),
1489 "MD040 under Pandoc should flag ```{{}}``` (no language declared)"
1490 );
1491 }
1492}