1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7pub mod md040_config;
11
12const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20 "title=", "hl_lines=", "linenums=", ".", "#", ];
26
27#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30 MKDOCS_SUPERFENCES_ATTR_PREFIXES
31 .iter()
32 .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37 line_idx: usize,
39 language: String,
41 fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47 config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51 pub fn new() -> Self {
52 Self::default()
53 }
54
55 pub fn with_config(config: MD040Config) -> Self {
56 Self { config }
57 }
58
59 fn validate_config(&self) -> Vec<String> {
61 let mut errors = Vec::new();
62
63 for (canonical, alias) in &self.config.preferred_aliases {
65 if let Some(actual_canonical) = resolve_canonical(canonical) {
67 if !is_valid_alias(actual_canonical, alias)
68 && let Some(valid_aliases) = get_aliases(actual_canonical)
69 {
70 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
71 let valid_str = valid_list
72 .iter()
73 .map(|s| format!("'{s}'"))
74 .collect::<Vec<_>>()
75 .join(", ");
76 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
77 errors.push(format!(
78 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
79 ));
80 }
81 } else {
82 errors.push(format!(
83 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
84 ));
85 }
86 }
87
88 errors
89 }
90
91 fn compute_preferred_labels(
93 &self,
94 blocks: &[FencedCodeBlock],
95 disabled_ranges: &[(usize, usize)],
96 ) -> HashMap<String, String> {
97 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
99
100 for block in blocks {
101 if is_line_disabled(disabled_ranges, block.line_idx) {
102 continue;
103 }
104 if block.language.is_empty() {
105 continue;
106 }
107 if let Some(canonical) = resolve_canonical(&block.language) {
108 by_canonical
109 .entry(canonical.to_string())
110 .or_default()
111 .push(&block.language);
112 }
113 }
114
115 let mut result = HashMap::new();
117
118 for (canonical, labels) in by_canonical {
119 let winner = if let Some(preferred) = self
121 .config
122 .preferred_aliases
123 .iter()
124 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
125 .map(|(_, v)| v.clone())
126 {
127 preferred
128 } else {
129 let mut counts: HashMap<&str, usize> = HashMap::new();
131 for label in &labels {
132 *counts.entry(*label).or_default() += 1;
133 }
134
135 let max_count = counts.values().max().copied().unwrap_or(0);
136 let winners: Vec<_> = counts
137 .iter()
138 .filter(|(_, c)| **c == max_count)
139 .map(|(l, _)| *l)
140 .collect();
141
142 if winners.len() == 1 {
143 winners[0].to_string()
144 } else {
145 default_alias(&canonical)
147 .filter(|default| winners.contains(default))
148 .map(|s| s.to_string())
149 .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
150 }
151 };
152
153 result.insert(canonical, winner);
154 }
155
156 result
157 }
158
159 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
161 if !self.config.allowed_languages.is_empty() {
163 let allowed = self.config.allowed_languages.join(", ");
164 let Some(canonical) = canonical else {
165 return Some(format!(
166 "Language '{original_label}' is not in the allowed list: {allowed}"
167 ));
168 };
169 if !self
170 .config
171 .allowed_languages
172 .iter()
173 .any(|a| a.eq_ignore_ascii_case(canonical))
174 {
175 return Some(format!(
176 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
177 ));
178 }
179 } else if !self.config.disallowed_languages.is_empty()
180 && canonical.is_some_and(|canonical| {
181 self.config
182 .disallowed_languages
183 .iter()
184 .any(|d| d.eq_ignore_ascii_case(canonical))
185 })
186 {
187 let canonical = canonical.unwrap_or("unknown");
188 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
189 }
190 None
191 }
192
193 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
195 if resolve_canonical(label).is_some() {
196 return None;
197 }
198
199 match self.config.unknown_language_action {
200 UnknownLanguageAction::Ignore => None,
201 UnknownLanguageAction::Warn => Some((
202 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
203 Severity::Warning,
204 )),
205 UnknownLanguageAction::Error => Some((
206 format!("Unknown language '{label}' (not in GitHub Linguist)"),
207 Severity::Error,
208 )),
209 }
210 }
211}
212
213impl Rule for MD040FencedCodeLanguage {
214 fn name(&self) -> &'static str {
215 "MD040"
216 }
217
218 fn description(&self) -> &'static str {
219 "Code blocks should have a language specified"
220 }
221
222 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
223 let content = ctx.content;
224 let mut warnings = Vec::new();
225
226 for error in self.validate_config() {
228 warnings.push(LintWarning {
229 rule_name: Some(self.name().to_string()),
230 line: 1,
231 column: 1,
232 end_line: 1,
233 end_column: 1,
234 message: format!("[config error] {error}"),
235 severity: Severity::Error,
236 fix: None,
237 });
238 }
239
240 let fenced_blocks = derive_fenced_code_blocks(ctx);
242
243 let disabled_ranges = compute_disabled_ranges(content, self.name());
245
246 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
248 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
249 } else {
250 HashMap::new()
251 };
252
253 let lines = ctx.raw_lines();
254
255 for block in &fenced_blocks {
256 if is_line_disabled(&disabled_ranges, block.line_idx) {
258 continue;
259 }
260
261 let line = lines.get(block.line_idx).unwrap_or(&"");
263 let trimmed = line.trim();
264 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
265
266 let has_mkdocs_attrs_only =
268 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
269
270 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
272 && after_fence.starts_with('{')
273 && after_fence.contains('}');
274
275 let needs_language =
278 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
279
280 if needs_language && !has_quarto_syntax {
281 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
282
283 warnings.push(LintWarning {
284 rule_name: Some(self.name().to_string()),
285 line: start_line,
286 column: start_col,
287 end_line,
288 end_column: end_col,
289 message: "Code block (```) missing language".to_string(),
290 severity: Severity::Warning,
291 fix: Some(Fix {
292 range: {
293 let trimmed = line.trim_start();
294 let trimmed_start = line.len() - trimmed.len();
295 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
296 let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
297 let line_end_byte = line_start_byte + line.len();
301 fence_end_byte..line_end_byte
302 },
303 replacement: {
304 let trimmed = line.trim_start();
305 let after_fence = &trimmed[block.fence_marker.len()..];
306 let after_fence_trimmed = after_fence.trim();
307 if after_fence_trimmed.is_empty() {
308 "text".to_string()
309 } else {
310 format!("text {after_fence_trimmed}")
311 }
312 },
313 }),
314 });
315 continue;
316 }
317
318 if has_quarto_syntax {
320 continue;
321 }
322
323 let canonical = resolve_canonical(&block.language);
324
325 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
327 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
328
329 warnings.push(LintWarning {
330 rule_name: Some(self.name().to_string()),
331 line: start_line,
332 column: start_col,
333 end_line,
334 end_column: end_col,
335 message: msg,
336 severity: Severity::Warning,
337 fix: None,
338 });
339 continue;
340 }
341
342 if canonical.is_none() {
344 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
345 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
346
347 warnings.push(LintWarning {
348 rule_name: Some(self.name().to_string()),
349 line: start_line,
350 column: start_col,
351 end_line,
352 end_column: end_col,
353 message: msg,
354 severity,
355 fix: None,
356 });
357 }
358 continue;
359 }
360
361 if self.config.style == LanguageStyle::Consistent
363 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
364 && &block.language != preferred
365 {
366 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
367
368 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
369 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
370 Fix {
371 range: (line_start_byte + label_start)..(line_start_byte + label_end),
372 replacement: preferred.clone(),
373 }
374 });
375 let lang = &block.language;
376 let canonical = canonical.unwrap();
377
378 warnings.push(LintWarning {
379 rule_name: Some(self.name().to_string()),
380 line: start_line,
381 column: start_col,
382 end_line,
383 end_column: end_col,
384 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
385 severity: Severity::Warning,
386 fix,
387 });
388 }
389 }
390
391 Ok(warnings)
392 }
393
394 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
395 if self.should_skip(ctx) {
396 return Ok(ctx.content.to_string());
397 }
398 let warnings = self.check(ctx)?;
399 if warnings.is_empty() {
400 return Ok(ctx.content.to_string());
401 }
402 let warnings =
403 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
404 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
405 }
406
407 fn category(&self) -> RuleCategory {
409 RuleCategory::CodeBlock
410 }
411
412 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
414 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
415 }
416
417 fn as_any(&self) -> &dyn std::any::Any {
418 self
419 }
420
421 fn default_config_section(&self) -> Option<(String, toml::Value)> {
422 let default_config = MD040Config::default();
423 let json_value = serde_json::to_value(&default_config).ok()?;
424 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
425
426 if let toml::Value::Table(table) = toml_value {
427 if !table.is_empty() {
428 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
429 } else {
430 None
431 }
432 } else {
433 None
434 }
435 }
436
437 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
438 where
439 Self: Sized,
440 {
441 let rule_config: MD040Config = load_rule_config(config);
442 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
443 }
444}
445
446fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
448 let content = ctx.content;
449 let line_offsets = &ctx.line_offsets;
450
451 ctx.code_block_details
452 .iter()
453 .filter(|d| d.is_fenced)
454 .map(|detail| {
455 let line_idx = match line_offsets.binary_search(&detail.start) {
456 Ok(idx) => idx,
457 Err(idx) => idx.saturating_sub(1),
458 };
459
460 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
462 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
463 let line = content.get(line_start..line_end).unwrap_or("");
464 let trimmed = line.trim();
465 let fence_marker = if trimmed.starts_with('`') {
466 let count = trimmed.chars().take_while(|&c| c == '`').count();
467 "`".repeat(count)
468 } else if trimmed.starts_with('~') {
469 let count = trimmed.chars().take_while(|&c| c == '~').count();
470 "~".repeat(count)
471 } else {
472 "```".to_string()
473 };
474
475 let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
476
477 FencedCodeBlock {
478 line_idx,
479 language,
480 fence_marker,
481 }
482 })
483 .collect()
484}
485
486fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
488 let mut ranges = Vec::new();
489 let mut disabled_start: Option<usize> = None;
490
491 for (i, line) in content.lines().enumerate() {
492 let trimmed = line.trim();
493
494 if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
495 && (rules.is_empty() || rules.contains(&rule_name))
496 && disabled_start.is_none()
497 {
498 disabled_start = Some(i);
499 }
500
501 if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
502 && (rules.is_empty() || rules.contains(&rule_name))
503 && let Some(start) = disabled_start.take()
504 {
505 ranges.push((start, i));
506 }
507 }
508
509 if let Some(start) = disabled_start {
511 ranges.push((start, usize::MAX));
512 }
513
514 ranges
515}
516
517fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
519 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
520}
521
522fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
524 let trimmed_start = line.len() - line.trim_start().len();
525 let after_indent = &line[trimmed_start..];
526 if !after_indent.starts_with(fence_marker) {
527 return None;
528 }
529 let after_fence = &after_indent[fence_marker.len()..];
530
531 let label_start_rel = after_fence
532 .char_indices()
533 .find(|&(_, ch)| !ch.is_whitespace())
534 .map(|(idx, _)| idx)?;
535 let after_label = &after_fence[label_start_rel..];
536 let label_end_rel = after_label
537 .char_indices()
538 .find(|&(_, ch)| ch.is_whitespace())
539 .map(|(idx, _)| label_start_rel + idx)
540 .unwrap_or(after_fence.len());
541
542 Some((
543 trimmed_start + fence_marker.len() + label_start_rel,
544 trimmed_start + fence_marker.len() + label_end_rel,
545 ))
546}
547
548#[cfg(test)]
549mod tests {
550 use super::*;
551 use crate::lint_context::LintContext;
552
553 fn run_check(content: &str) -> LintResult {
554 let rule = MD040FencedCodeLanguage::default();
555 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
556 rule.check(&ctx)
557 }
558
559 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
560 let rule = MD040FencedCodeLanguage::with_config(config);
561 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
562 rule.check(&ctx)
563 }
564
565 fn run_fix(content: &str) -> Result<String, LintError> {
566 let rule = MD040FencedCodeLanguage::default();
567 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
568 rule.fix(&ctx)
569 }
570
571 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
572 let rule = MD040FencedCodeLanguage::with_config(config);
573 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
574 rule.fix(&ctx)
575 }
576
577 fn run_check_mkdocs(content: &str) -> LintResult {
578 let rule = MD040FencedCodeLanguage::default();
579 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
580 rule.check(&ctx)
581 }
582
583 #[test]
588 fn test_code_blocks_with_language_specified() {
589 let content = r#"# Test
590
591```python
592print("Hello, world!")
593```
594
595```javascript
596console.log("Hello!");
597```
598"#;
599 let result = run_check(content).unwrap();
600 assert!(result.is_empty(), "No warnings expected for code blocks with language");
601 }
602
603 #[test]
604 fn test_code_blocks_without_language() {
605 let content = r#"# Test
606
607```
608print("Hello, world!")
609```
610"#;
611 let result = run_check(content).unwrap();
612 assert_eq!(result.len(), 1);
613 assert_eq!(result[0].message, "Code block (```) missing language");
614 assert_eq!(result[0].line, 3);
615 }
616
617 #[test]
618 fn test_fix_method_adds_text_language() {
619 let content = r#"# Test
620
621```
622code without language
623```
624
625```python
626already has language
627```
628
629```
630another block without
631```
632"#;
633 let fixed = run_fix(content).unwrap();
634 assert!(fixed.contains("```text"));
635 assert!(fixed.contains("```python"));
636 assert_eq!(fixed.matches("```text").count(), 2);
637 }
638
639 #[test]
640 fn test_fix_preserves_indentation() {
641 let content = r#"# Test
642
643- List item
644 ```
645 indented code block
646 ```
647"#;
648 let fixed = run_fix(content).unwrap();
649 assert!(fixed.contains(" ```text"));
650 }
651
652 #[test]
657 fn test_consistent_mode_detects_inconsistency() {
658 let content = r#"```bash
659echo hi
660```
661
662```sh
663echo there
664```
665
666```bash
667echo again
668```
669"#;
670 let config = MD040Config {
671 style: LanguageStyle::Consistent,
672 ..Default::default()
673 };
674 let result = run_check_with_config(content, config).unwrap();
675 assert_eq!(result.len(), 1);
676 assert!(result[0].message.contains("Inconsistent"));
677 assert!(result[0].message.contains("sh"));
678 assert!(result[0].message.contains("bash"));
679 }
680
681 #[test]
682 fn test_consistent_mode_fix_normalizes() {
683 let content = r#"```bash
684echo hi
685```
686
687```sh
688echo there
689```
690
691```bash
692echo again
693```
694"#;
695 let config = MD040Config {
696 style: LanguageStyle::Consistent,
697 ..Default::default()
698 };
699 let fixed = run_fix_with_config(content, config).unwrap();
700 assert_eq!(fixed.matches("```bash").count(), 3);
701 assert_eq!(fixed.matches("```sh").count(), 0);
702 }
703
704 #[test]
705 fn test_consistent_mode_tie_break_uses_curated_default() {
706 let content = r#"```bash
708echo hi
709```
710
711```sh
712echo there
713```
714"#;
715 let config = MD040Config {
716 style: LanguageStyle::Consistent,
717 ..Default::default()
718 };
719 let fixed = run_fix_with_config(content, config).unwrap();
720 assert_eq!(fixed.matches("```bash").count(), 2);
722 }
723
724 #[test]
725 fn test_consistent_mode_with_preferred_alias() {
726 let content = r#"```bash
727echo hi
728```
729
730```sh
731echo there
732```
733"#;
734 let mut preferred = HashMap::new();
735 preferred.insert("Shell".to_string(), "sh".to_string());
736
737 let config = MD040Config {
738 style: LanguageStyle::Consistent,
739 preferred_aliases: preferred,
740 ..Default::default()
741 };
742 let fixed = run_fix_with_config(content, config).unwrap();
743 assert_eq!(fixed.matches("```sh").count(), 2);
744 assert_eq!(fixed.matches("```bash").count(), 0);
745 }
746
747 #[test]
748 fn test_consistent_mode_ignores_disabled_blocks() {
749 let content = r#"```bash
750echo hi
751```
752<!-- rumdl-disable MD040 -->
753```sh
754echo there
755```
756```sh
757echo again
758```
759<!-- rumdl-enable MD040 -->
760"#;
761 let config = MD040Config {
762 style: LanguageStyle::Consistent,
763 ..Default::default()
764 };
765 let result = run_check_with_config(content, config).unwrap();
766 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
767 }
768
769 #[test]
770 fn test_fix_preserves_attributes() {
771 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
772 let config = MD040Config {
773 style: LanguageStyle::Consistent,
774 ..Default::default()
775 };
776 let fixed = run_fix_with_config(content, config).unwrap();
777 assert!(fixed.contains("```bash {.highlight}"));
778 }
779
780 #[test]
781 fn test_fix_preserves_spacing_before_label() {
782 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
783 let config = MD040Config {
784 style: LanguageStyle::Consistent,
785 ..Default::default()
786 };
787 let fixed = run_fix_with_config(content, config).unwrap();
788 assert!(fixed.contains("``` bash {.highlight}"));
789 assert!(!fixed.contains("``` sh {.highlight}"));
790 }
791
792 #[test]
797 fn test_allowlist_blocks_unlisted() {
798 let content = "```java\ncode\n```";
799 let config = MD040Config {
800 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
801 ..Default::default()
802 };
803 let result = run_check_with_config(content, config).unwrap();
804 assert_eq!(result.len(), 1);
805 assert!(result[0].message.contains("not in the allowed list"));
806 }
807
808 #[test]
809 fn test_allowlist_allows_listed() {
810 let content = "```python\ncode\n```";
811 let config = MD040Config {
812 allowed_languages: vec!["Python".to_string()],
813 ..Default::default()
814 };
815 let result = run_check_with_config(content, config).unwrap();
816 assert!(result.is_empty());
817 }
818
819 #[test]
820 fn test_allowlist_blocks_unknown_language() {
821 let content = "```mysterylang\ncode\n```";
822 let config = MD040Config {
823 allowed_languages: vec!["Python".to_string()],
824 ..Default::default()
825 };
826 let result = run_check_with_config(content, config).unwrap();
827 assert_eq!(result.len(), 1);
828 assert!(result[0].message.contains("allowed list"));
829 }
830
831 #[test]
832 fn test_allowlist_case_insensitive() {
833 let content = "```python\ncode\n```";
834 let config = MD040Config {
835 allowed_languages: vec!["PYTHON".to_string()],
836 ..Default::default()
837 };
838 let result = run_check_with_config(content, config).unwrap();
839 assert!(result.is_empty());
840 }
841
842 #[test]
843 fn test_denylist_blocks_listed() {
844 let content = "```java\ncode\n```";
845 let config = MD040Config {
846 disallowed_languages: vec!["Java".to_string()],
847 ..Default::default()
848 };
849 let result = run_check_with_config(content, config).unwrap();
850 assert_eq!(result.len(), 1);
851 assert!(result[0].message.contains("disallowed"));
852 }
853
854 #[test]
855 fn test_denylist_allows_unlisted() {
856 let content = "```python\ncode\n```";
857 let config = MD040Config {
858 disallowed_languages: vec!["Java".to_string()],
859 ..Default::default()
860 };
861 let result = run_check_with_config(content, config).unwrap();
862 assert!(result.is_empty());
863 }
864
865 #[test]
866 fn test_allowlist_takes_precedence_over_denylist() {
867 let content = "```python\ncode\n```";
868 let config = MD040Config {
869 allowed_languages: vec!["Python".to_string()],
870 disallowed_languages: vec!["Python".to_string()], ..Default::default()
872 };
873 let result = run_check_with_config(content, config).unwrap();
874 assert!(result.is_empty());
875 }
876
877 #[test]
882 fn test_unknown_language_ignore_default() {
883 let content = "```mycustomlang\ncode\n```";
884 let result = run_check(content).unwrap();
885 assert!(result.is_empty(), "Unknown languages ignored by default");
886 }
887
888 #[test]
889 fn test_unknown_language_warn() {
890 let content = "```mycustomlang\ncode\n```";
891 let config = MD040Config {
892 unknown_language_action: UnknownLanguageAction::Warn,
893 ..Default::default()
894 };
895 let result = run_check_with_config(content, config).unwrap();
896 assert_eq!(result.len(), 1);
897 assert!(result[0].message.contains("Unknown language"));
898 assert!(result[0].message.contains("mycustomlang"));
899 assert_eq!(result[0].severity, Severity::Warning);
900 }
901
902 #[test]
903 fn test_unknown_language_error() {
904 let content = "```mycustomlang\ncode\n```";
905 let config = MD040Config {
906 unknown_language_action: UnknownLanguageAction::Error,
907 ..Default::default()
908 };
909 let result = run_check_with_config(content, config).unwrap();
910 assert_eq!(result.len(), 1);
911 assert!(result[0].message.contains("Unknown language"));
912 assert_eq!(result[0].severity, Severity::Error);
913 }
914
915 #[test]
920 fn test_invalid_preferred_alias_detected() {
921 let mut preferred = HashMap::new();
922 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
923
924 let config = MD040Config {
925 style: LanguageStyle::Consistent,
926 preferred_aliases: preferred,
927 ..Default::default()
928 };
929 let rule = MD040FencedCodeLanguage::with_config(config);
930 let errors = rule.validate_config();
931 assert_eq!(errors.len(), 1);
932 assert!(errors[0].contains("Invalid alias"));
933 assert!(errors[0].contains("invalid_alias"));
934 }
935
936 #[test]
937 fn test_unknown_language_in_preferred_aliases_detected() {
938 let mut preferred = HashMap::new();
939 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
940
941 let config = MD040Config {
942 style: LanguageStyle::Consistent,
943 preferred_aliases: preferred,
944 ..Default::default()
945 };
946 let rule = MD040FencedCodeLanguage::with_config(config);
947 let errors = rule.validate_config();
948 assert_eq!(errors.len(), 1);
949 assert!(errors[0].contains("Unknown language"));
950 }
951
952 #[test]
953 fn test_valid_preferred_alias_accepted() {
954 let mut preferred = HashMap::new();
955 preferred.insert("Shell".to_string(), "bash".to_string());
956 preferred.insert("JavaScript".to_string(), "js".to_string());
957
958 let config = MD040Config {
959 style: LanguageStyle::Consistent,
960 preferred_aliases: preferred,
961 ..Default::default()
962 };
963 let rule = MD040FencedCodeLanguage::with_config(config);
964 let errors = rule.validate_config();
965 assert!(errors.is_empty());
966 }
967
968 #[test]
969 fn test_config_error_uses_valid_line_column() {
970 let config = md040_config::MD040Config {
971 preferred_aliases: {
972 let mut map = std::collections::HashMap::new();
973 map.insert("Shell".to_string(), "invalid_alias".to_string());
974 map
975 },
976 ..Default::default()
977 };
978 let rule = MD040FencedCodeLanguage::with_config(config);
979
980 let content = "```shell\necho hello\n```";
981 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
982 let result = rule.check(&ctx).unwrap();
983
984 let config_error = result.iter().find(|w| w.message.contains("[config error]"));
986 assert!(config_error.is_some(), "Should have a config error warning");
987
988 let warning = config_error.unwrap();
989 assert!(
991 warning.line >= 1,
992 "Config error line should be >= 1, got {}",
993 warning.line
994 );
995 assert!(
996 warning.column >= 1,
997 "Config error column should be >= 1, got {}",
998 warning.column
999 );
1000 }
1001
1002 #[test]
1007 fn test_linguist_resolution() {
1008 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1009 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1010 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1011 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1012 assert_eq!(resolve_canonical("python"), Some("Python"));
1013 assert_eq!(resolve_canonical("unknown_lang"), None);
1014 }
1015
1016 #[test]
1017 fn test_linguist_resolution_case_insensitive() {
1018 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1019 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1020 assert_eq!(resolve_canonical("Python"), Some("Python"));
1021 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1022 }
1023
1024 #[test]
1025 fn test_alias_validation() {
1026 assert!(is_valid_alias("Shell", "bash"));
1027 assert!(is_valid_alias("Shell", "sh"));
1028 assert!(is_valid_alias("Shell", "zsh"));
1029 assert!(!is_valid_alias("Shell", "python"));
1030 assert!(!is_valid_alias("Shell", "invalid"));
1031 }
1032
1033 #[test]
1034 fn test_default_alias() {
1035 assert_eq!(default_alias("Shell"), Some("bash"));
1036 assert_eq!(default_alias("JavaScript"), Some("js"));
1037 assert_eq!(default_alias("Python"), Some("python"));
1038 }
1039
1040 #[test]
1045 fn test_mixed_case_labels_normalized() {
1046 let content = r#"```BASH
1047echo hi
1048```
1049
1050```Bash
1051echo there
1052```
1053
1054```bash
1055echo again
1056```
1057"#;
1058 let config = MD040Config {
1059 style: LanguageStyle::Consistent,
1060 ..Default::default()
1061 };
1062 let result = run_check_with_config(content, config).unwrap();
1064 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1068 }
1069
1070 #[test]
1071 fn test_multiple_languages_independent() {
1072 let content = r#"```bash
1073shell code
1074```
1075
1076```python
1077python code
1078```
1079
1080```sh
1081more shell
1082```
1083
1084```python3
1085more python
1086```
1087"#;
1088 let config = MD040Config {
1089 style: LanguageStyle::Consistent,
1090 ..Default::default()
1091 };
1092 let result = run_check_with_config(content, config).unwrap();
1093 assert_eq!(result.len(), 2);
1095 }
1096
1097 #[test]
1098 fn test_tilde_fences() {
1099 let content = r#"~~~bash
1100echo hi
1101~~~
1102
1103~~~sh
1104echo there
1105~~~
1106"#;
1107 let config = MD040Config {
1108 style: LanguageStyle::Consistent,
1109 ..Default::default()
1110 };
1111 let result = run_check_with_config(content, config.clone()).unwrap();
1112 assert_eq!(result.len(), 1);
1113
1114 let fixed = run_fix_with_config(content, config).unwrap();
1115 assert!(fixed.contains("~~~bash"));
1116 assert!(!fixed.contains("~~~sh"));
1117 }
1118
1119 #[test]
1120 fn test_longer_fence_markers_preserved() {
1121 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1122 let config = MD040Config {
1123 style: LanguageStyle::Consistent,
1124 ..Default::default()
1125 };
1126 let fixed = run_fix_with_config(content, config).unwrap();
1127 assert!(fixed.contains("````bash"));
1128 assert!(fixed.contains("```bash"));
1129 }
1130
1131 #[test]
1132 fn test_empty_document() {
1133 let result = run_check("").unwrap();
1134 assert!(result.is_empty());
1135 }
1136
1137 #[test]
1138 fn test_no_code_blocks() {
1139 let content = "# Just a heading\n\nSome text.";
1140 let result = run_check(content).unwrap();
1141 assert!(result.is_empty());
1142 }
1143
1144 #[test]
1145 fn test_single_code_block_no_inconsistency() {
1146 let content = "```bash\necho hi\n```";
1147 let config = MD040Config {
1148 style: LanguageStyle::Consistent,
1149 ..Default::default()
1150 };
1151 let result = run_check_with_config(content, config).unwrap();
1152 assert!(result.is_empty(), "Single block has no inconsistency");
1153 }
1154
1155 #[test]
1156 fn test_idempotent_fix() {
1157 let content = r#"```bash
1158echo hi
1159```
1160
1161```sh
1162echo there
1163```
1164"#;
1165 let config = MD040Config {
1166 style: LanguageStyle::Consistent,
1167 ..Default::default()
1168 };
1169 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1170 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1171 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1172 }
1173
1174 #[test]
1179 fn test_mkdocs_superfences_title_only() {
1180 let content = r#"```title="Example"
1182echo hi
1183```
1184"#;
1185 let result = run_check_mkdocs(content).unwrap();
1186 assert!(
1187 result.is_empty(),
1188 "MkDocs superfences with title= should not require language"
1189 );
1190 }
1191
1192 #[test]
1193 fn test_mkdocs_superfences_hl_lines() {
1194 let content = r#"```hl_lines="1 2"
1196line 1
1197line 2
1198```
1199"#;
1200 let result = run_check_mkdocs(content).unwrap();
1201 assert!(
1202 result.is_empty(),
1203 "MkDocs superfences with hl_lines= should not require language"
1204 );
1205 }
1206
1207 #[test]
1208 fn test_mkdocs_superfences_linenums() {
1209 let content = r#"```linenums="1"
1211line 1
1212line 2
1213```
1214"#;
1215 let result = run_check_mkdocs(content).unwrap();
1216 assert!(
1217 result.is_empty(),
1218 "MkDocs superfences with linenums= should not require language"
1219 );
1220 }
1221
1222 #[test]
1223 fn test_mkdocs_superfences_class() {
1224 let content = r#"```.my-class
1226some text
1227```
1228"#;
1229 let result = run_check_mkdocs(content).unwrap();
1230 assert!(
1231 result.is_empty(),
1232 "MkDocs superfences with .class should not require language"
1233 );
1234 }
1235
1236 #[test]
1237 fn test_mkdocs_superfences_id() {
1238 let content = r#"```#my-id
1240some text
1241```
1242"#;
1243 let result = run_check_mkdocs(content).unwrap();
1244 assert!(
1245 result.is_empty(),
1246 "MkDocs superfences with #id should not require language"
1247 );
1248 }
1249
1250 #[test]
1251 fn test_mkdocs_superfences_with_language() {
1252 let content = r#"```python title="Example" hl_lines="1"
1254print("hello")
1255```
1256"#;
1257 let result = run_check_mkdocs(content).unwrap();
1258 assert!(result.is_empty(), "Code block with language and attrs should pass");
1259 }
1260
1261 #[test]
1262 fn test_standard_flavor_no_special_handling() {
1263 let content = r#"```title="Example"
1265echo hi
1266```
1267"#;
1268 let result = run_check(content).unwrap();
1269 assert_eq!(
1270 result.len(),
1271 1,
1272 "Standard flavor should warn about title= without language"
1273 );
1274 }
1275}