1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7pub mod md040_config;
11
12const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20 "title=", "hl_lines=", "linenums=", ".", "#", ];
26
27#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30 MKDOCS_SUPERFENCES_ATTR_PREFIXES
31 .iter()
32 .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37 line_idx: usize,
39 language: String,
41 fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47 config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51 pub fn with_config(config: MD040Config) -> Self {
52 Self { config }
53 }
54
55 fn validate_config(&self) -> Vec<String> {
57 let mut errors = Vec::new();
58
59 for (canonical, alias) in &self.config.preferred_aliases {
61 if let Some(actual_canonical) = resolve_canonical(canonical) {
63 if !is_valid_alias(actual_canonical, alias)
64 && let Some(valid_aliases) = get_aliases(actual_canonical)
65 {
66 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
67 let valid_str = valid_list
68 .iter()
69 .map(|s| format!("'{s}'"))
70 .collect::<Vec<_>>()
71 .join(", ");
72 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
73 errors.push(format!(
74 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
75 ));
76 }
77 } else {
78 errors.push(format!(
79 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
80 ));
81 }
82 }
83
84 errors
85 }
86
87 fn compute_preferred_labels(
89 &self,
90 blocks: &[FencedCodeBlock],
91 disabled_ranges: &[(usize, usize)],
92 ) -> HashMap<String, String> {
93 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
95
96 for block in blocks {
97 if is_line_disabled(disabled_ranges, block.line_idx) {
98 continue;
99 }
100 if block.language.is_empty() {
101 continue;
102 }
103 if let Some(canonical) = resolve_canonical(&block.language) {
104 by_canonical
105 .entry(canonical.to_string())
106 .or_default()
107 .push(&block.language);
108 }
109 }
110
111 let mut result = HashMap::new();
113
114 for (canonical, labels) in by_canonical {
115 let winner = if let Some(preferred) = self
117 .config
118 .preferred_aliases
119 .iter()
120 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
121 .map(|(_, v)| v.clone())
122 {
123 preferred
124 } else {
125 let mut counts: HashMap<&str, usize> = HashMap::new();
127 for label in &labels {
128 *counts.entry(*label).or_default() += 1;
129 }
130
131 let max_count = counts.values().max().copied().unwrap_or(0);
132 let winners: Vec<_> = counts
133 .iter()
134 .filter(|(_, c)| **c == max_count)
135 .map(|(l, _)| *l)
136 .collect();
137
138 if winners.len() == 1 {
139 winners[0].to_string()
140 } else {
141 default_alias(&canonical)
143 .filter(|default| winners.contains(default))
144 .map_or_else(
145 || winners.into_iter().min().unwrap().to_string(),
146 std::string::ToString::to_string,
147 )
148 }
149 };
150
151 result.insert(canonical, winner);
152 }
153
154 result
155 }
156
157 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
159 if !self.config.allowed_languages.is_empty() {
161 let allowed = self.config.allowed_languages.join(", ");
162 let Some(canonical) = canonical else {
163 return Some(format!(
164 "Language '{original_label}' is not in the allowed list: {allowed}"
165 ));
166 };
167 if !self
168 .config
169 .allowed_languages
170 .iter()
171 .any(|a| a.eq_ignore_ascii_case(canonical))
172 {
173 return Some(format!(
174 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
175 ));
176 }
177 } else if !self.config.disallowed_languages.is_empty()
178 && canonical.is_some_and(|canonical| {
179 self.config
180 .disallowed_languages
181 .iter()
182 .any(|d| d.eq_ignore_ascii_case(canonical))
183 })
184 {
185 let canonical = canonical.unwrap_or("unknown");
186 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
187 }
188 None
189 }
190
191 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
193 if resolve_canonical(label).is_some() {
194 return None;
195 }
196
197 match self.config.unknown_language_action {
198 UnknownLanguageAction::Ignore => None,
199 UnknownLanguageAction::Warn => Some((
200 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
201 Severity::Warning,
202 )),
203 UnknownLanguageAction::Error => Some((
204 format!("Unknown language '{label}' (not in GitHub Linguist)"),
205 Severity::Error,
206 )),
207 }
208 }
209}
210
211impl Rule for MD040FencedCodeLanguage {
212 fn name(&self) -> &'static str {
213 "MD040"
214 }
215
216 fn description(&self) -> &'static str {
217 "Code blocks should have a language specified"
218 }
219
220 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
221 let content = ctx.content;
222 let mut warnings = Vec::new();
223
224 for error in self.validate_config() {
226 warnings.push(LintWarning {
227 rule_name: Some(self.name().to_string()),
228 line: 1,
229 column: 1,
230 end_line: 1,
231 end_column: 1,
232 message: format!("[config error] {error}"),
233 severity: Severity::Error,
234 fix: None,
235 });
236 }
237
238 let fenced_blocks = derive_fenced_code_blocks(ctx);
240
241 let disabled_ranges = compute_disabled_ranges(content, self.name());
243
244 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
246 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
247 } else {
248 HashMap::new()
249 };
250
251 let lines = ctx.raw_lines();
252
253 for block in &fenced_blocks {
254 if is_line_disabled(&disabled_ranges, block.line_idx) {
256 continue;
257 }
258
259 let line = lines.get(block.line_idx).unwrap_or(&"");
261 let trimmed = line.trim();
262 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
263
264 let has_mkdocs_attrs_only =
266 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
267
268 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
270 && after_fence.starts_with('{')
271 && after_fence.contains('}');
272
273 let needs_language =
276 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
277
278 if needs_language && !has_quarto_syntax {
279 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
280
281 warnings.push(LintWarning {
282 rule_name: Some(self.name().to_string()),
283 line: start_line,
284 column: start_col,
285 end_line,
286 end_column: end_col,
287 message: "Code block (```) missing language".to_string(),
288 severity: Severity::Warning,
289 fix: Some(Fix {
290 range: {
291 let trimmed = line.trim_start();
292 let trimmed_start = line.len() - trimmed.len();
293 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
294 let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
295 let line_end_byte = line_start_byte + line.len();
299 fence_end_byte..line_end_byte
300 },
301 replacement: {
302 let trimmed = line.trim_start();
303 let after_fence = &trimmed[block.fence_marker.len()..];
304 let after_fence_trimmed = after_fence.trim();
305 if after_fence_trimmed.is_empty() {
306 "text".to_string()
307 } else {
308 format!("text {after_fence_trimmed}")
309 }
310 },
311 }),
312 });
313 continue;
314 }
315
316 if has_quarto_syntax {
318 continue;
319 }
320
321 let canonical = resolve_canonical(&block.language);
322
323 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
325 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
326
327 warnings.push(LintWarning {
328 rule_name: Some(self.name().to_string()),
329 line: start_line,
330 column: start_col,
331 end_line,
332 end_column: end_col,
333 message: msg,
334 severity: Severity::Warning,
335 fix: None,
336 });
337 continue;
338 }
339
340 if canonical.is_none() {
342 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
343 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
344
345 warnings.push(LintWarning {
346 rule_name: Some(self.name().to_string()),
347 line: start_line,
348 column: start_col,
349 end_line,
350 end_column: end_col,
351 message: msg,
352 severity,
353 fix: None,
354 });
355 }
356 continue;
357 }
358
359 if self.config.style == LanguageStyle::Consistent
361 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
362 && &block.language != preferred
363 {
364 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
365
366 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
367 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
368 Fix {
369 range: (line_start_byte + label_start)..(line_start_byte + label_end),
370 replacement: preferred.clone(),
371 }
372 });
373 let lang = &block.language;
374 let canonical = canonical.unwrap();
375
376 warnings.push(LintWarning {
377 rule_name: Some(self.name().to_string()),
378 line: start_line,
379 column: start_col,
380 end_line,
381 end_column: end_col,
382 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
383 severity: Severity::Warning,
384 fix,
385 });
386 }
387 }
388
389 Ok(warnings)
390 }
391
392 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
393 if self.should_skip(ctx) {
394 return Ok(ctx.content.to_string());
395 }
396 let warnings = self.check(ctx)?;
397 if warnings.is_empty() {
398 return Ok(ctx.content.to_string());
399 }
400 let warnings =
401 crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
402 crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
403 }
404
405 fn category(&self) -> RuleCategory {
407 RuleCategory::CodeBlock
408 }
409
410 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
412 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
413 }
414
415 fn as_any(&self) -> &dyn std::any::Any {
416 self
417 }
418
419 fn default_config_section(&self) -> Option<(String, toml::Value)> {
420 let default_config = MD040Config::default();
421 let json_value = serde_json::to_value(&default_config).ok()?;
422 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
423
424 if let toml::Value::Table(table) = toml_value {
425 if !table.is_empty() {
426 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
427 } else {
428 None
429 }
430 } else {
431 None
432 }
433 }
434
435 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
436 where
437 Self: Sized,
438 {
439 let rule_config: MD040Config = load_rule_config(config);
440 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
441 }
442}
443
444fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
446 let content = ctx.content;
447 let line_offsets = &ctx.line_offsets;
448
449 ctx.code_block_details
450 .iter()
451 .filter(|d| d.is_fenced)
452 .map(|detail| {
453 let line_idx = match line_offsets.binary_search(&detail.start) {
454 Ok(idx) => idx,
455 Err(idx) => idx.saturating_sub(1),
456 };
457
458 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
460 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
461 let line = content.get(line_start..line_end).unwrap_or("");
462 let trimmed = line.trim();
463 let fence_marker = if trimmed.starts_with('`') {
464 let count = trimmed.chars().take_while(|&c| c == '`').count();
465 "`".repeat(count)
466 } else if trimmed.starts_with('~') {
467 let count = trimmed.chars().take_while(|&c| c == '~').count();
468 "~".repeat(count)
469 } else {
470 "```".to_string()
471 };
472
473 let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
474
475 FencedCodeBlock {
476 line_idx,
477 language,
478 fence_marker,
479 }
480 })
481 .collect()
482}
483
484fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
486 let mut ranges = Vec::new();
487 let mut disabled_start: Option<usize> = None;
488
489 for (i, line) in content.lines().enumerate() {
490 let trimmed = line.trim();
491
492 if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
493 && (rules.is_empty() || rules.contains(&rule_name))
494 && disabled_start.is_none()
495 {
496 disabled_start = Some(i);
497 }
498
499 if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
500 && (rules.is_empty() || rules.contains(&rule_name))
501 && let Some(start) = disabled_start.take()
502 {
503 ranges.push((start, i));
504 }
505 }
506
507 if let Some(start) = disabled_start {
509 ranges.push((start, usize::MAX));
510 }
511
512 ranges
513}
514
515fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
517 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
518}
519
520fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
522 let trimmed_start = line.len() - line.trim_start().len();
523 let after_indent = &line[trimmed_start..];
524 if !after_indent.starts_with(fence_marker) {
525 return None;
526 }
527 let after_fence = &after_indent[fence_marker.len()..];
528
529 let label_start_rel = after_fence
530 .char_indices()
531 .find(|&(_, ch)| !ch.is_whitespace())
532 .map(|(idx, _)| idx)?;
533 let after_label = &after_fence[label_start_rel..];
534 let label_end_rel = after_label
535 .char_indices()
536 .find(|&(_, ch)| ch.is_whitespace())
537 .map_or(after_fence.len(), |(idx, _)| label_start_rel + idx);
538
539 Some((
540 trimmed_start + fence_marker.len() + label_start_rel,
541 trimmed_start + fence_marker.len() + label_end_rel,
542 ))
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548 use crate::lint_context::LintContext;
549
550 fn run_check(content: &str) -> LintResult {
551 let rule = MD040FencedCodeLanguage::default();
552 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
553 rule.check(&ctx)
554 }
555
556 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
557 let rule = MD040FencedCodeLanguage::with_config(config);
558 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
559 rule.check(&ctx)
560 }
561
562 fn run_fix(content: &str) -> Result<String, LintError> {
563 let rule = MD040FencedCodeLanguage::default();
564 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
565 rule.fix(&ctx)
566 }
567
568 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
569 let rule = MD040FencedCodeLanguage::with_config(config);
570 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
571 rule.fix(&ctx)
572 }
573
574 fn run_check_mkdocs(content: &str) -> LintResult {
575 let rule = MD040FencedCodeLanguage::default();
576 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
577 rule.check(&ctx)
578 }
579
580 #[test]
585 fn test_code_blocks_with_language_specified() {
586 let content = r#"# Test
587
588```python
589print("Hello, world!")
590```
591
592```javascript
593console.log("Hello!");
594```
595"#;
596 let result = run_check(content).unwrap();
597 assert!(result.is_empty(), "No warnings expected for code blocks with language");
598 }
599
600 #[test]
601 fn test_code_blocks_without_language() {
602 let content = r#"# Test
603
604```
605print("Hello, world!")
606```
607"#;
608 let result = run_check(content).unwrap();
609 assert_eq!(result.len(), 1);
610 assert_eq!(result[0].message, "Code block (```) missing language");
611 assert_eq!(result[0].line, 3);
612 }
613
614 #[test]
615 fn test_fix_method_adds_text_language() {
616 let content = r#"# Test
617
618```
619code without language
620```
621
622```python
623already has language
624```
625
626```
627another block without
628```
629"#;
630 let fixed = run_fix(content).unwrap();
631 assert!(fixed.contains("```text"));
632 assert!(fixed.contains("```python"));
633 assert_eq!(fixed.matches("```text").count(), 2);
634 }
635
636 #[test]
637 fn test_fix_preserves_indentation() {
638 let content = r#"# Test
639
640- List item
641 ```
642 indented code block
643 ```
644"#;
645 let fixed = run_fix(content).unwrap();
646 assert!(fixed.contains(" ```text"));
647 }
648
649 #[test]
654 fn test_consistent_mode_detects_inconsistency() {
655 let content = r#"```bash
656echo hi
657```
658
659```sh
660echo there
661```
662
663```bash
664echo again
665```
666"#;
667 let config = MD040Config {
668 style: LanguageStyle::Consistent,
669 ..Default::default()
670 };
671 let result = run_check_with_config(content, config).unwrap();
672 assert_eq!(result.len(), 1);
673 assert!(result[0].message.contains("Inconsistent"));
674 assert!(result[0].message.contains("sh"));
675 assert!(result[0].message.contains("bash"));
676 }
677
678 #[test]
679 fn test_consistent_mode_fix_normalizes() {
680 let content = r#"```bash
681echo hi
682```
683
684```sh
685echo there
686```
687
688```bash
689echo again
690```
691"#;
692 let config = MD040Config {
693 style: LanguageStyle::Consistent,
694 ..Default::default()
695 };
696 let fixed = run_fix_with_config(content, config).unwrap();
697 assert_eq!(fixed.matches("```bash").count(), 3);
698 assert_eq!(fixed.matches("```sh").count(), 0);
699 }
700
701 #[test]
702 fn test_consistent_mode_tie_break_uses_curated_default() {
703 let content = r#"```bash
705echo hi
706```
707
708```sh
709echo there
710```
711"#;
712 let config = MD040Config {
713 style: LanguageStyle::Consistent,
714 ..Default::default()
715 };
716 let fixed = run_fix_with_config(content, config).unwrap();
717 assert_eq!(fixed.matches("```bash").count(), 2);
719 }
720
721 #[test]
722 fn test_consistent_mode_with_preferred_alias() {
723 let content = r#"```bash
724echo hi
725```
726
727```sh
728echo there
729```
730"#;
731 let mut preferred = HashMap::new();
732 preferred.insert("Shell".to_string(), "sh".to_string());
733
734 let config = MD040Config {
735 style: LanguageStyle::Consistent,
736 preferred_aliases: preferred,
737 ..Default::default()
738 };
739 let fixed = run_fix_with_config(content, config).unwrap();
740 assert_eq!(fixed.matches("```sh").count(), 2);
741 assert_eq!(fixed.matches("```bash").count(), 0);
742 }
743
744 #[test]
745 fn test_consistent_mode_ignores_disabled_blocks() {
746 let content = r#"```bash
747echo hi
748```
749<!-- rumdl-disable MD040 -->
750```sh
751echo there
752```
753```sh
754echo again
755```
756<!-- rumdl-enable MD040 -->
757"#;
758 let config = MD040Config {
759 style: LanguageStyle::Consistent,
760 ..Default::default()
761 };
762 let result = run_check_with_config(content, config).unwrap();
763 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
764 }
765
766 #[test]
767 fn test_fix_preserves_attributes() {
768 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
769 let config = MD040Config {
770 style: LanguageStyle::Consistent,
771 ..Default::default()
772 };
773 let fixed = run_fix_with_config(content, config).unwrap();
774 assert!(fixed.contains("```bash {.highlight}"));
775 }
776
777 #[test]
778 fn test_fix_preserves_spacing_before_label() {
779 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
780 let config = MD040Config {
781 style: LanguageStyle::Consistent,
782 ..Default::default()
783 };
784 let fixed = run_fix_with_config(content, config).unwrap();
785 assert!(fixed.contains("``` bash {.highlight}"));
786 assert!(!fixed.contains("``` sh {.highlight}"));
787 }
788
789 #[test]
794 fn test_allowlist_blocks_unlisted() {
795 let content = "```java\ncode\n```";
796 let config = MD040Config {
797 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
798 ..Default::default()
799 };
800 let result = run_check_with_config(content, config).unwrap();
801 assert_eq!(result.len(), 1);
802 assert!(result[0].message.contains("not in the allowed list"));
803 }
804
805 #[test]
806 fn test_allowlist_allows_listed() {
807 let content = "```python\ncode\n```";
808 let config = MD040Config {
809 allowed_languages: vec!["Python".to_string()],
810 ..Default::default()
811 };
812 let result = run_check_with_config(content, config).unwrap();
813 assert!(result.is_empty());
814 }
815
816 #[test]
817 fn test_allowlist_blocks_unknown_language() {
818 let content = "```mysterylang\ncode\n```";
819 let config = MD040Config {
820 allowed_languages: vec!["Python".to_string()],
821 ..Default::default()
822 };
823 let result = run_check_with_config(content, config).unwrap();
824 assert_eq!(result.len(), 1);
825 assert!(result[0].message.contains("allowed list"));
826 }
827
828 #[test]
829 fn test_allowlist_case_insensitive() {
830 let content = "```python\ncode\n```";
831 let config = MD040Config {
832 allowed_languages: vec!["PYTHON".to_string()],
833 ..Default::default()
834 };
835 let result = run_check_with_config(content, config).unwrap();
836 assert!(result.is_empty());
837 }
838
839 #[test]
840 fn test_denylist_blocks_listed() {
841 let content = "```java\ncode\n```";
842 let config = MD040Config {
843 disallowed_languages: vec!["Java".to_string()],
844 ..Default::default()
845 };
846 let result = run_check_with_config(content, config).unwrap();
847 assert_eq!(result.len(), 1);
848 assert!(result[0].message.contains("disallowed"));
849 }
850
851 #[test]
852 fn test_denylist_allows_unlisted() {
853 let content = "```python\ncode\n```";
854 let config = MD040Config {
855 disallowed_languages: vec!["Java".to_string()],
856 ..Default::default()
857 };
858 let result = run_check_with_config(content, config).unwrap();
859 assert!(result.is_empty());
860 }
861
862 #[test]
863 fn test_allowlist_takes_precedence_over_denylist() {
864 let content = "```python\ncode\n```";
865 let config = MD040Config {
866 allowed_languages: vec!["Python".to_string()],
867 disallowed_languages: vec!["Python".to_string()], ..Default::default()
869 };
870 let result = run_check_with_config(content, config).unwrap();
871 assert!(result.is_empty());
872 }
873
874 #[test]
879 fn test_unknown_language_ignore_default() {
880 let content = "```mycustomlang\ncode\n```";
881 let result = run_check(content).unwrap();
882 assert!(result.is_empty(), "Unknown languages ignored by default");
883 }
884
885 #[test]
886 fn test_unknown_language_warn() {
887 let content = "```mycustomlang\ncode\n```";
888 let config = MD040Config {
889 unknown_language_action: UnknownLanguageAction::Warn,
890 ..Default::default()
891 };
892 let result = run_check_with_config(content, config).unwrap();
893 assert_eq!(result.len(), 1);
894 assert!(result[0].message.contains("Unknown language"));
895 assert!(result[0].message.contains("mycustomlang"));
896 assert_eq!(result[0].severity, Severity::Warning);
897 }
898
899 #[test]
900 fn test_unknown_language_error() {
901 let content = "```mycustomlang\ncode\n```";
902 let config = MD040Config {
903 unknown_language_action: UnknownLanguageAction::Error,
904 ..Default::default()
905 };
906 let result = run_check_with_config(content, config).unwrap();
907 assert_eq!(result.len(), 1);
908 assert!(result[0].message.contains("Unknown language"));
909 assert_eq!(result[0].severity, Severity::Error);
910 }
911
912 #[test]
917 fn test_invalid_preferred_alias_detected() {
918 let mut preferred = HashMap::new();
919 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
920
921 let config = MD040Config {
922 style: LanguageStyle::Consistent,
923 preferred_aliases: preferred,
924 ..Default::default()
925 };
926 let rule = MD040FencedCodeLanguage::with_config(config);
927 let errors = rule.validate_config();
928 assert_eq!(errors.len(), 1);
929 assert!(errors[0].contains("Invalid alias"));
930 assert!(errors[0].contains("invalid_alias"));
931 }
932
933 #[test]
934 fn test_unknown_language_in_preferred_aliases_detected() {
935 let mut preferred = HashMap::new();
936 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
937
938 let config = MD040Config {
939 style: LanguageStyle::Consistent,
940 preferred_aliases: preferred,
941 ..Default::default()
942 };
943 let rule = MD040FencedCodeLanguage::with_config(config);
944 let errors = rule.validate_config();
945 assert_eq!(errors.len(), 1);
946 assert!(errors[0].contains("Unknown language"));
947 }
948
949 #[test]
950 fn test_valid_preferred_alias_accepted() {
951 let mut preferred = HashMap::new();
952 preferred.insert("Shell".to_string(), "bash".to_string());
953 preferred.insert("JavaScript".to_string(), "js".to_string());
954
955 let config = MD040Config {
956 style: LanguageStyle::Consistent,
957 preferred_aliases: preferred,
958 ..Default::default()
959 };
960 let rule = MD040FencedCodeLanguage::with_config(config);
961 let errors = rule.validate_config();
962 assert!(errors.is_empty());
963 }
964
965 #[test]
966 fn test_config_error_uses_valid_line_column() {
967 let config = md040_config::MD040Config {
968 preferred_aliases: {
969 let mut map = std::collections::HashMap::new();
970 map.insert("Shell".to_string(), "invalid_alias".to_string());
971 map
972 },
973 ..Default::default()
974 };
975 let rule = MD040FencedCodeLanguage::with_config(config);
976
977 let content = "```shell\necho hello\n```";
978 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
979 let result = rule.check(&ctx).unwrap();
980
981 let config_error = result.iter().find(|w| w.message.contains("[config error]"));
983 assert!(config_error.is_some(), "Should have a config error warning");
984
985 let warning = config_error.unwrap();
986 assert!(
988 warning.line >= 1,
989 "Config error line should be >= 1, got {}",
990 warning.line
991 );
992 assert!(
993 warning.column >= 1,
994 "Config error column should be >= 1, got {}",
995 warning.column
996 );
997 }
998
999 #[test]
1004 fn test_linguist_resolution() {
1005 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1006 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1007 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1008 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1009 assert_eq!(resolve_canonical("python"), Some("Python"));
1010 assert_eq!(resolve_canonical("unknown_lang"), None);
1011 }
1012
1013 #[test]
1014 fn test_linguist_resolution_case_insensitive() {
1015 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1016 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1017 assert_eq!(resolve_canonical("Python"), Some("Python"));
1018 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1019 }
1020
1021 #[test]
1022 fn test_alias_validation() {
1023 assert!(is_valid_alias("Shell", "bash"));
1024 assert!(is_valid_alias("Shell", "sh"));
1025 assert!(is_valid_alias("Shell", "zsh"));
1026 assert!(!is_valid_alias("Shell", "python"));
1027 assert!(!is_valid_alias("Shell", "invalid"));
1028 }
1029
1030 #[test]
1031 fn test_default_alias() {
1032 assert_eq!(default_alias("Shell"), Some("bash"));
1033 assert_eq!(default_alias("JavaScript"), Some("js"));
1034 assert_eq!(default_alias("Python"), Some("python"));
1035 }
1036
1037 #[test]
1042 fn test_mixed_case_labels_normalized() {
1043 let content = r#"```BASH
1044echo hi
1045```
1046
1047```Bash
1048echo there
1049```
1050
1051```bash
1052echo again
1053```
1054"#;
1055 let config = MD040Config {
1056 style: LanguageStyle::Consistent,
1057 ..Default::default()
1058 };
1059 let result = run_check_with_config(content, config).unwrap();
1061 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1065 }
1066
1067 #[test]
1068 fn test_multiple_languages_independent() {
1069 let content = r#"```bash
1070shell code
1071```
1072
1073```python
1074python code
1075```
1076
1077```sh
1078more shell
1079```
1080
1081```python3
1082more python
1083```
1084"#;
1085 let config = MD040Config {
1086 style: LanguageStyle::Consistent,
1087 ..Default::default()
1088 };
1089 let result = run_check_with_config(content, config).unwrap();
1090 assert_eq!(result.len(), 2);
1092 }
1093
1094 #[test]
1095 fn test_tilde_fences() {
1096 let content = r#"~~~bash
1097echo hi
1098~~~
1099
1100~~~sh
1101echo there
1102~~~
1103"#;
1104 let config = MD040Config {
1105 style: LanguageStyle::Consistent,
1106 ..Default::default()
1107 };
1108 let result = run_check_with_config(content, config.clone()).unwrap();
1109 assert_eq!(result.len(), 1);
1110
1111 let fixed = run_fix_with_config(content, config).unwrap();
1112 assert!(fixed.contains("~~~bash"));
1113 assert!(!fixed.contains("~~~sh"));
1114 }
1115
1116 #[test]
1117 fn test_longer_fence_markers_preserved() {
1118 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1119 let config = MD040Config {
1120 style: LanguageStyle::Consistent,
1121 ..Default::default()
1122 };
1123 let fixed = run_fix_with_config(content, config).unwrap();
1124 assert!(fixed.contains("````bash"));
1125 assert!(fixed.contains("```bash"));
1126 }
1127
1128 #[test]
1129 fn test_empty_document() {
1130 let result = run_check("").unwrap();
1131 assert!(result.is_empty());
1132 }
1133
1134 #[test]
1135 fn test_no_code_blocks() {
1136 let content = "# Just a heading\n\nSome text.";
1137 let result = run_check(content).unwrap();
1138 assert!(result.is_empty());
1139 }
1140
1141 #[test]
1142 fn test_single_code_block_no_inconsistency() {
1143 let content = "```bash\necho hi\n```";
1144 let config = MD040Config {
1145 style: LanguageStyle::Consistent,
1146 ..Default::default()
1147 };
1148 let result = run_check_with_config(content, config).unwrap();
1149 assert!(result.is_empty(), "Single block has no inconsistency");
1150 }
1151
1152 #[test]
1153 fn test_idempotent_fix() {
1154 let content = r#"```bash
1155echo hi
1156```
1157
1158```sh
1159echo there
1160```
1161"#;
1162 let config = MD040Config {
1163 style: LanguageStyle::Consistent,
1164 ..Default::default()
1165 };
1166 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1167 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1168 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1169 }
1170
1171 #[test]
1176 fn test_mkdocs_superfences_title_only() {
1177 let content = r#"```title="Example"
1179echo hi
1180```
1181"#;
1182 let result = run_check_mkdocs(content).unwrap();
1183 assert!(
1184 result.is_empty(),
1185 "MkDocs superfences with title= should not require language"
1186 );
1187 }
1188
1189 #[test]
1190 fn test_mkdocs_superfences_hl_lines() {
1191 let content = r#"```hl_lines="1 2"
1193line 1
1194line 2
1195```
1196"#;
1197 let result = run_check_mkdocs(content).unwrap();
1198 assert!(
1199 result.is_empty(),
1200 "MkDocs superfences with hl_lines= should not require language"
1201 );
1202 }
1203
1204 #[test]
1205 fn test_mkdocs_superfences_linenums() {
1206 let content = r#"```linenums="1"
1208line 1
1209line 2
1210```
1211"#;
1212 let result = run_check_mkdocs(content).unwrap();
1213 assert!(
1214 result.is_empty(),
1215 "MkDocs superfences with linenums= should not require language"
1216 );
1217 }
1218
1219 #[test]
1220 fn test_mkdocs_superfences_class() {
1221 let content = r#"```.my-class
1223some text
1224```
1225"#;
1226 let result = run_check_mkdocs(content).unwrap();
1227 assert!(
1228 result.is_empty(),
1229 "MkDocs superfences with .class should not require language"
1230 );
1231 }
1232
1233 #[test]
1234 fn test_mkdocs_superfences_id() {
1235 let content = r#"```#my-id
1237some text
1238```
1239"#;
1240 let result = run_check_mkdocs(content).unwrap();
1241 assert!(
1242 result.is_empty(),
1243 "MkDocs superfences with #id should not require language"
1244 );
1245 }
1246
1247 #[test]
1248 fn test_mkdocs_superfences_with_language() {
1249 let content = r#"```python title="Example" hl_lines="1"
1251print("hello")
1252```
1253"#;
1254 let result = run_check_mkdocs(content).unwrap();
1255 assert!(result.is_empty(), "Code block with language and attrs should pass");
1256 }
1257
1258 #[test]
1259 fn test_standard_flavor_no_special_handling() {
1260 let content = r#"```title="Example"
1262echo hi
1263```
1264"#;
1265 let result = run_check(content).unwrap();
1266 assert_eq!(
1267 result.len(),
1268 1,
1269 "Standard flavor should warn about title= without language"
1270 );
1271 }
1272}