1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
6use std::collections::HashMap;
7
8pub mod md040_config;
12use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
13
14struct FencedCodeBlock {
15 line_idx: usize,
17 language: String,
19 fence_marker: String,
21}
22
23#[derive(Debug, Clone, Default)]
24pub struct MD040FencedCodeLanguage {
25 config: MD040Config,
26}
27
28impl MD040FencedCodeLanguage {
29 pub fn new() -> Self {
30 Self::default()
31 }
32
33 pub fn with_config(config: MD040Config) -> Self {
34 Self { config }
35 }
36
37 fn validate_config(&self) -> Vec<String> {
39 let mut errors = Vec::new();
40
41 for (canonical, alias) in &self.config.preferred_aliases {
43 if let Some(actual_canonical) = resolve_canonical(canonical) {
45 if !is_valid_alias(actual_canonical, alias)
46 && let Some(valid_aliases) = get_aliases(actual_canonical)
47 {
48 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
49 let valid_str = valid_list
50 .iter()
51 .map(|s| format!("'{s}'"))
52 .collect::<Vec<_>>()
53 .join(", ");
54 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
55 errors.push(format!(
56 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
57 ));
58 }
59 } else {
60 errors.push(format!(
61 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
62 ));
63 }
64 }
65
66 errors
67 }
68
69 fn compute_preferred_labels(
71 &self,
72 blocks: &[FencedCodeBlock],
73 disabled_ranges: &[(usize, usize)],
74 ) -> HashMap<String, String> {
75 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
77
78 for block in blocks {
79 if is_line_disabled(disabled_ranges, block.line_idx) {
80 continue;
81 }
82 if block.language.is_empty() {
83 continue;
84 }
85 if let Some(canonical) = resolve_canonical(&block.language) {
86 by_canonical
87 .entry(canonical.to_string())
88 .or_default()
89 .push(&block.language);
90 }
91 }
92
93 let mut result = HashMap::new();
95
96 for (canonical, labels) in by_canonical {
97 let winner = if let Some(preferred) = self
99 .config
100 .preferred_aliases
101 .iter()
102 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
103 .map(|(_, v)| v.clone())
104 {
105 preferred
106 } else {
107 let mut counts: HashMap<&str, usize> = HashMap::new();
109 for label in &labels {
110 *counts.entry(*label).or_default() += 1;
111 }
112
113 let max_count = counts.values().max().copied().unwrap_or(0);
114 let winners: Vec<_> = counts
115 .iter()
116 .filter(|(_, c)| **c == max_count)
117 .map(|(l, _)| *l)
118 .collect();
119
120 if winners.len() == 1 {
121 winners[0].to_string()
122 } else {
123 default_alias(&canonical)
125 .filter(|default| winners.contains(default))
126 .map(|s| s.to_string())
127 .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
128 }
129 };
130
131 result.insert(canonical, winner);
132 }
133
134 result
135 }
136
137 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
139 if !self.config.allowed_languages.is_empty() {
141 let allowed = self.config.allowed_languages.join(", ");
142 let Some(canonical) = canonical else {
143 return Some(format!(
144 "Language '{original_label}' is not in the allowed list: {allowed}"
145 ));
146 };
147 if !self
148 .config
149 .allowed_languages
150 .iter()
151 .any(|a| a.eq_ignore_ascii_case(canonical))
152 {
153 return Some(format!(
154 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
155 ));
156 }
157 } else if !self.config.disallowed_languages.is_empty()
158 && canonical.is_some_and(|canonical| {
159 self.config
160 .disallowed_languages
161 .iter()
162 .any(|d| d.eq_ignore_ascii_case(canonical))
163 })
164 {
165 let canonical = canonical.unwrap_or("unknown");
166 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
167 }
168 None
169 }
170
171 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
173 if resolve_canonical(label).is_some() {
174 return None;
175 }
176
177 match self.config.unknown_language_action {
178 UnknownLanguageAction::Ignore => None,
179 UnknownLanguageAction::Warn => Some((
180 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
181 Severity::Warning,
182 )),
183 UnknownLanguageAction::Error => Some((
184 format!("Unknown language '{label}' (not in GitHub Linguist)"),
185 Severity::Error,
186 )),
187 }
188 }
189}
190
191impl Rule for MD040FencedCodeLanguage {
192 fn name(&self) -> &'static str {
193 "MD040"
194 }
195
196 fn description(&self) -> &'static str {
197 "Code blocks should have a language specified"
198 }
199
200 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
201 let content = ctx.content;
202 let mut warnings = Vec::new();
203
204 for error in self.validate_config() {
206 warnings.push(LintWarning {
207 rule_name: Some(self.name().to_string()),
208 line: 0,
209 column: 0,
210 end_line: 0,
211 end_column: 0,
212 message: format!("[config error] {error}"),
213 severity: Severity::Error,
214 fix: None,
215 });
216 }
217
218 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
220
221 let disabled_ranges = compute_disabled_ranges(content, self.name());
223
224 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
226 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
227 } else {
228 HashMap::new()
229 };
230
231 for block in &fenced_blocks {
232 if is_line_disabled(&disabled_ranges, block.line_idx) {
234 continue;
235 }
236
237 let line = content.lines().nth(block.line_idx).unwrap_or("");
239 let trimmed = line.trim();
240 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
241
242 let has_title_only =
244 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
245
246 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
248 && after_fence.starts_with('{')
249 && after_fence.contains('}');
250
251 if (block.language.is_empty() || has_title_only) && !has_quarto_syntax {
253 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
254
255 warnings.push(LintWarning {
256 rule_name: Some(self.name().to_string()),
257 line: start_line,
258 column: start_col,
259 end_line,
260 end_column: end_col,
261 message: "Code block (```) missing language".to_string(),
262 severity: Severity::Warning,
263 fix: Some(Fix {
264 range: {
265 let trimmed_start = line.len() - line.trim_start().len();
266 let fence_len = block.fence_marker.len();
267 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
268 let fence_start_byte = line_start_byte + trimmed_start;
269 let fence_end_byte = fence_start_byte + fence_len;
270 fence_start_byte..fence_end_byte
271 },
272 replacement: format!("{}text", block.fence_marker),
273 }),
274 });
275 continue;
276 }
277
278 if has_quarto_syntax {
280 continue;
281 }
282
283 let canonical = resolve_canonical(&block.language);
284
285 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
287 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
288
289 warnings.push(LintWarning {
290 rule_name: Some(self.name().to_string()),
291 line: start_line,
292 column: start_col,
293 end_line,
294 end_column: end_col,
295 message: msg,
296 severity: Severity::Warning,
297 fix: None,
298 });
299 continue;
300 }
301
302 if canonical.is_none() {
304 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
305 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
306
307 warnings.push(LintWarning {
308 rule_name: Some(self.name().to_string()),
309 line: start_line,
310 column: start_col,
311 end_line,
312 end_column: end_col,
313 message: msg,
314 severity,
315 fix: None,
316 });
317 }
318 continue;
319 }
320
321 if self.config.style == LanguageStyle::Consistent
323 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
324 && &block.language != preferred
325 {
326 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
327
328 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
329 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
330 Fix {
331 range: (line_start_byte + label_start)..(line_start_byte + label_end),
332 replacement: preferred.clone(),
333 }
334 });
335 let lang = &block.language;
336 let canonical = canonical.unwrap();
337
338 warnings.push(LintWarning {
339 rule_name: Some(self.name().to_string()),
340 line: start_line,
341 column: start_col,
342 end_line,
343 end_column: end_col,
344 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
345 severity: Severity::Warning,
346 fix,
347 });
348 }
349 }
350
351 Ok(warnings)
352 }
353
354 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
355 let content = ctx.content;
356
357 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
359
360 let disabled_ranges = compute_disabled_ranges(content, self.name());
362
363 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
365 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
366 } else {
367 HashMap::new()
368 };
369
370 let mut lines_to_fix: std::collections::HashMap<usize, FixAction> = std::collections::HashMap::new();
372
373 for block in &fenced_blocks {
374 if is_line_disabled(&disabled_ranges, block.line_idx) {
375 continue;
376 }
377
378 let line = content.lines().nth(block.line_idx).unwrap_or("");
379 let trimmed = line.trim();
380 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
381
382 let has_title_only =
383 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && after_fence.starts_with("title=");
384
385 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
386 && after_fence.starts_with('{')
387 && after_fence.contains('}');
388
389 if (block.language.is_empty() || has_title_only) && !has_quarto_syntax {
390 lines_to_fix.insert(
391 block.line_idx,
392 FixAction::AddLanguage {
393 fence_marker: block.fence_marker.clone(),
394 has_title_only,
395 },
396 );
397 } else if !has_quarto_syntax
398 && self.config.style == LanguageStyle::Consistent
399 && let Some(canonical) = resolve_canonical(&block.language)
400 && let Some(preferred) = preferred_labels.get(canonical)
401 && &block.language != preferred
402 {
403 lines_to_fix.insert(
404 block.line_idx,
405 FixAction::NormalizeLabel {
406 fence_marker: block.fence_marker.clone(),
407 new_label: preferred.clone(),
408 },
409 );
410 }
411 }
412
413 let mut result = String::new();
415 for (i, line) in content.lines().enumerate() {
416 if let Some(action) = lines_to_fix.get(&i) {
417 match action {
418 FixAction::AddLanguage {
419 fence_marker,
420 has_title_only,
421 } => {
422 let indent = &line[..line.len() - line.trim_start().len()];
423 let trimmed = line.trim();
424 let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
425
426 if *has_title_only {
427 result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
428 } else {
429 result.push_str(&format!("{indent}{fence_marker}text\n"));
430 }
431 }
432 FixAction::NormalizeLabel {
433 fence_marker,
434 new_label,
435 } => {
436 if let Some((label_start, label_end)) = find_label_span(line, fence_marker) {
437 result.push_str(&line[..label_start]);
438 result.push_str(new_label);
439 result.push_str(&line[label_end..]);
440 result.push('\n');
441 } else {
442 result.push_str(line);
443 result.push('\n');
444 }
445 }
446 }
447 } else {
448 result.push_str(line);
449 result.push('\n');
450 }
451 }
452
453 if !content.ends_with('\n') {
455 result.pop();
456 }
457
458 Ok(result)
459 }
460
461 fn category(&self) -> RuleCategory {
463 RuleCategory::CodeBlock
464 }
465
466 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
468 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
469 }
470
471 fn as_any(&self) -> &dyn std::any::Any {
472 self
473 }
474
475 fn default_config_section(&self) -> Option<(String, toml::Value)> {
476 let default_config = MD040Config::default();
477 let json_value = serde_json::to_value(&default_config).ok()?;
478 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
479
480 if let toml::Value::Table(table) = toml_value {
481 if !table.is_empty() {
482 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
483 } else {
484 None
485 }
486 } else {
487 None
488 }
489 }
490
491 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
492 where
493 Self: Sized,
494 {
495 let rule_config: MD040Config = load_rule_config(config);
496 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
497 }
498}
499
500#[derive(Debug, Clone)]
501enum FixAction {
502 AddLanguage { fence_marker: String, has_title_only: bool },
503 NormalizeLabel { fence_marker: String, new_label: String },
504}
505
506fn detect_fenced_code_blocks(content: &str, line_offsets: &[usize]) -> Vec<FencedCodeBlock> {
508 let mut blocks = Vec::new();
509 let options = Options::all();
510 let parser = Parser::new_ext(content, options).into_offset_iter();
511
512 for (event, range) in parser {
513 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) = event {
514 let line_idx = line_idx_from_offset(line_offsets, range.start);
516
517 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
519 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
520 let line = content.get(line_start..line_end).unwrap_or("");
521 let trimmed = line.trim();
522 let fence_marker = if trimmed.starts_with('`') {
523 let count = trimmed.chars().take_while(|&c| c == '`').count();
524 "`".repeat(count)
525 } else if trimmed.starts_with('~') {
526 let count = trimmed.chars().take_while(|&c| c == '~').count();
527 "~".repeat(count)
528 } else {
529 "```".to_string() };
531
532 let language = info.split_whitespace().next().unwrap_or("").to_string();
534
535 blocks.push(FencedCodeBlock {
536 line_idx,
537 language,
538 fence_marker,
539 });
540 }
541 }
542
543 blocks
544}
545
546#[inline]
547fn line_idx_from_offset(line_offsets: &[usize], offset: usize) -> usize {
548 match line_offsets.binary_search(&offset) {
549 Ok(idx) => idx,
550 Err(idx) => idx.saturating_sub(1),
551 }
552}
553
554fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
556 let mut ranges = Vec::new();
557 let mut disabled_start: Option<usize> = None;
558
559 for (i, line) in content.lines().enumerate() {
560 let trimmed = line.trim();
561
562 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
563 && (rules.is_empty() || rules.contains(&rule_name))
564 && disabled_start.is_none()
565 {
566 disabled_start = Some(i);
567 }
568
569 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
570 && (rules.is_empty() || rules.contains(&rule_name))
571 && let Some(start) = disabled_start.take()
572 {
573 ranges.push((start, i));
574 }
575 }
576
577 if let Some(start) = disabled_start {
579 ranges.push((start, usize::MAX));
580 }
581
582 ranges
583}
584
585fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
587 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
588}
589
590fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
592 let trimmed_start = line.len() - line.trim_start().len();
593 let after_indent = &line[trimmed_start..];
594 if !after_indent.starts_with(fence_marker) {
595 return None;
596 }
597 let after_fence = &after_indent[fence_marker.len()..];
598
599 let label_start_rel = after_fence
600 .char_indices()
601 .find(|&(_, ch)| !ch.is_whitespace())
602 .map(|(idx, _)| idx)?;
603 let after_label = &after_fence[label_start_rel..];
604 let label_end_rel = after_label
605 .char_indices()
606 .find(|&(_, ch)| ch.is_whitespace())
607 .map(|(idx, _)| label_start_rel + idx)
608 .unwrap_or(after_fence.len());
609
610 Some((
611 trimmed_start + fence_marker.len() + label_start_rel,
612 trimmed_start + fence_marker.len() + label_end_rel,
613 ))
614}
615
616#[cfg(test)]
617mod tests {
618 use super::*;
619 use crate::lint_context::LintContext;
620
621 fn run_check(content: &str) -> LintResult {
622 let rule = MD040FencedCodeLanguage::default();
623 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
624 rule.check(&ctx)
625 }
626
627 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
628 let rule = MD040FencedCodeLanguage::with_config(config);
629 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
630 rule.check(&ctx)
631 }
632
633 fn run_fix(content: &str) -> Result<String, LintError> {
634 let rule = MD040FencedCodeLanguage::default();
635 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
636 rule.fix(&ctx)
637 }
638
639 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
640 let rule = MD040FencedCodeLanguage::with_config(config);
641 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
642 rule.fix(&ctx)
643 }
644
645 #[test]
650 fn test_code_blocks_with_language_specified() {
651 let content = r#"# Test
652
653```python
654print("Hello, world!")
655```
656
657```javascript
658console.log("Hello!");
659```
660"#;
661 let result = run_check(content).unwrap();
662 assert!(result.is_empty(), "No warnings expected for code blocks with language");
663 }
664
665 #[test]
666 fn test_code_blocks_without_language() {
667 let content = r#"# Test
668
669```
670print("Hello, world!")
671```
672"#;
673 let result = run_check(content).unwrap();
674 assert_eq!(result.len(), 1);
675 assert_eq!(result[0].message, "Code block (```) missing language");
676 assert_eq!(result[0].line, 3);
677 }
678
679 #[test]
680 fn test_fix_method_adds_text_language() {
681 let content = r#"# Test
682
683```
684code without language
685```
686
687```python
688already has language
689```
690
691```
692another block without
693```
694"#;
695 let fixed = run_fix(content).unwrap();
696 assert!(fixed.contains("```text"));
697 assert!(fixed.contains("```python"));
698 assert_eq!(fixed.matches("```text").count(), 2);
699 }
700
701 #[test]
702 fn test_fix_preserves_indentation() {
703 let content = r#"# Test
704
705- List item
706 ```
707 indented code block
708 ```
709"#;
710 let fixed = run_fix(content).unwrap();
711 assert!(fixed.contains(" ```text"));
712 }
713
714 #[test]
719 fn test_consistent_mode_detects_inconsistency() {
720 let content = r#"```bash
721echo hi
722```
723
724```sh
725echo there
726```
727
728```bash
729echo again
730```
731"#;
732 let config = MD040Config {
733 style: LanguageStyle::Consistent,
734 ..Default::default()
735 };
736 let result = run_check_with_config(content, config).unwrap();
737 assert_eq!(result.len(), 1);
738 assert!(result[0].message.contains("Inconsistent"));
739 assert!(result[0].message.contains("sh"));
740 assert!(result[0].message.contains("bash"));
741 }
742
743 #[test]
744 fn test_consistent_mode_fix_normalizes() {
745 let content = r#"```bash
746echo hi
747```
748
749```sh
750echo there
751```
752
753```bash
754echo again
755```
756"#;
757 let config = MD040Config {
758 style: LanguageStyle::Consistent,
759 ..Default::default()
760 };
761 let fixed = run_fix_with_config(content, config).unwrap();
762 assert_eq!(fixed.matches("```bash").count(), 3);
763 assert_eq!(fixed.matches("```sh").count(), 0);
764 }
765
766 #[test]
767 fn test_consistent_mode_tie_break_uses_curated_default() {
768 let content = r#"```bash
770echo hi
771```
772
773```sh
774echo there
775```
776"#;
777 let config = MD040Config {
778 style: LanguageStyle::Consistent,
779 ..Default::default()
780 };
781 let fixed = run_fix_with_config(content, config).unwrap();
782 assert_eq!(fixed.matches("```bash").count(), 2);
784 }
785
786 #[test]
787 fn test_consistent_mode_with_preferred_alias() {
788 let content = r#"```bash
789echo hi
790```
791
792```sh
793echo there
794```
795"#;
796 let mut preferred = HashMap::new();
797 preferred.insert("Shell".to_string(), "sh".to_string());
798
799 let config = MD040Config {
800 style: LanguageStyle::Consistent,
801 preferred_aliases: preferred,
802 ..Default::default()
803 };
804 let fixed = run_fix_with_config(content, config).unwrap();
805 assert_eq!(fixed.matches("```sh").count(), 2);
806 assert_eq!(fixed.matches("```bash").count(), 0);
807 }
808
809 #[test]
810 fn test_consistent_mode_ignores_disabled_blocks() {
811 let content = r#"```bash
812echo hi
813```
814<!-- rumdl-disable MD040 -->
815```sh
816echo there
817```
818```sh
819echo again
820```
821<!-- rumdl-enable MD040 -->
822"#;
823 let config = MD040Config {
824 style: LanguageStyle::Consistent,
825 ..Default::default()
826 };
827 let result = run_check_with_config(content, config).unwrap();
828 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
829 }
830
831 #[test]
832 fn test_fix_preserves_attributes() {
833 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
834 let config = MD040Config {
835 style: LanguageStyle::Consistent,
836 ..Default::default()
837 };
838 let fixed = run_fix_with_config(content, config).unwrap();
839 assert!(fixed.contains("```bash {.highlight}"));
840 }
841
842 #[test]
843 fn test_fix_preserves_spacing_before_label() {
844 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
845 let config = MD040Config {
846 style: LanguageStyle::Consistent,
847 ..Default::default()
848 };
849 let fixed = run_fix_with_config(content, config).unwrap();
850 assert!(fixed.contains("``` bash {.highlight}"));
851 assert!(!fixed.contains("``` sh {.highlight}"));
852 }
853
854 #[test]
859 fn test_allowlist_blocks_unlisted() {
860 let content = "```java\ncode\n```";
861 let config = MD040Config {
862 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
863 ..Default::default()
864 };
865 let result = run_check_with_config(content, config).unwrap();
866 assert_eq!(result.len(), 1);
867 assert!(result[0].message.contains("not in the allowed list"));
868 }
869
870 #[test]
871 fn test_allowlist_allows_listed() {
872 let content = "```python\ncode\n```";
873 let config = MD040Config {
874 allowed_languages: vec!["Python".to_string()],
875 ..Default::default()
876 };
877 let result = run_check_with_config(content, config).unwrap();
878 assert!(result.is_empty());
879 }
880
881 #[test]
882 fn test_allowlist_blocks_unknown_language() {
883 let content = "```mysterylang\ncode\n```";
884 let config = MD040Config {
885 allowed_languages: vec!["Python".to_string()],
886 ..Default::default()
887 };
888 let result = run_check_with_config(content, config).unwrap();
889 assert_eq!(result.len(), 1);
890 assert!(result[0].message.contains("allowed list"));
891 }
892
893 #[test]
894 fn test_allowlist_case_insensitive() {
895 let content = "```python\ncode\n```";
896 let config = MD040Config {
897 allowed_languages: vec!["PYTHON".to_string()],
898 ..Default::default()
899 };
900 let result = run_check_with_config(content, config).unwrap();
901 assert!(result.is_empty());
902 }
903
904 #[test]
905 fn test_denylist_blocks_listed() {
906 let content = "```java\ncode\n```";
907 let config = MD040Config {
908 disallowed_languages: vec!["Java".to_string()],
909 ..Default::default()
910 };
911 let result = run_check_with_config(content, config).unwrap();
912 assert_eq!(result.len(), 1);
913 assert!(result[0].message.contains("disallowed"));
914 }
915
916 #[test]
917 fn test_denylist_allows_unlisted() {
918 let content = "```python\ncode\n```";
919 let config = MD040Config {
920 disallowed_languages: vec!["Java".to_string()],
921 ..Default::default()
922 };
923 let result = run_check_with_config(content, config).unwrap();
924 assert!(result.is_empty());
925 }
926
927 #[test]
928 fn test_allowlist_takes_precedence_over_denylist() {
929 let content = "```python\ncode\n```";
930 let config = MD040Config {
931 allowed_languages: vec!["Python".to_string()],
932 disallowed_languages: vec!["Python".to_string()], ..Default::default()
934 };
935 let result = run_check_with_config(content, config).unwrap();
936 assert!(result.is_empty());
937 }
938
939 #[test]
944 fn test_unknown_language_ignore_default() {
945 let content = "```mycustomlang\ncode\n```";
946 let result = run_check(content).unwrap();
947 assert!(result.is_empty(), "Unknown languages ignored by default");
948 }
949
950 #[test]
951 fn test_unknown_language_warn() {
952 let content = "```mycustomlang\ncode\n```";
953 let config = MD040Config {
954 unknown_language_action: UnknownLanguageAction::Warn,
955 ..Default::default()
956 };
957 let result = run_check_with_config(content, config).unwrap();
958 assert_eq!(result.len(), 1);
959 assert!(result[0].message.contains("Unknown language"));
960 assert!(result[0].message.contains("mycustomlang"));
961 assert_eq!(result[0].severity, Severity::Warning);
962 }
963
964 #[test]
965 fn test_unknown_language_error() {
966 let content = "```mycustomlang\ncode\n```";
967 let config = MD040Config {
968 unknown_language_action: UnknownLanguageAction::Error,
969 ..Default::default()
970 };
971 let result = run_check_with_config(content, config).unwrap();
972 assert_eq!(result.len(), 1);
973 assert!(result[0].message.contains("Unknown language"));
974 assert_eq!(result[0].severity, Severity::Error);
975 }
976
977 #[test]
982 fn test_invalid_preferred_alias_detected() {
983 let mut preferred = HashMap::new();
984 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
985
986 let config = MD040Config {
987 style: LanguageStyle::Consistent,
988 preferred_aliases: preferred,
989 ..Default::default()
990 };
991 let rule = MD040FencedCodeLanguage::with_config(config);
992 let errors = rule.validate_config();
993 assert_eq!(errors.len(), 1);
994 assert!(errors[0].contains("Invalid alias"));
995 assert!(errors[0].contains("invalid_alias"));
996 }
997
998 #[test]
999 fn test_unknown_language_in_preferred_aliases_detected() {
1000 let mut preferred = HashMap::new();
1001 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
1002
1003 let config = MD040Config {
1004 style: LanguageStyle::Consistent,
1005 preferred_aliases: preferred,
1006 ..Default::default()
1007 };
1008 let rule = MD040FencedCodeLanguage::with_config(config);
1009 let errors = rule.validate_config();
1010 assert_eq!(errors.len(), 1);
1011 assert!(errors[0].contains("Unknown language"));
1012 }
1013
1014 #[test]
1015 fn test_valid_preferred_alias_accepted() {
1016 let mut preferred = HashMap::new();
1017 preferred.insert("Shell".to_string(), "bash".to_string());
1018 preferred.insert("JavaScript".to_string(), "js".to_string());
1019
1020 let config = MD040Config {
1021 style: LanguageStyle::Consistent,
1022 preferred_aliases: preferred,
1023 ..Default::default()
1024 };
1025 let rule = MD040FencedCodeLanguage::with_config(config);
1026 let errors = rule.validate_config();
1027 assert!(errors.is_empty());
1028 }
1029
1030 #[test]
1035 fn test_linguist_resolution() {
1036 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1037 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1038 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1039 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1040 assert_eq!(resolve_canonical("python"), Some("Python"));
1041 assert_eq!(resolve_canonical("unknown_lang"), None);
1042 }
1043
1044 #[test]
1045 fn test_linguist_resolution_case_insensitive() {
1046 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1047 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1048 assert_eq!(resolve_canonical("Python"), Some("Python"));
1049 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1050 }
1051
1052 #[test]
1053 fn test_alias_validation() {
1054 assert!(is_valid_alias("Shell", "bash"));
1055 assert!(is_valid_alias("Shell", "sh"));
1056 assert!(is_valid_alias("Shell", "zsh"));
1057 assert!(!is_valid_alias("Shell", "python"));
1058 assert!(!is_valid_alias("Shell", "invalid"));
1059 }
1060
1061 #[test]
1062 fn test_default_alias() {
1063 assert_eq!(default_alias("Shell"), Some("bash"));
1064 assert_eq!(default_alias("JavaScript"), Some("js"));
1065 assert_eq!(default_alias("Python"), Some("python"));
1066 }
1067
1068 #[test]
1073 fn test_mixed_case_labels_normalized() {
1074 let content = r#"```BASH
1075echo hi
1076```
1077
1078```Bash
1079echo there
1080```
1081
1082```bash
1083echo again
1084```
1085"#;
1086 let config = MD040Config {
1087 style: LanguageStyle::Consistent,
1088 ..Default::default()
1089 };
1090 let result = run_check_with_config(content, config).unwrap();
1092 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1096 }
1097
1098 #[test]
1099 fn test_multiple_languages_independent() {
1100 let content = r#"```bash
1101shell code
1102```
1103
1104```python
1105python code
1106```
1107
1108```sh
1109more shell
1110```
1111
1112```python3
1113more python
1114```
1115"#;
1116 let config = MD040Config {
1117 style: LanguageStyle::Consistent,
1118 ..Default::default()
1119 };
1120 let result = run_check_with_config(content, config).unwrap();
1121 assert_eq!(result.len(), 2);
1123 }
1124
1125 #[test]
1126 fn test_tilde_fences() {
1127 let content = r#"~~~bash
1128echo hi
1129~~~
1130
1131~~~sh
1132echo there
1133~~~
1134"#;
1135 let config = MD040Config {
1136 style: LanguageStyle::Consistent,
1137 ..Default::default()
1138 };
1139 let result = run_check_with_config(content, config.clone()).unwrap();
1140 assert_eq!(result.len(), 1);
1141
1142 let fixed = run_fix_with_config(content, config).unwrap();
1143 assert!(fixed.contains("~~~bash"));
1144 assert!(!fixed.contains("~~~sh"));
1145 }
1146
1147 #[test]
1148 fn test_longer_fence_markers_preserved() {
1149 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1150 let config = MD040Config {
1151 style: LanguageStyle::Consistent,
1152 ..Default::default()
1153 };
1154 let fixed = run_fix_with_config(content, config).unwrap();
1155 assert!(fixed.contains("````bash"));
1156 assert!(fixed.contains("```bash"));
1157 }
1158
1159 #[test]
1160 fn test_empty_document() {
1161 let result = run_check("").unwrap();
1162 assert!(result.is_empty());
1163 }
1164
1165 #[test]
1166 fn test_no_code_blocks() {
1167 let content = "# Just a heading\n\nSome text.";
1168 let result = run_check(content).unwrap();
1169 assert!(result.is_empty());
1170 }
1171
1172 #[test]
1173 fn test_single_code_block_no_inconsistency() {
1174 let content = "```bash\necho hi\n```";
1175 let config = MD040Config {
1176 style: LanguageStyle::Consistent,
1177 ..Default::default()
1178 };
1179 let result = run_check_with_config(content, config).unwrap();
1180 assert!(result.is_empty(), "Single block has no inconsistency");
1181 }
1182
1183 #[test]
1184 fn test_idempotent_fix() {
1185 let content = r#"```bash
1186echo hi
1187```
1188
1189```sh
1190echo there
1191```
1192"#;
1193 let config = MD040Config {
1194 style: LanguageStyle::Consistent,
1195 ..Default::default()
1196 };
1197 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1198 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1199 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1200 }
1201}