1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
6use std::collections::HashMap;
7
8pub mod md040_config;
12
13const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
21 "title=", "hl_lines=", "linenums=", ".", "#", ];
27
28#[inline]
30fn is_superfences_attribute(s: &str) -> bool {
31 MKDOCS_SUPERFENCES_ATTR_PREFIXES
32 .iter()
33 .any(|prefix| s.starts_with(prefix))
34}
35use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
36
37struct FencedCodeBlock {
38 line_idx: usize,
40 language: String,
42 fence_marker: String,
44}
45
46#[derive(Debug, Clone, Default)]
47pub struct MD040FencedCodeLanguage {
48 config: MD040Config,
49}
50
51impl MD040FencedCodeLanguage {
52 pub fn new() -> Self {
53 Self::default()
54 }
55
56 pub fn with_config(config: MD040Config) -> Self {
57 Self { config }
58 }
59
60 fn validate_config(&self) -> Vec<String> {
62 let mut errors = Vec::new();
63
64 for (canonical, alias) in &self.config.preferred_aliases {
66 if let Some(actual_canonical) = resolve_canonical(canonical) {
68 if !is_valid_alias(actual_canonical, alias)
69 && let Some(valid_aliases) = get_aliases(actual_canonical)
70 {
71 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
72 let valid_str = valid_list
73 .iter()
74 .map(|s| format!("'{s}'"))
75 .collect::<Vec<_>>()
76 .join(", ");
77 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
78 errors.push(format!(
79 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
80 ));
81 }
82 } else {
83 errors.push(format!(
84 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
85 ));
86 }
87 }
88
89 errors
90 }
91
92 fn compute_preferred_labels(
94 &self,
95 blocks: &[FencedCodeBlock],
96 disabled_ranges: &[(usize, usize)],
97 ) -> HashMap<String, String> {
98 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
100
101 for block in blocks {
102 if is_line_disabled(disabled_ranges, block.line_idx) {
103 continue;
104 }
105 if block.language.is_empty() {
106 continue;
107 }
108 if let Some(canonical) = resolve_canonical(&block.language) {
109 by_canonical
110 .entry(canonical.to_string())
111 .or_default()
112 .push(&block.language);
113 }
114 }
115
116 let mut result = HashMap::new();
118
119 for (canonical, labels) in by_canonical {
120 let winner = if let Some(preferred) = self
122 .config
123 .preferred_aliases
124 .iter()
125 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
126 .map(|(_, v)| v.clone())
127 {
128 preferred
129 } else {
130 let mut counts: HashMap<&str, usize> = HashMap::new();
132 for label in &labels {
133 *counts.entry(*label).or_default() += 1;
134 }
135
136 let max_count = counts.values().max().copied().unwrap_or(0);
137 let winners: Vec<_> = counts
138 .iter()
139 .filter(|(_, c)| **c == max_count)
140 .map(|(l, _)| *l)
141 .collect();
142
143 if winners.len() == 1 {
144 winners[0].to_string()
145 } else {
146 default_alias(&canonical)
148 .filter(|default| winners.contains(default))
149 .map(|s| s.to_string())
150 .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
151 }
152 };
153
154 result.insert(canonical, winner);
155 }
156
157 result
158 }
159
160 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
162 if !self.config.allowed_languages.is_empty() {
164 let allowed = self.config.allowed_languages.join(", ");
165 let Some(canonical) = canonical else {
166 return Some(format!(
167 "Language '{original_label}' is not in the allowed list: {allowed}"
168 ));
169 };
170 if !self
171 .config
172 .allowed_languages
173 .iter()
174 .any(|a| a.eq_ignore_ascii_case(canonical))
175 {
176 return Some(format!(
177 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
178 ));
179 }
180 } else if !self.config.disallowed_languages.is_empty()
181 && canonical.is_some_and(|canonical| {
182 self.config
183 .disallowed_languages
184 .iter()
185 .any(|d| d.eq_ignore_ascii_case(canonical))
186 })
187 {
188 let canonical = canonical.unwrap_or("unknown");
189 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
190 }
191 None
192 }
193
194 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
196 if resolve_canonical(label).is_some() {
197 return None;
198 }
199
200 match self.config.unknown_language_action {
201 UnknownLanguageAction::Ignore => None,
202 UnknownLanguageAction::Warn => Some((
203 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
204 Severity::Warning,
205 )),
206 UnknownLanguageAction::Error => Some((
207 format!("Unknown language '{label}' (not in GitHub Linguist)"),
208 Severity::Error,
209 )),
210 }
211 }
212}
213
214impl Rule for MD040FencedCodeLanguage {
215 fn name(&self) -> &'static str {
216 "MD040"
217 }
218
219 fn description(&self) -> &'static str {
220 "Code blocks should have a language specified"
221 }
222
223 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
224 let content = ctx.content;
225 let mut warnings = Vec::new();
226
227 for error in self.validate_config() {
229 warnings.push(LintWarning {
230 rule_name: Some(self.name().to_string()),
231 line: 0,
232 column: 0,
233 end_line: 0,
234 end_column: 0,
235 message: format!("[config error] {error}"),
236 severity: Severity::Error,
237 fix: None,
238 });
239 }
240
241 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
243
244 let disabled_ranges = compute_disabled_ranges(content, self.name());
246
247 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
249 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
250 } else {
251 HashMap::new()
252 };
253
254 for block in &fenced_blocks {
255 if is_line_disabled(&disabled_ranges, block.line_idx) {
257 continue;
258 }
259
260 let line = content.lines().nth(block.line_idx).unwrap_or("");
262 let trimmed = line.trim();
263 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
264
265 let has_mkdocs_attrs_only =
267 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
268
269 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
271 && after_fence.starts_with('{')
272 && after_fence.contains('}');
273
274 let needs_language =
277 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
278
279 if needs_language && !has_quarto_syntax {
280 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
281
282 warnings.push(LintWarning {
283 rule_name: Some(self.name().to_string()),
284 line: start_line,
285 column: start_col,
286 end_line,
287 end_column: end_col,
288 message: "Code block (```) missing language".to_string(),
289 severity: Severity::Warning,
290 fix: Some(Fix {
291 range: {
292 let trimmed_start = line.len() - line.trim_start().len();
293 let fence_len = block.fence_marker.len();
294 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
295 let fence_start_byte = line_start_byte + trimmed_start;
296 let fence_end_byte = fence_start_byte + fence_len;
297 fence_start_byte..fence_end_byte
298 },
299 replacement: format!("{}text", block.fence_marker),
300 }),
301 });
302 continue;
303 }
304
305 if has_quarto_syntax {
307 continue;
308 }
309
310 let canonical = resolve_canonical(&block.language);
311
312 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
314 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
315
316 warnings.push(LintWarning {
317 rule_name: Some(self.name().to_string()),
318 line: start_line,
319 column: start_col,
320 end_line,
321 end_column: end_col,
322 message: msg,
323 severity: Severity::Warning,
324 fix: None,
325 });
326 continue;
327 }
328
329 if canonical.is_none() {
331 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
332 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
333
334 warnings.push(LintWarning {
335 rule_name: Some(self.name().to_string()),
336 line: start_line,
337 column: start_col,
338 end_line,
339 end_column: end_col,
340 message: msg,
341 severity,
342 fix: None,
343 });
344 }
345 continue;
346 }
347
348 if self.config.style == LanguageStyle::Consistent
350 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
351 && &block.language != preferred
352 {
353 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
354
355 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
356 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
357 Fix {
358 range: (line_start_byte + label_start)..(line_start_byte + label_end),
359 replacement: preferred.clone(),
360 }
361 });
362 let lang = &block.language;
363 let canonical = canonical.unwrap();
364
365 warnings.push(LintWarning {
366 rule_name: Some(self.name().to_string()),
367 line: start_line,
368 column: start_col,
369 end_line,
370 end_column: end_col,
371 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
372 severity: Severity::Warning,
373 fix,
374 });
375 }
376 }
377
378 Ok(warnings)
379 }
380
381 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
382 let content = ctx.content;
383
384 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
386
387 let disabled_ranges = compute_disabled_ranges(content, self.name());
389
390 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
392 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
393 } else {
394 HashMap::new()
395 };
396
397 let mut lines_to_fix: std::collections::HashMap<usize, FixAction> = std::collections::HashMap::new();
399
400 for block in &fenced_blocks {
401 if is_line_disabled(&disabled_ranges, block.line_idx) {
402 continue;
403 }
404
405 let line = content.lines().nth(block.line_idx).unwrap_or("");
406 let trimmed = line.trim();
407 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
408
409 let has_mkdocs_attrs_only =
410 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
411
412 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
413 && after_fence.starts_with('{')
414 && after_fence.contains('}');
415
416 let needs_language =
417 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
418
419 if needs_language && !has_quarto_syntax {
420 lines_to_fix.insert(
421 block.line_idx,
422 FixAction::AddLanguage {
423 fence_marker: block.fence_marker.clone(),
424 has_mkdocs_attrs_only,
425 },
426 );
427 } else if !has_quarto_syntax
428 && self.config.style == LanguageStyle::Consistent
429 && let Some(canonical) = resolve_canonical(&block.language)
430 && let Some(preferred) = preferred_labels.get(canonical)
431 && &block.language != preferred
432 {
433 lines_to_fix.insert(
434 block.line_idx,
435 FixAction::NormalizeLabel {
436 fence_marker: block.fence_marker.clone(),
437 new_label: preferred.clone(),
438 },
439 );
440 }
441 }
442
443 let mut result = String::new();
445 for (i, line) in content.lines().enumerate() {
446 if let Some(action) = lines_to_fix.get(&i) {
447 match action {
448 FixAction::AddLanguage {
449 fence_marker,
450 has_mkdocs_attrs_only,
451 } => {
452 let indent = &line[..line.len() - line.trim_start().len()];
453 let trimmed = line.trim();
454 let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
455
456 if *has_mkdocs_attrs_only {
457 result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
458 } else {
459 result.push_str(&format!("{indent}{fence_marker}text\n"));
460 }
461 }
462 FixAction::NormalizeLabel {
463 fence_marker,
464 new_label,
465 } => {
466 if let Some((label_start, label_end)) = find_label_span(line, fence_marker) {
467 result.push_str(&line[..label_start]);
468 result.push_str(new_label);
469 result.push_str(&line[label_end..]);
470 result.push('\n');
471 } else {
472 result.push_str(line);
473 result.push('\n');
474 }
475 }
476 }
477 } else {
478 result.push_str(line);
479 result.push('\n');
480 }
481 }
482
483 if !content.ends_with('\n') {
485 result.pop();
486 }
487
488 Ok(result)
489 }
490
491 fn category(&self) -> RuleCategory {
493 RuleCategory::CodeBlock
494 }
495
496 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
498 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
499 }
500
501 fn as_any(&self) -> &dyn std::any::Any {
502 self
503 }
504
505 fn default_config_section(&self) -> Option<(String, toml::Value)> {
506 let default_config = MD040Config::default();
507 let json_value = serde_json::to_value(&default_config).ok()?;
508 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
509
510 if let toml::Value::Table(table) = toml_value {
511 if !table.is_empty() {
512 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
513 } else {
514 None
515 }
516 } else {
517 None
518 }
519 }
520
521 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
522 where
523 Self: Sized,
524 {
525 let rule_config: MD040Config = load_rule_config(config);
526 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
527 }
528}
529
530#[derive(Debug, Clone)]
531enum FixAction {
532 AddLanguage {
533 fence_marker: String,
534 has_mkdocs_attrs_only: bool,
535 },
536 NormalizeLabel {
537 fence_marker: String,
538 new_label: String,
539 },
540}
541
542fn detect_fenced_code_blocks(content: &str, line_offsets: &[usize]) -> Vec<FencedCodeBlock> {
544 let mut blocks = Vec::new();
545 let options = Options::all();
546 let parser = Parser::new_ext(content, options).into_offset_iter();
547
548 for (event, range) in parser {
549 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) = event {
550 let line_idx = line_idx_from_offset(line_offsets, range.start);
552
553 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
555 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
556 let line = content.get(line_start..line_end).unwrap_or("");
557 let trimmed = line.trim();
558 let fence_marker = if trimmed.starts_with('`') {
559 let count = trimmed.chars().take_while(|&c| c == '`').count();
560 "`".repeat(count)
561 } else if trimmed.starts_with('~') {
562 let count = trimmed.chars().take_while(|&c| c == '~').count();
563 "~".repeat(count)
564 } else {
565 "```".to_string() };
567
568 let language = info.split_whitespace().next().unwrap_or("").to_string();
570
571 blocks.push(FencedCodeBlock {
572 line_idx,
573 language,
574 fence_marker,
575 });
576 }
577 }
578
579 blocks
580}
581
582#[inline]
583fn line_idx_from_offset(line_offsets: &[usize], offset: usize) -> usize {
584 match line_offsets.binary_search(&offset) {
585 Ok(idx) => idx,
586 Err(idx) => idx.saturating_sub(1),
587 }
588}
589
590fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
592 let mut ranges = Vec::new();
593 let mut disabled_start: Option<usize> = None;
594
595 for (i, line) in content.lines().enumerate() {
596 let trimmed = line.trim();
597
598 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
599 && (rules.is_empty() || rules.contains(&rule_name))
600 && disabled_start.is_none()
601 {
602 disabled_start = Some(i);
603 }
604
605 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
606 && (rules.is_empty() || rules.contains(&rule_name))
607 && let Some(start) = disabled_start.take()
608 {
609 ranges.push((start, i));
610 }
611 }
612
613 if let Some(start) = disabled_start {
615 ranges.push((start, usize::MAX));
616 }
617
618 ranges
619}
620
621fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
623 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
624}
625
626fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
628 let trimmed_start = line.len() - line.trim_start().len();
629 let after_indent = &line[trimmed_start..];
630 if !after_indent.starts_with(fence_marker) {
631 return None;
632 }
633 let after_fence = &after_indent[fence_marker.len()..];
634
635 let label_start_rel = after_fence
636 .char_indices()
637 .find(|&(_, ch)| !ch.is_whitespace())
638 .map(|(idx, _)| idx)?;
639 let after_label = &after_fence[label_start_rel..];
640 let label_end_rel = after_label
641 .char_indices()
642 .find(|&(_, ch)| ch.is_whitespace())
643 .map(|(idx, _)| label_start_rel + idx)
644 .unwrap_or(after_fence.len());
645
646 Some((
647 trimmed_start + fence_marker.len() + label_start_rel,
648 trimmed_start + fence_marker.len() + label_end_rel,
649 ))
650}
651
652#[cfg(test)]
653mod tests {
654 use super::*;
655 use crate::lint_context::LintContext;
656
657 fn run_check(content: &str) -> LintResult {
658 let rule = MD040FencedCodeLanguage::default();
659 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
660 rule.check(&ctx)
661 }
662
663 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
664 let rule = MD040FencedCodeLanguage::with_config(config);
665 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
666 rule.check(&ctx)
667 }
668
669 fn run_fix(content: &str) -> Result<String, LintError> {
670 let rule = MD040FencedCodeLanguage::default();
671 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
672 rule.fix(&ctx)
673 }
674
675 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
676 let rule = MD040FencedCodeLanguage::with_config(config);
677 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
678 rule.fix(&ctx)
679 }
680
681 fn run_check_mkdocs(content: &str) -> LintResult {
682 let rule = MD040FencedCodeLanguage::default();
683 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
684 rule.check(&ctx)
685 }
686
687 #[test]
692 fn test_code_blocks_with_language_specified() {
693 let content = r#"# Test
694
695```python
696print("Hello, world!")
697```
698
699```javascript
700console.log("Hello!");
701```
702"#;
703 let result = run_check(content).unwrap();
704 assert!(result.is_empty(), "No warnings expected for code blocks with language");
705 }
706
707 #[test]
708 fn test_code_blocks_without_language() {
709 let content = r#"# Test
710
711```
712print("Hello, world!")
713```
714"#;
715 let result = run_check(content).unwrap();
716 assert_eq!(result.len(), 1);
717 assert_eq!(result[0].message, "Code block (```) missing language");
718 assert_eq!(result[0].line, 3);
719 }
720
721 #[test]
722 fn test_fix_method_adds_text_language() {
723 let content = r#"# Test
724
725```
726code without language
727```
728
729```python
730already has language
731```
732
733```
734another block without
735```
736"#;
737 let fixed = run_fix(content).unwrap();
738 assert!(fixed.contains("```text"));
739 assert!(fixed.contains("```python"));
740 assert_eq!(fixed.matches("```text").count(), 2);
741 }
742
743 #[test]
744 fn test_fix_preserves_indentation() {
745 let content = r#"# Test
746
747- List item
748 ```
749 indented code block
750 ```
751"#;
752 let fixed = run_fix(content).unwrap();
753 assert!(fixed.contains(" ```text"));
754 }
755
756 #[test]
761 fn test_consistent_mode_detects_inconsistency() {
762 let content = r#"```bash
763echo hi
764```
765
766```sh
767echo there
768```
769
770```bash
771echo again
772```
773"#;
774 let config = MD040Config {
775 style: LanguageStyle::Consistent,
776 ..Default::default()
777 };
778 let result = run_check_with_config(content, config).unwrap();
779 assert_eq!(result.len(), 1);
780 assert!(result[0].message.contains("Inconsistent"));
781 assert!(result[0].message.contains("sh"));
782 assert!(result[0].message.contains("bash"));
783 }
784
785 #[test]
786 fn test_consistent_mode_fix_normalizes() {
787 let content = r#"```bash
788echo hi
789```
790
791```sh
792echo there
793```
794
795```bash
796echo again
797```
798"#;
799 let config = MD040Config {
800 style: LanguageStyle::Consistent,
801 ..Default::default()
802 };
803 let fixed = run_fix_with_config(content, config).unwrap();
804 assert_eq!(fixed.matches("```bash").count(), 3);
805 assert_eq!(fixed.matches("```sh").count(), 0);
806 }
807
808 #[test]
809 fn test_consistent_mode_tie_break_uses_curated_default() {
810 let content = r#"```bash
812echo hi
813```
814
815```sh
816echo there
817```
818"#;
819 let config = MD040Config {
820 style: LanguageStyle::Consistent,
821 ..Default::default()
822 };
823 let fixed = run_fix_with_config(content, config).unwrap();
824 assert_eq!(fixed.matches("```bash").count(), 2);
826 }
827
828 #[test]
829 fn test_consistent_mode_with_preferred_alias() {
830 let content = r#"```bash
831echo hi
832```
833
834```sh
835echo there
836```
837"#;
838 let mut preferred = HashMap::new();
839 preferred.insert("Shell".to_string(), "sh".to_string());
840
841 let config = MD040Config {
842 style: LanguageStyle::Consistent,
843 preferred_aliases: preferred,
844 ..Default::default()
845 };
846 let fixed = run_fix_with_config(content, config).unwrap();
847 assert_eq!(fixed.matches("```sh").count(), 2);
848 assert_eq!(fixed.matches("```bash").count(), 0);
849 }
850
851 #[test]
852 fn test_consistent_mode_ignores_disabled_blocks() {
853 let content = r#"```bash
854echo hi
855```
856<!-- rumdl-disable MD040 -->
857```sh
858echo there
859```
860```sh
861echo again
862```
863<!-- rumdl-enable MD040 -->
864"#;
865 let config = MD040Config {
866 style: LanguageStyle::Consistent,
867 ..Default::default()
868 };
869 let result = run_check_with_config(content, config).unwrap();
870 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
871 }
872
873 #[test]
874 fn test_fix_preserves_attributes() {
875 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
876 let config = MD040Config {
877 style: LanguageStyle::Consistent,
878 ..Default::default()
879 };
880 let fixed = run_fix_with_config(content, config).unwrap();
881 assert!(fixed.contains("```bash {.highlight}"));
882 }
883
884 #[test]
885 fn test_fix_preserves_spacing_before_label() {
886 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
887 let config = MD040Config {
888 style: LanguageStyle::Consistent,
889 ..Default::default()
890 };
891 let fixed = run_fix_with_config(content, config).unwrap();
892 assert!(fixed.contains("``` bash {.highlight}"));
893 assert!(!fixed.contains("``` sh {.highlight}"));
894 }
895
896 #[test]
901 fn test_allowlist_blocks_unlisted() {
902 let content = "```java\ncode\n```";
903 let config = MD040Config {
904 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
905 ..Default::default()
906 };
907 let result = run_check_with_config(content, config).unwrap();
908 assert_eq!(result.len(), 1);
909 assert!(result[0].message.contains("not in the allowed list"));
910 }
911
912 #[test]
913 fn test_allowlist_allows_listed() {
914 let content = "```python\ncode\n```";
915 let config = MD040Config {
916 allowed_languages: vec!["Python".to_string()],
917 ..Default::default()
918 };
919 let result = run_check_with_config(content, config).unwrap();
920 assert!(result.is_empty());
921 }
922
923 #[test]
924 fn test_allowlist_blocks_unknown_language() {
925 let content = "```mysterylang\ncode\n```";
926 let config = MD040Config {
927 allowed_languages: vec!["Python".to_string()],
928 ..Default::default()
929 };
930 let result = run_check_with_config(content, config).unwrap();
931 assert_eq!(result.len(), 1);
932 assert!(result[0].message.contains("allowed list"));
933 }
934
935 #[test]
936 fn test_allowlist_case_insensitive() {
937 let content = "```python\ncode\n```";
938 let config = MD040Config {
939 allowed_languages: vec!["PYTHON".to_string()],
940 ..Default::default()
941 };
942 let result = run_check_with_config(content, config).unwrap();
943 assert!(result.is_empty());
944 }
945
946 #[test]
947 fn test_denylist_blocks_listed() {
948 let content = "```java\ncode\n```";
949 let config = MD040Config {
950 disallowed_languages: vec!["Java".to_string()],
951 ..Default::default()
952 };
953 let result = run_check_with_config(content, config).unwrap();
954 assert_eq!(result.len(), 1);
955 assert!(result[0].message.contains("disallowed"));
956 }
957
958 #[test]
959 fn test_denylist_allows_unlisted() {
960 let content = "```python\ncode\n```";
961 let config = MD040Config {
962 disallowed_languages: vec!["Java".to_string()],
963 ..Default::default()
964 };
965 let result = run_check_with_config(content, config).unwrap();
966 assert!(result.is_empty());
967 }
968
969 #[test]
970 fn test_allowlist_takes_precedence_over_denylist() {
971 let content = "```python\ncode\n```";
972 let config = MD040Config {
973 allowed_languages: vec!["Python".to_string()],
974 disallowed_languages: vec!["Python".to_string()], ..Default::default()
976 };
977 let result = run_check_with_config(content, config).unwrap();
978 assert!(result.is_empty());
979 }
980
981 #[test]
986 fn test_unknown_language_ignore_default() {
987 let content = "```mycustomlang\ncode\n```";
988 let result = run_check(content).unwrap();
989 assert!(result.is_empty(), "Unknown languages ignored by default");
990 }
991
992 #[test]
993 fn test_unknown_language_warn() {
994 let content = "```mycustomlang\ncode\n```";
995 let config = MD040Config {
996 unknown_language_action: UnknownLanguageAction::Warn,
997 ..Default::default()
998 };
999 let result = run_check_with_config(content, config).unwrap();
1000 assert_eq!(result.len(), 1);
1001 assert!(result[0].message.contains("Unknown language"));
1002 assert!(result[0].message.contains("mycustomlang"));
1003 assert_eq!(result[0].severity, Severity::Warning);
1004 }
1005
1006 #[test]
1007 fn test_unknown_language_error() {
1008 let content = "```mycustomlang\ncode\n```";
1009 let config = MD040Config {
1010 unknown_language_action: UnknownLanguageAction::Error,
1011 ..Default::default()
1012 };
1013 let result = run_check_with_config(content, config).unwrap();
1014 assert_eq!(result.len(), 1);
1015 assert!(result[0].message.contains("Unknown language"));
1016 assert_eq!(result[0].severity, Severity::Error);
1017 }
1018
1019 #[test]
1024 fn test_invalid_preferred_alias_detected() {
1025 let mut preferred = HashMap::new();
1026 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
1027
1028 let config = MD040Config {
1029 style: LanguageStyle::Consistent,
1030 preferred_aliases: preferred,
1031 ..Default::default()
1032 };
1033 let rule = MD040FencedCodeLanguage::with_config(config);
1034 let errors = rule.validate_config();
1035 assert_eq!(errors.len(), 1);
1036 assert!(errors[0].contains("Invalid alias"));
1037 assert!(errors[0].contains("invalid_alias"));
1038 }
1039
1040 #[test]
1041 fn test_unknown_language_in_preferred_aliases_detected() {
1042 let mut preferred = HashMap::new();
1043 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
1044
1045 let config = MD040Config {
1046 style: LanguageStyle::Consistent,
1047 preferred_aliases: preferred,
1048 ..Default::default()
1049 };
1050 let rule = MD040FencedCodeLanguage::with_config(config);
1051 let errors = rule.validate_config();
1052 assert_eq!(errors.len(), 1);
1053 assert!(errors[0].contains("Unknown language"));
1054 }
1055
1056 #[test]
1057 fn test_valid_preferred_alias_accepted() {
1058 let mut preferred = HashMap::new();
1059 preferred.insert("Shell".to_string(), "bash".to_string());
1060 preferred.insert("JavaScript".to_string(), "js".to_string());
1061
1062 let config = MD040Config {
1063 style: LanguageStyle::Consistent,
1064 preferred_aliases: preferred,
1065 ..Default::default()
1066 };
1067 let rule = MD040FencedCodeLanguage::with_config(config);
1068 let errors = rule.validate_config();
1069 assert!(errors.is_empty());
1070 }
1071
1072 #[test]
1077 fn test_linguist_resolution() {
1078 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1079 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1080 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1081 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1082 assert_eq!(resolve_canonical("python"), Some("Python"));
1083 assert_eq!(resolve_canonical("unknown_lang"), None);
1084 }
1085
1086 #[test]
1087 fn test_linguist_resolution_case_insensitive() {
1088 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1089 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1090 assert_eq!(resolve_canonical("Python"), Some("Python"));
1091 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1092 }
1093
1094 #[test]
1095 fn test_alias_validation() {
1096 assert!(is_valid_alias("Shell", "bash"));
1097 assert!(is_valid_alias("Shell", "sh"));
1098 assert!(is_valid_alias("Shell", "zsh"));
1099 assert!(!is_valid_alias("Shell", "python"));
1100 assert!(!is_valid_alias("Shell", "invalid"));
1101 }
1102
1103 #[test]
1104 fn test_default_alias() {
1105 assert_eq!(default_alias("Shell"), Some("bash"));
1106 assert_eq!(default_alias("JavaScript"), Some("js"));
1107 assert_eq!(default_alias("Python"), Some("python"));
1108 }
1109
1110 #[test]
1115 fn test_mixed_case_labels_normalized() {
1116 let content = r#"```BASH
1117echo hi
1118```
1119
1120```Bash
1121echo there
1122```
1123
1124```bash
1125echo again
1126```
1127"#;
1128 let config = MD040Config {
1129 style: LanguageStyle::Consistent,
1130 ..Default::default()
1131 };
1132 let result = run_check_with_config(content, config).unwrap();
1134 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1138 }
1139
1140 #[test]
1141 fn test_multiple_languages_independent() {
1142 let content = r#"```bash
1143shell code
1144```
1145
1146```python
1147python code
1148```
1149
1150```sh
1151more shell
1152```
1153
1154```python3
1155more python
1156```
1157"#;
1158 let config = MD040Config {
1159 style: LanguageStyle::Consistent,
1160 ..Default::default()
1161 };
1162 let result = run_check_with_config(content, config).unwrap();
1163 assert_eq!(result.len(), 2);
1165 }
1166
1167 #[test]
1168 fn test_tilde_fences() {
1169 let content = r#"~~~bash
1170echo hi
1171~~~
1172
1173~~~sh
1174echo there
1175~~~
1176"#;
1177 let config = MD040Config {
1178 style: LanguageStyle::Consistent,
1179 ..Default::default()
1180 };
1181 let result = run_check_with_config(content, config.clone()).unwrap();
1182 assert_eq!(result.len(), 1);
1183
1184 let fixed = run_fix_with_config(content, config).unwrap();
1185 assert!(fixed.contains("~~~bash"));
1186 assert!(!fixed.contains("~~~sh"));
1187 }
1188
1189 #[test]
1190 fn test_longer_fence_markers_preserved() {
1191 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1192 let config = MD040Config {
1193 style: LanguageStyle::Consistent,
1194 ..Default::default()
1195 };
1196 let fixed = run_fix_with_config(content, config).unwrap();
1197 assert!(fixed.contains("````bash"));
1198 assert!(fixed.contains("```bash"));
1199 }
1200
1201 #[test]
1202 fn test_empty_document() {
1203 let result = run_check("").unwrap();
1204 assert!(result.is_empty());
1205 }
1206
1207 #[test]
1208 fn test_no_code_blocks() {
1209 let content = "# Just a heading\n\nSome text.";
1210 let result = run_check(content).unwrap();
1211 assert!(result.is_empty());
1212 }
1213
1214 #[test]
1215 fn test_single_code_block_no_inconsistency() {
1216 let content = "```bash\necho hi\n```";
1217 let config = MD040Config {
1218 style: LanguageStyle::Consistent,
1219 ..Default::default()
1220 };
1221 let result = run_check_with_config(content, config).unwrap();
1222 assert!(result.is_empty(), "Single block has no inconsistency");
1223 }
1224
1225 #[test]
1226 fn test_idempotent_fix() {
1227 let content = r#"```bash
1228echo hi
1229```
1230
1231```sh
1232echo there
1233```
1234"#;
1235 let config = MD040Config {
1236 style: LanguageStyle::Consistent,
1237 ..Default::default()
1238 };
1239 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1240 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1241 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1242 }
1243
1244 #[test]
1249 fn test_mkdocs_superfences_title_only() {
1250 let content = r#"```title="Example"
1252echo hi
1253```
1254"#;
1255 let result = run_check_mkdocs(content).unwrap();
1256 assert!(
1257 result.is_empty(),
1258 "MkDocs superfences with title= should not require language"
1259 );
1260 }
1261
1262 #[test]
1263 fn test_mkdocs_superfences_hl_lines() {
1264 let content = r#"```hl_lines="1 2"
1266line 1
1267line 2
1268```
1269"#;
1270 let result = run_check_mkdocs(content).unwrap();
1271 assert!(
1272 result.is_empty(),
1273 "MkDocs superfences with hl_lines= should not require language"
1274 );
1275 }
1276
1277 #[test]
1278 fn test_mkdocs_superfences_linenums() {
1279 let content = r#"```linenums="1"
1281line 1
1282line 2
1283```
1284"#;
1285 let result = run_check_mkdocs(content).unwrap();
1286 assert!(
1287 result.is_empty(),
1288 "MkDocs superfences with linenums= should not require language"
1289 );
1290 }
1291
1292 #[test]
1293 fn test_mkdocs_superfences_class() {
1294 let content = r#"```.my-class
1296some text
1297```
1298"#;
1299 let result = run_check_mkdocs(content).unwrap();
1300 assert!(
1301 result.is_empty(),
1302 "MkDocs superfences with .class should not require language"
1303 );
1304 }
1305
1306 #[test]
1307 fn test_mkdocs_superfences_id() {
1308 let content = r#"```#my-id
1310some text
1311```
1312"#;
1313 let result = run_check_mkdocs(content).unwrap();
1314 assert!(
1315 result.is_empty(),
1316 "MkDocs superfences with #id should not require language"
1317 );
1318 }
1319
1320 #[test]
1321 fn test_mkdocs_superfences_with_language() {
1322 let content = r#"```python title="Example" hl_lines="1"
1324print("hello")
1325```
1326"#;
1327 let result = run_check_mkdocs(content).unwrap();
1328 assert!(result.is_empty(), "Code block with language and attrs should pass");
1329 }
1330
1331 #[test]
1332 fn test_standard_flavor_no_special_handling() {
1333 let content = r#"```title="Example"
1335echo hi
1336```
1337"#;
1338 let result = run_check(content).unwrap();
1339 assert_eq!(
1340 result.len(),
1341 1,
1342 "Standard flavor should warn about title= without language"
1343 );
1344 }
1345}