1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7pub mod md040_config;
11
12const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20 "title=", "hl_lines=", "linenums=", ".", "#", ];
26
27#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30 MKDOCS_SUPERFENCES_ATTR_PREFIXES
31 .iter()
32 .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37 line_idx: usize,
39 language: String,
41 fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47 config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51 pub fn new() -> Self {
52 Self::default()
53 }
54
55 pub fn with_config(config: MD040Config) -> Self {
56 Self { config }
57 }
58
59 fn validate_config(&self) -> Vec<String> {
61 let mut errors = Vec::new();
62
63 for (canonical, alias) in &self.config.preferred_aliases {
65 if let Some(actual_canonical) = resolve_canonical(canonical) {
67 if !is_valid_alias(actual_canonical, alias)
68 && let Some(valid_aliases) = get_aliases(actual_canonical)
69 {
70 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
71 let valid_str = valid_list
72 .iter()
73 .map(|s| format!("'{s}'"))
74 .collect::<Vec<_>>()
75 .join(", ");
76 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
77 errors.push(format!(
78 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
79 ));
80 }
81 } else {
82 errors.push(format!(
83 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
84 ));
85 }
86 }
87
88 errors
89 }
90
91 fn compute_preferred_labels(
93 &self,
94 blocks: &[FencedCodeBlock],
95 disabled_ranges: &[(usize, usize)],
96 ) -> HashMap<String, String> {
97 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
99
100 for block in blocks {
101 if is_line_disabled(disabled_ranges, block.line_idx) {
102 continue;
103 }
104 if block.language.is_empty() {
105 continue;
106 }
107 if let Some(canonical) = resolve_canonical(&block.language) {
108 by_canonical
109 .entry(canonical.to_string())
110 .or_default()
111 .push(&block.language);
112 }
113 }
114
115 let mut result = HashMap::new();
117
118 for (canonical, labels) in by_canonical {
119 let winner = if let Some(preferred) = self
121 .config
122 .preferred_aliases
123 .iter()
124 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
125 .map(|(_, v)| v.clone())
126 {
127 preferred
128 } else {
129 let mut counts: HashMap<&str, usize> = HashMap::new();
131 for label in &labels {
132 *counts.entry(*label).or_default() += 1;
133 }
134
135 let max_count = counts.values().max().copied().unwrap_or(0);
136 let winners: Vec<_> = counts
137 .iter()
138 .filter(|(_, c)| **c == max_count)
139 .map(|(l, _)| *l)
140 .collect();
141
142 if winners.len() == 1 {
143 winners[0].to_string()
144 } else {
145 default_alias(&canonical)
147 .filter(|default| winners.contains(default))
148 .map(|s| s.to_string())
149 .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
150 }
151 };
152
153 result.insert(canonical, winner);
154 }
155
156 result
157 }
158
159 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
161 if !self.config.allowed_languages.is_empty() {
163 let allowed = self.config.allowed_languages.join(", ");
164 let Some(canonical) = canonical else {
165 return Some(format!(
166 "Language '{original_label}' is not in the allowed list: {allowed}"
167 ));
168 };
169 if !self
170 .config
171 .allowed_languages
172 .iter()
173 .any(|a| a.eq_ignore_ascii_case(canonical))
174 {
175 return Some(format!(
176 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
177 ));
178 }
179 } else if !self.config.disallowed_languages.is_empty()
180 && canonical.is_some_and(|canonical| {
181 self.config
182 .disallowed_languages
183 .iter()
184 .any(|d| d.eq_ignore_ascii_case(canonical))
185 })
186 {
187 let canonical = canonical.unwrap_or("unknown");
188 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
189 }
190 None
191 }
192
193 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
195 if resolve_canonical(label).is_some() {
196 return None;
197 }
198
199 match self.config.unknown_language_action {
200 UnknownLanguageAction::Ignore => None,
201 UnknownLanguageAction::Warn => Some((
202 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
203 Severity::Warning,
204 )),
205 UnknownLanguageAction::Error => Some((
206 format!("Unknown language '{label}' (not in GitHub Linguist)"),
207 Severity::Error,
208 )),
209 }
210 }
211}
212
213impl Rule for MD040FencedCodeLanguage {
214 fn name(&self) -> &'static str {
215 "MD040"
216 }
217
218 fn description(&self) -> &'static str {
219 "Code blocks should have a language specified"
220 }
221
222 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
223 let content = ctx.content;
224 let mut warnings = Vec::new();
225
226 for error in self.validate_config() {
228 warnings.push(LintWarning {
229 rule_name: Some(self.name().to_string()),
230 line: 1,
231 column: 1,
232 end_line: 1,
233 end_column: 1,
234 message: format!("[config error] {error}"),
235 severity: Severity::Error,
236 fix: None,
237 });
238 }
239
240 let fenced_blocks = derive_fenced_code_blocks(ctx);
242
243 let disabled_ranges = compute_disabled_ranges(content, self.name());
245
246 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
248 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
249 } else {
250 HashMap::new()
251 };
252
253 let lines = ctx.raw_lines();
254
255 for block in &fenced_blocks {
256 if is_line_disabled(&disabled_ranges, block.line_idx) {
258 continue;
259 }
260
261 let line = lines.get(block.line_idx).unwrap_or(&"");
263 let trimmed = line.trim();
264 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
265
266 let has_mkdocs_attrs_only =
268 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
269
270 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
272 && after_fence.starts_with('{')
273 && after_fence.contains('}');
274
275 let needs_language =
278 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
279
280 if needs_language && !has_quarto_syntax {
281 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
282
283 warnings.push(LintWarning {
284 rule_name: Some(self.name().to_string()),
285 line: start_line,
286 column: start_col,
287 end_line,
288 end_column: end_col,
289 message: "Code block (```) missing language".to_string(),
290 severity: Severity::Warning,
291 fix: Some(Fix {
292 range: {
293 let trimmed_start = line.len() - line.trim_start().len();
294 let fence_len = block.fence_marker.len();
295 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
296 let fence_start_byte = line_start_byte + trimmed_start;
297 let fence_end_byte = fence_start_byte + fence_len;
298 fence_start_byte..fence_end_byte
299 },
300 replacement: format!("{}text", block.fence_marker),
301 }),
302 });
303 continue;
304 }
305
306 if has_quarto_syntax {
308 continue;
309 }
310
311 let canonical = resolve_canonical(&block.language);
312
313 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
315 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
316
317 warnings.push(LintWarning {
318 rule_name: Some(self.name().to_string()),
319 line: start_line,
320 column: start_col,
321 end_line,
322 end_column: end_col,
323 message: msg,
324 severity: Severity::Warning,
325 fix: None,
326 });
327 continue;
328 }
329
330 if canonical.is_none() {
332 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
333 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
334
335 warnings.push(LintWarning {
336 rule_name: Some(self.name().to_string()),
337 line: start_line,
338 column: start_col,
339 end_line,
340 end_column: end_col,
341 message: msg,
342 severity,
343 fix: None,
344 });
345 }
346 continue;
347 }
348
349 if self.config.style == LanguageStyle::Consistent
351 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
352 && &block.language != preferred
353 {
354 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
355
356 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
357 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
358 Fix {
359 range: (line_start_byte + label_start)..(line_start_byte + label_end),
360 replacement: preferred.clone(),
361 }
362 });
363 let lang = &block.language;
364 let canonical = canonical.unwrap();
365
366 warnings.push(LintWarning {
367 rule_name: Some(self.name().to_string()),
368 line: start_line,
369 column: start_col,
370 end_line,
371 end_column: end_col,
372 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
373 severity: Severity::Warning,
374 fix,
375 });
376 }
377 }
378
379 Ok(warnings)
380 }
381
382 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
383 let content = ctx.content;
384
385 let fenced_blocks = derive_fenced_code_blocks(ctx);
387
388 let disabled_ranges = compute_disabled_ranges(content, self.name());
390
391 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
393 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
394 } else {
395 HashMap::new()
396 };
397
398 let mut lines_to_fix: std::collections::HashMap<usize, FixAction> = std::collections::HashMap::new();
400
401 for block in &fenced_blocks {
402 if is_line_disabled(&disabled_ranges, block.line_idx) {
403 continue;
404 }
405
406 if ctx.inline_config().is_rule_disabled(self.name(), block.line_idx + 1) {
408 continue;
409 }
410
411 let fix_lines = ctx.raw_lines();
412 let line = fix_lines.get(block.line_idx).unwrap_or(&"");
413 let trimmed = line.trim();
414 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
415
416 let has_mkdocs_attrs_only =
417 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
418
419 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
420 && after_fence.starts_with('{')
421 && after_fence.contains('}');
422
423 let needs_language =
424 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
425
426 if needs_language && !has_quarto_syntax {
427 lines_to_fix.insert(
428 block.line_idx,
429 FixAction::AddLanguage {
430 fence_marker: block.fence_marker.clone(),
431 has_mkdocs_attrs_only,
432 },
433 );
434 } else if !has_quarto_syntax
435 && self.config.style == LanguageStyle::Consistent
436 && let Some(canonical) = resolve_canonical(&block.language)
437 && let Some(preferred) = preferred_labels.get(canonical)
438 && &block.language != preferred
439 {
440 lines_to_fix.insert(
441 block.line_idx,
442 FixAction::NormalizeLabel {
443 fence_marker: block.fence_marker.clone(),
444 new_label: preferred.clone(),
445 },
446 );
447 }
448 }
449
450 let mut result = String::new();
452 for (i, line) in content.lines().enumerate() {
453 if let Some(action) = lines_to_fix.get(&i) {
454 match action {
455 FixAction::AddLanguage {
456 fence_marker,
457 has_mkdocs_attrs_only,
458 } => {
459 let indent = &line[..line.len() - line.trim_start().len()];
460 let trimmed = line.trim();
461 let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
462
463 if *has_mkdocs_attrs_only {
464 result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
465 } else {
466 result.push_str(&format!("{indent}{fence_marker}text\n"));
467 }
468 }
469 FixAction::NormalizeLabel {
470 fence_marker,
471 new_label,
472 } => {
473 if let Some((label_start, label_end)) = find_label_span(line, fence_marker) {
474 result.push_str(&line[..label_start]);
475 result.push_str(new_label);
476 result.push_str(&line[label_end..]);
477 result.push('\n');
478 } else {
479 result.push_str(line);
480 result.push('\n');
481 }
482 }
483 }
484 } else {
485 result.push_str(line);
486 result.push('\n');
487 }
488 }
489
490 if !content.ends_with('\n') {
492 result.pop();
493 }
494
495 Ok(result)
496 }
497
498 fn category(&self) -> RuleCategory {
500 RuleCategory::CodeBlock
501 }
502
503 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
505 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
506 }
507
508 fn as_any(&self) -> &dyn std::any::Any {
509 self
510 }
511
512 fn default_config_section(&self) -> Option<(String, toml::Value)> {
513 let default_config = MD040Config::default();
514 let json_value = serde_json::to_value(&default_config).ok()?;
515 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
516
517 if let toml::Value::Table(table) = toml_value {
518 if !table.is_empty() {
519 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
520 } else {
521 None
522 }
523 } else {
524 None
525 }
526 }
527
528 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
529 where
530 Self: Sized,
531 {
532 let rule_config: MD040Config = load_rule_config(config);
533 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
534 }
535}
536
537#[derive(Debug, Clone)]
538enum FixAction {
539 AddLanguage {
540 fence_marker: String,
541 has_mkdocs_attrs_only: bool,
542 },
543 NormalizeLabel {
544 fence_marker: String,
545 new_label: String,
546 },
547}
548
549fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
551 let content = ctx.content;
552 let line_offsets = &ctx.line_offsets;
553
554 ctx.code_block_details
555 .iter()
556 .filter(|d| d.is_fenced)
557 .map(|detail| {
558 let line_idx = match line_offsets.binary_search(&detail.start) {
559 Ok(idx) => idx,
560 Err(idx) => idx.saturating_sub(1),
561 };
562
563 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
565 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
566 let line = content.get(line_start..line_end).unwrap_or("");
567 let trimmed = line.trim();
568 let fence_marker = if trimmed.starts_with('`') {
569 let count = trimmed.chars().take_while(|&c| c == '`').count();
570 "`".repeat(count)
571 } else if trimmed.starts_with('~') {
572 let count = trimmed.chars().take_while(|&c| c == '~').count();
573 "~".repeat(count)
574 } else {
575 "```".to_string()
576 };
577
578 let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
579
580 FencedCodeBlock {
581 line_idx,
582 language,
583 fence_marker,
584 }
585 })
586 .collect()
587}
588
589fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
591 let mut ranges = Vec::new();
592 let mut disabled_start: Option<usize> = None;
593
594 for (i, line) in content.lines().enumerate() {
595 let trimmed = line.trim();
596
597 if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
598 && (rules.is_empty() || rules.contains(&rule_name))
599 && disabled_start.is_none()
600 {
601 disabled_start = Some(i);
602 }
603
604 if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
605 && (rules.is_empty() || rules.contains(&rule_name))
606 && let Some(start) = disabled_start.take()
607 {
608 ranges.push((start, i));
609 }
610 }
611
612 if let Some(start) = disabled_start {
614 ranges.push((start, usize::MAX));
615 }
616
617 ranges
618}
619
620fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
622 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
623}
624
625fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
627 let trimmed_start = line.len() - line.trim_start().len();
628 let after_indent = &line[trimmed_start..];
629 if !after_indent.starts_with(fence_marker) {
630 return None;
631 }
632 let after_fence = &after_indent[fence_marker.len()..];
633
634 let label_start_rel = after_fence
635 .char_indices()
636 .find(|&(_, ch)| !ch.is_whitespace())
637 .map(|(idx, _)| idx)?;
638 let after_label = &after_fence[label_start_rel..];
639 let label_end_rel = after_label
640 .char_indices()
641 .find(|&(_, ch)| ch.is_whitespace())
642 .map(|(idx, _)| label_start_rel + idx)
643 .unwrap_or(after_fence.len());
644
645 Some((
646 trimmed_start + fence_marker.len() + label_start_rel,
647 trimmed_start + fence_marker.len() + label_end_rel,
648 ))
649}
650
651#[cfg(test)]
652mod tests {
653 use super::*;
654 use crate::lint_context::LintContext;
655
656 fn run_check(content: &str) -> LintResult {
657 let rule = MD040FencedCodeLanguage::default();
658 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
659 rule.check(&ctx)
660 }
661
662 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
663 let rule = MD040FencedCodeLanguage::with_config(config);
664 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
665 rule.check(&ctx)
666 }
667
668 fn run_fix(content: &str) -> Result<String, LintError> {
669 let rule = MD040FencedCodeLanguage::default();
670 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
671 rule.fix(&ctx)
672 }
673
674 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
675 let rule = MD040FencedCodeLanguage::with_config(config);
676 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
677 rule.fix(&ctx)
678 }
679
680 fn run_check_mkdocs(content: &str) -> LintResult {
681 let rule = MD040FencedCodeLanguage::default();
682 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
683 rule.check(&ctx)
684 }
685
686 #[test]
691 fn test_code_blocks_with_language_specified() {
692 let content = r#"# Test
693
694```python
695print("Hello, world!")
696```
697
698```javascript
699console.log("Hello!");
700```
701"#;
702 let result = run_check(content).unwrap();
703 assert!(result.is_empty(), "No warnings expected for code blocks with language");
704 }
705
706 #[test]
707 fn test_code_blocks_without_language() {
708 let content = r#"# Test
709
710```
711print("Hello, world!")
712```
713"#;
714 let result = run_check(content).unwrap();
715 assert_eq!(result.len(), 1);
716 assert_eq!(result[0].message, "Code block (```) missing language");
717 assert_eq!(result[0].line, 3);
718 }
719
720 #[test]
721 fn test_fix_method_adds_text_language() {
722 let content = r#"# Test
723
724```
725code without language
726```
727
728```python
729already has language
730```
731
732```
733another block without
734```
735"#;
736 let fixed = run_fix(content).unwrap();
737 assert!(fixed.contains("```text"));
738 assert!(fixed.contains("```python"));
739 assert_eq!(fixed.matches("```text").count(), 2);
740 }
741
742 #[test]
743 fn test_fix_preserves_indentation() {
744 let content = r#"# Test
745
746- List item
747 ```
748 indented code block
749 ```
750"#;
751 let fixed = run_fix(content).unwrap();
752 assert!(fixed.contains(" ```text"));
753 }
754
755 #[test]
760 fn test_consistent_mode_detects_inconsistency() {
761 let content = r#"```bash
762echo hi
763```
764
765```sh
766echo there
767```
768
769```bash
770echo again
771```
772"#;
773 let config = MD040Config {
774 style: LanguageStyle::Consistent,
775 ..Default::default()
776 };
777 let result = run_check_with_config(content, config).unwrap();
778 assert_eq!(result.len(), 1);
779 assert!(result[0].message.contains("Inconsistent"));
780 assert!(result[0].message.contains("sh"));
781 assert!(result[0].message.contains("bash"));
782 }
783
784 #[test]
785 fn test_consistent_mode_fix_normalizes() {
786 let content = r#"```bash
787echo hi
788```
789
790```sh
791echo there
792```
793
794```bash
795echo again
796```
797"#;
798 let config = MD040Config {
799 style: LanguageStyle::Consistent,
800 ..Default::default()
801 };
802 let fixed = run_fix_with_config(content, config).unwrap();
803 assert_eq!(fixed.matches("```bash").count(), 3);
804 assert_eq!(fixed.matches("```sh").count(), 0);
805 }
806
807 #[test]
808 fn test_consistent_mode_tie_break_uses_curated_default() {
809 let content = r#"```bash
811echo hi
812```
813
814```sh
815echo there
816```
817"#;
818 let config = MD040Config {
819 style: LanguageStyle::Consistent,
820 ..Default::default()
821 };
822 let fixed = run_fix_with_config(content, config).unwrap();
823 assert_eq!(fixed.matches("```bash").count(), 2);
825 }
826
827 #[test]
828 fn test_consistent_mode_with_preferred_alias() {
829 let content = r#"```bash
830echo hi
831```
832
833```sh
834echo there
835```
836"#;
837 let mut preferred = HashMap::new();
838 preferred.insert("Shell".to_string(), "sh".to_string());
839
840 let config = MD040Config {
841 style: LanguageStyle::Consistent,
842 preferred_aliases: preferred,
843 ..Default::default()
844 };
845 let fixed = run_fix_with_config(content, config).unwrap();
846 assert_eq!(fixed.matches("```sh").count(), 2);
847 assert_eq!(fixed.matches("```bash").count(), 0);
848 }
849
850 #[test]
851 fn test_consistent_mode_ignores_disabled_blocks() {
852 let content = r#"```bash
853echo hi
854```
855<!-- rumdl-disable MD040 -->
856```sh
857echo there
858```
859```sh
860echo again
861```
862<!-- rumdl-enable MD040 -->
863"#;
864 let config = MD040Config {
865 style: LanguageStyle::Consistent,
866 ..Default::default()
867 };
868 let result = run_check_with_config(content, config).unwrap();
869 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
870 }
871
872 #[test]
873 fn test_fix_preserves_attributes() {
874 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
875 let config = MD040Config {
876 style: LanguageStyle::Consistent,
877 ..Default::default()
878 };
879 let fixed = run_fix_with_config(content, config).unwrap();
880 assert!(fixed.contains("```bash {.highlight}"));
881 }
882
883 #[test]
884 fn test_fix_preserves_spacing_before_label() {
885 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
886 let config = MD040Config {
887 style: LanguageStyle::Consistent,
888 ..Default::default()
889 };
890 let fixed = run_fix_with_config(content, config).unwrap();
891 assert!(fixed.contains("``` bash {.highlight}"));
892 assert!(!fixed.contains("``` sh {.highlight}"));
893 }
894
895 #[test]
900 fn test_allowlist_blocks_unlisted() {
901 let content = "```java\ncode\n```";
902 let config = MD040Config {
903 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
904 ..Default::default()
905 };
906 let result = run_check_with_config(content, config).unwrap();
907 assert_eq!(result.len(), 1);
908 assert!(result[0].message.contains("not in the allowed list"));
909 }
910
911 #[test]
912 fn test_allowlist_allows_listed() {
913 let content = "```python\ncode\n```";
914 let config = MD040Config {
915 allowed_languages: vec!["Python".to_string()],
916 ..Default::default()
917 };
918 let result = run_check_with_config(content, config).unwrap();
919 assert!(result.is_empty());
920 }
921
922 #[test]
923 fn test_allowlist_blocks_unknown_language() {
924 let content = "```mysterylang\ncode\n```";
925 let config = MD040Config {
926 allowed_languages: vec!["Python".to_string()],
927 ..Default::default()
928 };
929 let result = run_check_with_config(content, config).unwrap();
930 assert_eq!(result.len(), 1);
931 assert!(result[0].message.contains("allowed list"));
932 }
933
934 #[test]
935 fn test_allowlist_case_insensitive() {
936 let content = "```python\ncode\n```";
937 let config = MD040Config {
938 allowed_languages: vec!["PYTHON".to_string()],
939 ..Default::default()
940 };
941 let result = run_check_with_config(content, config).unwrap();
942 assert!(result.is_empty());
943 }
944
945 #[test]
946 fn test_denylist_blocks_listed() {
947 let content = "```java\ncode\n```";
948 let config = MD040Config {
949 disallowed_languages: vec!["Java".to_string()],
950 ..Default::default()
951 };
952 let result = run_check_with_config(content, config).unwrap();
953 assert_eq!(result.len(), 1);
954 assert!(result[0].message.contains("disallowed"));
955 }
956
957 #[test]
958 fn test_denylist_allows_unlisted() {
959 let content = "```python\ncode\n```";
960 let config = MD040Config {
961 disallowed_languages: vec!["Java".to_string()],
962 ..Default::default()
963 };
964 let result = run_check_with_config(content, config).unwrap();
965 assert!(result.is_empty());
966 }
967
968 #[test]
969 fn test_allowlist_takes_precedence_over_denylist() {
970 let content = "```python\ncode\n```";
971 let config = MD040Config {
972 allowed_languages: vec!["Python".to_string()],
973 disallowed_languages: vec!["Python".to_string()], ..Default::default()
975 };
976 let result = run_check_with_config(content, config).unwrap();
977 assert!(result.is_empty());
978 }
979
980 #[test]
985 fn test_unknown_language_ignore_default() {
986 let content = "```mycustomlang\ncode\n```";
987 let result = run_check(content).unwrap();
988 assert!(result.is_empty(), "Unknown languages ignored by default");
989 }
990
991 #[test]
992 fn test_unknown_language_warn() {
993 let content = "```mycustomlang\ncode\n```";
994 let config = MD040Config {
995 unknown_language_action: UnknownLanguageAction::Warn,
996 ..Default::default()
997 };
998 let result = run_check_with_config(content, config).unwrap();
999 assert_eq!(result.len(), 1);
1000 assert!(result[0].message.contains("Unknown language"));
1001 assert!(result[0].message.contains("mycustomlang"));
1002 assert_eq!(result[0].severity, Severity::Warning);
1003 }
1004
1005 #[test]
1006 fn test_unknown_language_error() {
1007 let content = "```mycustomlang\ncode\n```";
1008 let config = MD040Config {
1009 unknown_language_action: UnknownLanguageAction::Error,
1010 ..Default::default()
1011 };
1012 let result = run_check_with_config(content, config).unwrap();
1013 assert_eq!(result.len(), 1);
1014 assert!(result[0].message.contains("Unknown language"));
1015 assert_eq!(result[0].severity, Severity::Error);
1016 }
1017
1018 #[test]
1023 fn test_invalid_preferred_alias_detected() {
1024 let mut preferred = HashMap::new();
1025 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
1026
1027 let config = MD040Config {
1028 style: LanguageStyle::Consistent,
1029 preferred_aliases: preferred,
1030 ..Default::default()
1031 };
1032 let rule = MD040FencedCodeLanguage::with_config(config);
1033 let errors = rule.validate_config();
1034 assert_eq!(errors.len(), 1);
1035 assert!(errors[0].contains("Invalid alias"));
1036 assert!(errors[0].contains("invalid_alias"));
1037 }
1038
1039 #[test]
1040 fn test_unknown_language_in_preferred_aliases_detected() {
1041 let mut preferred = HashMap::new();
1042 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
1043
1044 let config = MD040Config {
1045 style: LanguageStyle::Consistent,
1046 preferred_aliases: preferred,
1047 ..Default::default()
1048 };
1049 let rule = MD040FencedCodeLanguage::with_config(config);
1050 let errors = rule.validate_config();
1051 assert_eq!(errors.len(), 1);
1052 assert!(errors[0].contains("Unknown language"));
1053 }
1054
1055 #[test]
1056 fn test_valid_preferred_alias_accepted() {
1057 let mut preferred = HashMap::new();
1058 preferred.insert("Shell".to_string(), "bash".to_string());
1059 preferred.insert("JavaScript".to_string(), "js".to_string());
1060
1061 let config = MD040Config {
1062 style: LanguageStyle::Consistent,
1063 preferred_aliases: preferred,
1064 ..Default::default()
1065 };
1066 let rule = MD040FencedCodeLanguage::with_config(config);
1067 let errors = rule.validate_config();
1068 assert!(errors.is_empty());
1069 }
1070
1071 #[test]
1072 fn test_config_error_uses_valid_line_column() {
1073 let config = md040_config::MD040Config {
1074 preferred_aliases: {
1075 let mut map = std::collections::HashMap::new();
1076 map.insert("Shell".to_string(), "invalid_alias".to_string());
1077 map
1078 },
1079 ..Default::default()
1080 };
1081 let rule = MD040FencedCodeLanguage::with_config(config);
1082
1083 let content = "```shell\necho hello\n```";
1084 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1085 let result = rule.check(&ctx).unwrap();
1086
1087 let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1089 assert!(config_error.is_some(), "Should have a config error warning");
1090
1091 let warning = config_error.unwrap();
1092 assert!(
1094 warning.line >= 1,
1095 "Config error line should be >= 1, got {}",
1096 warning.line
1097 );
1098 assert!(
1099 warning.column >= 1,
1100 "Config error column should be >= 1, got {}",
1101 warning.column
1102 );
1103 }
1104
1105 #[test]
1110 fn test_linguist_resolution() {
1111 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1112 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1113 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1114 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1115 assert_eq!(resolve_canonical("python"), Some("Python"));
1116 assert_eq!(resolve_canonical("unknown_lang"), None);
1117 }
1118
1119 #[test]
1120 fn test_linguist_resolution_case_insensitive() {
1121 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1122 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1123 assert_eq!(resolve_canonical("Python"), Some("Python"));
1124 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1125 }
1126
1127 #[test]
1128 fn test_alias_validation() {
1129 assert!(is_valid_alias("Shell", "bash"));
1130 assert!(is_valid_alias("Shell", "sh"));
1131 assert!(is_valid_alias("Shell", "zsh"));
1132 assert!(!is_valid_alias("Shell", "python"));
1133 assert!(!is_valid_alias("Shell", "invalid"));
1134 }
1135
1136 #[test]
1137 fn test_default_alias() {
1138 assert_eq!(default_alias("Shell"), Some("bash"));
1139 assert_eq!(default_alias("JavaScript"), Some("js"));
1140 assert_eq!(default_alias("Python"), Some("python"));
1141 }
1142
1143 #[test]
1148 fn test_mixed_case_labels_normalized() {
1149 let content = r#"```BASH
1150echo hi
1151```
1152
1153```Bash
1154echo there
1155```
1156
1157```bash
1158echo again
1159```
1160"#;
1161 let config = MD040Config {
1162 style: LanguageStyle::Consistent,
1163 ..Default::default()
1164 };
1165 let result = run_check_with_config(content, config).unwrap();
1167 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1171 }
1172
1173 #[test]
1174 fn test_multiple_languages_independent() {
1175 let content = r#"```bash
1176shell code
1177```
1178
1179```python
1180python code
1181```
1182
1183```sh
1184more shell
1185```
1186
1187```python3
1188more python
1189```
1190"#;
1191 let config = MD040Config {
1192 style: LanguageStyle::Consistent,
1193 ..Default::default()
1194 };
1195 let result = run_check_with_config(content, config).unwrap();
1196 assert_eq!(result.len(), 2);
1198 }
1199
1200 #[test]
1201 fn test_tilde_fences() {
1202 let content = r#"~~~bash
1203echo hi
1204~~~
1205
1206~~~sh
1207echo there
1208~~~
1209"#;
1210 let config = MD040Config {
1211 style: LanguageStyle::Consistent,
1212 ..Default::default()
1213 };
1214 let result = run_check_with_config(content, config.clone()).unwrap();
1215 assert_eq!(result.len(), 1);
1216
1217 let fixed = run_fix_with_config(content, config).unwrap();
1218 assert!(fixed.contains("~~~bash"));
1219 assert!(!fixed.contains("~~~sh"));
1220 }
1221
1222 #[test]
1223 fn test_longer_fence_markers_preserved() {
1224 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1225 let config = MD040Config {
1226 style: LanguageStyle::Consistent,
1227 ..Default::default()
1228 };
1229 let fixed = run_fix_with_config(content, config).unwrap();
1230 assert!(fixed.contains("````bash"));
1231 assert!(fixed.contains("```bash"));
1232 }
1233
1234 #[test]
1235 fn test_empty_document() {
1236 let result = run_check("").unwrap();
1237 assert!(result.is_empty());
1238 }
1239
1240 #[test]
1241 fn test_no_code_blocks() {
1242 let content = "# Just a heading\n\nSome text.";
1243 let result = run_check(content).unwrap();
1244 assert!(result.is_empty());
1245 }
1246
1247 #[test]
1248 fn test_single_code_block_no_inconsistency() {
1249 let content = "```bash\necho hi\n```";
1250 let config = MD040Config {
1251 style: LanguageStyle::Consistent,
1252 ..Default::default()
1253 };
1254 let result = run_check_with_config(content, config).unwrap();
1255 assert!(result.is_empty(), "Single block has no inconsistency");
1256 }
1257
1258 #[test]
1259 fn test_idempotent_fix() {
1260 let content = r#"```bash
1261echo hi
1262```
1263
1264```sh
1265echo there
1266```
1267"#;
1268 let config = MD040Config {
1269 style: LanguageStyle::Consistent,
1270 ..Default::default()
1271 };
1272 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1273 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1274 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1275 }
1276
1277 #[test]
1282 fn test_mkdocs_superfences_title_only() {
1283 let content = r#"```title="Example"
1285echo hi
1286```
1287"#;
1288 let result = run_check_mkdocs(content).unwrap();
1289 assert!(
1290 result.is_empty(),
1291 "MkDocs superfences with title= should not require language"
1292 );
1293 }
1294
1295 #[test]
1296 fn test_mkdocs_superfences_hl_lines() {
1297 let content = r#"```hl_lines="1 2"
1299line 1
1300line 2
1301```
1302"#;
1303 let result = run_check_mkdocs(content).unwrap();
1304 assert!(
1305 result.is_empty(),
1306 "MkDocs superfences with hl_lines= should not require language"
1307 );
1308 }
1309
1310 #[test]
1311 fn test_mkdocs_superfences_linenums() {
1312 let content = r#"```linenums="1"
1314line 1
1315line 2
1316```
1317"#;
1318 let result = run_check_mkdocs(content).unwrap();
1319 assert!(
1320 result.is_empty(),
1321 "MkDocs superfences with linenums= should not require language"
1322 );
1323 }
1324
1325 #[test]
1326 fn test_mkdocs_superfences_class() {
1327 let content = r#"```.my-class
1329some text
1330```
1331"#;
1332 let result = run_check_mkdocs(content).unwrap();
1333 assert!(
1334 result.is_empty(),
1335 "MkDocs superfences with .class should not require language"
1336 );
1337 }
1338
1339 #[test]
1340 fn test_mkdocs_superfences_id() {
1341 let content = r#"```#my-id
1343some text
1344```
1345"#;
1346 let result = run_check_mkdocs(content).unwrap();
1347 assert!(
1348 result.is_empty(),
1349 "MkDocs superfences with #id should not require language"
1350 );
1351 }
1352
1353 #[test]
1354 fn test_mkdocs_superfences_with_language() {
1355 let content = r#"```python title="Example" hl_lines="1"
1357print("hello")
1358```
1359"#;
1360 let result = run_check_mkdocs(content).unwrap();
1361 assert!(result.is_empty(), "Code block with language and attrs should pass");
1362 }
1363
1364 #[test]
1365 fn test_standard_flavor_no_special_handling() {
1366 let content = r#"```title="Example"
1368echo hi
1369```
1370"#;
1371 let result = run_check(content).unwrap();
1372 assert_eq!(
1373 result.len(),
1374 1,
1375 "Standard flavor should warn about title= without language"
1376 );
1377 }
1378}