1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
6use std::collections::HashMap;
7
8pub mod md040_config;
12
13const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
21 "title=", "hl_lines=", "linenums=", ".", "#", ];
27
28#[inline]
30fn is_superfences_attribute(s: &str) -> bool {
31 MKDOCS_SUPERFENCES_ATTR_PREFIXES
32 .iter()
33 .any(|prefix| s.starts_with(prefix))
34}
35use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
36
37struct FencedCodeBlock {
38 line_idx: usize,
40 language: String,
42 fence_marker: String,
44}
45
46#[derive(Debug, Clone, Default)]
47pub struct MD040FencedCodeLanguage {
48 config: MD040Config,
49}
50
51impl MD040FencedCodeLanguage {
52 pub fn new() -> Self {
53 Self::default()
54 }
55
56 pub fn with_config(config: MD040Config) -> Self {
57 Self { config }
58 }
59
60 fn validate_config(&self) -> Vec<String> {
62 let mut errors = Vec::new();
63
64 for (canonical, alias) in &self.config.preferred_aliases {
66 if let Some(actual_canonical) = resolve_canonical(canonical) {
68 if !is_valid_alias(actual_canonical, alias)
69 && let Some(valid_aliases) = get_aliases(actual_canonical)
70 {
71 let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
72 let valid_str = valid_list
73 .iter()
74 .map(|s| format!("'{s}'"))
75 .collect::<Vec<_>>()
76 .join(", ");
77 let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
78 errors.push(format!(
79 "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
80 ));
81 }
82 } else {
83 errors.push(format!(
84 "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
85 ));
86 }
87 }
88
89 errors
90 }
91
92 fn compute_preferred_labels(
94 &self,
95 blocks: &[FencedCodeBlock],
96 disabled_ranges: &[(usize, usize)],
97 ) -> HashMap<String, String> {
98 let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
100
101 for block in blocks {
102 if is_line_disabled(disabled_ranges, block.line_idx) {
103 continue;
104 }
105 if block.language.is_empty() {
106 continue;
107 }
108 if let Some(canonical) = resolve_canonical(&block.language) {
109 by_canonical
110 .entry(canonical.to_string())
111 .or_default()
112 .push(&block.language);
113 }
114 }
115
116 let mut result = HashMap::new();
118
119 for (canonical, labels) in by_canonical {
120 let winner = if let Some(preferred) = self
122 .config
123 .preferred_aliases
124 .iter()
125 .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
126 .map(|(_, v)| v.clone())
127 {
128 preferred
129 } else {
130 let mut counts: HashMap<&str, usize> = HashMap::new();
132 for label in &labels {
133 *counts.entry(*label).or_default() += 1;
134 }
135
136 let max_count = counts.values().max().copied().unwrap_or(0);
137 let winners: Vec<_> = counts
138 .iter()
139 .filter(|(_, c)| **c == max_count)
140 .map(|(l, _)| *l)
141 .collect();
142
143 if winners.len() == 1 {
144 winners[0].to_string()
145 } else {
146 default_alias(&canonical)
148 .filter(|default| winners.contains(default))
149 .map(|s| s.to_string())
150 .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
151 }
152 };
153
154 result.insert(canonical, winner);
155 }
156
157 result
158 }
159
160 fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
162 if !self.config.allowed_languages.is_empty() {
164 let allowed = self.config.allowed_languages.join(", ");
165 let Some(canonical) = canonical else {
166 return Some(format!(
167 "Language '{original_label}' is not in the allowed list: {allowed}"
168 ));
169 };
170 if !self
171 .config
172 .allowed_languages
173 .iter()
174 .any(|a| a.eq_ignore_ascii_case(canonical))
175 {
176 return Some(format!(
177 "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
178 ));
179 }
180 } else if !self.config.disallowed_languages.is_empty()
181 && canonical.is_some_and(|canonical| {
182 self.config
183 .disallowed_languages
184 .iter()
185 .any(|d| d.eq_ignore_ascii_case(canonical))
186 })
187 {
188 let canonical = canonical.unwrap_or("unknown");
189 return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
190 }
191 None
192 }
193
194 fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
196 if resolve_canonical(label).is_some() {
197 return None;
198 }
199
200 match self.config.unknown_language_action {
201 UnknownLanguageAction::Ignore => None,
202 UnknownLanguageAction::Warn => Some((
203 format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
204 Severity::Warning,
205 )),
206 UnknownLanguageAction::Error => Some((
207 format!("Unknown language '{label}' (not in GitHub Linguist)"),
208 Severity::Error,
209 )),
210 }
211 }
212}
213
214impl Rule for MD040FencedCodeLanguage {
215 fn name(&self) -> &'static str {
216 "MD040"
217 }
218
219 fn description(&self) -> &'static str {
220 "Code blocks should have a language specified"
221 }
222
223 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
224 let content = ctx.content;
225 let mut warnings = Vec::new();
226
227 for error in self.validate_config() {
229 warnings.push(LintWarning {
230 rule_name: Some(self.name().to_string()),
231 line: 1,
232 column: 1,
233 end_line: 1,
234 end_column: 1,
235 message: format!("[config error] {error}"),
236 severity: Severity::Error,
237 fix: None,
238 });
239 }
240
241 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
243
244 let disabled_ranges = compute_disabled_ranges(content, self.name());
246
247 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
249 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
250 } else {
251 HashMap::new()
252 };
253
254 for block in &fenced_blocks {
255 if is_line_disabled(&disabled_ranges, block.line_idx) {
257 continue;
258 }
259
260 let line = content.lines().nth(block.line_idx).unwrap_or("");
262 let trimmed = line.trim();
263 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
264
265 let has_mkdocs_attrs_only =
267 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
268
269 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
271 && after_fence.starts_with('{')
272 && after_fence.contains('}');
273
274 let needs_language =
277 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
278
279 if needs_language && !has_quarto_syntax {
280 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
281
282 warnings.push(LintWarning {
283 rule_name: Some(self.name().to_string()),
284 line: start_line,
285 column: start_col,
286 end_line,
287 end_column: end_col,
288 message: "Code block (```) missing language".to_string(),
289 severity: Severity::Warning,
290 fix: Some(Fix {
291 range: {
292 let trimmed_start = line.len() - line.trim_start().len();
293 let fence_len = block.fence_marker.len();
294 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
295 let fence_start_byte = line_start_byte + trimmed_start;
296 let fence_end_byte = fence_start_byte + fence_len;
297 fence_start_byte..fence_end_byte
298 },
299 replacement: format!("{}text", block.fence_marker),
300 }),
301 });
302 continue;
303 }
304
305 if has_quarto_syntax {
307 continue;
308 }
309
310 let canonical = resolve_canonical(&block.language);
311
312 if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
314 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
315
316 warnings.push(LintWarning {
317 rule_name: Some(self.name().to_string()),
318 line: start_line,
319 column: start_col,
320 end_line,
321 end_column: end_col,
322 message: msg,
323 severity: Severity::Warning,
324 fix: None,
325 });
326 continue;
327 }
328
329 if canonical.is_none() {
331 if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
332 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
333
334 warnings.push(LintWarning {
335 rule_name: Some(self.name().to_string()),
336 line: start_line,
337 column: start_col,
338 end_line,
339 end_column: end_col,
340 message: msg,
341 severity,
342 fix: None,
343 });
344 }
345 continue;
346 }
347
348 if self.config.style == LanguageStyle::Consistent
350 && let Some(preferred) = preferred_labels.get(canonical.unwrap())
351 && &block.language != preferred
352 {
353 let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
354
355 let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
356 let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
357 Fix {
358 range: (line_start_byte + label_start)..(line_start_byte + label_end),
359 replacement: preferred.clone(),
360 }
361 });
362 let lang = &block.language;
363 let canonical = canonical.unwrap();
364
365 warnings.push(LintWarning {
366 rule_name: Some(self.name().to_string()),
367 line: start_line,
368 column: start_col,
369 end_line,
370 end_column: end_col,
371 message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
372 severity: Severity::Warning,
373 fix,
374 });
375 }
376 }
377
378 Ok(warnings)
379 }
380
381 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
382 let content = ctx.content;
383
384 let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
386
387 let disabled_ranges = compute_disabled_ranges(content, self.name());
389
390 let preferred_labels = if self.config.style == LanguageStyle::Consistent {
392 self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
393 } else {
394 HashMap::new()
395 };
396
397 let mut lines_to_fix: std::collections::HashMap<usize, FixAction> = std::collections::HashMap::new();
399
400 for block in &fenced_blocks {
401 if is_line_disabled(&disabled_ranges, block.line_idx) {
402 continue;
403 }
404
405 if ctx.inline_config().is_rule_disabled(self.name(), block.line_idx + 1) {
407 continue;
408 }
409
410 let line = content.lines().nth(block.line_idx).unwrap_or("");
411 let trimmed = line.trim();
412 let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
413
414 let has_mkdocs_attrs_only =
415 ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
416
417 let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
418 && after_fence.starts_with('{')
419 && after_fence.contains('}');
420
421 let needs_language =
422 !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
423
424 if needs_language && !has_quarto_syntax {
425 lines_to_fix.insert(
426 block.line_idx,
427 FixAction::AddLanguage {
428 fence_marker: block.fence_marker.clone(),
429 has_mkdocs_attrs_only,
430 },
431 );
432 } else if !has_quarto_syntax
433 && self.config.style == LanguageStyle::Consistent
434 && let Some(canonical) = resolve_canonical(&block.language)
435 && let Some(preferred) = preferred_labels.get(canonical)
436 && &block.language != preferred
437 {
438 lines_to_fix.insert(
439 block.line_idx,
440 FixAction::NormalizeLabel {
441 fence_marker: block.fence_marker.clone(),
442 new_label: preferred.clone(),
443 },
444 );
445 }
446 }
447
448 let mut result = String::new();
450 for (i, line) in content.lines().enumerate() {
451 if let Some(action) = lines_to_fix.get(&i) {
452 match action {
453 FixAction::AddLanguage {
454 fence_marker,
455 has_mkdocs_attrs_only,
456 } => {
457 let indent = &line[..line.len() - line.trim_start().len()];
458 let trimmed = line.trim();
459 let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
460
461 if *has_mkdocs_attrs_only {
462 result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
463 } else {
464 result.push_str(&format!("{indent}{fence_marker}text\n"));
465 }
466 }
467 FixAction::NormalizeLabel {
468 fence_marker,
469 new_label,
470 } => {
471 if let Some((label_start, label_end)) = find_label_span(line, fence_marker) {
472 result.push_str(&line[..label_start]);
473 result.push_str(new_label);
474 result.push_str(&line[label_end..]);
475 result.push('\n');
476 } else {
477 result.push_str(line);
478 result.push('\n');
479 }
480 }
481 }
482 } else {
483 result.push_str(line);
484 result.push('\n');
485 }
486 }
487
488 if !content.ends_with('\n') {
490 result.pop();
491 }
492
493 Ok(result)
494 }
495
496 fn category(&self) -> RuleCategory {
498 RuleCategory::CodeBlock
499 }
500
501 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
503 ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
504 }
505
506 fn as_any(&self) -> &dyn std::any::Any {
507 self
508 }
509
510 fn default_config_section(&self) -> Option<(String, toml::Value)> {
511 let default_config = MD040Config::default();
512 let json_value = serde_json::to_value(&default_config).ok()?;
513 let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
514
515 if let toml::Value::Table(table) = toml_value {
516 if !table.is_empty() {
517 Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
518 } else {
519 None
520 }
521 } else {
522 None
523 }
524 }
525
526 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
527 where
528 Self: Sized,
529 {
530 let rule_config: MD040Config = load_rule_config(config);
531 Box::new(MD040FencedCodeLanguage::with_config(rule_config))
532 }
533}
534
535#[derive(Debug, Clone)]
536enum FixAction {
537 AddLanguage {
538 fence_marker: String,
539 has_mkdocs_attrs_only: bool,
540 },
541 NormalizeLabel {
542 fence_marker: String,
543 new_label: String,
544 },
545}
546
547fn detect_fenced_code_blocks(content: &str, line_offsets: &[usize]) -> Vec<FencedCodeBlock> {
549 let mut blocks = Vec::new();
550 let options = Options::all();
551 let parser = Parser::new_ext(content, options).into_offset_iter();
552
553 for (event, range) in parser {
554 if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) = event {
555 let line_idx = line_idx_from_offset(line_offsets, range.start);
557
558 let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
560 let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
561 let line = content.get(line_start..line_end).unwrap_or("");
562 let trimmed = line.trim();
563 let fence_marker = if trimmed.starts_with('`') {
564 let count = trimmed.chars().take_while(|&c| c == '`').count();
565 "`".repeat(count)
566 } else if trimmed.starts_with('~') {
567 let count = trimmed.chars().take_while(|&c| c == '~').count();
568 "~".repeat(count)
569 } else {
570 "```".to_string() };
572
573 let language = info.split_whitespace().next().unwrap_or("").to_string();
575
576 blocks.push(FencedCodeBlock {
577 line_idx,
578 language,
579 fence_marker,
580 });
581 }
582 }
583
584 blocks
585}
586
587#[inline]
588fn line_idx_from_offset(line_offsets: &[usize], offset: usize) -> usize {
589 match line_offsets.binary_search(&offset) {
590 Ok(idx) => idx,
591 Err(idx) => idx.saturating_sub(1),
592 }
593}
594
595fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
597 let mut ranges = Vec::new();
598 let mut disabled_start: Option<usize> = None;
599
600 for (i, line) in content.lines().enumerate() {
601 let trimmed = line.trim();
602
603 if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
604 && (rules.is_empty() || rules.contains(&rule_name))
605 && disabled_start.is_none()
606 {
607 disabled_start = Some(i);
608 }
609
610 if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
611 && (rules.is_empty() || rules.contains(&rule_name))
612 && let Some(start) = disabled_start.take()
613 {
614 ranges.push((start, i));
615 }
616 }
617
618 if let Some(start) = disabled_start {
620 ranges.push((start, usize::MAX));
621 }
622
623 ranges
624}
625
626fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
628 ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
629}
630
631fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
633 let trimmed_start = line.len() - line.trim_start().len();
634 let after_indent = &line[trimmed_start..];
635 if !after_indent.starts_with(fence_marker) {
636 return None;
637 }
638 let after_fence = &after_indent[fence_marker.len()..];
639
640 let label_start_rel = after_fence
641 .char_indices()
642 .find(|&(_, ch)| !ch.is_whitespace())
643 .map(|(idx, _)| idx)?;
644 let after_label = &after_fence[label_start_rel..];
645 let label_end_rel = after_label
646 .char_indices()
647 .find(|&(_, ch)| ch.is_whitespace())
648 .map(|(idx, _)| label_start_rel + idx)
649 .unwrap_or(after_fence.len());
650
651 Some((
652 trimmed_start + fence_marker.len() + label_start_rel,
653 trimmed_start + fence_marker.len() + label_end_rel,
654 ))
655}
656
657#[cfg(test)]
658mod tests {
659 use super::*;
660 use crate::lint_context::LintContext;
661
662 fn run_check(content: &str) -> LintResult {
663 let rule = MD040FencedCodeLanguage::default();
664 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
665 rule.check(&ctx)
666 }
667
668 fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
669 let rule = MD040FencedCodeLanguage::with_config(config);
670 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
671 rule.check(&ctx)
672 }
673
674 fn run_fix(content: &str) -> Result<String, LintError> {
675 let rule = MD040FencedCodeLanguage::default();
676 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
677 rule.fix(&ctx)
678 }
679
680 fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
681 let rule = MD040FencedCodeLanguage::with_config(config);
682 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
683 rule.fix(&ctx)
684 }
685
686 fn run_check_mkdocs(content: &str) -> LintResult {
687 let rule = MD040FencedCodeLanguage::default();
688 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
689 rule.check(&ctx)
690 }
691
692 #[test]
697 fn test_code_blocks_with_language_specified() {
698 let content = r#"# Test
699
700```python
701print("Hello, world!")
702```
703
704```javascript
705console.log("Hello!");
706```
707"#;
708 let result = run_check(content).unwrap();
709 assert!(result.is_empty(), "No warnings expected for code blocks with language");
710 }
711
712 #[test]
713 fn test_code_blocks_without_language() {
714 let content = r#"# Test
715
716```
717print("Hello, world!")
718```
719"#;
720 let result = run_check(content).unwrap();
721 assert_eq!(result.len(), 1);
722 assert_eq!(result[0].message, "Code block (```) missing language");
723 assert_eq!(result[0].line, 3);
724 }
725
726 #[test]
727 fn test_fix_method_adds_text_language() {
728 let content = r#"# Test
729
730```
731code without language
732```
733
734```python
735already has language
736```
737
738```
739another block without
740```
741"#;
742 let fixed = run_fix(content).unwrap();
743 assert!(fixed.contains("```text"));
744 assert!(fixed.contains("```python"));
745 assert_eq!(fixed.matches("```text").count(), 2);
746 }
747
748 #[test]
749 fn test_fix_preserves_indentation() {
750 let content = r#"# Test
751
752- List item
753 ```
754 indented code block
755 ```
756"#;
757 let fixed = run_fix(content).unwrap();
758 assert!(fixed.contains(" ```text"));
759 }
760
761 #[test]
766 fn test_consistent_mode_detects_inconsistency() {
767 let content = r#"```bash
768echo hi
769```
770
771```sh
772echo there
773```
774
775```bash
776echo again
777```
778"#;
779 let config = MD040Config {
780 style: LanguageStyle::Consistent,
781 ..Default::default()
782 };
783 let result = run_check_with_config(content, config).unwrap();
784 assert_eq!(result.len(), 1);
785 assert!(result[0].message.contains("Inconsistent"));
786 assert!(result[0].message.contains("sh"));
787 assert!(result[0].message.contains("bash"));
788 }
789
790 #[test]
791 fn test_consistent_mode_fix_normalizes() {
792 let content = r#"```bash
793echo hi
794```
795
796```sh
797echo there
798```
799
800```bash
801echo again
802```
803"#;
804 let config = MD040Config {
805 style: LanguageStyle::Consistent,
806 ..Default::default()
807 };
808 let fixed = run_fix_with_config(content, config).unwrap();
809 assert_eq!(fixed.matches("```bash").count(), 3);
810 assert_eq!(fixed.matches("```sh").count(), 0);
811 }
812
813 #[test]
814 fn test_consistent_mode_tie_break_uses_curated_default() {
815 let content = r#"```bash
817echo hi
818```
819
820```sh
821echo there
822```
823"#;
824 let config = MD040Config {
825 style: LanguageStyle::Consistent,
826 ..Default::default()
827 };
828 let fixed = run_fix_with_config(content, config).unwrap();
829 assert_eq!(fixed.matches("```bash").count(), 2);
831 }
832
833 #[test]
834 fn test_consistent_mode_with_preferred_alias() {
835 let content = r#"```bash
836echo hi
837```
838
839```sh
840echo there
841```
842"#;
843 let mut preferred = HashMap::new();
844 preferred.insert("Shell".to_string(), "sh".to_string());
845
846 let config = MD040Config {
847 style: LanguageStyle::Consistent,
848 preferred_aliases: preferred,
849 ..Default::default()
850 };
851 let fixed = run_fix_with_config(content, config).unwrap();
852 assert_eq!(fixed.matches("```sh").count(), 2);
853 assert_eq!(fixed.matches("```bash").count(), 0);
854 }
855
856 #[test]
857 fn test_consistent_mode_ignores_disabled_blocks() {
858 let content = r#"```bash
859echo hi
860```
861<!-- rumdl-disable MD040 -->
862```sh
863echo there
864```
865```sh
866echo again
867```
868<!-- rumdl-enable MD040 -->
869"#;
870 let config = MD040Config {
871 style: LanguageStyle::Consistent,
872 ..Default::default()
873 };
874 let result = run_check_with_config(content, config).unwrap();
875 assert!(result.is_empty(), "Disabled blocks should not affect consistency");
876 }
877
878 #[test]
879 fn test_fix_preserves_attributes() {
880 let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
881 let config = MD040Config {
882 style: LanguageStyle::Consistent,
883 ..Default::default()
884 };
885 let fixed = run_fix_with_config(content, config).unwrap();
886 assert!(fixed.contains("```bash {.highlight}"));
887 }
888
889 #[test]
890 fn test_fix_preserves_spacing_before_label() {
891 let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
892 let config = MD040Config {
893 style: LanguageStyle::Consistent,
894 ..Default::default()
895 };
896 let fixed = run_fix_with_config(content, config).unwrap();
897 assert!(fixed.contains("``` bash {.highlight}"));
898 assert!(!fixed.contains("``` sh {.highlight}"));
899 }
900
901 #[test]
906 fn test_allowlist_blocks_unlisted() {
907 let content = "```java\ncode\n```";
908 let config = MD040Config {
909 allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
910 ..Default::default()
911 };
912 let result = run_check_with_config(content, config).unwrap();
913 assert_eq!(result.len(), 1);
914 assert!(result[0].message.contains("not in the allowed list"));
915 }
916
917 #[test]
918 fn test_allowlist_allows_listed() {
919 let content = "```python\ncode\n```";
920 let config = MD040Config {
921 allowed_languages: vec!["Python".to_string()],
922 ..Default::default()
923 };
924 let result = run_check_with_config(content, config).unwrap();
925 assert!(result.is_empty());
926 }
927
928 #[test]
929 fn test_allowlist_blocks_unknown_language() {
930 let content = "```mysterylang\ncode\n```";
931 let config = MD040Config {
932 allowed_languages: vec!["Python".to_string()],
933 ..Default::default()
934 };
935 let result = run_check_with_config(content, config).unwrap();
936 assert_eq!(result.len(), 1);
937 assert!(result[0].message.contains("allowed list"));
938 }
939
940 #[test]
941 fn test_allowlist_case_insensitive() {
942 let content = "```python\ncode\n```";
943 let config = MD040Config {
944 allowed_languages: vec!["PYTHON".to_string()],
945 ..Default::default()
946 };
947 let result = run_check_with_config(content, config).unwrap();
948 assert!(result.is_empty());
949 }
950
951 #[test]
952 fn test_denylist_blocks_listed() {
953 let content = "```java\ncode\n```";
954 let config = MD040Config {
955 disallowed_languages: vec!["Java".to_string()],
956 ..Default::default()
957 };
958 let result = run_check_with_config(content, config).unwrap();
959 assert_eq!(result.len(), 1);
960 assert!(result[0].message.contains("disallowed"));
961 }
962
963 #[test]
964 fn test_denylist_allows_unlisted() {
965 let content = "```python\ncode\n```";
966 let config = MD040Config {
967 disallowed_languages: vec!["Java".to_string()],
968 ..Default::default()
969 };
970 let result = run_check_with_config(content, config).unwrap();
971 assert!(result.is_empty());
972 }
973
974 #[test]
975 fn test_allowlist_takes_precedence_over_denylist() {
976 let content = "```python\ncode\n```";
977 let config = MD040Config {
978 allowed_languages: vec!["Python".to_string()],
979 disallowed_languages: vec!["Python".to_string()], ..Default::default()
981 };
982 let result = run_check_with_config(content, config).unwrap();
983 assert!(result.is_empty());
984 }
985
986 #[test]
991 fn test_unknown_language_ignore_default() {
992 let content = "```mycustomlang\ncode\n```";
993 let result = run_check(content).unwrap();
994 assert!(result.is_empty(), "Unknown languages ignored by default");
995 }
996
997 #[test]
998 fn test_unknown_language_warn() {
999 let content = "```mycustomlang\ncode\n```";
1000 let config = MD040Config {
1001 unknown_language_action: UnknownLanguageAction::Warn,
1002 ..Default::default()
1003 };
1004 let result = run_check_with_config(content, config).unwrap();
1005 assert_eq!(result.len(), 1);
1006 assert!(result[0].message.contains("Unknown language"));
1007 assert!(result[0].message.contains("mycustomlang"));
1008 assert_eq!(result[0].severity, Severity::Warning);
1009 }
1010
1011 #[test]
1012 fn test_unknown_language_error() {
1013 let content = "```mycustomlang\ncode\n```";
1014 let config = MD040Config {
1015 unknown_language_action: UnknownLanguageAction::Error,
1016 ..Default::default()
1017 };
1018 let result = run_check_with_config(content, config).unwrap();
1019 assert_eq!(result.len(), 1);
1020 assert!(result[0].message.contains("Unknown language"));
1021 assert_eq!(result[0].severity, Severity::Error);
1022 }
1023
1024 #[test]
1029 fn test_invalid_preferred_alias_detected() {
1030 let mut preferred = HashMap::new();
1031 preferred.insert("Shell".to_string(), "invalid_alias".to_string());
1032
1033 let config = MD040Config {
1034 style: LanguageStyle::Consistent,
1035 preferred_aliases: preferred,
1036 ..Default::default()
1037 };
1038 let rule = MD040FencedCodeLanguage::with_config(config);
1039 let errors = rule.validate_config();
1040 assert_eq!(errors.len(), 1);
1041 assert!(errors[0].contains("Invalid alias"));
1042 assert!(errors[0].contains("invalid_alias"));
1043 }
1044
1045 #[test]
1046 fn test_unknown_language_in_preferred_aliases_detected() {
1047 let mut preferred = HashMap::new();
1048 preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
1049
1050 let config = MD040Config {
1051 style: LanguageStyle::Consistent,
1052 preferred_aliases: preferred,
1053 ..Default::default()
1054 };
1055 let rule = MD040FencedCodeLanguage::with_config(config);
1056 let errors = rule.validate_config();
1057 assert_eq!(errors.len(), 1);
1058 assert!(errors[0].contains("Unknown language"));
1059 }
1060
1061 #[test]
1062 fn test_valid_preferred_alias_accepted() {
1063 let mut preferred = HashMap::new();
1064 preferred.insert("Shell".to_string(), "bash".to_string());
1065 preferred.insert("JavaScript".to_string(), "js".to_string());
1066
1067 let config = MD040Config {
1068 style: LanguageStyle::Consistent,
1069 preferred_aliases: preferred,
1070 ..Default::default()
1071 };
1072 let rule = MD040FencedCodeLanguage::with_config(config);
1073 let errors = rule.validate_config();
1074 assert!(errors.is_empty());
1075 }
1076
1077 #[test]
1078 fn test_config_error_uses_valid_line_column() {
1079 let config = md040_config::MD040Config {
1080 preferred_aliases: {
1081 let mut map = std::collections::HashMap::new();
1082 map.insert("Shell".to_string(), "invalid_alias".to_string());
1083 map
1084 },
1085 ..Default::default()
1086 };
1087 let rule = MD040FencedCodeLanguage::with_config(config);
1088
1089 let content = "```shell\necho hello\n```";
1090 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1091 let result = rule.check(&ctx).unwrap();
1092
1093 let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1095 assert!(config_error.is_some(), "Should have a config error warning");
1096
1097 let warning = config_error.unwrap();
1098 assert!(
1100 warning.line >= 1,
1101 "Config error line should be >= 1, got {}",
1102 warning.line
1103 );
1104 assert!(
1105 warning.column >= 1,
1106 "Config error column should be >= 1, got {}",
1107 warning.column
1108 );
1109 }
1110
1111 #[test]
1116 fn test_linguist_resolution() {
1117 assert_eq!(resolve_canonical("bash"), Some("Shell"));
1118 assert_eq!(resolve_canonical("sh"), Some("Shell"));
1119 assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1120 assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1121 assert_eq!(resolve_canonical("python"), Some("Python"));
1122 assert_eq!(resolve_canonical("unknown_lang"), None);
1123 }
1124
1125 #[test]
1126 fn test_linguist_resolution_case_insensitive() {
1127 assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1128 assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1129 assert_eq!(resolve_canonical("Python"), Some("Python"));
1130 assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1131 }
1132
1133 #[test]
1134 fn test_alias_validation() {
1135 assert!(is_valid_alias("Shell", "bash"));
1136 assert!(is_valid_alias("Shell", "sh"));
1137 assert!(is_valid_alias("Shell", "zsh"));
1138 assert!(!is_valid_alias("Shell", "python"));
1139 assert!(!is_valid_alias("Shell", "invalid"));
1140 }
1141
1142 #[test]
1143 fn test_default_alias() {
1144 assert_eq!(default_alias("Shell"), Some("bash"));
1145 assert_eq!(default_alias("JavaScript"), Some("js"));
1146 assert_eq!(default_alias("Python"), Some("python"));
1147 }
1148
1149 #[test]
1154 fn test_mixed_case_labels_normalized() {
1155 let content = r#"```BASH
1156echo hi
1157```
1158
1159```Bash
1160echo there
1161```
1162
1163```bash
1164echo again
1165```
1166"#;
1167 let config = MD040Config {
1168 style: LanguageStyle::Consistent,
1169 ..Default::default()
1170 };
1171 let result = run_check_with_config(content, config).unwrap();
1173 assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1177 }
1178
1179 #[test]
1180 fn test_multiple_languages_independent() {
1181 let content = r#"```bash
1182shell code
1183```
1184
1185```python
1186python code
1187```
1188
1189```sh
1190more shell
1191```
1192
1193```python3
1194more python
1195```
1196"#;
1197 let config = MD040Config {
1198 style: LanguageStyle::Consistent,
1199 ..Default::default()
1200 };
1201 let result = run_check_with_config(content, config).unwrap();
1202 assert_eq!(result.len(), 2);
1204 }
1205
1206 #[test]
1207 fn test_tilde_fences() {
1208 let content = r#"~~~bash
1209echo hi
1210~~~
1211
1212~~~sh
1213echo there
1214~~~
1215"#;
1216 let config = MD040Config {
1217 style: LanguageStyle::Consistent,
1218 ..Default::default()
1219 };
1220 let result = run_check_with_config(content, config.clone()).unwrap();
1221 assert_eq!(result.len(), 1);
1222
1223 let fixed = run_fix_with_config(content, config).unwrap();
1224 assert!(fixed.contains("~~~bash"));
1225 assert!(!fixed.contains("~~~sh"));
1226 }
1227
1228 #[test]
1229 fn test_longer_fence_markers_preserved() {
1230 let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1231 let config = MD040Config {
1232 style: LanguageStyle::Consistent,
1233 ..Default::default()
1234 };
1235 let fixed = run_fix_with_config(content, config).unwrap();
1236 assert!(fixed.contains("````bash"));
1237 assert!(fixed.contains("```bash"));
1238 }
1239
1240 #[test]
1241 fn test_empty_document() {
1242 let result = run_check("").unwrap();
1243 assert!(result.is_empty());
1244 }
1245
1246 #[test]
1247 fn test_no_code_blocks() {
1248 let content = "# Just a heading\n\nSome text.";
1249 let result = run_check(content).unwrap();
1250 assert!(result.is_empty());
1251 }
1252
1253 #[test]
1254 fn test_single_code_block_no_inconsistency() {
1255 let content = "```bash\necho hi\n```";
1256 let config = MD040Config {
1257 style: LanguageStyle::Consistent,
1258 ..Default::default()
1259 };
1260 let result = run_check_with_config(content, config).unwrap();
1261 assert!(result.is_empty(), "Single block has no inconsistency");
1262 }
1263
1264 #[test]
1265 fn test_idempotent_fix() {
1266 let content = r#"```bash
1267echo hi
1268```
1269
1270```sh
1271echo there
1272```
1273"#;
1274 let config = MD040Config {
1275 style: LanguageStyle::Consistent,
1276 ..Default::default()
1277 };
1278 let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1279 let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1280 assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1281 }
1282
1283 #[test]
1288 fn test_mkdocs_superfences_title_only() {
1289 let content = r#"```title="Example"
1291echo hi
1292```
1293"#;
1294 let result = run_check_mkdocs(content).unwrap();
1295 assert!(
1296 result.is_empty(),
1297 "MkDocs superfences with title= should not require language"
1298 );
1299 }
1300
1301 #[test]
1302 fn test_mkdocs_superfences_hl_lines() {
1303 let content = r#"```hl_lines="1 2"
1305line 1
1306line 2
1307```
1308"#;
1309 let result = run_check_mkdocs(content).unwrap();
1310 assert!(
1311 result.is_empty(),
1312 "MkDocs superfences with hl_lines= should not require language"
1313 );
1314 }
1315
1316 #[test]
1317 fn test_mkdocs_superfences_linenums() {
1318 let content = r#"```linenums="1"
1320line 1
1321line 2
1322```
1323"#;
1324 let result = run_check_mkdocs(content).unwrap();
1325 assert!(
1326 result.is_empty(),
1327 "MkDocs superfences with linenums= should not require language"
1328 );
1329 }
1330
1331 #[test]
1332 fn test_mkdocs_superfences_class() {
1333 let content = r#"```.my-class
1335some text
1336```
1337"#;
1338 let result = run_check_mkdocs(content).unwrap();
1339 assert!(
1340 result.is_empty(),
1341 "MkDocs superfences with .class should not require language"
1342 );
1343 }
1344
1345 #[test]
1346 fn test_mkdocs_superfences_id() {
1347 let content = r#"```#my-id
1349some text
1350```
1351"#;
1352 let result = run_check_mkdocs(content).unwrap();
1353 assert!(
1354 result.is_empty(),
1355 "MkDocs superfences with #id should not require language"
1356 );
1357 }
1358
1359 #[test]
1360 fn test_mkdocs_superfences_with_language() {
1361 let content = r#"```python title="Example" hl_lines="1"
1363print("hello")
1364```
1365"#;
1366 let result = run_check_mkdocs(content).unwrap();
1367 assert!(result.is_empty(), "Code block with language and attrs should pass");
1368 }
1369
1370 #[test]
1371 fn test_standard_flavor_no_special_handling() {
1372 let content = r#"```title="Example"
1374echo hi
1375```
1376"#;
1377 let result = run_check(content).unwrap();
1378 assert_eq!(
1379 result.len(),
1380 1,
1381 "Standard flavor should warn about title= without language"
1382 );
1383 }
1384}