use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
use crate::rule_config_serde::{RuleConfig, load_rule_config};
use crate::utils::range_utils::calculate_line_range;
use std::collections::HashMap;
pub mod md040_config;
const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
"title=", "hl_lines=", "linenums=", ".", "#", ];
#[inline]
fn is_superfences_attribute(s: &str) -> bool {
MKDOCS_SUPERFENCES_ATTR_PREFIXES
.iter()
.any(|prefix| s.starts_with(prefix))
}
use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
struct FencedCodeBlock {
line_idx: usize,
language: String,
fence_marker: String,
}
#[derive(Debug, Clone, Default)]
pub struct MD040FencedCodeLanguage {
config: MD040Config,
}
impl MD040FencedCodeLanguage {
pub fn new() -> Self {
Self::default()
}
pub fn with_config(config: MD040Config) -> Self {
Self { config }
}
fn validate_config(&self) -> Vec<String> {
let mut errors = Vec::new();
for (canonical, alias) in &self.config.preferred_aliases {
if let Some(actual_canonical) = resolve_canonical(canonical) {
if !is_valid_alias(actual_canonical, alias)
&& let Some(valid_aliases) = get_aliases(actual_canonical)
{
let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
let valid_str = valid_list
.iter()
.map(|s| format!("'{s}'"))
.collect::<Vec<_>>()
.join(", ");
let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
errors.push(format!(
"Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
));
}
} else {
errors.push(format!(
"Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
));
}
}
errors
}
fn compute_preferred_labels(
&self,
blocks: &[FencedCodeBlock],
disabled_ranges: &[(usize, usize)],
) -> HashMap<String, String> {
let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
for block in blocks {
if is_line_disabled(disabled_ranges, block.line_idx) {
continue;
}
if block.language.is_empty() {
continue;
}
if let Some(canonical) = resolve_canonical(&block.language) {
by_canonical
.entry(canonical.to_string())
.or_default()
.push(&block.language);
}
}
let mut result = HashMap::new();
for (canonical, labels) in by_canonical {
let winner = if let Some(preferred) = self
.config
.preferred_aliases
.iter()
.find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
.map(|(_, v)| v.clone())
{
preferred
} else {
let mut counts: HashMap<&str, usize> = HashMap::new();
for label in &labels {
*counts.entry(*label).or_default() += 1;
}
let max_count = counts.values().max().copied().unwrap_or(0);
let winners: Vec<_> = counts
.iter()
.filter(|(_, c)| **c == max_count)
.map(|(l, _)| *l)
.collect();
if winners.len() == 1 {
winners[0].to_string()
} else {
default_alias(&canonical)
.filter(|default| winners.contains(default))
.map(|s| s.to_string())
.unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
}
};
result.insert(canonical, winner);
}
result
}
fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
if !self.config.allowed_languages.is_empty() {
let allowed = self.config.allowed_languages.join(", ");
let Some(canonical) = canonical else {
return Some(format!(
"Language '{original_label}' is not in the allowed list: {allowed}"
));
};
if !self
.config
.allowed_languages
.iter()
.any(|a| a.eq_ignore_ascii_case(canonical))
{
return Some(format!(
"Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
));
}
} else if !self.config.disallowed_languages.is_empty()
&& canonical.is_some_and(|canonical| {
self.config
.disallowed_languages
.iter()
.any(|d| d.eq_ignore_ascii_case(canonical))
})
{
let canonical = canonical.unwrap_or("unknown");
return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
}
None
}
fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
if resolve_canonical(label).is_some() {
return None;
}
match self.config.unknown_language_action {
UnknownLanguageAction::Ignore => None,
UnknownLanguageAction::Warn => Some((
format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
Severity::Warning,
)),
UnknownLanguageAction::Error => Some((
format!("Unknown language '{label}' (not in GitHub Linguist)"),
Severity::Error,
)),
}
}
}
impl Rule for MD040FencedCodeLanguage {
fn name(&self) -> &'static str {
"MD040"
}
fn description(&self) -> &'static str {
"Code blocks should have a language specified"
}
fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
let content = ctx.content;
let mut warnings = Vec::new();
for error in self.validate_config() {
warnings.push(LintWarning {
rule_name: Some(self.name().to_string()),
line: 1,
column: 1,
end_line: 1,
end_column: 1,
message: format!("[config error] {error}"),
severity: Severity::Error,
fix: None,
});
}
let fenced_blocks = derive_fenced_code_blocks(ctx);
let disabled_ranges = compute_disabled_ranges(content, self.name());
let preferred_labels = if self.config.style == LanguageStyle::Consistent {
self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
} else {
HashMap::new()
};
let lines = ctx.raw_lines();
for block in &fenced_blocks {
if is_line_disabled(&disabled_ranges, block.line_idx) {
continue;
}
let line = lines.get(block.line_idx).unwrap_or(&"");
let trimmed = line.trim();
let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
let has_mkdocs_attrs_only =
ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
&& after_fence.starts_with('{')
&& after_fence.contains('}');
let needs_language =
!has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
if needs_language && !has_quarto_syntax {
let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
warnings.push(LintWarning {
rule_name: Some(self.name().to_string()),
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message: "Code block (```) missing language".to_string(),
severity: Severity::Warning,
fix: Some(Fix {
range: {
let trimmed_start = line.len() - line.trim_start().len();
let fence_len = block.fence_marker.len();
let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
let fence_start_byte = line_start_byte + trimmed_start;
let fence_end_byte = fence_start_byte + fence_len;
fence_start_byte..fence_end_byte
},
replacement: format!("{}text", block.fence_marker),
}),
});
continue;
}
if has_quarto_syntax {
continue;
}
let canonical = resolve_canonical(&block.language);
if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
warnings.push(LintWarning {
rule_name: Some(self.name().to_string()),
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message: msg,
severity: Severity::Warning,
fix: None,
});
continue;
}
if canonical.is_none() {
if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
warnings.push(LintWarning {
rule_name: Some(self.name().to_string()),
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message: msg,
severity,
fix: None,
});
}
continue;
}
if self.config.style == LanguageStyle::Consistent
&& let Some(preferred) = preferred_labels.get(canonical.unwrap())
&& &block.language != preferred
{
let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
Fix {
range: (line_start_byte + label_start)..(line_start_byte + label_end),
replacement: preferred.clone(),
}
});
let lang = &block.language;
let canonical = canonical.unwrap();
warnings.push(LintWarning {
rule_name: Some(self.name().to_string()),
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
severity: Severity::Warning,
fix,
});
}
}
Ok(warnings)
}
fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
let content = ctx.content;
let fenced_blocks = derive_fenced_code_blocks(ctx);
let disabled_ranges = compute_disabled_ranges(content, self.name());
let preferred_labels = if self.config.style == LanguageStyle::Consistent {
self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
} else {
HashMap::new()
};
let mut lines_to_fix: std::collections::HashMap<usize, FixAction> = std::collections::HashMap::new();
for block in &fenced_blocks {
if is_line_disabled(&disabled_ranges, block.line_idx) {
continue;
}
if ctx.inline_config().is_rule_disabled(self.name(), block.line_idx + 1) {
continue;
}
let fix_lines = ctx.raw_lines();
let line = fix_lines.get(block.line_idx).unwrap_or(&"");
let trimmed = line.trim();
let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
let has_mkdocs_attrs_only =
ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
&& after_fence.starts_with('{')
&& after_fence.contains('}');
let needs_language =
!has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
if needs_language && !has_quarto_syntax {
lines_to_fix.insert(
block.line_idx,
FixAction::AddLanguage {
fence_marker: block.fence_marker.clone(),
has_mkdocs_attrs_only,
},
);
} else if !has_quarto_syntax
&& self.config.style == LanguageStyle::Consistent
&& let Some(canonical) = resolve_canonical(&block.language)
&& let Some(preferred) = preferred_labels.get(canonical)
&& &block.language != preferred
{
lines_to_fix.insert(
block.line_idx,
FixAction::NormalizeLabel {
fence_marker: block.fence_marker.clone(),
new_label: preferred.clone(),
},
);
}
}
let mut result = String::new();
for (i, line) in content.lines().enumerate() {
if let Some(action) = lines_to_fix.get(&i) {
match action {
FixAction::AddLanguage {
fence_marker,
has_mkdocs_attrs_only,
} => {
let indent = &line[..line.len() - line.trim_start().len()];
let trimmed = line.trim();
let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
if *has_mkdocs_attrs_only {
result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
} else {
result.push_str(&format!("{indent}{fence_marker}text\n"));
}
}
FixAction::NormalizeLabel {
fence_marker,
new_label,
} => {
if let Some((label_start, label_end)) = find_label_span(line, fence_marker) {
result.push_str(&line[..label_start]);
result.push_str(new_label);
result.push_str(&line[label_end..]);
result.push('\n');
} else {
result.push_str(line);
result.push('\n');
}
}
}
} else {
result.push_str(line);
result.push('\n');
}
}
if !content.ends_with('\n') {
result.pop();
}
Ok(result)
}
fn category(&self) -> RuleCategory {
RuleCategory::CodeBlock
}
fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn default_config_section(&self) -> Option<(String, toml::Value)> {
let default_config = MD040Config::default();
let json_value = serde_json::to_value(&default_config).ok()?;
let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
if let toml::Value::Table(table) = toml_value {
if !table.is_empty() {
Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
} else {
None
}
} else {
None
}
}
fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
where
Self: Sized,
{
let rule_config: MD040Config = load_rule_config(config);
Box::new(MD040FencedCodeLanguage::with_config(rule_config))
}
}
#[derive(Debug, Clone)]
enum FixAction {
AddLanguage {
fence_marker: String,
has_mkdocs_attrs_only: bool,
},
NormalizeLabel {
fence_marker: String,
new_label: String,
},
}
fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
let content = ctx.content;
let line_offsets = &ctx.line_offsets;
ctx.code_block_details
.iter()
.filter(|d| d.is_fenced)
.map(|detail| {
let line_idx = match line_offsets.binary_search(&detail.start) {
Ok(idx) => idx,
Err(idx) => idx.saturating_sub(1),
};
let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
let line = content.get(line_start..line_end).unwrap_or("");
let trimmed = line.trim();
let fence_marker = if trimmed.starts_with('`') {
let count = trimmed.chars().take_while(|&c| c == '`').count();
"`".repeat(count)
} else if trimmed.starts_with('~') {
let count = trimmed.chars().take_while(|&c| c == '~').count();
"~".repeat(count)
} else {
"```".to_string()
};
let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
FencedCodeBlock {
line_idx,
language,
fence_marker,
}
})
.collect()
}
fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
let mut ranges = Vec::new();
let mut disabled_start: Option<usize> = None;
for (i, line) in content.lines().enumerate() {
let trimmed = line.trim();
if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
&& (rules.is_empty() || rules.contains(&rule_name))
&& disabled_start.is_none()
{
disabled_start = Some(i);
}
if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
&& (rules.is_empty() || rules.contains(&rule_name))
&& let Some(start) = disabled_start.take()
{
ranges.push((start, i));
}
}
if let Some(start) = disabled_start {
ranges.push((start, usize::MAX));
}
ranges
}
fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
}
fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
let trimmed_start = line.len() - line.trim_start().len();
let after_indent = &line[trimmed_start..];
if !after_indent.starts_with(fence_marker) {
return None;
}
let after_fence = &after_indent[fence_marker.len()..];
let label_start_rel = after_fence
.char_indices()
.find(|&(_, ch)| !ch.is_whitespace())
.map(|(idx, _)| idx)?;
let after_label = &after_fence[label_start_rel..];
let label_end_rel = after_label
.char_indices()
.find(|&(_, ch)| ch.is_whitespace())
.map(|(idx, _)| label_start_rel + idx)
.unwrap_or(after_fence.len());
Some((
trimmed_start + fence_marker.len() + label_start_rel,
trimmed_start + fence_marker.len() + label_end_rel,
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lint_context::LintContext;
fn run_check(content: &str) -> LintResult {
let rule = MD040FencedCodeLanguage::default();
let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
rule.check(&ctx)
}
fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
let rule = MD040FencedCodeLanguage::with_config(config);
let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
rule.check(&ctx)
}
fn run_fix(content: &str) -> Result<String, LintError> {
let rule = MD040FencedCodeLanguage::default();
let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
rule.fix(&ctx)
}
fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
let rule = MD040FencedCodeLanguage::with_config(config);
let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
rule.fix(&ctx)
}
fn run_check_mkdocs(content: &str) -> LintResult {
let rule = MD040FencedCodeLanguage::default();
let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
rule.check(&ctx)
}
#[test]
fn test_code_blocks_with_language_specified() {
let content = r#"# Test
```python
print("Hello, world!")
```
```javascript
console.log("Hello!");
```
"#;
let result = run_check(content).unwrap();
assert!(result.is_empty(), "No warnings expected for code blocks with language");
}
#[test]
fn test_code_blocks_without_language() {
let content = r#"# Test
```
print("Hello, world!")
```
"#;
let result = run_check(content).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].message, "Code block (```) missing language");
assert_eq!(result[0].line, 3);
}
#[test]
fn test_fix_method_adds_text_language() {
let content = r#"# Test
```
code without language
```
```python
already has language
```
```
another block without
```
"#;
let fixed = run_fix(content).unwrap();
assert!(fixed.contains("```text"));
assert!(fixed.contains("```python"));
assert_eq!(fixed.matches("```text").count(), 2);
}
#[test]
fn test_fix_preserves_indentation() {
let content = r#"# Test
- List item
```
indented code block
```
"#;
let fixed = run_fix(content).unwrap();
assert!(fixed.contains(" ```text"));
}
#[test]
fn test_consistent_mode_detects_inconsistency() {
let content = r#"```bash
echo hi
```
```sh
echo there
```
```bash
echo again
```
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].message.contains("Inconsistent"));
assert!(result[0].message.contains("sh"));
assert!(result[0].message.contains("bash"));
}
#[test]
fn test_consistent_mode_fix_normalizes() {
let content = r#"```bash
echo hi
```
```sh
echo there
```
```bash
echo again
```
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let fixed = run_fix_with_config(content, config).unwrap();
assert_eq!(fixed.matches("```bash").count(), 3);
assert_eq!(fixed.matches("```sh").count(), 0);
}
#[test]
fn test_consistent_mode_tie_break_uses_curated_default() {
let content = r#"```bash
echo hi
```
```sh
echo there
```
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let fixed = run_fix_with_config(content, config).unwrap();
assert_eq!(fixed.matches("```bash").count(), 2);
}
#[test]
fn test_consistent_mode_with_preferred_alias() {
let content = r#"```bash
echo hi
```
```sh
echo there
```
"#;
let mut preferred = HashMap::new();
preferred.insert("Shell".to_string(), "sh".to_string());
let config = MD040Config {
style: LanguageStyle::Consistent,
preferred_aliases: preferred,
..Default::default()
};
let fixed = run_fix_with_config(content, config).unwrap();
assert_eq!(fixed.matches("```sh").count(), 2);
assert_eq!(fixed.matches("```bash").count(), 0);
}
#[test]
fn test_consistent_mode_ignores_disabled_blocks() {
let content = r#"```bash
echo hi
```
<!-- rumdl-disable MD040 -->
```sh
echo there
```
```sh
echo again
```
<!-- rumdl-enable MD040 -->
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert!(result.is_empty(), "Disabled blocks should not affect consistency");
}
#[test]
fn test_fix_preserves_attributes() {
let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let fixed = run_fix_with_config(content, config).unwrap();
assert!(fixed.contains("```bash {.highlight}"));
}
#[test]
fn test_fix_preserves_spacing_before_label() {
let content = "```bash\ncode\n```\n\n``` sh {.highlight}\ncode\n```";
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let fixed = run_fix_with_config(content, config).unwrap();
assert!(fixed.contains("``` bash {.highlight}"));
assert!(!fixed.contains("``` sh {.highlight}"));
}
#[test]
fn test_allowlist_blocks_unlisted() {
let content = "```java\ncode\n```";
let config = MD040Config {
allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].message.contains("not in the allowed list"));
}
#[test]
fn test_allowlist_allows_listed() {
let content = "```python\ncode\n```";
let config = MD040Config {
allowed_languages: vec!["Python".to_string()],
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_allowlist_blocks_unknown_language() {
let content = "```mysterylang\ncode\n```";
let config = MD040Config {
allowed_languages: vec!["Python".to_string()],
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].message.contains("allowed list"));
}
#[test]
fn test_allowlist_case_insensitive() {
let content = "```python\ncode\n```";
let config = MD040Config {
allowed_languages: vec!["PYTHON".to_string()],
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_denylist_blocks_listed() {
let content = "```java\ncode\n```";
let config = MD040Config {
disallowed_languages: vec!["Java".to_string()],
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].message.contains("disallowed"));
}
#[test]
fn test_denylist_allows_unlisted() {
let content = "```python\ncode\n```";
let config = MD040Config {
disallowed_languages: vec!["Java".to_string()],
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_allowlist_takes_precedence_over_denylist() {
let content = "```python\ncode\n```";
let config = MD040Config {
allowed_languages: vec!["Python".to_string()],
disallowed_languages: vec!["Python".to_string()], ..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_unknown_language_ignore_default() {
let content = "```mycustomlang\ncode\n```";
let result = run_check(content).unwrap();
assert!(result.is_empty(), "Unknown languages ignored by default");
}
#[test]
fn test_unknown_language_warn() {
let content = "```mycustomlang\ncode\n```";
let config = MD040Config {
unknown_language_action: UnknownLanguageAction::Warn,
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].message.contains("Unknown language"));
assert!(result[0].message.contains("mycustomlang"));
assert_eq!(result[0].severity, Severity::Warning);
}
#[test]
fn test_unknown_language_error() {
let content = "```mycustomlang\ncode\n```";
let config = MD040Config {
unknown_language_action: UnknownLanguageAction::Error,
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].message.contains("Unknown language"));
assert_eq!(result[0].severity, Severity::Error);
}
#[test]
fn test_invalid_preferred_alias_detected() {
let mut preferred = HashMap::new();
preferred.insert("Shell".to_string(), "invalid_alias".to_string());
let config = MD040Config {
style: LanguageStyle::Consistent,
preferred_aliases: preferred,
..Default::default()
};
let rule = MD040FencedCodeLanguage::with_config(config);
let errors = rule.validate_config();
assert_eq!(errors.len(), 1);
assert!(errors[0].contains("Invalid alias"));
assert!(errors[0].contains("invalid_alias"));
}
#[test]
fn test_unknown_language_in_preferred_aliases_detected() {
let mut preferred = HashMap::new();
preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
let config = MD040Config {
style: LanguageStyle::Consistent,
preferred_aliases: preferred,
..Default::default()
};
let rule = MD040FencedCodeLanguage::with_config(config);
let errors = rule.validate_config();
assert_eq!(errors.len(), 1);
assert!(errors[0].contains("Unknown language"));
}
#[test]
fn test_valid_preferred_alias_accepted() {
let mut preferred = HashMap::new();
preferred.insert("Shell".to_string(), "bash".to_string());
preferred.insert("JavaScript".to_string(), "js".to_string());
let config = MD040Config {
style: LanguageStyle::Consistent,
preferred_aliases: preferred,
..Default::default()
};
let rule = MD040FencedCodeLanguage::with_config(config);
let errors = rule.validate_config();
assert!(errors.is_empty());
}
#[test]
fn test_config_error_uses_valid_line_column() {
let config = md040_config::MD040Config {
preferred_aliases: {
let mut map = std::collections::HashMap::new();
map.insert("Shell".to_string(), "invalid_alias".to_string());
map
},
..Default::default()
};
let rule = MD040FencedCodeLanguage::with_config(config);
let content = "```shell\necho hello\n```";
let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
let result = rule.check(&ctx).unwrap();
let config_error = result.iter().find(|w| w.message.contains("[config error]"));
assert!(config_error.is_some(), "Should have a config error warning");
let warning = config_error.unwrap();
assert!(
warning.line >= 1,
"Config error line should be >= 1, got {}",
warning.line
);
assert!(
warning.column >= 1,
"Config error column should be >= 1, got {}",
warning.column
);
}
#[test]
fn test_linguist_resolution() {
assert_eq!(resolve_canonical("bash"), Some("Shell"));
assert_eq!(resolve_canonical("sh"), Some("Shell"));
assert_eq!(resolve_canonical("zsh"), Some("Shell"));
assert_eq!(resolve_canonical("js"), Some("JavaScript"));
assert_eq!(resolve_canonical("python"), Some("Python"));
assert_eq!(resolve_canonical("unknown_lang"), None);
}
#[test]
fn test_linguist_resolution_case_insensitive() {
assert_eq!(resolve_canonical("BASH"), Some("Shell"));
assert_eq!(resolve_canonical("Bash"), Some("Shell"));
assert_eq!(resolve_canonical("Python"), Some("Python"));
assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
}
#[test]
fn test_alias_validation() {
assert!(is_valid_alias("Shell", "bash"));
assert!(is_valid_alias("Shell", "sh"));
assert!(is_valid_alias("Shell", "zsh"));
assert!(!is_valid_alias("Shell", "python"));
assert!(!is_valid_alias("Shell", "invalid"));
}
#[test]
fn test_default_alias() {
assert_eq!(default_alias("Shell"), Some("bash"));
assert_eq!(default_alias("JavaScript"), Some("js"));
assert_eq!(default_alias("Python"), Some("python"));
}
#[test]
fn test_mixed_case_labels_normalized() {
let content = r#"```BASH
echo hi
```
```Bash
echo there
```
```bash
echo again
```
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
}
#[test]
fn test_multiple_languages_independent() {
let content = r#"```bash
shell code
```
```python
python code
```
```sh
more shell
```
```python3
more python
```
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert_eq!(result.len(), 2);
}
#[test]
fn test_tilde_fences() {
let content = r#"~~~bash
echo hi
~~~
~~~sh
echo there
~~~
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let result = run_check_with_config(content, config.clone()).unwrap();
assert_eq!(result.len(), 1);
let fixed = run_fix_with_config(content, config).unwrap();
assert!(fixed.contains("~~~bash"));
assert!(!fixed.contains("~~~sh"));
}
#[test]
fn test_longer_fence_markers_preserved() {
let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let fixed = run_fix_with_config(content, config).unwrap();
assert!(fixed.contains("````bash"));
assert!(fixed.contains("```bash"));
}
#[test]
fn test_empty_document() {
let result = run_check("").unwrap();
assert!(result.is_empty());
}
#[test]
fn test_no_code_blocks() {
let content = "# Just a heading\n\nSome text.";
let result = run_check(content).unwrap();
assert!(result.is_empty());
}
#[test]
fn test_single_code_block_no_inconsistency() {
let content = "```bash\necho hi\n```";
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let result = run_check_with_config(content, config).unwrap();
assert!(result.is_empty(), "Single block has no inconsistency");
}
#[test]
fn test_idempotent_fix() {
let content = r#"```bash
echo hi
```
```sh
echo there
```
"#;
let config = MD040Config {
style: LanguageStyle::Consistent,
..Default::default()
};
let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
assert_eq!(fixed1, fixed2, "Fix should be idempotent");
}
#[test]
fn test_mkdocs_superfences_title_only() {
let content = r#"```title="Example"
echo hi
```
"#;
let result = run_check_mkdocs(content).unwrap();
assert!(
result.is_empty(),
"MkDocs superfences with title= should not require language"
);
}
#[test]
fn test_mkdocs_superfences_hl_lines() {
let content = r#"```hl_lines="1 2"
line 1
line 2
```
"#;
let result = run_check_mkdocs(content).unwrap();
assert!(
result.is_empty(),
"MkDocs superfences with hl_lines= should not require language"
);
}
#[test]
fn test_mkdocs_superfences_linenums() {
let content = r#"```linenums="1"
line 1
line 2
```
"#;
let result = run_check_mkdocs(content).unwrap();
assert!(
result.is_empty(),
"MkDocs superfences with linenums= should not require language"
);
}
#[test]
fn test_mkdocs_superfences_class() {
let content = r#"```.my-class
some text
```
"#;
let result = run_check_mkdocs(content).unwrap();
assert!(
result.is_empty(),
"MkDocs superfences with .class should not require language"
);
}
#[test]
fn test_mkdocs_superfences_id() {
let content = r#"```#my-id
some text
```
"#;
let result = run_check_mkdocs(content).unwrap();
assert!(
result.is_empty(),
"MkDocs superfences with #id should not require language"
);
}
#[test]
fn test_mkdocs_superfences_with_language() {
let content = r#"```python title="Example" hl_lines="1"
print("hello")
```
"#;
let result = run_check_mkdocs(content).unwrap();
assert!(result.is_empty(), "Code block with language and attrs should pass");
}
#[test]
fn test_standard_flavor_no_special_handling() {
let content = r#"```title="Example"
echo hi
```
"#;
let result = run_check(content).unwrap();
assert_eq!(
result.len(),
1,
"Standard flavor should warn about title= without language"
);
}
}