Skip to main content

rumdl_lib/rules/
md040_fenced_code_language.rs

1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7/// Rule MD040: Fenced code blocks should have a language
8///
9/// See [docs/md040.md](../../docs/md040.md) for full documentation, configuration, and examples.
10pub mod md040_config;
11
12// ============================================================================
13// MkDocs Superfences Attribute Detection
14// ============================================================================
15
16/// Prefixes that indicate MkDocs superfences attributes rather than language identifiers.
17/// These are valid in MkDocs flavor without a language specification.
18/// See: https://facelessuser.github.io/pymdown-extensions/extensions/superfences/
19const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20    "title=",    // Block title
21    "hl_lines=", // Highlighted lines
22    "linenums=", // Line numbers
23    ".",         // CSS class (e.g., .annotate)
24    "#",         // CSS id
25];
26
27/// Check if a string starts with a MkDocs superfences attribute prefix
28#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30    MKDOCS_SUPERFENCES_ATTR_PREFIXES
31        .iter()
32        .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37    /// 0-indexed line number where the code block starts
38    line_idx: usize,
39    /// The language/info string (empty if no language specified)
40    language: String,
41    /// The fence marker used (``` or ~~~)
42    fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47    config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51    pub fn with_config(config: MD040Config) -> Self {
52        Self { config }
53    }
54
55    /// Validate the configuration and return any errors
56    fn validate_config(&self) -> Vec<String> {
57        let mut errors = Vec::new();
58
59        // Validate preferred-aliases: check that each alias is valid for its language
60        for (canonical, alias) in &self.config.preferred_aliases {
61            // Find the actual canonical name (case-insensitive)
62            if let Some(actual_canonical) = resolve_canonical(canonical) {
63                if !is_valid_alias(actual_canonical, alias)
64                    && let Some(valid_aliases) = get_aliases(actual_canonical)
65                {
66                    let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
67                    let valid_str = valid_list
68                        .iter()
69                        .map(|s| format!("'{s}'"))
70                        .collect::<Vec<_>>()
71                        .join(", ");
72                    let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
73                    errors.push(format!(
74                        "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
75                    ));
76                }
77            } else {
78                errors.push(format!(
79                    "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
80                ));
81            }
82        }
83
84        errors
85    }
86
87    /// Determine the preferred label for each canonical language in the document
88    fn compute_preferred_labels(
89        &self,
90        blocks: &[FencedCodeBlock],
91        disabled_ranges: &[(usize, usize)],
92    ) -> HashMap<String, String> {
93        // Group labels by canonical language
94        let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
95
96        for block in blocks {
97            if is_line_disabled(disabled_ranges, block.line_idx) {
98                continue;
99            }
100            if block.language.is_empty() {
101                continue;
102            }
103            if let Some(canonical) = resolve_canonical(&block.language) {
104                by_canonical
105                    .entry(canonical.to_string())
106                    .or_default()
107                    .push(&block.language);
108            }
109        }
110
111        // Determine winning label for each canonical language
112        let mut result = HashMap::new();
113
114        for (canonical, labels) in by_canonical {
115            // Check for user override first (case-insensitive lookup)
116            let winner = if let Some(preferred) = self
117                .config
118                .preferred_aliases
119                .iter()
120                .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
121                .map(|(_, v)| v.clone())
122            {
123                preferred
124            } else {
125                // Find most prevalent label
126                let mut counts: HashMap<&str, usize> = HashMap::new();
127                for label in &labels {
128                    *counts.entry(*label).or_default() += 1;
129                }
130
131                let max_count = counts.values().max().copied().unwrap_or(0);
132                let winners: Vec<_> = counts
133                    .iter()
134                    .filter(|(_, c)| **c == max_count)
135                    .map(|(l, _)| *l)
136                    .collect();
137
138                if winners.len() == 1 {
139                    winners[0].to_string()
140                } else {
141                    // Tie-break: use curated default if available, otherwise alphabetically first
142                    default_alias(&canonical)
143                        .filter(|default| winners.contains(default))
144                        .map_or_else(
145                            || winners.into_iter().min().unwrap().to_string(),
146                            std::string::ToString::to_string,
147                        )
148                }
149            };
150
151            result.insert(canonical, winner);
152        }
153
154        result
155    }
156
157    /// Check if a language is allowed based on config
158    fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
159        // Allowlist takes precedence
160        if !self.config.allowed_languages.is_empty() {
161            let allowed = self.config.allowed_languages.join(", ");
162            let Some(canonical) = canonical else {
163                return Some(format!(
164                    "Language '{original_label}' is not in the allowed list: {allowed}"
165                ));
166            };
167            if !self
168                .config
169                .allowed_languages
170                .iter()
171                .any(|a| a.eq_ignore_ascii_case(canonical))
172            {
173                return Some(format!(
174                    "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
175                ));
176            }
177        } else if !self.config.disallowed_languages.is_empty()
178            && canonical.is_some_and(|canonical| {
179                self.config
180                    .disallowed_languages
181                    .iter()
182                    .any(|d| d.eq_ignore_ascii_case(canonical))
183            })
184        {
185            let canonical = canonical.unwrap_or("unknown");
186            return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
187        }
188        None
189    }
190
191    /// Check for unknown language based on config
192    fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
193        if resolve_canonical(label).is_some() {
194            return None;
195        }
196
197        match self.config.unknown_language_action {
198            UnknownLanguageAction::Ignore => None,
199            UnknownLanguageAction::Warn => Some((
200                format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
201                Severity::Warning,
202            )),
203            UnknownLanguageAction::Error => Some((
204                format!("Unknown language '{label}' (not in GitHub Linguist)"),
205                Severity::Error,
206            )),
207        }
208    }
209}
210
211impl Rule for MD040FencedCodeLanguage {
212    fn name(&self) -> &'static str {
213        "MD040"
214    }
215
216    fn description(&self) -> &'static str {
217        "Code blocks should have a language specified"
218    }
219
220    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
221        let content = ctx.content;
222        let mut warnings = Vec::new();
223
224        // Validate config and emit warnings for invalid configuration
225        for error in self.validate_config() {
226            warnings.push(LintWarning {
227                rule_name: Some(self.name().to_string()),
228                line: 1,
229                column: 1,
230                end_line: 1,
231                end_column: 1,
232                message: format!("[config error] {error}"),
233                severity: Severity::Error,
234                fix: None,
235            });
236        }
237
238        // Derive fenced code blocks from pre-computed context
239        let fenced_blocks = derive_fenced_code_blocks(ctx);
240
241        // Pre-compute disabled ranges for efficient lookup
242        let disabled_ranges = compute_disabled_ranges(content, self.name());
243
244        // Compute preferred labels for consistent mode
245        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
246            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
247        } else {
248            HashMap::new()
249        };
250
251        let lines = ctx.raw_lines();
252
253        for block in &fenced_blocks {
254            // Skip if this line is in a disabled range
255            if is_line_disabled(&disabled_ranges, block.line_idx) {
256                continue;
257            }
258
259            // Get the actual line content for additional checks
260            let line = lines.get(block.line_idx).unwrap_or(&"");
261            let trimmed = line.trim();
262            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
263
264            // Check if fence has MkDocs superfences attributes but no language
265            let has_mkdocs_attrs_only =
266                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
267
268            // Check for Quarto/RMarkdown code chunk syntax: {language} or {language, options}
269            let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
270                && after_fence.starts_with('{')
271                && after_fence.contains('}');
272
273            // Determine if this block needs a language specification
274            // In MkDocs flavor, superfences attributes without language are acceptable
275            let needs_language =
276                !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
277
278            if needs_language && !has_quarto_syntax {
279                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
280
281                warnings.push(LintWarning {
282                    rule_name: Some(self.name().to_string()),
283                    line: start_line,
284                    column: start_col,
285                    end_line,
286                    end_column: end_col,
287                    message: "Code block (```) missing language".to_string(),
288                    severity: Severity::Warning,
289                    fix: Some(Fix {
290                        range: {
291                            let trimmed = line.trim_start();
292                            let trimmed_start = line.len() - trimmed.len();
293                            let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
294                            let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
295                            // Replace from after fence marker to end of line content,
296                            // so trailing whitespace is cleaned up while any existing
297                            // info string / attributes are preserved via the replacement.
298                            let line_end_byte = line_start_byte + line.len();
299                            fence_end_byte..line_end_byte
300                        },
301                        replacement: {
302                            let trimmed = line.trim_start();
303                            let after_fence = &trimmed[block.fence_marker.len()..];
304                            let after_fence_trimmed = after_fence.trim();
305                            if after_fence_trimmed.is_empty() {
306                                "text".to_string()
307                            } else {
308                                format!("text {after_fence_trimmed}")
309                            }
310                        },
311                    }),
312                });
313                continue;
314            }
315
316            // Skip further checks for special syntax
317            if has_quarto_syntax {
318                continue;
319            }
320
321            let canonical = resolve_canonical(&block.language);
322
323            // Check language restrictions (allowlist/denylist)
324            if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
325                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
326
327                warnings.push(LintWarning {
328                    rule_name: Some(self.name().to_string()),
329                    line: start_line,
330                    column: start_col,
331                    end_line,
332                    end_column: end_col,
333                    message: msg,
334                    severity: Severity::Warning,
335                    fix: None,
336                });
337                continue;
338            }
339
340            // Check for unknown language (only if not handled by allowlist)
341            if canonical.is_none() {
342                if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
343                    let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
344
345                    warnings.push(LintWarning {
346                        rule_name: Some(self.name().to_string()),
347                        line: start_line,
348                        column: start_col,
349                        end_line,
350                        end_column: end_col,
351                        message: msg,
352                        severity,
353                        fix: None,
354                    });
355                }
356                continue;
357            }
358
359            // Check consistency
360            if self.config.style == LanguageStyle::Consistent
361                && let Some(preferred) = preferred_labels.get(canonical.unwrap())
362                && &block.language != preferred
363            {
364                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
365
366                let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
367                    let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
368                    Fix {
369                        range: (line_start_byte + label_start)..(line_start_byte + label_end),
370                        replacement: preferred.clone(),
371                    }
372                });
373                let lang = &block.language;
374                let canonical = canonical.unwrap();
375
376                warnings.push(LintWarning {
377                    rule_name: Some(self.name().to_string()),
378                    line: start_line,
379                    column: start_col,
380                    end_line,
381                    end_column: end_col,
382                    message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
383                    severity: Severity::Warning,
384                    fix,
385                });
386            }
387        }
388
389        Ok(warnings)
390    }
391
392    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
393        if self.should_skip(ctx) {
394            return Ok(ctx.content.to_string());
395        }
396        let warnings = self.check(ctx)?;
397        if warnings.is_empty() {
398            return Ok(ctx.content.to_string());
399        }
400        let warnings =
401            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
402        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
403    }
404
405    /// Get the category of this rule for selective processing
406    fn category(&self) -> RuleCategory {
407        RuleCategory::CodeBlock
408    }
409
410    /// Check if this rule should be skipped
411    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
412        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
413    }
414
415    fn as_any(&self) -> &dyn std::any::Any {
416        self
417    }
418
419    fn default_config_section(&self) -> Option<(String, toml::Value)> {
420        let default_config = MD040Config::default();
421        let json_value = serde_json::to_value(&default_config).ok()?;
422        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
423
424        if let toml::Value::Table(table) = toml_value {
425            if !table.is_empty() {
426                Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
427            } else {
428                None
429            }
430        } else {
431            None
432        }
433    }
434
435    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
436    where
437        Self: Sized,
438    {
439        let rule_config: MD040Config = load_rule_config(config);
440        Box::new(MD040FencedCodeLanguage::with_config(rule_config))
441    }
442}
443
444/// Derive fenced code blocks from pre-computed CodeBlockDetail data
445fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
446    let content = ctx.content;
447    let line_offsets = &ctx.line_offsets;
448
449    ctx.code_block_details
450        .iter()
451        .filter(|d| d.is_fenced)
452        .map(|detail| {
453            let line_idx = match line_offsets.binary_search(&detail.start) {
454                Ok(idx) => idx,
455                Err(idx) => idx.saturating_sub(1),
456            };
457
458            // Determine fence marker from the actual line content
459            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
460            let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
461            let line = content.get(line_start..line_end).unwrap_or("");
462            let trimmed = line.trim();
463            let fence_marker = if trimmed.starts_with('`') {
464                let count = trimmed.chars().take_while(|&c| c == '`').count();
465                "`".repeat(count)
466            } else if trimmed.starts_with('~') {
467                let count = trimmed.chars().take_while(|&c| c == '~').count();
468                "~".repeat(count)
469            } else {
470                "```".to_string()
471            };
472
473            let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
474
475            FencedCodeBlock {
476                line_idx,
477                language,
478                fence_marker,
479            }
480        })
481        .collect()
482}
483
484/// Compute disabled line ranges from disable/enable comments
485fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
486    let mut ranges = Vec::new();
487    let mut disabled_start: Option<usize> = None;
488
489    for (i, line) in content.lines().enumerate() {
490        let trimmed = line.trim();
491
492        if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
493            && (rules.is_empty() || rules.contains(&rule_name))
494            && disabled_start.is_none()
495        {
496            disabled_start = Some(i);
497        }
498
499        if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
500            && (rules.is_empty() || rules.contains(&rule_name))
501            && let Some(start) = disabled_start.take()
502        {
503            ranges.push((start, i));
504        }
505    }
506
507    // Handle unclosed disable
508    if let Some(start) = disabled_start {
509        ranges.push((start, usize::MAX));
510    }
511
512    ranges
513}
514
515/// Check if a line index is within a disabled range
516fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
517    ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
518}
519
520/// Find the byte span of the language label in a fence line.
521fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
522    let trimmed_start = line.len() - line.trim_start().len();
523    let after_indent = &line[trimmed_start..];
524    if !after_indent.starts_with(fence_marker) {
525        return None;
526    }
527    let after_fence = &after_indent[fence_marker.len()..];
528
529    let label_start_rel = after_fence
530        .char_indices()
531        .find(|&(_, ch)| !ch.is_whitespace())
532        .map(|(idx, _)| idx)?;
533    let after_label = &after_fence[label_start_rel..];
534    let label_end_rel = after_label
535        .char_indices()
536        .find(|&(_, ch)| ch.is_whitespace())
537        .map_or(after_fence.len(), |(idx, _)| label_start_rel + idx);
538
539    Some((
540        trimmed_start + fence_marker.len() + label_start_rel,
541        trimmed_start + fence_marker.len() + label_end_rel,
542    ))
543}
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548    use crate::lint_context::LintContext;
549
550    fn run_check(content: &str) -> LintResult {
551        let rule = MD040FencedCodeLanguage::default();
552        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
553        rule.check(&ctx)
554    }
555
556    fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
557        let rule = MD040FencedCodeLanguage::with_config(config);
558        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
559        rule.check(&ctx)
560    }
561
562    fn run_fix(content: &str) -> Result<String, LintError> {
563        let rule = MD040FencedCodeLanguage::default();
564        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
565        rule.fix(&ctx)
566    }
567
568    fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
569        let rule = MD040FencedCodeLanguage::with_config(config);
570        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
571        rule.fix(&ctx)
572    }
573
574    fn run_check_mkdocs(content: &str) -> LintResult {
575        let rule = MD040FencedCodeLanguage::default();
576        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
577        rule.check(&ctx)
578    }
579
580    // =========================================================================
581    // Basic functionality tests
582    // =========================================================================
583
584    #[test]
585    fn test_code_blocks_with_language_specified() {
586        let content = r#"# Test
587
588```python
589print("Hello, world!")
590```
591
592```javascript
593console.log("Hello!");
594```
595"#;
596        let result = run_check(content).unwrap();
597        assert!(result.is_empty(), "No warnings expected for code blocks with language");
598    }
599
600    #[test]
601    fn test_code_blocks_without_language() {
602        let content = r#"# Test
603
604```
605print("Hello, world!")
606```
607"#;
608        let result = run_check(content).unwrap();
609        assert_eq!(result.len(), 1);
610        assert_eq!(result[0].message, "Code block (```) missing language");
611        assert_eq!(result[0].line, 3);
612    }
613
614    #[test]
615    fn test_fix_method_adds_text_language() {
616        let content = r#"# Test
617
618```
619code without language
620```
621
622```python
623already has language
624```
625
626```
627another block without
628```
629"#;
630        let fixed = run_fix(content).unwrap();
631        assert!(fixed.contains("```text"));
632        assert!(fixed.contains("```python"));
633        assert_eq!(fixed.matches("```text").count(), 2);
634    }
635
636    #[test]
637    fn test_fix_preserves_indentation() {
638        let content = r#"# Test
639
640- List item
641  ```
642  indented code block
643  ```
644"#;
645        let fixed = run_fix(content).unwrap();
646        assert!(fixed.contains("  ```text"));
647    }
648
649    // =========================================================================
650    // Consistent mode tests
651    // =========================================================================
652
653    #[test]
654    fn test_consistent_mode_detects_inconsistency() {
655        let content = r#"```bash
656echo hi
657```
658
659```sh
660echo there
661```
662
663```bash
664echo again
665```
666"#;
667        let config = MD040Config {
668            style: LanguageStyle::Consistent,
669            ..Default::default()
670        };
671        let result = run_check_with_config(content, config).unwrap();
672        assert_eq!(result.len(), 1);
673        assert!(result[0].message.contains("Inconsistent"));
674        assert!(result[0].message.contains("sh"));
675        assert!(result[0].message.contains("bash"));
676    }
677
678    #[test]
679    fn test_consistent_mode_fix_normalizes() {
680        let content = r#"```bash
681echo hi
682```
683
684```sh
685echo there
686```
687
688```bash
689echo again
690```
691"#;
692        let config = MD040Config {
693            style: LanguageStyle::Consistent,
694            ..Default::default()
695        };
696        let fixed = run_fix_with_config(content, config).unwrap();
697        assert_eq!(fixed.matches("```bash").count(), 3);
698        assert_eq!(fixed.matches("```sh").count(), 0);
699    }
700
701    #[test]
702    fn test_consistent_mode_tie_break_uses_curated_default() {
703        // When there's a tie (1 bash, 1 sh), should use curated default (bash)
704        let content = r#"```bash
705echo hi
706```
707
708```sh
709echo there
710```
711"#;
712        let config = MD040Config {
713            style: LanguageStyle::Consistent,
714            ..Default::default()
715        };
716        let fixed = run_fix_with_config(content, config).unwrap();
717        // bash is the curated default for Shell
718        assert_eq!(fixed.matches("```bash").count(), 2);
719    }
720
721    #[test]
722    fn test_consistent_mode_with_preferred_alias() {
723        let content = r#"```bash
724echo hi
725```
726
727```sh
728echo there
729```
730"#;
731        let mut preferred = HashMap::new();
732        preferred.insert("Shell".to_string(), "sh".to_string());
733
734        let config = MD040Config {
735            style: LanguageStyle::Consistent,
736            preferred_aliases: preferred,
737            ..Default::default()
738        };
739        let fixed = run_fix_with_config(content, config).unwrap();
740        assert_eq!(fixed.matches("```sh").count(), 2);
741        assert_eq!(fixed.matches("```bash").count(), 0);
742    }
743
744    #[test]
745    fn test_consistent_mode_ignores_disabled_blocks() {
746        let content = r#"```bash
747echo hi
748```
749<!-- rumdl-disable MD040 -->
750```sh
751echo there
752```
753```sh
754echo again
755```
756<!-- rumdl-enable MD040 -->
757"#;
758        let config = MD040Config {
759            style: LanguageStyle::Consistent,
760            ..Default::default()
761        };
762        let result = run_check_with_config(content, config).unwrap();
763        assert!(result.is_empty(), "Disabled blocks should not affect consistency");
764    }
765
766    #[test]
767    fn test_fix_preserves_attributes() {
768        let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
769        let config = MD040Config {
770            style: LanguageStyle::Consistent,
771            ..Default::default()
772        };
773        let fixed = run_fix_with_config(content, config).unwrap();
774        assert!(fixed.contains("```bash {.highlight}"));
775    }
776
777    #[test]
778    fn test_fix_preserves_spacing_before_label() {
779        let content = "```bash\ncode\n```\n\n```  sh {.highlight}\ncode\n```";
780        let config = MD040Config {
781            style: LanguageStyle::Consistent,
782            ..Default::default()
783        };
784        let fixed = run_fix_with_config(content, config).unwrap();
785        assert!(fixed.contains("```  bash {.highlight}"));
786        assert!(!fixed.contains("```  sh {.highlight}"));
787    }
788
789    // =========================================================================
790    // Allowlist/denylist tests
791    // =========================================================================
792
793    #[test]
794    fn test_allowlist_blocks_unlisted() {
795        let content = "```java\ncode\n```";
796        let config = MD040Config {
797            allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
798            ..Default::default()
799        };
800        let result = run_check_with_config(content, config).unwrap();
801        assert_eq!(result.len(), 1);
802        assert!(result[0].message.contains("not in the allowed list"));
803    }
804
805    #[test]
806    fn test_allowlist_allows_listed() {
807        let content = "```python\ncode\n```";
808        let config = MD040Config {
809            allowed_languages: vec!["Python".to_string()],
810            ..Default::default()
811        };
812        let result = run_check_with_config(content, config).unwrap();
813        assert!(result.is_empty());
814    }
815
816    #[test]
817    fn test_allowlist_blocks_unknown_language() {
818        let content = "```mysterylang\ncode\n```";
819        let config = MD040Config {
820            allowed_languages: vec!["Python".to_string()],
821            ..Default::default()
822        };
823        let result = run_check_with_config(content, config).unwrap();
824        assert_eq!(result.len(), 1);
825        assert!(result[0].message.contains("allowed list"));
826    }
827
828    #[test]
829    fn test_allowlist_case_insensitive() {
830        let content = "```python\ncode\n```";
831        let config = MD040Config {
832            allowed_languages: vec!["PYTHON".to_string()],
833            ..Default::default()
834        };
835        let result = run_check_with_config(content, config).unwrap();
836        assert!(result.is_empty());
837    }
838
839    #[test]
840    fn test_denylist_blocks_listed() {
841        let content = "```java\ncode\n```";
842        let config = MD040Config {
843            disallowed_languages: vec!["Java".to_string()],
844            ..Default::default()
845        };
846        let result = run_check_with_config(content, config).unwrap();
847        assert_eq!(result.len(), 1);
848        assert!(result[0].message.contains("disallowed"));
849    }
850
851    #[test]
852    fn test_denylist_allows_unlisted() {
853        let content = "```python\ncode\n```";
854        let config = MD040Config {
855            disallowed_languages: vec!["Java".to_string()],
856            ..Default::default()
857        };
858        let result = run_check_with_config(content, config).unwrap();
859        assert!(result.is_empty());
860    }
861
862    #[test]
863    fn test_allowlist_takes_precedence_over_denylist() {
864        let content = "```python\ncode\n```";
865        let config = MD040Config {
866            allowed_languages: vec!["Python".to_string()],
867            disallowed_languages: vec!["Python".to_string()], // Should be ignored
868            ..Default::default()
869        };
870        let result = run_check_with_config(content, config).unwrap();
871        assert!(result.is_empty());
872    }
873
874    // =========================================================================
875    // Unknown language tests
876    // =========================================================================
877
878    #[test]
879    fn test_unknown_language_ignore_default() {
880        let content = "```mycustomlang\ncode\n```";
881        let result = run_check(content).unwrap();
882        assert!(result.is_empty(), "Unknown languages ignored by default");
883    }
884
885    #[test]
886    fn test_unknown_language_warn() {
887        let content = "```mycustomlang\ncode\n```";
888        let config = MD040Config {
889            unknown_language_action: UnknownLanguageAction::Warn,
890            ..Default::default()
891        };
892        let result = run_check_with_config(content, config).unwrap();
893        assert_eq!(result.len(), 1);
894        assert!(result[0].message.contains("Unknown language"));
895        assert!(result[0].message.contains("mycustomlang"));
896        assert_eq!(result[0].severity, Severity::Warning);
897    }
898
899    #[test]
900    fn test_unknown_language_error() {
901        let content = "```mycustomlang\ncode\n```";
902        let config = MD040Config {
903            unknown_language_action: UnknownLanguageAction::Error,
904            ..Default::default()
905        };
906        let result = run_check_with_config(content, config).unwrap();
907        assert_eq!(result.len(), 1);
908        assert!(result[0].message.contains("Unknown language"));
909        assert_eq!(result[0].severity, Severity::Error);
910    }
911
912    // =========================================================================
913    // Config validation tests
914    // =========================================================================
915
916    #[test]
917    fn test_invalid_preferred_alias_detected() {
918        let mut preferred = HashMap::new();
919        preferred.insert("Shell".to_string(), "invalid_alias".to_string());
920
921        let config = MD040Config {
922            style: LanguageStyle::Consistent,
923            preferred_aliases: preferred,
924            ..Default::default()
925        };
926        let rule = MD040FencedCodeLanguage::with_config(config);
927        let errors = rule.validate_config();
928        assert_eq!(errors.len(), 1);
929        assert!(errors[0].contains("Invalid alias"));
930        assert!(errors[0].contains("invalid_alias"));
931    }
932
933    #[test]
934    fn test_unknown_language_in_preferred_aliases_detected() {
935        let mut preferred = HashMap::new();
936        preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
937
938        let config = MD040Config {
939            style: LanguageStyle::Consistent,
940            preferred_aliases: preferred,
941            ..Default::default()
942        };
943        let rule = MD040FencedCodeLanguage::with_config(config);
944        let errors = rule.validate_config();
945        assert_eq!(errors.len(), 1);
946        assert!(errors[0].contains("Unknown language"));
947    }
948
949    #[test]
950    fn test_valid_preferred_alias_accepted() {
951        let mut preferred = HashMap::new();
952        preferred.insert("Shell".to_string(), "bash".to_string());
953        preferred.insert("JavaScript".to_string(), "js".to_string());
954
955        let config = MD040Config {
956            style: LanguageStyle::Consistent,
957            preferred_aliases: preferred,
958            ..Default::default()
959        };
960        let rule = MD040FencedCodeLanguage::with_config(config);
961        let errors = rule.validate_config();
962        assert!(errors.is_empty());
963    }
964
965    #[test]
966    fn test_config_error_uses_valid_line_column() {
967        let config = md040_config::MD040Config {
968            preferred_aliases: {
969                let mut map = std::collections::HashMap::new();
970                map.insert("Shell".to_string(), "invalid_alias".to_string());
971                map
972            },
973            ..Default::default()
974        };
975        let rule = MD040FencedCodeLanguage::with_config(config);
976
977        let content = "```shell\necho hello\n```";
978        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
979        let result = rule.check(&ctx).unwrap();
980
981        // Find the config error warning
982        let config_error = result.iter().find(|w| w.message.contains("[config error]"));
983        assert!(config_error.is_some(), "Should have a config error warning");
984
985        let warning = config_error.unwrap();
986        // Line and column should be 1-indexed (not 0)
987        assert!(
988            warning.line >= 1,
989            "Config error line should be >= 1, got {}",
990            warning.line
991        );
992        assert!(
993            warning.column >= 1,
994            "Config error column should be >= 1, got {}",
995            warning.column
996        );
997    }
998
999    // =========================================================================
1000    // Linguist resolution tests
1001    // =========================================================================
1002
1003    #[test]
1004    fn test_linguist_resolution() {
1005        assert_eq!(resolve_canonical("bash"), Some("Shell"));
1006        assert_eq!(resolve_canonical("sh"), Some("Shell"));
1007        assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1008        assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1009        assert_eq!(resolve_canonical("python"), Some("Python"));
1010        assert_eq!(resolve_canonical("unknown_lang"), None);
1011    }
1012
1013    #[test]
1014    fn test_linguist_resolution_case_insensitive() {
1015        assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1016        assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1017        assert_eq!(resolve_canonical("Python"), Some("Python"));
1018        assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1019    }
1020
1021    #[test]
1022    fn test_alias_validation() {
1023        assert!(is_valid_alias("Shell", "bash"));
1024        assert!(is_valid_alias("Shell", "sh"));
1025        assert!(is_valid_alias("Shell", "zsh"));
1026        assert!(!is_valid_alias("Shell", "python"));
1027        assert!(!is_valid_alias("Shell", "invalid"));
1028    }
1029
1030    #[test]
1031    fn test_default_alias() {
1032        assert_eq!(default_alias("Shell"), Some("bash"));
1033        assert_eq!(default_alias("JavaScript"), Some("js"));
1034        assert_eq!(default_alias("Python"), Some("python"));
1035    }
1036
1037    // =========================================================================
1038    // Edge case tests
1039    // =========================================================================
1040
1041    #[test]
1042    fn test_mixed_case_labels_normalized() {
1043        let content = r#"```BASH
1044echo hi
1045```
1046
1047```Bash
1048echo there
1049```
1050
1051```bash
1052echo again
1053```
1054"#;
1055        let config = MD040Config {
1056            style: LanguageStyle::Consistent,
1057            ..Default::default()
1058        };
1059        // All should resolve to Shell, most prevalent should win
1060        let result = run_check_with_config(content, config).unwrap();
1061        // "bash" appears 1x, "Bash" appears 1x, "BASH" appears 1x
1062        // All are different strings, so there's a 3-way tie
1063        // Should pick curated default "bash" or alphabetically first
1064        assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1065    }
1066
1067    #[test]
1068    fn test_multiple_languages_independent() {
1069        let content = r#"```bash
1070shell code
1071```
1072
1073```python
1074python code
1075```
1076
1077```sh
1078more shell
1079```
1080
1081```python3
1082more python
1083```
1084"#;
1085        let config = MD040Config {
1086            style: LanguageStyle::Consistent,
1087            ..Default::default()
1088        };
1089        let result = run_check_with_config(content, config).unwrap();
1090        // Should have 2 warnings: one for sh (inconsistent with bash) and one for python3 (inconsistent with python)
1091        assert_eq!(result.len(), 2);
1092    }
1093
1094    #[test]
1095    fn test_tilde_fences() {
1096        let content = r#"~~~bash
1097echo hi
1098~~~
1099
1100~~~sh
1101echo there
1102~~~
1103"#;
1104        let config = MD040Config {
1105            style: LanguageStyle::Consistent,
1106            ..Default::default()
1107        };
1108        let result = run_check_with_config(content, config.clone()).unwrap();
1109        assert_eq!(result.len(), 1);
1110
1111        let fixed = run_fix_with_config(content, config).unwrap();
1112        assert!(fixed.contains("~~~bash"));
1113        assert!(!fixed.contains("~~~sh"));
1114    }
1115
1116    #[test]
1117    fn test_longer_fence_markers_preserved() {
1118        let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1119        let config = MD040Config {
1120            style: LanguageStyle::Consistent,
1121            ..Default::default()
1122        };
1123        let fixed = run_fix_with_config(content, config).unwrap();
1124        assert!(fixed.contains("````bash"));
1125        assert!(fixed.contains("```bash"));
1126    }
1127
1128    #[test]
1129    fn test_empty_document() {
1130        let result = run_check("").unwrap();
1131        assert!(result.is_empty());
1132    }
1133
1134    #[test]
1135    fn test_no_code_blocks() {
1136        let content = "# Just a heading\n\nSome text.";
1137        let result = run_check(content).unwrap();
1138        assert!(result.is_empty());
1139    }
1140
1141    #[test]
1142    fn test_single_code_block_no_inconsistency() {
1143        let content = "```bash\necho hi\n```";
1144        let config = MD040Config {
1145            style: LanguageStyle::Consistent,
1146            ..Default::default()
1147        };
1148        let result = run_check_with_config(content, config).unwrap();
1149        assert!(result.is_empty(), "Single block has no inconsistency");
1150    }
1151
1152    #[test]
1153    fn test_idempotent_fix() {
1154        let content = r#"```bash
1155echo hi
1156```
1157
1158```sh
1159echo there
1160```
1161"#;
1162        let config = MD040Config {
1163            style: LanguageStyle::Consistent,
1164            ..Default::default()
1165        };
1166        let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1167        let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1168        assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1169    }
1170
1171    // =========================================================================
1172    // MkDocs superfences tests
1173    // =========================================================================
1174
1175    #[test]
1176    fn test_mkdocs_superfences_title_only() {
1177        // title= attribute without language should not warn in MkDocs flavor
1178        let content = r#"```title="Example"
1179echo hi
1180```
1181"#;
1182        let result = run_check_mkdocs(content).unwrap();
1183        assert!(
1184            result.is_empty(),
1185            "MkDocs superfences with title= should not require language"
1186        );
1187    }
1188
1189    #[test]
1190    fn test_mkdocs_superfences_hl_lines() {
1191        // hl_lines= attribute without language should not warn
1192        let content = r#"```hl_lines="1 2"
1193line 1
1194line 2
1195```
1196"#;
1197        let result = run_check_mkdocs(content).unwrap();
1198        assert!(
1199            result.is_empty(),
1200            "MkDocs superfences with hl_lines= should not require language"
1201        );
1202    }
1203
1204    #[test]
1205    fn test_mkdocs_superfences_linenums() {
1206        // linenums= attribute without language should not warn
1207        let content = r#"```linenums="1"
1208line 1
1209line 2
1210```
1211"#;
1212        let result = run_check_mkdocs(content).unwrap();
1213        assert!(
1214            result.is_empty(),
1215            "MkDocs superfences with linenums= should not require language"
1216        );
1217    }
1218
1219    #[test]
1220    fn test_mkdocs_superfences_class() {
1221        // Custom class (starting with .) should not warn
1222        let content = r#"```.my-class
1223some text
1224```
1225"#;
1226        let result = run_check_mkdocs(content).unwrap();
1227        assert!(
1228            result.is_empty(),
1229            "MkDocs superfences with .class should not require language"
1230        );
1231    }
1232
1233    #[test]
1234    fn test_mkdocs_superfences_id() {
1235        // Custom ID (starting with #) should not warn
1236        let content = r#"```#my-id
1237some text
1238```
1239"#;
1240        let result = run_check_mkdocs(content).unwrap();
1241        assert!(
1242            result.is_empty(),
1243            "MkDocs superfences with #id should not require language"
1244        );
1245    }
1246
1247    #[test]
1248    fn test_mkdocs_superfences_with_language() {
1249        // Language with superfences attributes should work fine
1250        let content = r#"```python title="Example" hl_lines="1"
1251print("hello")
1252```
1253"#;
1254        let result = run_check_mkdocs(content).unwrap();
1255        assert!(result.is_empty(), "Code block with language and attrs should pass");
1256    }
1257
1258    #[test]
1259    fn test_standard_flavor_no_special_handling() {
1260        // In Standard flavor, title= should still warn
1261        let content = r#"```title="Example"
1262echo hi
1263```
1264"#;
1265        let result = run_check(content).unwrap();
1266        assert_eq!(
1267            result.len(),
1268            1,
1269            "Standard flavor should warn about title= without language"
1270        );
1271    }
1272}