Skip to main content

rumdl_lib/rules/
md040_fenced_code_language.rs

1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7/// Rule MD040: Fenced code blocks should have a language
8///
9/// See [docs/md040.md](../../docs/md040.md) for full documentation, configuration, and examples.
10pub mod md040_config;
11
12// ============================================================================
13// MkDocs Superfences Attribute Detection
14// ============================================================================
15
16/// Prefixes that indicate MkDocs superfences attributes rather than language identifiers.
17/// These are valid in MkDocs flavor without a language specification.
18/// See: https://facelessuser.github.io/pymdown-extensions/extensions/superfences/
19const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20    "title=",    // Block title
21    "hl_lines=", // Highlighted lines
22    "linenums=", // Line numbers
23    ".",         // CSS class (e.g., .annotate)
24    "#",         // CSS id
25];
26
27/// Check if a string starts with a MkDocs superfences attribute prefix
28#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30    MKDOCS_SUPERFENCES_ATTR_PREFIXES
31        .iter()
32        .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37    /// 0-indexed line number where the code block starts
38    line_idx: usize,
39    /// The language/info string (empty if no language specified)
40    language: String,
41    /// The fence marker used (``` or ~~~)
42    fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47    config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51    pub fn with_config(config: MD040Config) -> Self {
52        Self { config }
53    }
54
55    /// Validate the configuration and return any errors
56    fn validate_config(&self) -> Vec<String> {
57        let mut errors = Vec::new();
58
59        // Validate preferred-aliases: check that each alias is valid for its language
60        for (canonical, alias) in &self.config.preferred_aliases {
61            // Find the actual canonical name (case-insensitive)
62            if let Some(actual_canonical) = resolve_canonical(canonical) {
63                if !is_valid_alias(actual_canonical, alias)
64                    && let Some(valid_aliases) = get_aliases(actual_canonical)
65                {
66                    let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
67                    let valid_str = valid_list
68                        .iter()
69                        .map(|s| format!("'{s}'"))
70                        .collect::<Vec<_>>()
71                        .join(", ");
72                    let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
73                    errors.push(format!(
74                        "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
75                    ));
76                }
77            } else {
78                errors.push(format!(
79                    "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
80                ));
81            }
82        }
83
84        errors
85    }
86
87    /// Determine the preferred label for each canonical language in the document
88    fn compute_preferred_labels(
89        &self,
90        blocks: &[FencedCodeBlock],
91        disabled_ranges: &[(usize, usize)],
92    ) -> HashMap<String, String> {
93        // Group labels by canonical language
94        let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
95
96        for block in blocks {
97            if is_line_disabled(disabled_ranges, block.line_idx) {
98                continue;
99            }
100            if block.language.is_empty() {
101                continue;
102            }
103            if let Some(canonical) = resolve_canonical(&block.language) {
104                by_canonical
105                    .entry(canonical.to_string())
106                    .or_default()
107                    .push(&block.language);
108            }
109        }
110
111        // Determine winning label for each canonical language
112        let mut result = HashMap::new();
113
114        for (canonical, labels) in by_canonical {
115            // Check for user override first (case-insensitive lookup)
116            let winner = if let Some(preferred) = self
117                .config
118                .preferred_aliases
119                .iter()
120                .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
121                .map(|(_, v)| v.clone())
122            {
123                preferred
124            } else {
125                // Find most prevalent label
126                let mut counts: HashMap<&str, usize> = HashMap::new();
127                for label in &labels {
128                    *counts.entry(*label).or_default() += 1;
129                }
130
131                let max_count = counts.values().max().copied().unwrap_or(0);
132                let winners: Vec<_> = counts
133                    .iter()
134                    .filter(|(_, c)| **c == max_count)
135                    .map(|(l, _)| *l)
136                    .collect();
137
138                if winners.len() == 1 {
139                    winners[0].to_string()
140                } else {
141                    // Tie-break: use curated default if available, otherwise alphabetically first
142                    default_alias(&canonical)
143                        .filter(|default| winners.contains(default))
144                        .map_or_else(
145                            || winners.into_iter().min().unwrap().to_string(),
146                            std::string::ToString::to_string,
147                        )
148                }
149            };
150
151            result.insert(canonical, winner);
152        }
153
154        result
155    }
156
157    /// Check if a language is allowed based on config
158    fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
159        // Allowlist takes precedence
160        if !self.config.allowed_languages.is_empty() {
161            let allowed = self.config.allowed_languages.join(", ");
162            let Some(canonical) = canonical else {
163                return Some(format!(
164                    "Language '{original_label}' is not in the allowed list: {allowed}"
165                ));
166            };
167            if !self
168                .config
169                .allowed_languages
170                .iter()
171                .any(|a| a.eq_ignore_ascii_case(canonical))
172            {
173                return Some(format!(
174                    "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
175                ));
176            }
177        } else if !self.config.disallowed_languages.is_empty()
178            && canonical.is_some_and(|canonical| {
179                self.config
180                    .disallowed_languages
181                    .iter()
182                    .any(|d| d.eq_ignore_ascii_case(canonical))
183            })
184        {
185            let canonical = canonical.unwrap_or("unknown");
186            return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
187        }
188        None
189    }
190
191    /// Check for unknown language based on config
192    fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
193        if resolve_canonical(label).is_some() {
194            return None;
195        }
196
197        match self.config.unknown_language_action {
198            UnknownLanguageAction::Ignore => None,
199            UnknownLanguageAction::Warn => Some((
200                format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
201                Severity::Warning,
202            )),
203            UnknownLanguageAction::Error => Some((
204                format!("Unknown language '{label}' (not in GitHub Linguist)"),
205                Severity::Error,
206            )),
207        }
208    }
209}
210
211impl Rule for MD040FencedCodeLanguage {
212    fn name(&self) -> &'static str {
213        "MD040"
214    }
215
216    fn description(&self) -> &'static str {
217        "Code blocks should have a language specified"
218    }
219
220    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
221        let content = ctx.content;
222        let mut warnings = Vec::new();
223
224        // Validate config and emit warnings for invalid configuration
225        for error in self.validate_config() {
226            warnings.push(LintWarning {
227                rule_name: Some(self.name().to_string()),
228                line: 1,
229                column: 1,
230                end_line: 1,
231                end_column: 1,
232                message: format!("[config error] {error}"),
233                severity: Severity::Error,
234                fix: None,
235            });
236        }
237
238        // Derive fenced code blocks from pre-computed context
239        let fenced_blocks = derive_fenced_code_blocks(ctx);
240
241        // Pre-compute disabled ranges for efficient lookup
242        let disabled_ranges = compute_disabled_ranges(content, self.name());
243
244        // Compute preferred labels for consistent mode
245        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
246            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
247        } else {
248            HashMap::new()
249        };
250
251        let lines = ctx.raw_lines();
252
253        for block in &fenced_blocks {
254            // Skip if this line is in a disabled range
255            if is_line_disabled(&disabled_ranges, block.line_idx) {
256                continue;
257            }
258
259            // Get the actual line content for additional checks
260            let line = lines.get(block.line_idx).unwrap_or(&"");
261            let trimmed = line.trim();
262            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
263
264            // Check if fence has MkDocs superfences attributes but no language
265            let has_mkdocs_attrs_only =
266                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
267
268            // MyST directives use {name} as the info string (e.g., {note}, {code-cell} python).
269            // These are valid MyST syntax and should not trigger missing-language warnings.
270            let is_myst_directive =
271                ctx.flavor.supports_myst_directives() && after_fence.starts_with('{') && after_fence.contains('}') && {
272                    let name = after_fence.trim_start_matches('{').split('}').next().unwrap_or("");
273                    !name.is_empty() && name.chars().next().is_some_and(|c| c.is_alphabetic() || c == '_')
274                };
275
276            // Pandoc/Quarto brace-syntax code chunks fall into three forms:
277            //   1. `{=html}` raw blocks — accepted under any Pandoc-compatible flavor.
278            //      Validated by `is_pandoc_raw_block_lang` (non-empty ASCII format name).
279            //   2. `{.python}` / `{.haskell .numberLines}` code-attribute syntax — the
280            //      first `.class` declares the language. Accepted under any
281            //      Pandoc-compatible flavor.
282            //   3. `{r}` / `{python}` exec chunks — accepted under Quarto only.
283            // Anything else wrapped in braces (e.g. `{r}` under pure Pandoc, or
284            // `{#myid}` with no class) is not a real language identifier and must be
285            // flagged as missing-language.
286            let is_pandoc_raw =
287                ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_raw_block_lang(after_fence);
288            let is_pandoc_class_attr =
289                ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_code_class_attr(after_fence);
290            let is_quarto_exec = ctx.flavor == crate::config::MarkdownFlavor::Quarto
291                && after_fence.starts_with('{')
292                && after_fence.ends_with('}')
293                && !is_pandoc_raw
294                && !is_pandoc_class_attr;
295            let has_pandoc_or_quarto_syntax = is_pandoc_raw || is_pandoc_class_attr || is_quarto_exec;
296            let is_unrecognized_brace_syntax = after_fence.starts_with('{')
297                && after_fence.ends_with('}')
298                && !has_pandoc_or_quarto_syntax
299                && !is_myst_directive;
300
301            let needs_language = !has_mkdocs_attrs_only
302                && !is_myst_directive
303                && (block.language.is_empty()
304                    || is_superfences_attribute(&block.language)
305                    || is_unrecognized_brace_syntax);
306
307            if needs_language && !has_pandoc_or_quarto_syntax {
308                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
309
310                warnings.push(LintWarning {
311                    rule_name: Some(self.name().to_string()),
312                    line: start_line,
313                    column: start_col,
314                    end_line,
315                    end_column: end_col,
316                    message: "Code block (```) missing language".to_string(),
317                    severity: Severity::Warning,
318                    fix: Some(Fix::new(
319                        {
320                            let trimmed = line.trim_start();
321                            let trimmed_start = line.len() - trimmed.len();
322                            let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
323                            let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
324                            // Replace from after fence marker to end of line content,
325                            // so trailing whitespace is cleaned up while any existing
326                            // info string / attributes are preserved via the replacement.
327                            let line_end_byte = line_start_byte + line.len();
328                            fence_end_byte..line_end_byte
329                        },
330                        {
331                            let trimmed = line.trim_start();
332                            let after_fence = &trimmed[block.fence_marker.len()..];
333                            let after_fence_trimmed = after_fence.trim();
334                            if after_fence_trimmed.is_empty() {
335                                "text".to_string()
336                            } else {
337                                format!("text {after_fence_trimmed}")
338                            }
339                        },
340                    )),
341                });
342                continue;
343            }
344
345            // Skip further checks for Pandoc raw blocks and Quarto exec chunks
346            if has_pandoc_or_quarto_syntax {
347                continue;
348            }
349
350            let canonical = resolve_canonical(&block.language);
351
352            // Check language restrictions (allowlist/denylist)
353            if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
354                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
355
356                warnings.push(LintWarning {
357                    rule_name: Some(self.name().to_string()),
358                    line: start_line,
359                    column: start_col,
360                    end_line,
361                    end_column: end_col,
362                    message: msg,
363                    severity: Severity::Warning,
364                    fix: None,
365                });
366                continue;
367            }
368
369            // Check for unknown language (only if not handled by allowlist)
370            if canonical.is_none() {
371                if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
372                    let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
373
374                    warnings.push(LintWarning {
375                        rule_name: Some(self.name().to_string()),
376                        line: start_line,
377                        column: start_col,
378                        end_line,
379                        end_column: end_col,
380                        message: msg,
381                        severity,
382                        fix: None,
383                    });
384                }
385                continue;
386            }
387
388            // Check consistency
389            if self.config.style == LanguageStyle::Consistent
390                && let Some(preferred) = preferred_labels.get(canonical.unwrap())
391                && &block.language != preferred
392            {
393                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
394
395                let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
396                    let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
397                    Fix::new(
398                        (line_start_byte + label_start)..(line_start_byte + label_end),
399                        preferred.clone(),
400                    )
401                });
402                let lang = &block.language;
403                let canonical = canonical.unwrap();
404
405                warnings.push(LintWarning {
406                    rule_name: Some(self.name().to_string()),
407                    line: start_line,
408                    column: start_col,
409                    end_line,
410                    end_column: end_col,
411                    message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
412                    severity: Severity::Warning,
413                    fix,
414                });
415            }
416        }
417
418        Ok(warnings)
419    }
420
421    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
422        if self.should_skip(ctx) {
423            return Ok(ctx.content.to_string());
424        }
425        let warnings = self.check(ctx)?;
426        if warnings.is_empty() {
427            return Ok(ctx.content.to_string());
428        }
429        let warnings =
430            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
431        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
432    }
433
434    /// Get the category of this rule for selective processing
435    fn category(&self) -> RuleCategory {
436        RuleCategory::CodeBlock
437    }
438
439    /// Check if this rule should be skipped
440    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
441        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
442    }
443
444    fn as_any(&self) -> &dyn std::any::Any {
445        self
446    }
447
448    fn default_config_section(&self) -> Option<(String, toml::Value)> {
449        let default_config = MD040Config::default();
450        let json_value = serde_json::to_value(&default_config).ok()?;
451        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
452
453        if let toml::Value::Table(table) = toml_value {
454            if !table.is_empty() {
455                Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
456            } else {
457                None
458            }
459        } else {
460            None
461        }
462    }
463
464    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
465    where
466        Self: Sized,
467    {
468        let rule_config: MD040Config = load_rule_config(config);
469        Box::new(MD040FencedCodeLanguage::with_config(rule_config))
470    }
471}
472
473/// Derive fenced code blocks from pre-computed CodeBlockDetail data
474fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
475    let content = ctx.content;
476    let line_offsets = &ctx.line_offsets;
477
478    ctx.code_block_details
479        .iter()
480        .filter(|d| d.is_fenced)
481        .map(|detail| {
482            let line_idx = match line_offsets.binary_search(&detail.start) {
483                Ok(idx) => idx,
484                Err(idx) => idx.saturating_sub(1),
485            };
486
487            // Determine fence marker from the actual line content
488            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
489            let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
490            let line = content.get(line_start..line_end).unwrap_or("");
491            let trimmed = line.trim();
492            let fence_marker = if trimmed.starts_with('`') {
493                let count = trimmed.chars().take_while(|&c| c == '`').count();
494                "`".repeat(count)
495            } else if trimmed.starts_with('~') {
496                let count = trimmed.chars().take_while(|&c| c == '~').count();
497                "~".repeat(count)
498            } else {
499                "```".to_string()
500            };
501
502            let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
503
504            FencedCodeBlock {
505                line_idx,
506                language,
507                fence_marker,
508            }
509        })
510        .collect()
511}
512
513/// Compute disabled line ranges from disable/enable comments
514fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
515    let mut ranges = Vec::new();
516    let mut disabled_start: Option<usize> = None;
517
518    for (i, line) in content.lines().enumerate() {
519        let trimmed = line.trim();
520
521        if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
522            && (rules.is_empty() || rules.contains(&rule_name))
523            && disabled_start.is_none()
524        {
525            disabled_start = Some(i);
526        }
527
528        if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
529            && (rules.is_empty() || rules.contains(&rule_name))
530            && let Some(start) = disabled_start.take()
531        {
532            ranges.push((start, i));
533        }
534    }
535
536    // Handle unclosed disable
537    if let Some(start) = disabled_start {
538        ranges.push((start, usize::MAX));
539    }
540
541    ranges
542}
543
544/// Check if a line index is within a disabled range
545fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
546    ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
547}
548
549/// Find the byte span of the language label in a fence line.
550fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
551    let trimmed_start = line.len() - line.trim_start().len();
552    let after_indent = &line[trimmed_start..];
553    if !after_indent.starts_with(fence_marker) {
554        return None;
555    }
556    let after_fence = &after_indent[fence_marker.len()..];
557
558    let label_start_rel = after_fence
559        .char_indices()
560        .find(|&(_, ch)| !ch.is_whitespace())
561        .map(|(idx, _)| idx)?;
562    let after_label = &after_fence[label_start_rel..];
563    let label_end_rel = after_label
564        .char_indices()
565        .find(|&(_, ch)| ch.is_whitespace())
566        .map_or(after_fence.len(), |(idx, _)| label_start_rel + idx);
567
568    Some((
569        trimmed_start + fence_marker.len() + label_start_rel,
570        trimmed_start + fence_marker.len() + label_end_rel,
571    ))
572}
573
574#[cfg(test)]
575mod tests {
576    use super::*;
577    use crate::lint_context::LintContext;
578
579    fn run_check(content: &str) -> LintResult {
580        let rule = MD040FencedCodeLanguage::default();
581        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
582        rule.check(&ctx)
583    }
584
585    fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
586        let rule = MD040FencedCodeLanguage::with_config(config);
587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
588        rule.check(&ctx)
589    }
590
591    fn run_fix(content: &str) -> Result<String, LintError> {
592        let rule = MD040FencedCodeLanguage::default();
593        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
594        rule.fix(&ctx)
595    }
596
597    fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
598        let rule = MD040FencedCodeLanguage::with_config(config);
599        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
600        rule.fix(&ctx)
601    }
602
603    fn run_check_mkdocs(content: &str) -> LintResult {
604        let rule = MD040FencedCodeLanguage::default();
605        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
606        rule.check(&ctx)
607    }
608
609    // =========================================================================
610    // Basic functionality tests
611    // =========================================================================
612
613    #[test]
614    fn test_code_blocks_with_language_specified() {
615        let content = r#"# Test
616
617```python
618print("Hello, world!")
619```
620
621```javascript
622console.log("Hello!");
623```
624"#;
625        let result = run_check(content).unwrap();
626        assert!(result.is_empty(), "No warnings expected for code blocks with language");
627    }
628
629    #[test]
630    fn test_code_blocks_without_language() {
631        let content = r#"# Test
632
633```
634print("Hello, world!")
635```
636"#;
637        let result = run_check(content).unwrap();
638        assert_eq!(result.len(), 1);
639        assert_eq!(result[0].message, "Code block (```) missing language");
640        assert_eq!(result[0].line, 3);
641    }
642
643    #[test]
644    fn test_fix_method_adds_text_language() {
645        let content = r#"# Test
646
647```
648code without language
649```
650
651```python
652already has language
653```
654
655```
656another block without
657```
658"#;
659        let fixed = run_fix(content).unwrap();
660        assert!(fixed.contains("```text"));
661        assert!(fixed.contains("```python"));
662        assert_eq!(fixed.matches("```text").count(), 2);
663    }
664
665    #[test]
666    fn test_fix_preserves_indentation() {
667        let content = r#"# Test
668
669- List item
670  ```
671  indented code block
672  ```
673"#;
674        let fixed = run_fix(content).unwrap();
675        assert!(fixed.contains("  ```text"));
676    }
677
678    // =========================================================================
679    // Consistent mode tests
680    // =========================================================================
681
682    #[test]
683    fn test_consistent_mode_detects_inconsistency() {
684        let content = r#"```bash
685echo hi
686```
687
688```sh
689echo there
690```
691
692```bash
693echo again
694```
695"#;
696        let config = MD040Config {
697            style: LanguageStyle::Consistent,
698            ..Default::default()
699        };
700        let result = run_check_with_config(content, config).unwrap();
701        assert_eq!(result.len(), 1);
702        assert!(result[0].message.contains("Inconsistent"));
703        assert!(result[0].message.contains("sh"));
704        assert!(result[0].message.contains("bash"));
705    }
706
707    #[test]
708    fn test_consistent_mode_fix_normalizes() {
709        let content = r#"```bash
710echo hi
711```
712
713```sh
714echo there
715```
716
717```bash
718echo again
719```
720"#;
721        let config = MD040Config {
722            style: LanguageStyle::Consistent,
723            ..Default::default()
724        };
725        let fixed = run_fix_with_config(content, config).unwrap();
726        assert_eq!(fixed.matches("```bash").count(), 3);
727        assert_eq!(fixed.matches("```sh").count(), 0);
728    }
729
730    #[test]
731    fn test_consistent_mode_tie_break_uses_curated_default() {
732        // When there's a tie (1 bash, 1 sh), should use curated default (bash)
733        let content = r#"```bash
734echo hi
735```
736
737```sh
738echo there
739```
740"#;
741        let config = MD040Config {
742            style: LanguageStyle::Consistent,
743            ..Default::default()
744        };
745        let fixed = run_fix_with_config(content, config).unwrap();
746        // bash is the curated default for Shell
747        assert_eq!(fixed.matches("```bash").count(), 2);
748    }
749
750    #[test]
751    fn test_consistent_mode_with_preferred_alias() {
752        let content = r#"```bash
753echo hi
754```
755
756```sh
757echo there
758```
759"#;
760        let mut preferred = HashMap::new();
761        preferred.insert("Shell".to_string(), "sh".to_string());
762
763        let config = MD040Config {
764            style: LanguageStyle::Consistent,
765            preferred_aliases: preferred,
766            ..Default::default()
767        };
768        let fixed = run_fix_with_config(content, config).unwrap();
769        assert_eq!(fixed.matches("```sh").count(), 2);
770        assert_eq!(fixed.matches("```bash").count(), 0);
771    }
772
773    #[test]
774    fn test_consistent_mode_ignores_disabled_blocks() {
775        let content = r#"```bash
776echo hi
777```
778<!-- rumdl-disable MD040 -->
779```sh
780echo there
781```
782```sh
783echo again
784```
785<!-- rumdl-enable MD040 -->
786"#;
787        let config = MD040Config {
788            style: LanguageStyle::Consistent,
789            ..Default::default()
790        };
791        let result = run_check_with_config(content, config).unwrap();
792        assert!(result.is_empty(), "Disabled blocks should not affect consistency");
793    }
794
795    #[test]
796    fn test_fix_preserves_attributes() {
797        let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
798        let config = MD040Config {
799            style: LanguageStyle::Consistent,
800            ..Default::default()
801        };
802        let fixed = run_fix_with_config(content, config).unwrap();
803        assert!(fixed.contains("```bash {.highlight}"));
804    }
805
806    #[test]
807    fn test_fix_preserves_spacing_before_label() {
808        let content = "```bash\ncode\n```\n\n```  sh {.highlight}\ncode\n```";
809        let config = MD040Config {
810            style: LanguageStyle::Consistent,
811            ..Default::default()
812        };
813        let fixed = run_fix_with_config(content, config).unwrap();
814        assert!(fixed.contains("```  bash {.highlight}"));
815        assert!(!fixed.contains("```  sh {.highlight}"));
816    }
817
818    // =========================================================================
819    // Allowlist/denylist tests
820    // =========================================================================
821
822    #[test]
823    fn test_allowlist_blocks_unlisted() {
824        let content = "```java\ncode\n```";
825        let config = MD040Config {
826            allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
827            ..Default::default()
828        };
829        let result = run_check_with_config(content, config).unwrap();
830        assert_eq!(result.len(), 1);
831        assert!(result[0].message.contains("not in the allowed list"));
832    }
833
834    #[test]
835    fn test_allowlist_allows_listed() {
836        let content = "```python\ncode\n```";
837        let config = MD040Config {
838            allowed_languages: vec!["Python".to_string()],
839            ..Default::default()
840        };
841        let result = run_check_with_config(content, config).unwrap();
842        assert!(result.is_empty());
843    }
844
845    #[test]
846    fn test_allowlist_blocks_unknown_language() {
847        let content = "```mysterylang\ncode\n```";
848        let config = MD040Config {
849            allowed_languages: vec!["Python".to_string()],
850            ..Default::default()
851        };
852        let result = run_check_with_config(content, config).unwrap();
853        assert_eq!(result.len(), 1);
854        assert!(result[0].message.contains("allowed list"));
855    }
856
857    #[test]
858    fn test_allowlist_case_insensitive() {
859        let content = "```python\ncode\n```";
860        let config = MD040Config {
861            allowed_languages: vec!["PYTHON".to_string()],
862            ..Default::default()
863        };
864        let result = run_check_with_config(content, config).unwrap();
865        assert!(result.is_empty());
866    }
867
868    #[test]
869    fn test_denylist_blocks_listed() {
870        let content = "```java\ncode\n```";
871        let config = MD040Config {
872            disallowed_languages: vec!["Java".to_string()],
873            ..Default::default()
874        };
875        let result = run_check_with_config(content, config).unwrap();
876        assert_eq!(result.len(), 1);
877        assert!(result[0].message.contains("disallowed"));
878    }
879
880    #[test]
881    fn test_denylist_allows_unlisted() {
882        let content = "```python\ncode\n```";
883        let config = MD040Config {
884            disallowed_languages: vec!["Java".to_string()],
885            ..Default::default()
886        };
887        let result = run_check_with_config(content, config).unwrap();
888        assert!(result.is_empty());
889    }
890
891    #[test]
892    fn test_allowlist_takes_precedence_over_denylist() {
893        let content = "```python\ncode\n```";
894        let config = MD040Config {
895            allowed_languages: vec!["Python".to_string()],
896            disallowed_languages: vec!["Python".to_string()], // Should be ignored
897            ..Default::default()
898        };
899        let result = run_check_with_config(content, config).unwrap();
900        assert!(result.is_empty());
901    }
902
903    // =========================================================================
904    // Unknown language tests
905    // =========================================================================
906
907    #[test]
908    fn test_unknown_language_ignore_default() {
909        let content = "```mycustomlang\ncode\n```";
910        let result = run_check(content).unwrap();
911        assert!(result.is_empty(), "Unknown languages ignored by default");
912    }
913
914    #[test]
915    fn test_unknown_language_warn() {
916        let content = "```mycustomlang\ncode\n```";
917        let config = MD040Config {
918            unknown_language_action: UnknownLanguageAction::Warn,
919            ..Default::default()
920        };
921        let result = run_check_with_config(content, config).unwrap();
922        assert_eq!(result.len(), 1);
923        assert!(result[0].message.contains("Unknown language"));
924        assert!(result[0].message.contains("mycustomlang"));
925        assert_eq!(result[0].severity, Severity::Warning);
926    }
927
928    #[test]
929    fn test_unknown_language_error() {
930        let content = "```mycustomlang\ncode\n```";
931        let config = MD040Config {
932            unknown_language_action: UnknownLanguageAction::Error,
933            ..Default::default()
934        };
935        let result = run_check_with_config(content, config).unwrap();
936        assert_eq!(result.len(), 1);
937        assert!(result[0].message.contains("Unknown language"));
938        assert_eq!(result[0].severity, Severity::Error);
939    }
940
941    // =========================================================================
942    // Config validation tests
943    // =========================================================================
944
945    #[test]
946    fn test_invalid_preferred_alias_detected() {
947        let mut preferred = HashMap::new();
948        preferred.insert("Shell".to_string(), "invalid_alias".to_string());
949
950        let config = MD040Config {
951            style: LanguageStyle::Consistent,
952            preferred_aliases: preferred,
953            ..Default::default()
954        };
955        let rule = MD040FencedCodeLanguage::with_config(config);
956        let errors = rule.validate_config();
957        assert_eq!(errors.len(), 1);
958        assert!(errors[0].contains("Invalid alias"));
959        assert!(errors[0].contains("invalid_alias"));
960    }
961
962    #[test]
963    fn test_unknown_language_in_preferred_aliases_detected() {
964        let mut preferred = HashMap::new();
965        preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
966
967        let config = MD040Config {
968            style: LanguageStyle::Consistent,
969            preferred_aliases: preferred,
970            ..Default::default()
971        };
972        let rule = MD040FencedCodeLanguage::with_config(config);
973        let errors = rule.validate_config();
974        assert_eq!(errors.len(), 1);
975        assert!(errors[0].contains("Unknown language"));
976    }
977
978    #[test]
979    fn test_valid_preferred_alias_accepted() {
980        let mut preferred = HashMap::new();
981        preferred.insert("Shell".to_string(), "bash".to_string());
982        preferred.insert("JavaScript".to_string(), "js".to_string());
983
984        let config = MD040Config {
985            style: LanguageStyle::Consistent,
986            preferred_aliases: preferred,
987            ..Default::default()
988        };
989        let rule = MD040FencedCodeLanguage::with_config(config);
990        let errors = rule.validate_config();
991        assert!(errors.is_empty());
992    }
993
994    #[test]
995    fn test_config_error_uses_valid_line_column() {
996        let config = md040_config::MD040Config {
997            preferred_aliases: {
998                let mut map = std::collections::HashMap::new();
999                map.insert("Shell".to_string(), "invalid_alias".to_string());
1000                map
1001            },
1002            ..Default::default()
1003        };
1004        let rule = MD040FencedCodeLanguage::with_config(config);
1005
1006        let content = "```shell\necho hello\n```";
1007        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1008        let result = rule.check(&ctx).unwrap();
1009
1010        // Find the config error warning
1011        let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1012        assert!(config_error.is_some(), "Should have a config error warning");
1013
1014        let warning = config_error.unwrap();
1015        // Line and column should be 1-indexed (not 0)
1016        assert!(
1017            warning.line >= 1,
1018            "Config error line should be >= 1, got {}",
1019            warning.line
1020        );
1021        assert!(
1022            warning.column >= 1,
1023            "Config error column should be >= 1, got {}",
1024            warning.column
1025        );
1026    }
1027
1028    // =========================================================================
1029    // Linguist resolution tests
1030    // =========================================================================
1031
1032    #[test]
1033    fn test_linguist_resolution() {
1034        assert_eq!(resolve_canonical("bash"), Some("Shell"));
1035        assert_eq!(resolve_canonical("sh"), Some("Shell"));
1036        assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1037        assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1038        assert_eq!(resolve_canonical("python"), Some("Python"));
1039        assert_eq!(resolve_canonical("unknown_lang"), None);
1040    }
1041
1042    #[test]
1043    fn test_linguist_resolution_case_insensitive() {
1044        assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1045        assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1046        assert_eq!(resolve_canonical("Python"), Some("Python"));
1047        assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1048    }
1049
1050    #[test]
1051    fn test_alias_validation() {
1052        assert!(is_valid_alias("Shell", "bash"));
1053        assert!(is_valid_alias("Shell", "sh"));
1054        assert!(is_valid_alias("Shell", "zsh"));
1055        assert!(!is_valid_alias("Shell", "python"));
1056        assert!(!is_valid_alias("Shell", "invalid"));
1057    }
1058
1059    #[test]
1060    fn test_default_alias() {
1061        assert_eq!(default_alias("Shell"), Some("bash"));
1062        assert_eq!(default_alias("JavaScript"), Some("js"));
1063        assert_eq!(default_alias("Python"), Some("python"));
1064    }
1065
1066    // =========================================================================
1067    // Edge case tests
1068    // =========================================================================
1069
1070    #[test]
1071    fn test_mixed_case_labels_normalized() {
1072        let content = r#"```BASH
1073echo hi
1074```
1075
1076```Bash
1077echo there
1078```
1079
1080```bash
1081echo again
1082```
1083"#;
1084        let config = MD040Config {
1085            style: LanguageStyle::Consistent,
1086            ..Default::default()
1087        };
1088        // All should resolve to Shell, most prevalent should win
1089        let result = run_check_with_config(content, config).unwrap();
1090        // "bash" appears 1x, "Bash" appears 1x, "BASH" appears 1x
1091        // All are different strings, so there's a 3-way tie
1092        // Should pick curated default "bash" or alphabetically first
1093        assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1094    }
1095
1096    #[test]
1097    fn test_multiple_languages_independent() {
1098        let content = r#"```bash
1099shell code
1100```
1101
1102```python
1103python code
1104```
1105
1106```sh
1107more shell
1108```
1109
1110```python3
1111more python
1112```
1113"#;
1114        let config = MD040Config {
1115            style: LanguageStyle::Consistent,
1116            ..Default::default()
1117        };
1118        let result = run_check_with_config(content, config).unwrap();
1119        // Should have 2 warnings: one for sh (inconsistent with bash) and one for python3 (inconsistent with python)
1120        assert_eq!(result.len(), 2);
1121    }
1122
1123    #[test]
1124    fn test_tilde_fences() {
1125        let content = r#"~~~bash
1126echo hi
1127~~~
1128
1129~~~sh
1130echo there
1131~~~
1132"#;
1133        let config = MD040Config {
1134            style: LanguageStyle::Consistent,
1135            ..Default::default()
1136        };
1137        let result = run_check_with_config(content, config.clone()).unwrap();
1138        assert_eq!(result.len(), 1);
1139
1140        let fixed = run_fix_with_config(content, config).unwrap();
1141        assert!(fixed.contains("~~~bash"));
1142        assert!(!fixed.contains("~~~sh"));
1143    }
1144
1145    #[test]
1146    fn test_longer_fence_markers_preserved() {
1147        let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1148        let config = MD040Config {
1149            style: LanguageStyle::Consistent,
1150            ..Default::default()
1151        };
1152        let fixed = run_fix_with_config(content, config).unwrap();
1153        assert!(fixed.contains("````bash"));
1154        assert!(fixed.contains("```bash"));
1155    }
1156
1157    #[test]
1158    fn test_empty_document() {
1159        let result = run_check("").unwrap();
1160        assert!(result.is_empty());
1161    }
1162
1163    #[test]
1164    fn test_no_code_blocks() {
1165        let content = "# Just a heading\n\nSome text.";
1166        let result = run_check(content).unwrap();
1167        assert!(result.is_empty());
1168    }
1169
1170    #[test]
1171    fn test_single_code_block_no_inconsistency() {
1172        let content = "```bash\necho hi\n```";
1173        let config = MD040Config {
1174            style: LanguageStyle::Consistent,
1175            ..Default::default()
1176        };
1177        let result = run_check_with_config(content, config).unwrap();
1178        assert!(result.is_empty(), "Single block has no inconsistency");
1179    }
1180
1181    #[test]
1182    fn test_idempotent_fix() {
1183        let content = r#"```bash
1184echo hi
1185```
1186
1187```sh
1188echo there
1189```
1190"#;
1191        let config = MD040Config {
1192            style: LanguageStyle::Consistent,
1193            ..Default::default()
1194        };
1195        let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1196        let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1197        assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1198    }
1199
1200    // =========================================================================
1201    // MkDocs superfences tests
1202    // =========================================================================
1203
1204    #[test]
1205    fn test_mkdocs_superfences_title_only() {
1206        // title= attribute without language should not warn in MkDocs flavor
1207        let content = r#"```title="Example"
1208echo hi
1209```
1210"#;
1211        let result = run_check_mkdocs(content).unwrap();
1212        assert!(
1213            result.is_empty(),
1214            "MkDocs superfences with title= should not require language"
1215        );
1216    }
1217
1218    #[test]
1219    fn test_mkdocs_superfences_hl_lines() {
1220        // hl_lines= attribute without language should not warn
1221        let content = r#"```hl_lines="1 2"
1222line 1
1223line 2
1224```
1225"#;
1226        let result = run_check_mkdocs(content).unwrap();
1227        assert!(
1228            result.is_empty(),
1229            "MkDocs superfences with hl_lines= should not require language"
1230        );
1231    }
1232
1233    #[test]
1234    fn test_mkdocs_superfences_linenums() {
1235        // linenums= attribute without language should not warn
1236        let content = r#"```linenums="1"
1237line 1
1238line 2
1239```
1240"#;
1241        let result = run_check_mkdocs(content).unwrap();
1242        assert!(
1243            result.is_empty(),
1244            "MkDocs superfences with linenums= should not require language"
1245        );
1246    }
1247
1248    #[test]
1249    fn test_mkdocs_superfences_class() {
1250        // Custom class (starting with .) should not warn
1251        let content = r#"```.my-class
1252some text
1253```
1254"#;
1255        let result = run_check_mkdocs(content).unwrap();
1256        assert!(
1257            result.is_empty(),
1258            "MkDocs superfences with .class should not require language"
1259        );
1260    }
1261
1262    #[test]
1263    fn test_mkdocs_superfences_id() {
1264        // Custom ID (starting with #) should not warn
1265        let content = r#"```#my-id
1266some text
1267```
1268"#;
1269        let result = run_check_mkdocs(content).unwrap();
1270        assert!(
1271            result.is_empty(),
1272            "MkDocs superfences with #id should not require language"
1273        );
1274    }
1275
1276    #[test]
1277    fn test_mkdocs_superfences_with_language() {
1278        // Language with superfences attributes should work fine
1279        let content = r#"```python title="Example" hl_lines="1"
1280print("hello")
1281```
1282"#;
1283        let result = run_check_mkdocs(content).unwrap();
1284        assert!(result.is_empty(), "Code block with language and attrs should pass");
1285    }
1286
1287    #[test]
1288    fn test_standard_flavor_no_special_handling() {
1289        // In Standard flavor, title= should still warn
1290        let content = r#"```title="Example"
1291echo hi
1292```
1293"#;
1294        let result = run_check(content).unwrap();
1295        assert_eq!(
1296            result.len(),
1297            1,
1298            "Standard flavor should warn about title= without language"
1299        );
1300    }
1301
1302    #[test]
1303    fn test_pandoc_raw_block_skipped_under_pandoc_flavor() {
1304        // ```{=html} raw blocks are valid Pandoc syntax and should not trigger MD040
1305        // under Pandoc flavor.
1306        let rule = MD040FencedCodeLanguage::default();
1307        let content = "```{=html}\n<div>raw html</div>\n```\n";
1308        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1309        let result = rule.check(&ctx).unwrap();
1310        assert!(
1311            result.is_empty(),
1312            "MD040 should skip Pandoc raw blocks ({{=html}}) under Pandoc flavor: {result:?}"
1313        );
1314    }
1315
1316    #[test]
1317    fn test_pandoc_raw_block_skipped_under_quarto_flavor() {
1318        // ```{=html} raw blocks are also valid under Quarto (which is Pandoc-compatible).
1319        let rule = MD040FencedCodeLanguage::default();
1320        let content = "```{=html}\n<div>raw html</div>\n```\n";
1321        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1322        let result = rule.check(&ctx).unwrap();
1323        assert!(
1324            result.is_empty(),
1325            "MD040 should skip Pandoc raw blocks ({{=html}}) under Quarto flavor: {result:?}"
1326        );
1327    }
1328
1329    /// Pandoc raw blocks like ```` ```{=html} ```` declare an output target,
1330    /// not a missing language. MD040 must accept them under Pandoc.
1331    #[test]
1332    fn test_pandoc_accepts_raw_html_block() {
1333        use crate::config::MarkdownFlavor;
1334        let rule = MD040FencedCodeLanguage::default();
1335        let content = "```{=html}\n<div>raw</div>\n```\n";
1336        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1337        let result = rule.check(&ctx).unwrap();
1338        assert!(result.is_empty(), "MD040 should accept ```{{=html}}```: {result:?}");
1339    }
1340
1341    /// Under Pandoc (not Quarto), `{r}` is NOT a valid raw-format declaration —
1342    /// it's a Quarto-only execution syntax that should be flagged as missing language.
1343    #[test]
1344    fn test_pandoc_rejects_quarto_exec_blocks() {
1345        use crate::config::MarkdownFlavor;
1346        let rule = MD040FencedCodeLanguage::default();
1347        let content = "```{r}\nsummary(data)\n```\n";
1348        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1349        let result = rule.check(&ctx).unwrap();
1350        assert!(
1351            !result.is_empty(),
1352            "MD040 under Pandoc should flag `{{r}}` (Quarto-only)"
1353        );
1354    }
1355
1356    /// Under Quarto, `{r}` IS valid — Quarto exec syntax. Must not be flagged.
1357    #[test]
1358    fn test_quarto_still_accepts_exec_block() {
1359        use crate::config::MarkdownFlavor;
1360        let rule = MD040FencedCodeLanguage::default();
1361        let content = "```{r}\nsummary(data)\n```\n";
1362        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
1363        let result = rule.check(&ctx).unwrap();
1364        assert!(
1365            result.is_empty(),
1366            "MD040 under Quarto should accept `{{r}}`: {result:?}"
1367        );
1368    }
1369
1370    #[test]
1371    fn test_quarto_exec_block_skipped_under_quarto_only() {
1372        // ```{r} exec chunks are Quarto-specific syntax accepted only under the Quarto flavor.
1373        // Under Pandoc flavor, `{r}` is not a valid Pandoc raw-format declaration (those use
1374        // `{=format}` syntax), so MD040 flags it as missing a real language identifier.
1375        let rule = MD040FencedCodeLanguage::default();
1376        let content = "```{r}\n1 + 1\n```\n";
1377
1378        let ctx_quarto = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1379        let result_quarto = rule.check(&ctx_quarto).unwrap();
1380        assert!(
1381            result_quarto.is_empty(),
1382            "MD040 should skip Quarto exec chunks under Quarto flavor: {result_quarto:?}"
1383        );
1384
1385        // Under Pandoc, `{r}` is unrecognized brace syntax — not a valid Pandoc raw block.
1386        // MD040 treats it as a missing language.
1387        let ctx_pandoc = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1388        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1389        assert!(
1390            !result_pandoc.is_empty(),
1391            "MD040 should flag `{{r}}` under Pandoc as missing a real language"
1392        );
1393    }
1394
1395    /// Pandoc code-attribute syntax `{.lang}` declares the language and is valid under
1396    /// both Pandoc and Quarto. MD040 must accept it.
1397    #[test]
1398    fn test_pandoc_class_attr_accepted_as_language() {
1399        use crate::config::MarkdownFlavor;
1400        let rule = MD040FencedCodeLanguage::default();
1401        let content = "```{.python}\nprint(\"hi\")\n```\n";
1402
1403        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1404        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1405        assert!(
1406            result_pandoc.is_empty(),
1407            "MD040 under Pandoc should accept ```{{.python}}``` as language declaration: {result_pandoc:?}"
1408        );
1409
1410        let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1411        let result_quarto = rule.check(&ctx_quarto).unwrap();
1412        assert!(
1413            result_quarto.is_empty(),
1414            "MD040 under Quarto should accept ```{{.python}}``` as language declaration: {result_quarto:?}"
1415        );
1416    }
1417
1418    /// Pandoc code attributes can include multiple classes plus key=value pairs.
1419    /// The first class is the language; trailing attributes (e.g. `.numberLines`) are decoration.
1420    #[test]
1421    fn test_pandoc_class_attr_with_extra_attributes_accepted() {
1422        use crate::config::MarkdownFlavor;
1423        let rule = MD040FencedCodeLanguage::default();
1424        let content = "```{.haskell .numberLines}\nmain = putStrLn \"hi\"\n```\n";
1425
1426        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1427        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1428        assert!(
1429            result_pandoc.is_empty(),
1430            "MD040 under Pandoc should accept ```{{.haskell .numberLines}}```: {result_pandoc:?}"
1431        );
1432
1433        let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1434        let result_quarto = rule.check(&ctx_quarto).unwrap();
1435        assert!(
1436            result_quarto.is_empty(),
1437            "MD040 under Quarto should accept ```{{.haskell .numberLines}}```: {result_quarto:?}"
1438        );
1439    }
1440
1441    /// Pandoc code attributes can include id (`#myid`) and key=value attributes.
1442    /// As long as a `.class` is present, the block declares a language.
1443    #[test]
1444    fn test_pandoc_class_attr_with_id_and_keyvalue_accepted() {
1445        use crate::config::MarkdownFlavor;
1446        let rule = MD040FencedCodeLanguage::default();
1447        let content = "```{#snippet .python startFrom=\"10\"}\nprint(1)\n```\n";
1448
1449        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1450        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1451        assert!(
1452            result_pandoc.is_empty(),
1453            "MD040 under Pandoc should accept ```{{#snippet .python …}}```: {result_pandoc:?}"
1454        );
1455    }
1456
1457    /// Standard flavor knows nothing about Pandoc code attributes — they remain
1458    /// unrecognized brace syntax and must still be flagged as missing-language.
1459    #[test]
1460    fn test_standard_still_flags_pandoc_class_attr() {
1461        use crate::config::MarkdownFlavor;
1462        let rule = MD040FencedCodeLanguage::default();
1463        let content = "```{.python}\nprint(\"hi\")\n```\n";
1464
1465        let ctx_standard = LintContext::new(content, MarkdownFlavor::Standard, None);
1466        let result_standard = rule.check(&ctx_standard).unwrap();
1467        assert!(
1468            !result_standard.is_empty(),
1469            "MD040 under Standard should still flag ```{{.python}}``` (no Pandoc support)"
1470        );
1471    }
1472
1473    /// A brace block with only an id (`{#myid}`) and no class declares no language.
1474    /// Even under Pandoc this must remain flagged.
1475    #[test]
1476    fn test_pandoc_id_only_attr_still_flagged() {
1477        use crate::config::MarkdownFlavor;
1478        let rule = MD040FencedCodeLanguage::default();
1479        let content = "```{#myid}\ncode here\n```\n";
1480
1481        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1482        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1483        assert!(
1484            !result_pandoc.is_empty(),
1485            "MD040 under Pandoc should flag ```{{#myid}}``` — id without class declares no language"
1486        );
1487    }
1488
1489    /// Empty `{}` braces declare nothing and must still be flagged under any flavor.
1490    #[test]
1491    fn test_pandoc_empty_braces_still_flagged() {
1492        use crate::config::MarkdownFlavor;
1493        let rule = MD040FencedCodeLanguage::default();
1494        let content = "```{}\ncode here\n```\n";
1495
1496        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1497        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1498        assert!(
1499            !result_pandoc.is_empty(),
1500            "MD040 under Pandoc should flag ```{{}}``` (no language declared)"
1501        );
1502    }
1503}