Skip to main content

rumdl_lib/rules/
md040_fenced_code_language.rs

1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7/// Rule MD040: Fenced code blocks should have a language
8///
9/// See [docs/md040.md](../../docs/md040.md) for full documentation, configuration, and examples.
10pub mod md040_config;
11
12// ============================================================================
13// MkDocs Superfences Attribute Detection
14// ============================================================================
15
16/// Prefixes that indicate MkDocs superfences attributes rather than language identifiers.
17/// These are valid in MkDocs flavor without a language specification.
18/// See: https://facelessuser.github.io/pymdown-extensions/extensions/superfences/
19const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20    "title=",    // Block title
21    "hl_lines=", // Highlighted lines
22    "linenums=", // Line numbers
23    ".",         // CSS class (e.g., .annotate)
24    "#",         // CSS id
25];
26
27/// Check if a string starts with a MkDocs superfences attribute prefix
28#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30    MKDOCS_SUPERFENCES_ATTR_PREFIXES
31        .iter()
32        .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37    /// 0-indexed line number where the code block starts
38    line_idx: usize,
39    /// The language/info string (empty if no language specified)
40    language: String,
41    /// The fence marker used (``` or ~~~)
42    fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47    config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51    pub fn with_config(config: MD040Config) -> Self {
52        Self { config }
53    }
54
55    /// Validate the configuration and return any errors
56    fn validate_config(&self) -> Vec<String> {
57        let mut errors = Vec::new();
58
59        // Validate preferred-aliases: check that each alias is valid for its language
60        for (canonical, alias) in &self.config.preferred_aliases {
61            // Find the actual canonical name (case-insensitive)
62            if let Some(actual_canonical) = resolve_canonical(canonical) {
63                if !is_valid_alias(actual_canonical, alias)
64                    && let Some(valid_aliases) = get_aliases(actual_canonical)
65                {
66                    let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
67                    let valid_str = valid_list
68                        .iter()
69                        .map(|s| format!("'{s}'"))
70                        .collect::<Vec<_>>()
71                        .join(", ");
72                    let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
73                    errors.push(format!(
74                        "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
75                    ));
76                }
77            } else {
78                errors.push(format!(
79                    "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
80                ));
81            }
82        }
83
84        errors
85    }
86
87    /// Determine the preferred label for each canonical language in the document
88    fn compute_preferred_labels(
89        &self,
90        blocks: &[FencedCodeBlock],
91        disabled_ranges: &[(usize, usize)],
92    ) -> HashMap<String, String> {
93        // Group labels by canonical language
94        let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
95
96        for block in blocks {
97            if is_line_disabled(disabled_ranges, block.line_idx) {
98                continue;
99            }
100            if block.language.is_empty() {
101                continue;
102            }
103            if let Some(canonical) = resolve_canonical(&block.language) {
104                by_canonical
105                    .entry(canonical.to_string())
106                    .or_default()
107                    .push(&block.language);
108            }
109        }
110
111        // Determine winning label for each canonical language
112        let mut result = HashMap::new();
113
114        for (canonical, labels) in by_canonical {
115            // Check for user override first (case-insensitive lookup)
116            let winner = if let Some(preferred) = self
117                .config
118                .preferred_aliases
119                .iter()
120                .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
121                .map(|(_, v)| v.clone())
122            {
123                preferred
124            } else {
125                // Find most prevalent label
126                let mut counts: HashMap<&str, usize> = HashMap::new();
127                for label in &labels {
128                    *counts.entry(*label).or_default() += 1;
129                }
130
131                let max_count = counts.values().max().copied().unwrap_or(0);
132                let winners: Vec<_> = counts
133                    .iter()
134                    .filter(|(_, c)| **c == max_count)
135                    .map(|(l, _)| *l)
136                    .collect();
137
138                if winners.len() == 1 {
139                    winners[0].to_string()
140                } else {
141                    // Tie-break: use curated default if available, otherwise alphabetically first
142                    default_alias(&canonical)
143                        .filter(|default| winners.contains(default))
144                        .map_or_else(
145                            || winners.into_iter().min().unwrap().to_string(),
146                            std::string::ToString::to_string,
147                        )
148                }
149            };
150
151            result.insert(canonical, winner);
152        }
153
154        result
155    }
156
157    /// Check if a language is allowed based on config
158    fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
159        // Allowlist takes precedence
160        if !self.config.allowed_languages.is_empty() {
161            let allowed = self.config.allowed_languages.join(", ");
162            let Some(canonical) = canonical else {
163                return Some(format!(
164                    "Language '{original_label}' is not in the allowed list: {allowed}"
165                ));
166            };
167            if !self
168                .config
169                .allowed_languages
170                .iter()
171                .any(|a| a.eq_ignore_ascii_case(canonical))
172            {
173                return Some(format!(
174                    "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
175                ));
176            }
177        } else if !self.config.disallowed_languages.is_empty()
178            && canonical.is_some_and(|canonical| {
179                self.config
180                    .disallowed_languages
181                    .iter()
182                    .any(|d| d.eq_ignore_ascii_case(canonical))
183            })
184        {
185            let canonical = canonical.unwrap_or("unknown");
186            return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
187        }
188        None
189    }
190
191    /// Check for unknown language based on config
192    fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
193        if resolve_canonical(label).is_some() {
194            return None;
195        }
196
197        match self.config.unknown_language_action {
198            UnknownLanguageAction::Ignore => None,
199            UnknownLanguageAction::Warn => Some((
200                format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
201                Severity::Warning,
202            )),
203            UnknownLanguageAction::Error => Some((
204                format!("Unknown language '{label}' (not in GitHub Linguist)"),
205                Severity::Error,
206            )),
207        }
208    }
209}
210
211impl Rule for MD040FencedCodeLanguage {
212    fn name(&self) -> &'static str {
213        "MD040"
214    }
215
216    fn description(&self) -> &'static str {
217        "Code blocks should have a language specified"
218    }
219
220    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
221        let content = ctx.content;
222        let mut warnings = Vec::new();
223
224        // Validate config and emit warnings for invalid configuration
225        for error in self.validate_config() {
226            warnings.push(LintWarning {
227                rule_name: Some(self.name().to_string()),
228                line: 1,
229                column: 1,
230                end_line: 1,
231                end_column: 1,
232                message: format!("[config error] {error}"),
233                severity: Severity::Error,
234                fix: None,
235            });
236        }
237
238        // Derive fenced code blocks from pre-computed context
239        let fenced_blocks = derive_fenced_code_blocks(ctx);
240
241        // Pre-compute disabled ranges for efficient lookup
242        let disabled_ranges = compute_disabled_ranges(content, self.name());
243
244        // Compute preferred labels for consistent mode
245        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
246            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
247        } else {
248            HashMap::new()
249        };
250
251        let lines = ctx.raw_lines();
252
253        for block in &fenced_blocks {
254            // Skip if this line is in a disabled range
255            if is_line_disabled(&disabled_ranges, block.line_idx) {
256                continue;
257            }
258
259            // Get the actual line content for additional checks
260            let line = lines.get(block.line_idx).unwrap_or(&"");
261            let trimmed = line.trim();
262            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
263
264            // Check if fence has MkDocs superfences attributes but no language
265            let has_mkdocs_attrs_only =
266                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
267
268            // Pandoc/Quarto brace-syntax code chunks fall into three forms:
269            //   1. `{=html}` raw blocks — accepted under any Pandoc-compatible flavor.
270            //      Validated by `is_pandoc_raw_block_lang` (non-empty ASCII format name).
271            //   2. `{.python}` / `{.haskell .numberLines}` code-attribute syntax — the
272            //      first `.class` declares the language. Accepted under any
273            //      Pandoc-compatible flavor.
274            //   3. `{r}` / `{python}` exec chunks — accepted under Quarto only.
275            // Anything else wrapped in braces (e.g. `{r}` under pure Pandoc, or
276            // `{#myid}` with no class) is not a real language identifier and must be
277            // flagged as missing-language.
278            let is_pandoc_raw =
279                ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_raw_block_lang(after_fence);
280            let is_pandoc_class_attr =
281                ctx.flavor.is_pandoc_compatible() && crate::utils::pandoc::is_pandoc_code_class_attr(after_fence);
282            let is_quarto_exec = ctx.flavor == crate::config::MarkdownFlavor::Quarto
283                && after_fence.starts_with('{')
284                && after_fence.ends_with('}')
285                && !is_pandoc_raw
286                && !is_pandoc_class_attr;
287            let has_pandoc_or_quarto_syntax = is_pandoc_raw || is_pandoc_class_attr || is_quarto_exec;
288            let is_unrecognized_brace_syntax =
289                after_fence.starts_with('{') && after_fence.ends_with('}') && !has_pandoc_or_quarto_syntax;
290
291            let needs_language = !has_mkdocs_attrs_only
292                && (block.language.is_empty()
293                    || is_superfences_attribute(&block.language)
294                    || is_unrecognized_brace_syntax);
295
296            if needs_language && !has_pandoc_or_quarto_syntax {
297                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
298
299                warnings.push(LintWarning {
300                    rule_name: Some(self.name().to_string()),
301                    line: start_line,
302                    column: start_col,
303                    end_line,
304                    end_column: end_col,
305                    message: "Code block (```) missing language".to_string(),
306                    severity: Severity::Warning,
307                    fix: Some(Fix::new(
308                        {
309                            let trimmed = line.trim_start();
310                            let trimmed_start = line.len() - trimmed.len();
311                            let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
312                            let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
313                            // Replace from after fence marker to end of line content,
314                            // so trailing whitespace is cleaned up while any existing
315                            // info string / attributes are preserved via the replacement.
316                            let line_end_byte = line_start_byte + line.len();
317                            fence_end_byte..line_end_byte
318                        },
319                        {
320                            let trimmed = line.trim_start();
321                            let after_fence = &trimmed[block.fence_marker.len()..];
322                            let after_fence_trimmed = after_fence.trim();
323                            if after_fence_trimmed.is_empty() {
324                                "text".to_string()
325                            } else {
326                                format!("text {after_fence_trimmed}")
327                            }
328                        },
329                    )),
330                });
331                continue;
332            }
333
334            // Skip further checks for Pandoc raw blocks and Quarto exec chunks
335            if has_pandoc_or_quarto_syntax {
336                continue;
337            }
338
339            let canonical = resolve_canonical(&block.language);
340
341            // Check language restrictions (allowlist/denylist)
342            if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
343                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
344
345                warnings.push(LintWarning {
346                    rule_name: Some(self.name().to_string()),
347                    line: start_line,
348                    column: start_col,
349                    end_line,
350                    end_column: end_col,
351                    message: msg,
352                    severity: Severity::Warning,
353                    fix: None,
354                });
355                continue;
356            }
357
358            // Check for unknown language (only if not handled by allowlist)
359            if canonical.is_none() {
360                if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
361                    let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
362
363                    warnings.push(LintWarning {
364                        rule_name: Some(self.name().to_string()),
365                        line: start_line,
366                        column: start_col,
367                        end_line,
368                        end_column: end_col,
369                        message: msg,
370                        severity,
371                        fix: None,
372                    });
373                }
374                continue;
375            }
376
377            // Check consistency
378            if self.config.style == LanguageStyle::Consistent
379                && let Some(preferred) = preferred_labels.get(canonical.unwrap())
380                && &block.language != preferred
381            {
382                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
383
384                let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
385                    let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
386                    Fix::new(
387                        (line_start_byte + label_start)..(line_start_byte + label_end),
388                        preferred.clone(),
389                    )
390                });
391                let lang = &block.language;
392                let canonical = canonical.unwrap();
393
394                warnings.push(LintWarning {
395                    rule_name: Some(self.name().to_string()),
396                    line: start_line,
397                    column: start_col,
398                    end_line,
399                    end_column: end_col,
400                    message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
401                    severity: Severity::Warning,
402                    fix,
403                });
404            }
405        }
406
407        Ok(warnings)
408    }
409
410    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
411        if self.should_skip(ctx) {
412            return Ok(ctx.content.to_string());
413        }
414        let warnings = self.check(ctx)?;
415        if warnings.is_empty() {
416            return Ok(ctx.content.to_string());
417        }
418        let warnings =
419            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
420        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
421    }
422
423    /// Get the category of this rule for selective processing
424    fn category(&self) -> RuleCategory {
425        RuleCategory::CodeBlock
426    }
427
428    /// Check if this rule should be skipped
429    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
430        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
431    }
432
433    fn as_any(&self) -> &dyn std::any::Any {
434        self
435    }
436
437    fn default_config_section(&self) -> Option<(String, toml::Value)> {
438        let default_config = MD040Config::default();
439        let json_value = serde_json::to_value(&default_config).ok()?;
440        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
441
442        if let toml::Value::Table(table) = toml_value {
443            if !table.is_empty() {
444                Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
445            } else {
446                None
447            }
448        } else {
449            None
450        }
451    }
452
453    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
454    where
455        Self: Sized,
456    {
457        let rule_config: MD040Config = load_rule_config(config);
458        Box::new(MD040FencedCodeLanguage::with_config(rule_config))
459    }
460}
461
462/// Derive fenced code blocks from pre-computed CodeBlockDetail data
463fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
464    let content = ctx.content;
465    let line_offsets = &ctx.line_offsets;
466
467    ctx.code_block_details
468        .iter()
469        .filter(|d| d.is_fenced)
470        .map(|detail| {
471            let line_idx = match line_offsets.binary_search(&detail.start) {
472                Ok(idx) => idx,
473                Err(idx) => idx.saturating_sub(1),
474            };
475
476            // Determine fence marker from the actual line content
477            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
478            let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
479            let line = content.get(line_start..line_end).unwrap_or("");
480            let trimmed = line.trim();
481            let fence_marker = if trimmed.starts_with('`') {
482                let count = trimmed.chars().take_while(|&c| c == '`').count();
483                "`".repeat(count)
484            } else if trimmed.starts_with('~') {
485                let count = trimmed.chars().take_while(|&c| c == '~').count();
486                "~".repeat(count)
487            } else {
488                "```".to_string()
489            };
490
491            let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
492
493            FencedCodeBlock {
494                line_idx,
495                language,
496                fence_marker,
497            }
498        })
499        .collect()
500}
501
502/// Compute disabled line ranges from disable/enable comments
503fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
504    let mut ranges = Vec::new();
505    let mut disabled_start: Option<usize> = None;
506
507    for (i, line) in content.lines().enumerate() {
508        let trimmed = line.trim();
509
510        if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
511            && (rules.is_empty() || rules.contains(&rule_name))
512            && disabled_start.is_none()
513        {
514            disabled_start = Some(i);
515        }
516
517        if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
518            && (rules.is_empty() || rules.contains(&rule_name))
519            && let Some(start) = disabled_start.take()
520        {
521            ranges.push((start, i));
522        }
523    }
524
525    // Handle unclosed disable
526    if let Some(start) = disabled_start {
527        ranges.push((start, usize::MAX));
528    }
529
530    ranges
531}
532
533/// Check if a line index is within a disabled range
534fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
535    ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
536}
537
538/// Find the byte span of the language label in a fence line.
539fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
540    let trimmed_start = line.len() - line.trim_start().len();
541    let after_indent = &line[trimmed_start..];
542    if !after_indent.starts_with(fence_marker) {
543        return None;
544    }
545    let after_fence = &after_indent[fence_marker.len()..];
546
547    let label_start_rel = after_fence
548        .char_indices()
549        .find(|&(_, ch)| !ch.is_whitespace())
550        .map(|(idx, _)| idx)?;
551    let after_label = &after_fence[label_start_rel..];
552    let label_end_rel = after_label
553        .char_indices()
554        .find(|&(_, ch)| ch.is_whitespace())
555        .map_or(after_fence.len(), |(idx, _)| label_start_rel + idx);
556
557    Some((
558        trimmed_start + fence_marker.len() + label_start_rel,
559        trimmed_start + fence_marker.len() + label_end_rel,
560    ))
561}
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566    use crate::lint_context::LintContext;
567
568    fn run_check(content: &str) -> LintResult {
569        let rule = MD040FencedCodeLanguage::default();
570        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
571        rule.check(&ctx)
572    }
573
574    fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
575        let rule = MD040FencedCodeLanguage::with_config(config);
576        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
577        rule.check(&ctx)
578    }
579
580    fn run_fix(content: &str) -> Result<String, LintError> {
581        let rule = MD040FencedCodeLanguage::default();
582        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
583        rule.fix(&ctx)
584    }
585
586    fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
587        let rule = MD040FencedCodeLanguage::with_config(config);
588        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
589        rule.fix(&ctx)
590    }
591
592    fn run_check_mkdocs(content: &str) -> LintResult {
593        let rule = MD040FencedCodeLanguage::default();
594        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
595        rule.check(&ctx)
596    }
597
598    // =========================================================================
599    // Basic functionality tests
600    // =========================================================================
601
602    #[test]
603    fn test_code_blocks_with_language_specified() {
604        let content = r#"# Test
605
606```python
607print("Hello, world!")
608```
609
610```javascript
611console.log("Hello!");
612```
613"#;
614        let result = run_check(content).unwrap();
615        assert!(result.is_empty(), "No warnings expected for code blocks with language");
616    }
617
618    #[test]
619    fn test_code_blocks_without_language() {
620        let content = r#"# Test
621
622```
623print("Hello, world!")
624```
625"#;
626        let result = run_check(content).unwrap();
627        assert_eq!(result.len(), 1);
628        assert_eq!(result[0].message, "Code block (```) missing language");
629        assert_eq!(result[0].line, 3);
630    }
631
632    #[test]
633    fn test_fix_method_adds_text_language() {
634        let content = r#"# Test
635
636```
637code without language
638```
639
640```python
641already has language
642```
643
644```
645another block without
646```
647"#;
648        let fixed = run_fix(content).unwrap();
649        assert!(fixed.contains("```text"));
650        assert!(fixed.contains("```python"));
651        assert_eq!(fixed.matches("```text").count(), 2);
652    }
653
654    #[test]
655    fn test_fix_preserves_indentation() {
656        let content = r#"# Test
657
658- List item
659  ```
660  indented code block
661  ```
662"#;
663        let fixed = run_fix(content).unwrap();
664        assert!(fixed.contains("  ```text"));
665    }
666
667    // =========================================================================
668    // Consistent mode tests
669    // =========================================================================
670
671    #[test]
672    fn test_consistent_mode_detects_inconsistency() {
673        let content = r#"```bash
674echo hi
675```
676
677```sh
678echo there
679```
680
681```bash
682echo again
683```
684"#;
685        let config = MD040Config {
686            style: LanguageStyle::Consistent,
687            ..Default::default()
688        };
689        let result = run_check_with_config(content, config).unwrap();
690        assert_eq!(result.len(), 1);
691        assert!(result[0].message.contains("Inconsistent"));
692        assert!(result[0].message.contains("sh"));
693        assert!(result[0].message.contains("bash"));
694    }
695
696    #[test]
697    fn test_consistent_mode_fix_normalizes() {
698        let content = r#"```bash
699echo hi
700```
701
702```sh
703echo there
704```
705
706```bash
707echo again
708```
709"#;
710        let config = MD040Config {
711            style: LanguageStyle::Consistent,
712            ..Default::default()
713        };
714        let fixed = run_fix_with_config(content, config).unwrap();
715        assert_eq!(fixed.matches("```bash").count(), 3);
716        assert_eq!(fixed.matches("```sh").count(), 0);
717    }
718
719    #[test]
720    fn test_consistent_mode_tie_break_uses_curated_default() {
721        // When there's a tie (1 bash, 1 sh), should use curated default (bash)
722        let content = r#"```bash
723echo hi
724```
725
726```sh
727echo there
728```
729"#;
730        let config = MD040Config {
731            style: LanguageStyle::Consistent,
732            ..Default::default()
733        };
734        let fixed = run_fix_with_config(content, config).unwrap();
735        // bash is the curated default for Shell
736        assert_eq!(fixed.matches("```bash").count(), 2);
737    }
738
739    #[test]
740    fn test_consistent_mode_with_preferred_alias() {
741        let content = r#"```bash
742echo hi
743```
744
745```sh
746echo there
747```
748"#;
749        let mut preferred = HashMap::new();
750        preferred.insert("Shell".to_string(), "sh".to_string());
751
752        let config = MD040Config {
753            style: LanguageStyle::Consistent,
754            preferred_aliases: preferred,
755            ..Default::default()
756        };
757        let fixed = run_fix_with_config(content, config).unwrap();
758        assert_eq!(fixed.matches("```sh").count(), 2);
759        assert_eq!(fixed.matches("```bash").count(), 0);
760    }
761
762    #[test]
763    fn test_consistent_mode_ignores_disabled_blocks() {
764        let content = r#"```bash
765echo hi
766```
767<!-- rumdl-disable MD040 -->
768```sh
769echo there
770```
771```sh
772echo again
773```
774<!-- rumdl-enable MD040 -->
775"#;
776        let config = MD040Config {
777            style: LanguageStyle::Consistent,
778            ..Default::default()
779        };
780        let result = run_check_with_config(content, config).unwrap();
781        assert!(result.is_empty(), "Disabled blocks should not affect consistency");
782    }
783
784    #[test]
785    fn test_fix_preserves_attributes() {
786        let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
787        let config = MD040Config {
788            style: LanguageStyle::Consistent,
789            ..Default::default()
790        };
791        let fixed = run_fix_with_config(content, config).unwrap();
792        assert!(fixed.contains("```bash {.highlight}"));
793    }
794
795    #[test]
796    fn test_fix_preserves_spacing_before_label() {
797        let content = "```bash\ncode\n```\n\n```  sh {.highlight}\ncode\n```";
798        let config = MD040Config {
799            style: LanguageStyle::Consistent,
800            ..Default::default()
801        };
802        let fixed = run_fix_with_config(content, config).unwrap();
803        assert!(fixed.contains("```  bash {.highlight}"));
804        assert!(!fixed.contains("```  sh {.highlight}"));
805    }
806
807    // =========================================================================
808    // Allowlist/denylist tests
809    // =========================================================================
810
811    #[test]
812    fn test_allowlist_blocks_unlisted() {
813        let content = "```java\ncode\n```";
814        let config = MD040Config {
815            allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
816            ..Default::default()
817        };
818        let result = run_check_with_config(content, config).unwrap();
819        assert_eq!(result.len(), 1);
820        assert!(result[0].message.contains("not in the allowed list"));
821    }
822
823    #[test]
824    fn test_allowlist_allows_listed() {
825        let content = "```python\ncode\n```";
826        let config = MD040Config {
827            allowed_languages: vec!["Python".to_string()],
828            ..Default::default()
829        };
830        let result = run_check_with_config(content, config).unwrap();
831        assert!(result.is_empty());
832    }
833
834    #[test]
835    fn test_allowlist_blocks_unknown_language() {
836        let content = "```mysterylang\ncode\n```";
837        let config = MD040Config {
838            allowed_languages: vec!["Python".to_string()],
839            ..Default::default()
840        };
841        let result = run_check_with_config(content, config).unwrap();
842        assert_eq!(result.len(), 1);
843        assert!(result[0].message.contains("allowed list"));
844    }
845
846    #[test]
847    fn test_allowlist_case_insensitive() {
848        let content = "```python\ncode\n```";
849        let config = MD040Config {
850            allowed_languages: vec!["PYTHON".to_string()],
851            ..Default::default()
852        };
853        let result = run_check_with_config(content, config).unwrap();
854        assert!(result.is_empty());
855    }
856
857    #[test]
858    fn test_denylist_blocks_listed() {
859        let content = "```java\ncode\n```";
860        let config = MD040Config {
861            disallowed_languages: vec!["Java".to_string()],
862            ..Default::default()
863        };
864        let result = run_check_with_config(content, config).unwrap();
865        assert_eq!(result.len(), 1);
866        assert!(result[0].message.contains("disallowed"));
867    }
868
869    #[test]
870    fn test_denylist_allows_unlisted() {
871        let content = "```python\ncode\n```";
872        let config = MD040Config {
873            disallowed_languages: vec!["Java".to_string()],
874            ..Default::default()
875        };
876        let result = run_check_with_config(content, config).unwrap();
877        assert!(result.is_empty());
878    }
879
880    #[test]
881    fn test_allowlist_takes_precedence_over_denylist() {
882        let content = "```python\ncode\n```";
883        let config = MD040Config {
884            allowed_languages: vec!["Python".to_string()],
885            disallowed_languages: vec!["Python".to_string()], // Should be ignored
886            ..Default::default()
887        };
888        let result = run_check_with_config(content, config).unwrap();
889        assert!(result.is_empty());
890    }
891
892    // =========================================================================
893    // Unknown language tests
894    // =========================================================================
895
896    #[test]
897    fn test_unknown_language_ignore_default() {
898        let content = "```mycustomlang\ncode\n```";
899        let result = run_check(content).unwrap();
900        assert!(result.is_empty(), "Unknown languages ignored by default");
901    }
902
903    #[test]
904    fn test_unknown_language_warn() {
905        let content = "```mycustomlang\ncode\n```";
906        let config = MD040Config {
907            unknown_language_action: UnknownLanguageAction::Warn,
908            ..Default::default()
909        };
910        let result = run_check_with_config(content, config).unwrap();
911        assert_eq!(result.len(), 1);
912        assert!(result[0].message.contains("Unknown language"));
913        assert!(result[0].message.contains("mycustomlang"));
914        assert_eq!(result[0].severity, Severity::Warning);
915    }
916
917    #[test]
918    fn test_unknown_language_error() {
919        let content = "```mycustomlang\ncode\n```";
920        let config = MD040Config {
921            unknown_language_action: UnknownLanguageAction::Error,
922            ..Default::default()
923        };
924        let result = run_check_with_config(content, config).unwrap();
925        assert_eq!(result.len(), 1);
926        assert!(result[0].message.contains("Unknown language"));
927        assert_eq!(result[0].severity, Severity::Error);
928    }
929
930    // =========================================================================
931    // Config validation tests
932    // =========================================================================
933
934    #[test]
935    fn test_invalid_preferred_alias_detected() {
936        let mut preferred = HashMap::new();
937        preferred.insert("Shell".to_string(), "invalid_alias".to_string());
938
939        let config = MD040Config {
940            style: LanguageStyle::Consistent,
941            preferred_aliases: preferred,
942            ..Default::default()
943        };
944        let rule = MD040FencedCodeLanguage::with_config(config);
945        let errors = rule.validate_config();
946        assert_eq!(errors.len(), 1);
947        assert!(errors[0].contains("Invalid alias"));
948        assert!(errors[0].contains("invalid_alias"));
949    }
950
951    #[test]
952    fn test_unknown_language_in_preferred_aliases_detected() {
953        let mut preferred = HashMap::new();
954        preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
955
956        let config = MD040Config {
957            style: LanguageStyle::Consistent,
958            preferred_aliases: preferred,
959            ..Default::default()
960        };
961        let rule = MD040FencedCodeLanguage::with_config(config);
962        let errors = rule.validate_config();
963        assert_eq!(errors.len(), 1);
964        assert!(errors[0].contains("Unknown language"));
965    }
966
967    #[test]
968    fn test_valid_preferred_alias_accepted() {
969        let mut preferred = HashMap::new();
970        preferred.insert("Shell".to_string(), "bash".to_string());
971        preferred.insert("JavaScript".to_string(), "js".to_string());
972
973        let config = MD040Config {
974            style: LanguageStyle::Consistent,
975            preferred_aliases: preferred,
976            ..Default::default()
977        };
978        let rule = MD040FencedCodeLanguage::with_config(config);
979        let errors = rule.validate_config();
980        assert!(errors.is_empty());
981    }
982
983    #[test]
984    fn test_config_error_uses_valid_line_column() {
985        let config = md040_config::MD040Config {
986            preferred_aliases: {
987                let mut map = std::collections::HashMap::new();
988                map.insert("Shell".to_string(), "invalid_alias".to_string());
989                map
990            },
991            ..Default::default()
992        };
993        let rule = MD040FencedCodeLanguage::with_config(config);
994
995        let content = "```shell\necho hello\n```";
996        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
997        let result = rule.check(&ctx).unwrap();
998
999        // Find the config error warning
1000        let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1001        assert!(config_error.is_some(), "Should have a config error warning");
1002
1003        let warning = config_error.unwrap();
1004        // Line and column should be 1-indexed (not 0)
1005        assert!(
1006            warning.line >= 1,
1007            "Config error line should be >= 1, got {}",
1008            warning.line
1009        );
1010        assert!(
1011            warning.column >= 1,
1012            "Config error column should be >= 1, got {}",
1013            warning.column
1014        );
1015    }
1016
1017    // =========================================================================
1018    // Linguist resolution tests
1019    // =========================================================================
1020
1021    #[test]
1022    fn test_linguist_resolution() {
1023        assert_eq!(resolve_canonical("bash"), Some("Shell"));
1024        assert_eq!(resolve_canonical("sh"), Some("Shell"));
1025        assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1026        assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1027        assert_eq!(resolve_canonical("python"), Some("Python"));
1028        assert_eq!(resolve_canonical("unknown_lang"), None);
1029    }
1030
1031    #[test]
1032    fn test_linguist_resolution_case_insensitive() {
1033        assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1034        assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1035        assert_eq!(resolve_canonical("Python"), Some("Python"));
1036        assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1037    }
1038
1039    #[test]
1040    fn test_alias_validation() {
1041        assert!(is_valid_alias("Shell", "bash"));
1042        assert!(is_valid_alias("Shell", "sh"));
1043        assert!(is_valid_alias("Shell", "zsh"));
1044        assert!(!is_valid_alias("Shell", "python"));
1045        assert!(!is_valid_alias("Shell", "invalid"));
1046    }
1047
1048    #[test]
1049    fn test_default_alias() {
1050        assert_eq!(default_alias("Shell"), Some("bash"));
1051        assert_eq!(default_alias("JavaScript"), Some("js"));
1052        assert_eq!(default_alias("Python"), Some("python"));
1053    }
1054
1055    // =========================================================================
1056    // Edge case tests
1057    // =========================================================================
1058
1059    #[test]
1060    fn test_mixed_case_labels_normalized() {
1061        let content = r#"```BASH
1062echo hi
1063```
1064
1065```Bash
1066echo there
1067```
1068
1069```bash
1070echo again
1071```
1072"#;
1073        let config = MD040Config {
1074            style: LanguageStyle::Consistent,
1075            ..Default::default()
1076        };
1077        // All should resolve to Shell, most prevalent should win
1078        let result = run_check_with_config(content, config).unwrap();
1079        // "bash" appears 1x, "Bash" appears 1x, "BASH" appears 1x
1080        // All are different strings, so there's a 3-way tie
1081        // Should pick curated default "bash" or alphabetically first
1082        assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1083    }
1084
1085    #[test]
1086    fn test_multiple_languages_independent() {
1087        let content = r#"```bash
1088shell code
1089```
1090
1091```python
1092python code
1093```
1094
1095```sh
1096more shell
1097```
1098
1099```python3
1100more python
1101```
1102"#;
1103        let config = MD040Config {
1104            style: LanguageStyle::Consistent,
1105            ..Default::default()
1106        };
1107        let result = run_check_with_config(content, config).unwrap();
1108        // Should have 2 warnings: one for sh (inconsistent with bash) and one for python3 (inconsistent with python)
1109        assert_eq!(result.len(), 2);
1110    }
1111
1112    #[test]
1113    fn test_tilde_fences() {
1114        let content = r#"~~~bash
1115echo hi
1116~~~
1117
1118~~~sh
1119echo there
1120~~~
1121"#;
1122        let config = MD040Config {
1123            style: LanguageStyle::Consistent,
1124            ..Default::default()
1125        };
1126        let result = run_check_with_config(content, config.clone()).unwrap();
1127        assert_eq!(result.len(), 1);
1128
1129        let fixed = run_fix_with_config(content, config).unwrap();
1130        assert!(fixed.contains("~~~bash"));
1131        assert!(!fixed.contains("~~~sh"));
1132    }
1133
1134    #[test]
1135    fn test_longer_fence_markers_preserved() {
1136        let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1137        let config = MD040Config {
1138            style: LanguageStyle::Consistent,
1139            ..Default::default()
1140        };
1141        let fixed = run_fix_with_config(content, config).unwrap();
1142        assert!(fixed.contains("````bash"));
1143        assert!(fixed.contains("```bash"));
1144    }
1145
1146    #[test]
1147    fn test_empty_document() {
1148        let result = run_check("").unwrap();
1149        assert!(result.is_empty());
1150    }
1151
1152    #[test]
1153    fn test_no_code_blocks() {
1154        let content = "# Just a heading\n\nSome text.";
1155        let result = run_check(content).unwrap();
1156        assert!(result.is_empty());
1157    }
1158
1159    #[test]
1160    fn test_single_code_block_no_inconsistency() {
1161        let content = "```bash\necho hi\n```";
1162        let config = MD040Config {
1163            style: LanguageStyle::Consistent,
1164            ..Default::default()
1165        };
1166        let result = run_check_with_config(content, config).unwrap();
1167        assert!(result.is_empty(), "Single block has no inconsistency");
1168    }
1169
1170    #[test]
1171    fn test_idempotent_fix() {
1172        let content = r#"```bash
1173echo hi
1174```
1175
1176```sh
1177echo there
1178```
1179"#;
1180        let config = MD040Config {
1181            style: LanguageStyle::Consistent,
1182            ..Default::default()
1183        };
1184        let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1185        let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1186        assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1187    }
1188
1189    // =========================================================================
1190    // MkDocs superfences tests
1191    // =========================================================================
1192
1193    #[test]
1194    fn test_mkdocs_superfences_title_only() {
1195        // title= attribute without language should not warn in MkDocs flavor
1196        let content = r#"```title="Example"
1197echo hi
1198```
1199"#;
1200        let result = run_check_mkdocs(content).unwrap();
1201        assert!(
1202            result.is_empty(),
1203            "MkDocs superfences with title= should not require language"
1204        );
1205    }
1206
1207    #[test]
1208    fn test_mkdocs_superfences_hl_lines() {
1209        // hl_lines= attribute without language should not warn
1210        let content = r#"```hl_lines="1 2"
1211line 1
1212line 2
1213```
1214"#;
1215        let result = run_check_mkdocs(content).unwrap();
1216        assert!(
1217            result.is_empty(),
1218            "MkDocs superfences with hl_lines= should not require language"
1219        );
1220    }
1221
1222    #[test]
1223    fn test_mkdocs_superfences_linenums() {
1224        // linenums= attribute without language should not warn
1225        let content = r#"```linenums="1"
1226line 1
1227line 2
1228```
1229"#;
1230        let result = run_check_mkdocs(content).unwrap();
1231        assert!(
1232            result.is_empty(),
1233            "MkDocs superfences with linenums= should not require language"
1234        );
1235    }
1236
1237    #[test]
1238    fn test_mkdocs_superfences_class() {
1239        // Custom class (starting with .) should not warn
1240        let content = r#"```.my-class
1241some text
1242```
1243"#;
1244        let result = run_check_mkdocs(content).unwrap();
1245        assert!(
1246            result.is_empty(),
1247            "MkDocs superfences with .class should not require language"
1248        );
1249    }
1250
1251    #[test]
1252    fn test_mkdocs_superfences_id() {
1253        // Custom ID (starting with #) should not warn
1254        let content = r#"```#my-id
1255some text
1256```
1257"#;
1258        let result = run_check_mkdocs(content).unwrap();
1259        assert!(
1260            result.is_empty(),
1261            "MkDocs superfences with #id should not require language"
1262        );
1263    }
1264
1265    #[test]
1266    fn test_mkdocs_superfences_with_language() {
1267        // Language with superfences attributes should work fine
1268        let content = r#"```python title="Example" hl_lines="1"
1269print("hello")
1270```
1271"#;
1272        let result = run_check_mkdocs(content).unwrap();
1273        assert!(result.is_empty(), "Code block with language and attrs should pass");
1274    }
1275
1276    #[test]
1277    fn test_standard_flavor_no_special_handling() {
1278        // In Standard flavor, title= should still warn
1279        let content = r#"```title="Example"
1280echo hi
1281```
1282"#;
1283        let result = run_check(content).unwrap();
1284        assert_eq!(
1285            result.len(),
1286            1,
1287            "Standard flavor should warn about title= without language"
1288        );
1289    }
1290
1291    #[test]
1292    fn test_pandoc_raw_block_skipped_under_pandoc_flavor() {
1293        // ```{=html} raw blocks are valid Pandoc syntax and should not trigger MD040
1294        // under Pandoc flavor.
1295        let rule = MD040FencedCodeLanguage::default();
1296        let content = "```{=html}\n<div>raw html</div>\n```\n";
1297        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1298        let result = rule.check(&ctx).unwrap();
1299        assert!(
1300            result.is_empty(),
1301            "MD040 should skip Pandoc raw blocks ({{=html}}) under Pandoc flavor: {result:?}"
1302        );
1303    }
1304
1305    #[test]
1306    fn test_pandoc_raw_block_skipped_under_quarto_flavor() {
1307        // ```{=html} raw blocks are also valid under Quarto (which is Pandoc-compatible).
1308        let rule = MD040FencedCodeLanguage::default();
1309        let content = "```{=html}\n<div>raw html</div>\n```\n";
1310        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1311        let result = rule.check(&ctx).unwrap();
1312        assert!(
1313            result.is_empty(),
1314            "MD040 should skip Pandoc raw blocks ({{=html}}) under Quarto flavor: {result:?}"
1315        );
1316    }
1317
1318    /// Pandoc raw blocks like ```` ```{=html} ```` declare an output target,
1319    /// not a missing language. MD040 must accept them under Pandoc.
1320    #[test]
1321    fn test_pandoc_accepts_raw_html_block() {
1322        use crate::config::MarkdownFlavor;
1323        let rule = MD040FencedCodeLanguage::default();
1324        let content = "```{=html}\n<div>raw</div>\n```\n";
1325        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1326        let result = rule.check(&ctx).unwrap();
1327        assert!(result.is_empty(), "MD040 should accept ```{{=html}}```: {result:?}");
1328    }
1329
1330    /// Under Pandoc (not Quarto), `{r}` is NOT a valid raw-format declaration —
1331    /// it's a Quarto-only execution syntax that should be flagged as missing language.
1332    #[test]
1333    fn test_pandoc_rejects_quarto_exec_blocks() {
1334        use crate::config::MarkdownFlavor;
1335        let rule = MD040FencedCodeLanguage::default();
1336        let content = "```{r}\nsummary(data)\n```\n";
1337        let ctx = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1338        let result = rule.check(&ctx).unwrap();
1339        assert!(
1340            !result.is_empty(),
1341            "MD040 under Pandoc should flag `{{r}}` (Quarto-only)"
1342        );
1343    }
1344
1345    /// Under Quarto, `{r}` IS valid — Quarto exec syntax. Must not be flagged.
1346    #[test]
1347    fn test_quarto_still_accepts_exec_block() {
1348        use crate::config::MarkdownFlavor;
1349        let rule = MD040FencedCodeLanguage::default();
1350        let content = "```{r}\nsummary(data)\n```\n";
1351        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
1352        let result = rule.check(&ctx).unwrap();
1353        assert!(
1354            result.is_empty(),
1355            "MD040 under Quarto should accept `{{r}}`: {result:?}"
1356        );
1357    }
1358
1359    #[test]
1360    fn test_quarto_exec_block_skipped_under_quarto_only() {
1361        // ```{r} exec chunks are Quarto-specific syntax accepted only under the Quarto flavor.
1362        // Under Pandoc flavor, `{r}` is not a valid Pandoc raw-format declaration (those use
1363        // `{=format}` syntax), so MD040 flags it as missing a real language identifier.
1364        let rule = MD040FencedCodeLanguage::default();
1365        let content = "```{r}\n1 + 1\n```\n";
1366
1367        let ctx_quarto = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1368        let result_quarto = rule.check(&ctx_quarto).unwrap();
1369        assert!(
1370            result_quarto.is_empty(),
1371            "MD040 should skip Quarto exec chunks under Quarto flavor: {result_quarto:?}"
1372        );
1373
1374        // Under Pandoc, `{r}` is unrecognized brace syntax — not a valid Pandoc raw block.
1375        // MD040 treats it as a missing language.
1376        let ctx_pandoc = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1377        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1378        assert!(
1379            !result_pandoc.is_empty(),
1380            "MD040 should flag `{{r}}` under Pandoc as missing a real language"
1381        );
1382    }
1383
1384    /// Pandoc code-attribute syntax `{.lang}` declares the language and is valid under
1385    /// both Pandoc and Quarto. MD040 must accept it.
1386    #[test]
1387    fn test_pandoc_class_attr_accepted_as_language() {
1388        use crate::config::MarkdownFlavor;
1389        let rule = MD040FencedCodeLanguage::default();
1390        let content = "```{.python}\nprint(\"hi\")\n```\n";
1391
1392        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1393        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1394        assert!(
1395            result_pandoc.is_empty(),
1396            "MD040 under Pandoc should accept ```{{.python}}``` as language declaration: {result_pandoc:?}"
1397        );
1398
1399        let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1400        let result_quarto = rule.check(&ctx_quarto).unwrap();
1401        assert!(
1402            result_quarto.is_empty(),
1403            "MD040 under Quarto should accept ```{{.python}}``` as language declaration: {result_quarto:?}"
1404        );
1405    }
1406
1407    /// Pandoc code attributes can include multiple classes plus key=value pairs.
1408    /// The first class is the language; trailing attributes (e.g. `.numberLines`) are decoration.
1409    #[test]
1410    fn test_pandoc_class_attr_with_extra_attributes_accepted() {
1411        use crate::config::MarkdownFlavor;
1412        let rule = MD040FencedCodeLanguage::default();
1413        let content = "```{.haskell .numberLines}\nmain = putStrLn \"hi\"\n```\n";
1414
1415        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1416        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1417        assert!(
1418            result_pandoc.is_empty(),
1419            "MD040 under Pandoc should accept ```{{.haskell .numberLines}}```: {result_pandoc:?}"
1420        );
1421
1422        let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1423        let result_quarto = rule.check(&ctx_quarto).unwrap();
1424        assert!(
1425            result_quarto.is_empty(),
1426            "MD040 under Quarto should accept ```{{.haskell .numberLines}}```: {result_quarto:?}"
1427        );
1428    }
1429
1430    /// Pandoc code attributes can include id (`#myid`) and key=value attributes.
1431    /// As long as a `.class` is present, the block declares a language.
1432    #[test]
1433    fn test_pandoc_class_attr_with_id_and_keyvalue_accepted() {
1434        use crate::config::MarkdownFlavor;
1435        let rule = MD040FencedCodeLanguage::default();
1436        let content = "```{#snippet .python startFrom=\"10\"}\nprint(1)\n```\n";
1437
1438        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1439        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1440        assert!(
1441            result_pandoc.is_empty(),
1442            "MD040 under Pandoc should accept ```{{#snippet .python …}}```: {result_pandoc:?}"
1443        );
1444    }
1445
1446    /// Standard flavor knows nothing about Pandoc code attributes — they remain
1447    /// unrecognized brace syntax and must still be flagged as missing-language.
1448    #[test]
1449    fn test_standard_still_flags_pandoc_class_attr() {
1450        use crate::config::MarkdownFlavor;
1451        let rule = MD040FencedCodeLanguage::default();
1452        let content = "```{.python}\nprint(\"hi\")\n```\n";
1453
1454        let ctx_standard = LintContext::new(content, MarkdownFlavor::Standard, None);
1455        let result_standard = rule.check(&ctx_standard).unwrap();
1456        assert!(
1457            !result_standard.is_empty(),
1458            "MD040 under Standard should still flag ```{{.python}}``` (no Pandoc support)"
1459        );
1460    }
1461
1462    /// A brace block with only an id (`{#myid}`) and no class declares no language.
1463    /// Even under Pandoc this must remain flagged.
1464    #[test]
1465    fn test_pandoc_id_only_attr_still_flagged() {
1466        use crate::config::MarkdownFlavor;
1467        let rule = MD040FencedCodeLanguage::default();
1468        let content = "```{#myid}\ncode here\n```\n";
1469
1470        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1471        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1472        assert!(
1473            !result_pandoc.is_empty(),
1474            "MD040 under Pandoc should flag ```{{#myid}}``` — id without class declares no language"
1475        );
1476    }
1477
1478    /// Empty `{}` braces declare nothing and must still be flagged under any flavor.
1479    #[test]
1480    fn test_pandoc_empty_braces_still_flagged() {
1481        use crate::config::MarkdownFlavor;
1482        let rule = MD040FencedCodeLanguage::default();
1483        let content = "```{}\ncode here\n```\n";
1484
1485        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1486        let result_pandoc = rule.check(&ctx_pandoc).unwrap();
1487        assert!(
1488            !result_pandoc.is_empty(),
1489            "MD040 under Pandoc should flag ```{{}}``` (no language declared)"
1490        );
1491    }
1492}