Skip to main content

rumdl_lib/rules/
md040_fenced_code_language.rs

1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7/// Rule MD040: Fenced code blocks should have a language
8///
9/// See [docs/md040.md](../../docs/md040.md) for full documentation, configuration, and examples.
10pub mod md040_config;
11
12// ============================================================================
13// MkDocs Superfences Attribute Detection
14// ============================================================================
15
16/// Prefixes that indicate MkDocs superfences attributes rather than language identifiers.
17/// These are valid in MkDocs flavor without a language specification.
18/// See: https://facelessuser.github.io/pymdown-extensions/extensions/superfences/
19const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20    "title=",    // Block title
21    "hl_lines=", // Highlighted lines
22    "linenums=", // Line numbers
23    ".",         // CSS class (e.g., .annotate)
24    "#",         // CSS id
25];
26
27/// Check if a string starts with a MkDocs superfences attribute prefix
28#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30    MKDOCS_SUPERFENCES_ATTR_PREFIXES
31        .iter()
32        .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37    /// 0-indexed line number where the code block starts
38    line_idx: usize,
39    /// The language/info string (empty if no language specified)
40    language: String,
41    /// The fence marker used (``` or ~~~)
42    fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47    config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51    pub fn new() -> Self {
52        Self::default()
53    }
54
55    pub fn with_config(config: MD040Config) -> Self {
56        Self { config }
57    }
58
59    /// Validate the configuration and return any errors
60    fn validate_config(&self) -> Vec<String> {
61        let mut errors = Vec::new();
62
63        // Validate preferred-aliases: check that each alias is valid for its language
64        for (canonical, alias) in &self.config.preferred_aliases {
65            // Find the actual canonical name (case-insensitive)
66            if let Some(actual_canonical) = resolve_canonical(canonical) {
67                if !is_valid_alias(actual_canonical, alias)
68                    && let Some(valid_aliases) = get_aliases(actual_canonical)
69                {
70                    let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
71                    let valid_str = valid_list
72                        .iter()
73                        .map(|s| format!("'{s}'"))
74                        .collect::<Vec<_>>()
75                        .join(", ");
76                    let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
77                    errors.push(format!(
78                        "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
79                    ));
80                }
81            } else {
82                errors.push(format!(
83                    "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
84                ));
85            }
86        }
87
88        errors
89    }
90
91    /// Determine the preferred label for each canonical language in the document
92    fn compute_preferred_labels(
93        &self,
94        blocks: &[FencedCodeBlock],
95        disabled_ranges: &[(usize, usize)],
96    ) -> HashMap<String, String> {
97        // Group labels by canonical language
98        let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
99
100        for block in blocks {
101            if is_line_disabled(disabled_ranges, block.line_idx) {
102                continue;
103            }
104            if block.language.is_empty() {
105                continue;
106            }
107            if let Some(canonical) = resolve_canonical(&block.language) {
108                by_canonical
109                    .entry(canonical.to_string())
110                    .or_default()
111                    .push(&block.language);
112            }
113        }
114
115        // Determine winning label for each canonical language
116        let mut result = HashMap::new();
117
118        for (canonical, labels) in by_canonical {
119            // Check for user override first (case-insensitive lookup)
120            let winner = if let Some(preferred) = self
121                .config
122                .preferred_aliases
123                .iter()
124                .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
125                .map(|(_, v)| v.clone())
126            {
127                preferred
128            } else {
129                // Find most prevalent label
130                let mut counts: HashMap<&str, usize> = HashMap::new();
131                for label in &labels {
132                    *counts.entry(*label).or_default() += 1;
133                }
134
135                let max_count = counts.values().max().copied().unwrap_or(0);
136                let winners: Vec<_> = counts
137                    .iter()
138                    .filter(|(_, c)| **c == max_count)
139                    .map(|(l, _)| *l)
140                    .collect();
141
142                if winners.len() == 1 {
143                    winners[0].to_string()
144                } else {
145                    // Tie-break: use curated default if available, otherwise alphabetically first
146                    default_alias(&canonical)
147                        .filter(|default| winners.contains(default))
148                        .map(|s| s.to_string())
149                        .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
150                }
151            };
152
153            result.insert(canonical, winner);
154        }
155
156        result
157    }
158
159    /// Check if a language is allowed based on config
160    fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
161        // Allowlist takes precedence
162        if !self.config.allowed_languages.is_empty() {
163            let allowed = self.config.allowed_languages.join(", ");
164            let Some(canonical) = canonical else {
165                return Some(format!(
166                    "Language '{original_label}' is not in the allowed list: {allowed}"
167                ));
168            };
169            if !self
170                .config
171                .allowed_languages
172                .iter()
173                .any(|a| a.eq_ignore_ascii_case(canonical))
174            {
175                return Some(format!(
176                    "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
177                ));
178            }
179        } else if !self.config.disallowed_languages.is_empty()
180            && canonical.is_some_and(|canonical| {
181                self.config
182                    .disallowed_languages
183                    .iter()
184                    .any(|d| d.eq_ignore_ascii_case(canonical))
185            })
186        {
187            let canonical = canonical.unwrap_or("unknown");
188            return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
189        }
190        None
191    }
192
193    /// Check for unknown language based on config
194    fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
195        if resolve_canonical(label).is_some() {
196            return None;
197        }
198
199        match self.config.unknown_language_action {
200            UnknownLanguageAction::Ignore => None,
201            UnknownLanguageAction::Warn => Some((
202                format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
203                Severity::Warning,
204            )),
205            UnknownLanguageAction::Error => Some((
206                format!("Unknown language '{label}' (not in GitHub Linguist)"),
207                Severity::Error,
208            )),
209        }
210    }
211}
212
213impl Rule for MD040FencedCodeLanguage {
214    fn name(&self) -> &'static str {
215        "MD040"
216    }
217
218    fn description(&self) -> &'static str {
219        "Code blocks should have a language specified"
220    }
221
222    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
223        let content = ctx.content;
224        let mut warnings = Vec::new();
225
226        // Validate config and emit warnings for invalid configuration
227        for error in self.validate_config() {
228            warnings.push(LintWarning {
229                rule_name: Some(self.name().to_string()),
230                line: 1,
231                column: 1,
232                end_line: 1,
233                end_column: 1,
234                message: format!("[config error] {error}"),
235                severity: Severity::Error,
236                fix: None,
237            });
238        }
239
240        // Derive fenced code blocks from pre-computed context
241        let fenced_blocks = derive_fenced_code_blocks(ctx);
242
243        // Pre-compute disabled ranges for efficient lookup
244        let disabled_ranges = compute_disabled_ranges(content, self.name());
245
246        // Compute preferred labels for consistent mode
247        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
248            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
249        } else {
250            HashMap::new()
251        };
252
253        let lines = ctx.raw_lines();
254
255        for block in &fenced_blocks {
256            // Skip if this line is in a disabled range
257            if is_line_disabled(&disabled_ranges, block.line_idx) {
258                continue;
259            }
260
261            // Get the actual line content for additional checks
262            let line = lines.get(block.line_idx).unwrap_or(&"");
263            let trimmed = line.trim();
264            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
265
266            // Check if fence has MkDocs superfences attributes but no language
267            let has_mkdocs_attrs_only =
268                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
269
270            // Check for Quarto/RMarkdown code chunk syntax: {language} or {language, options}
271            let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
272                && after_fence.starts_with('{')
273                && after_fence.contains('}');
274
275            // Determine if this block needs a language specification
276            // In MkDocs flavor, superfences attributes without language are acceptable
277            let needs_language =
278                !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
279
280            if needs_language && !has_quarto_syntax {
281                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
282
283                warnings.push(LintWarning {
284                    rule_name: Some(self.name().to_string()),
285                    line: start_line,
286                    column: start_col,
287                    end_line,
288                    end_column: end_col,
289                    message: "Code block (```) missing language".to_string(),
290                    severity: Severity::Warning,
291                    fix: Some(Fix {
292                        range: {
293                            let trimmed = line.trim_start();
294                            let trimmed_start = line.len() - trimmed.len();
295                            let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
296                            let fence_end_byte = line_start_byte + trimmed_start + block.fence_marker.len();
297                            // Replace from after fence marker to end of line content,
298                            // so trailing whitespace is cleaned up while any existing
299                            // info string / attributes are preserved via the replacement.
300                            let line_end_byte = line_start_byte + line.len();
301                            fence_end_byte..line_end_byte
302                        },
303                        replacement: {
304                            let trimmed = line.trim_start();
305                            let after_fence = &trimmed[block.fence_marker.len()..];
306                            let after_fence_trimmed = after_fence.trim();
307                            if after_fence_trimmed.is_empty() {
308                                "text".to_string()
309                            } else {
310                                format!("text {after_fence_trimmed}")
311                            }
312                        },
313                    }),
314                });
315                continue;
316            }
317
318            // Skip further checks for special syntax
319            if has_quarto_syntax {
320                continue;
321            }
322
323            let canonical = resolve_canonical(&block.language);
324
325            // Check language restrictions (allowlist/denylist)
326            if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
327                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
328
329                warnings.push(LintWarning {
330                    rule_name: Some(self.name().to_string()),
331                    line: start_line,
332                    column: start_col,
333                    end_line,
334                    end_column: end_col,
335                    message: msg,
336                    severity: Severity::Warning,
337                    fix: None,
338                });
339                continue;
340            }
341
342            // Check for unknown language (only if not handled by allowlist)
343            if canonical.is_none() {
344                if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
345                    let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
346
347                    warnings.push(LintWarning {
348                        rule_name: Some(self.name().to_string()),
349                        line: start_line,
350                        column: start_col,
351                        end_line,
352                        end_column: end_col,
353                        message: msg,
354                        severity,
355                        fix: None,
356                    });
357                }
358                continue;
359            }
360
361            // Check consistency
362            if self.config.style == LanguageStyle::Consistent
363                && let Some(preferred) = preferred_labels.get(canonical.unwrap())
364                && &block.language != preferred
365            {
366                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
367
368                let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
369                    let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
370                    Fix {
371                        range: (line_start_byte + label_start)..(line_start_byte + label_end),
372                        replacement: preferred.clone(),
373                    }
374                });
375                let lang = &block.language;
376                let canonical = canonical.unwrap();
377
378                warnings.push(LintWarning {
379                    rule_name: Some(self.name().to_string()),
380                    line: start_line,
381                    column: start_col,
382                    end_line,
383                    end_column: end_col,
384                    message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
385                    severity: Severity::Warning,
386                    fix,
387                });
388            }
389        }
390
391        Ok(warnings)
392    }
393
394    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
395        if self.should_skip(ctx) {
396            return Ok(ctx.content.to_string());
397        }
398        let warnings = self.check(ctx)?;
399        if warnings.is_empty() {
400            return Ok(ctx.content.to_string());
401        }
402        let warnings =
403            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
404        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
405    }
406
407    /// Get the category of this rule for selective processing
408    fn category(&self) -> RuleCategory {
409        RuleCategory::CodeBlock
410    }
411
412    /// Check if this rule should be skipped
413    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
414        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
415    }
416
417    fn as_any(&self) -> &dyn std::any::Any {
418        self
419    }
420
421    fn default_config_section(&self) -> Option<(String, toml::Value)> {
422        let default_config = MD040Config::default();
423        let json_value = serde_json::to_value(&default_config).ok()?;
424        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
425
426        if let toml::Value::Table(table) = toml_value {
427            if !table.is_empty() {
428                Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
429            } else {
430                None
431            }
432        } else {
433            None
434        }
435    }
436
437    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
438    where
439        Self: Sized,
440    {
441        let rule_config: MD040Config = load_rule_config(config);
442        Box::new(MD040FencedCodeLanguage::with_config(rule_config))
443    }
444}
445
446/// Derive fenced code blocks from pre-computed CodeBlockDetail data
447fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
448    let content = ctx.content;
449    let line_offsets = &ctx.line_offsets;
450
451    ctx.code_block_details
452        .iter()
453        .filter(|d| d.is_fenced)
454        .map(|detail| {
455            let line_idx = match line_offsets.binary_search(&detail.start) {
456                Ok(idx) => idx,
457                Err(idx) => idx.saturating_sub(1),
458            };
459
460            // Determine fence marker from the actual line content
461            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
462            let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
463            let line = content.get(line_start..line_end).unwrap_or("");
464            let trimmed = line.trim();
465            let fence_marker = if trimmed.starts_with('`') {
466                let count = trimmed.chars().take_while(|&c| c == '`').count();
467                "`".repeat(count)
468            } else if trimmed.starts_with('~') {
469                let count = trimmed.chars().take_while(|&c| c == '~').count();
470                "~".repeat(count)
471            } else {
472                "```".to_string()
473            };
474
475            let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
476
477            FencedCodeBlock {
478                line_idx,
479                language,
480                fence_marker,
481            }
482        })
483        .collect()
484}
485
486/// Compute disabled line ranges from disable/enable comments
487fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
488    let mut ranges = Vec::new();
489    let mut disabled_start: Option<usize> = None;
490
491    for (i, line) in content.lines().enumerate() {
492        let trimmed = line.trim();
493
494        if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
495            && (rules.is_empty() || rules.contains(&rule_name))
496            && disabled_start.is_none()
497        {
498            disabled_start = Some(i);
499        }
500
501        if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
502            && (rules.is_empty() || rules.contains(&rule_name))
503            && let Some(start) = disabled_start.take()
504        {
505            ranges.push((start, i));
506        }
507    }
508
509    // Handle unclosed disable
510    if let Some(start) = disabled_start {
511        ranges.push((start, usize::MAX));
512    }
513
514    ranges
515}
516
517/// Check if a line index is within a disabled range
518fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
519    ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
520}
521
522/// Find the byte span of the language label in a fence line.
523fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
524    let trimmed_start = line.len() - line.trim_start().len();
525    let after_indent = &line[trimmed_start..];
526    if !after_indent.starts_with(fence_marker) {
527        return None;
528    }
529    let after_fence = &after_indent[fence_marker.len()..];
530
531    let label_start_rel = after_fence
532        .char_indices()
533        .find(|&(_, ch)| !ch.is_whitespace())
534        .map(|(idx, _)| idx)?;
535    let after_label = &after_fence[label_start_rel..];
536    let label_end_rel = after_label
537        .char_indices()
538        .find(|&(_, ch)| ch.is_whitespace())
539        .map(|(idx, _)| label_start_rel + idx)
540        .unwrap_or(after_fence.len());
541
542    Some((
543        trimmed_start + fence_marker.len() + label_start_rel,
544        trimmed_start + fence_marker.len() + label_end_rel,
545    ))
546}
547
548#[cfg(test)]
549mod tests {
550    use super::*;
551    use crate::lint_context::LintContext;
552
553    fn run_check(content: &str) -> LintResult {
554        let rule = MD040FencedCodeLanguage::default();
555        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
556        rule.check(&ctx)
557    }
558
559    fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
560        let rule = MD040FencedCodeLanguage::with_config(config);
561        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
562        rule.check(&ctx)
563    }
564
565    fn run_fix(content: &str) -> Result<String, LintError> {
566        let rule = MD040FencedCodeLanguage::default();
567        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
568        rule.fix(&ctx)
569    }
570
571    fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
572        let rule = MD040FencedCodeLanguage::with_config(config);
573        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
574        rule.fix(&ctx)
575    }
576
577    fn run_check_mkdocs(content: &str) -> LintResult {
578        let rule = MD040FencedCodeLanguage::default();
579        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
580        rule.check(&ctx)
581    }
582
583    // =========================================================================
584    // Basic functionality tests
585    // =========================================================================
586
587    #[test]
588    fn test_code_blocks_with_language_specified() {
589        let content = r#"# Test
590
591```python
592print("Hello, world!")
593```
594
595```javascript
596console.log("Hello!");
597```
598"#;
599        let result = run_check(content).unwrap();
600        assert!(result.is_empty(), "No warnings expected for code blocks with language");
601    }
602
603    #[test]
604    fn test_code_blocks_without_language() {
605        let content = r#"# Test
606
607```
608print("Hello, world!")
609```
610"#;
611        let result = run_check(content).unwrap();
612        assert_eq!(result.len(), 1);
613        assert_eq!(result[0].message, "Code block (```) missing language");
614        assert_eq!(result[0].line, 3);
615    }
616
617    #[test]
618    fn test_fix_method_adds_text_language() {
619        let content = r#"# Test
620
621```
622code without language
623```
624
625```python
626already has language
627```
628
629```
630another block without
631```
632"#;
633        let fixed = run_fix(content).unwrap();
634        assert!(fixed.contains("```text"));
635        assert!(fixed.contains("```python"));
636        assert_eq!(fixed.matches("```text").count(), 2);
637    }
638
639    #[test]
640    fn test_fix_preserves_indentation() {
641        let content = r#"# Test
642
643- List item
644  ```
645  indented code block
646  ```
647"#;
648        let fixed = run_fix(content).unwrap();
649        assert!(fixed.contains("  ```text"));
650    }
651
652    // =========================================================================
653    // Consistent mode tests
654    // =========================================================================
655
656    #[test]
657    fn test_consistent_mode_detects_inconsistency() {
658        let content = r#"```bash
659echo hi
660```
661
662```sh
663echo there
664```
665
666```bash
667echo again
668```
669"#;
670        let config = MD040Config {
671            style: LanguageStyle::Consistent,
672            ..Default::default()
673        };
674        let result = run_check_with_config(content, config).unwrap();
675        assert_eq!(result.len(), 1);
676        assert!(result[0].message.contains("Inconsistent"));
677        assert!(result[0].message.contains("sh"));
678        assert!(result[0].message.contains("bash"));
679    }
680
681    #[test]
682    fn test_consistent_mode_fix_normalizes() {
683        let content = r#"```bash
684echo hi
685```
686
687```sh
688echo there
689```
690
691```bash
692echo again
693```
694"#;
695        let config = MD040Config {
696            style: LanguageStyle::Consistent,
697            ..Default::default()
698        };
699        let fixed = run_fix_with_config(content, config).unwrap();
700        assert_eq!(fixed.matches("```bash").count(), 3);
701        assert_eq!(fixed.matches("```sh").count(), 0);
702    }
703
704    #[test]
705    fn test_consistent_mode_tie_break_uses_curated_default() {
706        // When there's a tie (1 bash, 1 sh), should use curated default (bash)
707        let content = r#"```bash
708echo hi
709```
710
711```sh
712echo there
713```
714"#;
715        let config = MD040Config {
716            style: LanguageStyle::Consistent,
717            ..Default::default()
718        };
719        let fixed = run_fix_with_config(content, config).unwrap();
720        // bash is the curated default for Shell
721        assert_eq!(fixed.matches("```bash").count(), 2);
722    }
723
724    #[test]
725    fn test_consistent_mode_with_preferred_alias() {
726        let content = r#"```bash
727echo hi
728```
729
730```sh
731echo there
732```
733"#;
734        let mut preferred = HashMap::new();
735        preferred.insert("Shell".to_string(), "sh".to_string());
736
737        let config = MD040Config {
738            style: LanguageStyle::Consistent,
739            preferred_aliases: preferred,
740            ..Default::default()
741        };
742        let fixed = run_fix_with_config(content, config).unwrap();
743        assert_eq!(fixed.matches("```sh").count(), 2);
744        assert_eq!(fixed.matches("```bash").count(), 0);
745    }
746
747    #[test]
748    fn test_consistent_mode_ignores_disabled_blocks() {
749        let content = r#"```bash
750echo hi
751```
752<!-- rumdl-disable MD040 -->
753```sh
754echo there
755```
756```sh
757echo again
758```
759<!-- rumdl-enable MD040 -->
760"#;
761        let config = MD040Config {
762            style: LanguageStyle::Consistent,
763            ..Default::default()
764        };
765        let result = run_check_with_config(content, config).unwrap();
766        assert!(result.is_empty(), "Disabled blocks should not affect consistency");
767    }
768
769    #[test]
770    fn test_fix_preserves_attributes() {
771        let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
772        let config = MD040Config {
773            style: LanguageStyle::Consistent,
774            ..Default::default()
775        };
776        let fixed = run_fix_with_config(content, config).unwrap();
777        assert!(fixed.contains("```bash {.highlight}"));
778    }
779
780    #[test]
781    fn test_fix_preserves_spacing_before_label() {
782        let content = "```bash\ncode\n```\n\n```  sh {.highlight}\ncode\n```";
783        let config = MD040Config {
784            style: LanguageStyle::Consistent,
785            ..Default::default()
786        };
787        let fixed = run_fix_with_config(content, config).unwrap();
788        assert!(fixed.contains("```  bash {.highlight}"));
789        assert!(!fixed.contains("```  sh {.highlight}"));
790    }
791
792    // =========================================================================
793    // Allowlist/denylist tests
794    // =========================================================================
795
796    #[test]
797    fn test_allowlist_blocks_unlisted() {
798        let content = "```java\ncode\n```";
799        let config = MD040Config {
800            allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
801            ..Default::default()
802        };
803        let result = run_check_with_config(content, config).unwrap();
804        assert_eq!(result.len(), 1);
805        assert!(result[0].message.contains("not in the allowed list"));
806    }
807
808    #[test]
809    fn test_allowlist_allows_listed() {
810        let content = "```python\ncode\n```";
811        let config = MD040Config {
812            allowed_languages: vec!["Python".to_string()],
813            ..Default::default()
814        };
815        let result = run_check_with_config(content, config).unwrap();
816        assert!(result.is_empty());
817    }
818
819    #[test]
820    fn test_allowlist_blocks_unknown_language() {
821        let content = "```mysterylang\ncode\n```";
822        let config = MD040Config {
823            allowed_languages: vec!["Python".to_string()],
824            ..Default::default()
825        };
826        let result = run_check_with_config(content, config).unwrap();
827        assert_eq!(result.len(), 1);
828        assert!(result[0].message.contains("allowed list"));
829    }
830
831    #[test]
832    fn test_allowlist_case_insensitive() {
833        let content = "```python\ncode\n```";
834        let config = MD040Config {
835            allowed_languages: vec!["PYTHON".to_string()],
836            ..Default::default()
837        };
838        let result = run_check_with_config(content, config).unwrap();
839        assert!(result.is_empty());
840    }
841
842    #[test]
843    fn test_denylist_blocks_listed() {
844        let content = "```java\ncode\n```";
845        let config = MD040Config {
846            disallowed_languages: vec!["Java".to_string()],
847            ..Default::default()
848        };
849        let result = run_check_with_config(content, config).unwrap();
850        assert_eq!(result.len(), 1);
851        assert!(result[0].message.contains("disallowed"));
852    }
853
854    #[test]
855    fn test_denylist_allows_unlisted() {
856        let content = "```python\ncode\n```";
857        let config = MD040Config {
858            disallowed_languages: vec!["Java".to_string()],
859            ..Default::default()
860        };
861        let result = run_check_with_config(content, config).unwrap();
862        assert!(result.is_empty());
863    }
864
865    #[test]
866    fn test_allowlist_takes_precedence_over_denylist() {
867        let content = "```python\ncode\n```";
868        let config = MD040Config {
869            allowed_languages: vec!["Python".to_string()],
870            disallowed_languages: vec!["Python".to_string()], // Should be ignored
871            ..Default::default()
872        };
873        let result = run_check_with_config(content, config).unwrap();
874        assert!(result.is_empty());
875    }
876
877    // =========================================================================
878    // Unknown language tests
879    // =========================================================================
880
881    #[test]
882    fn test_unknown_language_ignore_default() {
883        let content = "```mycustomlang\ncode\n```";
884        let result = run_check(content).unwrap();
885        assert!(result.is_empty(), "Unknown languages ignored by default");
886    }
887
888    #[test]
889    fn test_unknown_language_warn() {
890        let content = "```mycustomlang\ncode\n```";
891        let config = MD040Config {
892            unknown_language_action: UnknownLanguageAction::Warn,
893            ..Default::default()
894        };
895        let result = run_check_with_config(content, config).unwrap();
896        assert_eq!(result.len(), 1);
897        assert!(result[0].message.contains("Unknown language"));
898        assert!(result[0].message.contains("mycustomlang"));
899        assert_eq!(result[0].severity, Severity::Warning);
900    }
901
902    #[test]
903    fn test_unknown_language_error() {
904        let content = "```mycustomlang\ncode\n```";
905        let config = MD040Config {
906            unknown_language_action: UnknownLanguageAction::Error,
907            ..Default::default()
908        };
909        let result = run_check_with_config(content, config).unwrap();
910        assert_eq!(result.len(), 1);
911        assert!(result[0].message.contains("Unknown language"));
912        assert_eq!(result[0].severity, Severity::Error);
913    }
914
915    // =========================================================================
916    // Config validation tests
917    // =========================================================================
918
919    #[test]
920    fn test_invalid_preferred_alias_detected() {
921        let mut preferred = HashMap::new();
922        preferred.insert("Shell".to_string(), "invalid_alias".to_string());
923
924        let config = MD040Config {
925            style: LanguageStyle::Consistent,
926            preferred_aliases: preferred,
927            ..Default::default()
928        };
929        let rule = MD040FencedCodeLanguage::with_config(config);
930        let errors = rule.validate_config();
931        assert_eq!(errors.len(), 1);
932        assert!(errors[0].contains("Invalid alias"));
933        assert!(errors[0].contains("invalid_alias"));
934    }
935
936    #[test]
937    fn test_unknown_language_in_preferred_aliases_detected() {
938        let mut preferred = HashMap::new();
939        preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
940
941        let config = MD040Config {
942            style: LanguageStyle::Consistent,
943            preferred_aliases: preferred,
944            ..Default::default()
945        };
946        let rule = MD040FencedCodeLanguage::with_config(config);
947        let errors = rule.validate_config();
948        assert_eq!(errors.len(), 1);
949        assert!(errors[0].contains("Unknown language"));
950    }
951
952    #[test]
953    fn test_valid_preferred_alias_accepted() {
954        let mut preferred = HashMap::new();
955        preferred.insert("Shell".to_string(), "bash".to_string());
956        preferred.insert("JavaScript".to_string(), "js".to_string());
957
958        let config = MD040Config {
959            style: LanguageStyle::Consistent,
960            preferred_aliases: preferred,
961            ..Default::default()
962        };
963        let rule = MD040FencedCodeLanguage::with_config(config);
964        let errors = rule.validate_config();
965        assert!(errors.is_empty());
966    }
967
968    #[test]
969    fn test_config_error_uses_valid_line_column() {
970        let config = md040_config::MD040Config {
971            preferred_aliases: {
972                let mut map = std::collections::HashMap::new();
973                map.insert("Shell".to_string(), "invalid_alias".to_string());
974                map
975            },
976            ..Default::default()
977        };
978        let rule = MD040FencedCodeLanguage::with_config(config);
979
980        let content = "```shell\necho hello\n```";
981        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
982        let result = rule.check(&ctx).unwrap();
983
984        // Find the config error warning
985        let config_error = result.iter().find(|w| w.message.contains("[config error]"));
986        assert!(config_error.is_some(), "Should have a config error warning");
987
988        let warning = config_error.unwrap();
989        // Line and column should be 1-indexed (not 0)
990        assert!(
991            warning.line >= 1,
992            "Config error line should be >= 1, got {}",
993            warning.line
994        );
995        assert!(
996            warning.column >= 1,
997            "Config error column should be >= 1, got {}",
998            warning.column
999        );
1000    }
1001
1002    // =========================================================================
1003    // Linguist resolution tests
1004    // =========================================================================
1005
1006    #[test]
1007    fn test_linguist_resolution() {
1008        assert_eq!(resolve_canonical("bash"), Some("Shell"));
1009        assert_eq!(resolve_canonical("sh"), Some("Shell"));
1010        assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1011        assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1012        assert_eq!(resolve_canonical("python"), Some("Python"));
1013        assert_eq!(resolve_canonical("unknown_lang"), None);
1014    }
1015
1016    #[test]
1017    fn test_linguist_resolution_case_insensitive() {
1018        assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1019        assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1020        assert_eq!(resolve_canonical("Python"), Some("Python"));
1021        assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1022    }
1023
1024    #[test]
1025    fn test_alias_validation() {
1026        assert!(is_valid_alias("Shell", "bash"));
1027        assert!(is_valid_alias("Shell", "sh"));
1028        assert!(is_valid_alias("Shell", "zsh"));
1029        assert!(!is_valid_alias("Shell", "python"));
1030        assert!(!is_valid_alias("Shell", "invalid"));
1031    }
1032
1033    #[test]
1034    fn test_default_alias() {
1035        assert_eq!(default_alias("Shell"), Some("bash"));
1036        assert_eq!(default_alias("JavaScript"), Some("js"));
1037        assert_eq!(default_alias("Python"), Some("python"));
1038    }
1039
1040    // =========================================================================
1041    // Edge case tests
1042    // =========================================================================
1043
1044    #[test]
1045    fn test_mixed_case_labels_normalized() {
1046        let content = r#"```BASH
1047echo hi
1048```
1049
1050```Bash
1051echo there
1052```
1053
1054```bash
1055echo again
1056```
1057"#;
1058        let config = MD040Config {
1059            style: LanguageStyle::Consistent,
1060            ..Default::default()
1061        };
1062        // All should resolve to Shell, most prevalent should win
1063        let result = run_check_with_config(content, config).unwrap();
1064        // "bash" appears 1x, "Bash" appears 1x, "BASH" appears 1x
1065        // All are different strings, so there's a 3-way tie
1066        // Should pick curated default "bash" or alphabetically first
1067        assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1068    }
1069
1070    #[test]
1071    fn test_multiple_languages_independent() {
1072        let content = r#"```bash
1073shell code
1074```
1075
1076```python
1077python code
1078```
1079
1080```sh
1081more shell
1082```
1083
1084```python3
1085more python
1086```
1087"#;
1088        let config = MD040Config {
1089            style: LanguageStyle::Consistent,
1090            ..Default::default()
1091        };
1092        let result = run_check_with_config(content, config).unwrap();
1093        // Should have 2 warnings: one for sh (inconsistent with bash) and one for python3 (inconsistent with python)
1094        assert_eq!(result.len(), 2);
1095    }
1096
1097    #[test]
1098    fn test_tilde_fences() {
1099        let content = r#"~~~bash
1100echo hi
1101~~~
1102
1103~~~sh
1104echo there
1105~~~
1106"#;
1107        let config = MD040Config {
1108            style: LanguageStyle::Consistent,
1109            ..Default::default()
1110        };
1111        let result = run_check_with_config(content, config.clone()).unwrap();
1112        assert_eq!(result.len(), 1);
1113
1114        let fixed = run_fix_with_config(content, config).unwrap();
1115        assert!(fixed.contains("~~~bash"));
1116        assert!(!fixed.contains("~~~sh"));
1117    }
1118
1119    #[test]
1120    fn test_longer_fence_markers_preserved() {
1121        let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1122        let config = MD040Config {
1123            style: LanguageStyle::Consistent,
1124            ..Default::default()
1125        };
1126        let fixed = run_fix_with_config(content, config).unwrap();
1127        assert!(fixed.contains("````bash"));
1128        assert!(fixed.contains("```bash"));
1129    }
1130
1131    #[test]
1132    fn test_empty_document() {
1133        let result = run_check("").unwrap();
1134        assert!(result.is_empty());
1135    }
1136
1137    #[test]
1138    fn test_no_code_blocks() {
1139        let content = "# Just a heading\n\nSome text.";
1140        let result = run_check(content).unwrap();
1141        assert!(result.is_empty());
1142    }
1143
1144    #[test]
1145    fn test_single_code_block_no_inconsistency() {
1146        let content = "```bash\necho hi\n```";
1147        let config = MD040Config {
1148            style: LanguageStyle::Consistent,
1149            ..Default::default()
1150        };
1151        let result = run_check_with_config(content, config).unwrap();
1152        assert!(result.is_empty(), "Single block has no inconsistency");
1153    }
1154
1155    #[test]
1156    fn test_idempotent_fix() {
1157        let content = r#"```bash
1158echo hi
1159```
1160
1161```sh
1162echo there
1163```
1164"#;
1165        let config = MD040Config {
1166            style: LanguageStyle::Consistent,
1167            ..Default::default()
1168        };
1169        let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1170        let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1171        assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1172    }
1173
1174    // =========================================================================
1175    // MkDocs superfences tests
1176    // =========================================================================
1177
1178    #[test]
1179    fn test_mkdocs_superfences_title_only() {
1180        // title= attribute without language should not warn in MkDocs flavor
1181        let content = r#"```title="Example"
1182echo hi
1183```
1184"#;
1185        let result = run_check_mkdocs(content).unwrap();
1186        assert!(
1187            result.is_empty(),
1188            "MkDocs superfences with title= should not require language"
1189        );
1190    }
1191
1192    #[test]
1193    fn test_mkdocs_superfences_hl_lines() {
1194        // hl_lines= attribute without language should not warn
1195        let content = r#"```hl_lines="1 2"
1196line 1
1197line 2
1198```
1199"#;
1200        let result = run_check_mkdocs(content).unwrap();
1201        assert!(
1202            result.is_empty(),
1203            "MkDocs superfences with hl_lines= should not require language"
1204        );
1205    }
1206
1207    #[test]
1208    fn test_mkdocs_superfences_linenums() {
1209        // linenums= attribute without language should not warn
1210        let content = r#"```linenums="1"
1211line 1
1212line 2
1213```
1214"#;
1215        let result = run_check_mkdocs(content).unwrap();
1216        assert!(
1217            result.is_empty(),
1218            "MkDocs superfences with linenums= should not require language"
1219        );
1220    }
1221
1222    #[test]
1223    fn test_mkdocs_superfences_class() {
1224        // Custom class (starting with .) should not warn
1225        let content = r#"```.my-class
1226some text
1227```
1228"#;
1229        let result = run_check_mkdocs(content).unwrap();
1230        assert!(
1231            result.is_empty(),
1232            "MkDocs superfences with .class should not require language"
1233        );
1234    }
1235
1236    #[test]
1237    fn test_mkdocs_superfences_id() {
1238        // Custom ID (starting with #) should not warn
1239        let content = r#"```#my-id
1240some text
1241```
1242"#;
1243        let result = run_check_mkdocs(content).unwrap();
1244        assert!(
1245            result.is_empty(),
1246            "MkDocs superfences with #id should not require language"
1247        );
1248    }
1249
1250    #[test]
1251    fn test_mkdocs_superfences_with_language() {
1252        // Language with superfences attributes should work fine
1253        let content = r#"```python title="Example" hl_lines="1"
1254print("hello")
1255```
1256"#;
1257        let result = run_check_mkdocs(content).unwrap();
1258        assert!(result.is_empty(), "Code block with language and attrs should pass");
1259    }
1260
1261    #[test]
1262    fn test_standard_flavor_no_special_handling() {
1263        // In Standard flavor, title= should still warn
1264        let content = r#"```title="Example"
1265echo hi
1266```
1267"#;
1268        let result = run_check(content).unwrap();
1269        assert_eq!(
1270            result.len(),
1271            1,
1272            "Standard flavor should warn about title= without language"
1273        );
1274    }
1275}