Skip to main content

rumdl_lib/rules/
md040_fenced_code_language.rs

1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use std::collections::HashMap;
6
7/// Rule MD040: Fenced code blocks should have a language
8///
9/// See [docs/md040.md](../../docs/md040.md) for full documentation, configuration, and examples.
10pub mod md040_config;
11
12// ============================================================================
13// MkDocs Superfences Attribute Detection
14// ============================================================================
15
16/// Prefixes that indicate MkDocs superfences attributes rather than language identifiers.
17/// These are valid in MkDocs flavor without a language specification.
18/// See: https://facelessuser.github.io/pymdown-extensions/extensions/superfences/
19const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
20    "title=",    // Block title
21    "hl_lines=", // Highlighted lines
22    "linenums=", // Line numbers
23    ".",         // CSS class (e.g., .annotate)
24    "#",         // CSS id
25];
26
27/// Check if a string starts with a MkDocs superfences attribute prefix
28#[inline]
29fn is_superfences_attribute(s: &str) -> bool {
30    MKDOCS_SUPERFENCES_ATTR_PREFIXES
31        .iter()
32        .any(|prefix| s.starts_with(prefix))
33}
34use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
35
36struct FencedCodeBlock {
37    /// 0-indexed line number where the code block starts
38    line_idx: usize,
39    /// The language/info string (empty if no language specified)
40    language: String,
41    /// The fence marker used (``` or ~~~)
42    fence_marker: String,
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct MD040FencedCodeLanguage {
47    config: MD040Config,
48}
49
50impl MD040FencedCodeLanguage {
51    pub fn new() -> Self {
52        Self::default()
53    }
54
55    pub fn with_config(config: MD040Config) -> Self {
56        Self { config }
57    }
58
59    /// Validate the configuration and return any errors
60    fn validate_config(&self) -> Vec<String> {
61        let mut errors = Vec::new();
62
63        // Validate preferred-aliases: check that each alias is valid for its language
64        for (canonical, alias) in &self.config.preferred_aliases {
65            // Find the actual canonical name (case-insensitive)
66            if let Some(actual_canonical) = resolve_canonical(canonical) {
67                if !is_valid_alias(actual_canonical, alias)
68                    && let Some(valid_aliases) = get_aliases(actual_canonical)
69                {
70                    let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
71                    let valid_str = valid_list
72                        .iter()
73                        .map(|s| format!("'{s}'"))
74                        .collect::<Vec<_>>()
75                        .join(", ");
76                    let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
77                    errors.push(format!(
78                        "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
79                    ));
80                }
81            } else {
82                errors.push(format!(
83                    "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
84                ));
85            }
86        }
87
88        errors
89    }
90
91    /// Determine the preferred label for each canonical language in the document
92    fn compute_preferred_labels(
93        &self,
94        blocks: &[FencedCodeBlock],
95        disabled_ranges: &[(usize, usize)],
96    ) -> HashMap<String, String> {
97        // Group labels by canonical language
98        let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
99
100        for block in blocks {
101            if is_line_disabled(disabled_ranges, block.line_idx) {
102                continue;
103            }
104            if block.language.is_empty() {
105                continue;
106            }
107            if let Some(canonical) = resolve_canonical(&block.language) {
108                by_canonical
109                    .entry(canonical.to_string())
110                    .or_default()
111                    .push(&block.language);
112            }
113        }
114
115        // Determine winning label for each canonical language
116        let mut result = HashMap::new();
117
118        for (canonical, labels) in by_canonical {
119            // Check for user override first (case-insensitive lookup)
120            let winner = if let Some(preferred) = self
121                .config
122                .preferred_aliases
123                .iter()
124                .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
125                .map(|(_, v)| v.clone())
126            {
127                preferred
128            } else {
129                // Find most prevalent label
130                let mut counts: HashMap<&str, usize> = HashMap::new();
131                for label in &labels {
132                    *counts.entry(*label).or_default() += 1;
133                }
134
135                let max_count = counts.values().max().copied().unwrap_or(0);
136                let winners: Vec<_> = counts
137                    .iter()
138                    .filter(|(_, c)| **c == max_count)
139                    .map(|(l, _)| *l)
140                    .collect();
141
142                if winners.len() == 1 {
143                    winners[0].to_string()
144                } else {
145                    // Tie-break: use curated default if available, otherwise alphabetically first
146                    default_alias(&canonical)
147                        .filter(|default| winners.contains(default))
148                        .map(|s| s.to_string())
149                        .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
150                }
151            };
152
153            result.insert(canonical, winner);
154        }
155
156        result
157    }
158
159    /// Check if a language is allowed based on config
160    fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
161        // Allowlist takes precedence
162        if !self.config.allowed_languages.is_empty() {
163            let allowed = self.config.allowed_languages.join(", ");
164            let Some(canonical) = canonical else {
165                return Some(format!(
166                    "Language '{original_label}' is not in the allowed list: {allowed}"
167                ));
168            };
169            if !self
170                .config
171                .allowed_languages
172                .iter()
173                .any(|a| a.eq_ignore_ascii_case(canonical))
174            {
175                return Some(format!(
176                    "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
177                ));
178            }
179        } else if !self.config.disallowed_languages.is_empty()
180            && canonical.is_some_and(|canonical| {
181                self.config
182                    .disallowed_languages
183                    .iter()
184                    .any(|d| d.eq_ignore_ascii_case(canonical))
185            })
186        {
187            let canonical = canonical.unwrap_or("unknown");
188            return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
189        }
190        None
191    }
192
193    /// Check for unknown language based on config
194    fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
195        if resolve_canonical(label).is_some() {
196            return None;
197        }
198
199        match self.config.unknown_language_action {
200            UnknownLanguageAction::Ignore => None,
201            UnknownLanguageAction::Warn => Some((
202                format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
203                Severity::Warning,
204            )),
205            UnknownLanguageAction::Error => Some((
206                format!("Unknown language '{label}' (not in GitHub Linguist)"),
207                Severity::Error,
208            )),
209        }
210    }
211}
212
213impl Rule for MD040FencedCodeLanguage {
214    fn name(&self) -> &'static str {
215        "MD040"
216    }
217
218    fn description(&self) -> &'static str {
219        "Code blocks should have a language specified"
220    }
221
222    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
223        let content = ctx.content;
224        let mut warnings = Vec::new();
225
226        // Validate config and emit warnings for invalid configuration
227        for error in self.validate_config() {
228            warnings.push(LintWarning {
229                rule_name: Some(self.name().to_string()),
230                line: 1,
231                column: 1,
232                end_line: 1,
233                end_column: 1,
234                message: format!("[config error] {error}"),
235                severity: Severity::Error,
236                fix: None,
237            });
238        }
239
240        // Derive fenced code blocks from pre-computed context
241        let fenced_blocks = derive_fenced_code_blocks(ctx);
242
243        // Pre-compute disabled ranges for efficient lookup
244        let disabled_ranges = compute_disabled_ranges(content, self.name());
245
246        // Compute preferred labels for consistent mode
247        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
248            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
249        } else {
250            HashMap::new()
251        };
252
253        let lines = ctx.raw_lines();
254
255        for block in &fenced_blocks {
256            // Skip if this line is in a disabled range
257            if is_line_disabled(&disabled_ranges, block.line_idx) {
258                continue;
259            }
260
261            // Get the actual line content for additional checks
262            let line = lines.get(block.line_idx).unwrap_or(&"");
263            let trimmed = line.trim();
264            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
265
266            // Check if fence has MkDocs superfences attributes but no language
267            let has_mkdocs_attrs_only =
268                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
269
270            // Check for Quarto/RMarkdown code chunk syntax: {language} or {language, options}
271            let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
272                && after_fence.starts_with('{')
273                && after_fence.contains('}');
274
275            // Determine if this block needs a language specification
276            // In MkDocs flavor, superfences attributes without language are acceptable
277            let needs_language =
278                !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
279
280            if needs_language && !has_quarto_syntax {
281                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
282
283                warnings.push(LintWarning {
284                    rule_name: Some(self.name().to_string()),
285                    line: start_line,
286                    column: start_col,
287                    end_line,
288                    end_column: end_col,
289                    message: "Code block (```) missing language".to_string(),
290                    severity: Severity::Warning,
291                    fix: Some(Fix {
292                        range: {
293                            let trimmed_start = line.len() - line.trim_start().len();
294                            let fence_len = block.fence_marker.len();
295                            let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
296                            let fence_start_byte = line_start_byte + trimmed_start;
297                            let fence_end_byte = fence_start_byte + fence_len;
298                            fence_start_byte..fence_end_byte
299                        },
300                        replacement: format!("{}text", block.fence_marker),
301                    }),
302                });
303                continue;
304            }
305
306            // Skip further checks for special syntax
307            if has_quarto_syntax {
308                continue;
309            }
310
311            let canonical = resolve_canonical(&block.language);
312
313            // Check language restrictions (allowlist/denylist)
314            if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
315                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
316
317                warnings.push(LintWarning {
318                    rule_name: Some(self.name().to_string()),
319                    line: start_line,
320                    column: start_col,
321                    end_line,
322                    end_column: end_col,
323                    message: msg,
324                    severity: Severity::Warning,
325                    fix: None,
326                });
327                continue;
328            }
329
330            // Check for unknown language (only if not handled by allowlist)
331            if canonical.is_none() {
332                if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
333                    let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
334
335                    warnings.push(LintWarning {
336                        rule_name: Some(self.name().to_string()),
337                        line: start_line,
338                        column: start_col,
339                        end_line,
340                        end_column: end_col,
341                        message: msg,
342                        severity,
343                        fix: None,
344                    });
345                }
346                continue;
347            }
348
349            // Check consistency
350            if self.config.style == LanguageStyle::Consistent
351                && let Some(preferred) = preferred_labels.get(canonical.unwrap())
352                && &block.language != preferred
353            {
354                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
355
356                let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
357                    let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
358                    Fix {
359                        range: (line_start_byte + label_start)..(line_start_byte + label_end),
360                        replacement: preferred.clone(),
361                    }
362                });
363                let lang = &block.language;
364                let canonical = canonical.unwrap();
365
366                warnings.push(LintWarning {
367                    rule_name: Some(self.name().to_string()),
368                    line: start_line,
369                    column: start_col,
370                    end_line,
371                    end_column: end_col,
372                    message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
373                    severity: Severity::Warning,
374                    fix,
375                });
376            }
377        }
378
379        Ok(warnings)
380    }
381
382    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
383        let content = ctx.content;
384
385        // Derive fenced code blocks from pre-computed context
386        let fenced_blocks = derive_fenced_code_blocks(ctx);
387
388        // Pre-compute disabled ranges
389        let disabled_ranges = compute_disabled_ranges(content, self.name());
390
391        // Compute preferred labels for consistent mode
392        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
393            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
394        } else {
395            HashMap::new()
396        };
397
398        // Build a set of line indices that need fixing
399        let mut lines_to_fix: std::collections::HashMap<usize, FixAction> = std::collections::HashMap::new();
400
401        for block in &fenced_blocks {
402            if is_line_disabled(&disabled_ranges, block.line_idx) {
403                continue;
404            }
405
406            // Skip lines where this rule is disabled by inline config
407            if ctx.inline_config().is_rule_disabled(self.name(), block.line_idx + 1) {
408                continue;
409            }
410
411            let fix_lines = ctx.raw_lines();
412            let line = fix_lines.get(block.line_idx).unwrap_or(&"");
413            let trimmed = line.trim();
414            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
415
416            let has_mkdocs_attrs_only =
417                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
418
419            let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
420                && after_fence.starts_with('{')
421                && after_fence.contains('}');
422
423            let needs_language =
424                !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
425
426            if needs_language && !has_quarto_syntax {
427                lines_to_fix.insert(
428                    block.line_idx,
429                    FixAction::AddLanguage {
430                        fence_marker: block.fence_marker.clone(),
431                        has_mkdocs_attrs_only,
432                    },
433                );
434            } else if !has_quarto_syntax
435                && self.config.style == LanguageStyle::Consistent
436                && let Some(canonical) = resolve_canonical(&block.language)
437                && let Some(preferred) = preferred_labels.get(canonical)
438                && &block.language != preferred
439            {
440                lines_to_fix.insert(
441                    block.line_idx,
442                    FixAction::NormalizeLabel {
443                        fence_marker: block.fence_marker.clone(),
444                        new_label: preferred.clone(),
445                    },
446                );
447            }
448        }
449
450        // Build the result by iterating through lines
451        let mut result = String::new();
452        for (i, line) in content.lines().enumerate() {
453            if let Some(action) = lines_to_fix.get(&i) {
454                match action {
455                    FixAction::AddLanguage {
456                        fence_marker,
457                        has_mkdocs_attrs_only,
458                    } => {
459                        let indent = &line[..line.len() - line.trim_start().len()];
460                        let trimmed = line.trim();
461                        let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
462
463                        if *has_mkdocs_attrs_only {
464                            result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
465                        } else {
466                            result.push_str(&format!("{indent}{fence_marker}text\n"));
467                        }
468                    }
469                    FixAction::NormalizeLabel {
470                        fence_marker,
471                        new_label,
472                    } => {
473                        if let Some((label_start, label_end)) = find_label_span(line, fence_marker) {
474                            result.push_str(&line[..label_start]);
475                            result.push_str(new_label);
476                            result.push_str(&line[label_end..]);
477                            result.push('\n');
478                        } else {
479                            result.push_str(line);
480                            result.push('\n');
481                        }
482                    }
483                }
484            } else {
485                result.push_str(line);
486                result.push('\n');
487            }
488        }
489
490        // Remove trailing newline if the original content didn't have one
491        if !content.ends_with('\n') {
492            result.pop();
493        }
494
495        Ok(result)
496    }
497
498    /// Get the category of this rule for selective processing
499    fn category(&self) -> RuleCategory {
500        RuleCategory::CodeBlock
501    }
502
503    /// Check if this rule should be skipped
504    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
505        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
506    }
507
508    fn as_any(&self) -> &dyn std::any::Any {
509        self
510    }
511
512    fn default_config_section(&self) -> Option<(String, toml::Value)> {
513        let default_config = MD040Config::default();
514        let json_value = serde_json::to_value(&default_config).ok()?;
515        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
516
517        if let toml::Value::Table(table) = toml_value {
518            if !table.is_empty() {
519                Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
520            } else {
521                None
522            }
523        } else {
524            None
525        }
526    }
527
528    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
529    where
530        Self: Sized,
531    {
532        let rule_config: MD040Config = load_rule_config(config);
533        Box::new(MD040FencedCodeLanguage::with_config(rule_config))
534    }
535}
536
537#[derive(Debug, Clone)]
538enum FixAction {
539    AddLanguage {
540        fence_marker: String,
541        has_mkdocs_attrs_only: bool,
542    },
543    NormalizeLabel {
544        fence_marker: String,
545        new_label: String,
546    },
547}
548
549/// Derive fenced code blocks from pre-computed CodeBlockDetail data
550fn derive_fenced_code_blocks(ctx: &crate::lint_context::LintContext) -> Vec<FencedCodeBlock> {
551    let content = ctx.content;
552    let line_offsets = &ctx.line_offsets;
553
554    ctx.code_block_details
555        .iter()
556        .filter(|d| d.is_fenced)
557        .map(|detail| {
558            let line_idx = match line_offsets.binary_search(&detail.start) {
559                Ok(idx) => idx,
560                Err(idx) => idx.saturating_sub(1),
561            };
562
563            // Determine fence marker from the actual line content
564            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
565            let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
566            let line = content.get(line_start..line_end).unwrap_or("");
567            let trimmed = line.trim();
568            let fence_marker = if trimmed.starts_with('`') {
569                let count = trimmed.chars().take_while(|&c| c == '`').count();
570                "`".repeat(count)
571            } else if trimmed.starts_with('~') {
572                let count = trimmed.chars().take_while(|&c| c == '~').count();
573                "~".repeat(count)
574            } else {
575                "```".to_string()
576            };
577
578            let language = detail.info_string.split_whitespace().next().unwrap_or("").to_string();
579
580            FencedCodeBlock {
581                line_idx,
582                language,
583                fence_marker,
584            }
585        })
586        .collect()
587}
588
589/// Compute disabled line ranges from disable/enable comments
590fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
591    let mut ranges = Vec::new();
592    let mut disabled_start: Option<usize> = None;
593
594    for (i, line) in content.lines().enumerate() {
595        let trimmed = line.trim();
596
597        if let Some(rules) = crate::inline_config::parse_disable_comment(trimmed)
598            && (rules.is_empty() || rules.contains(&rule_name))
599            && disabled_start.is_none()
600        {
601            disabled_start = Some(i);
602        }
603
604        if let Some(rules) = crate::inline_config::parse_enable_comment(trimmed)
605            && (rules.is_empty() || rules.contains(&rule_name))
606            && let Some(start) = disabled_start.take()
607        {
608            ranges.push((start, i));
609        }
610    }
611
612    // Handle unclosed disable
613    if let Some(start) = disabled_start {
614        ranges.push((start, usize::MAX));
615    }
616
617    ranges
618}
619
620/// Check if a line index is within a disabled range
621fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
622    ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
623}
624
625/// Find the byte span of the language label in a fence line.
626fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
627    let trimmed_start = line.len() - line.trim_start().len();
628    let after_indent = &line[trimmed_start..];
629    if !after_indent.starts_with(fence_marker) {
630        return None;
631    }
632    let after_fence = &after_indent[fence_marker.len()..];
633
634    let label_start_rel = after_fence
635        .char_indices()
636        .find(|&(_, ch)| !ch.is_whitespace())
637        .map(|(idx, _)| idx)?;
638    let after_label = &after_fence[label_start_rel..];
639    let label_end_rel = after_label
640        .char_indices()
641        .find(|&(_, ch)| ch.is_whitespace())
642        .map(|(idx, _)| label_start_rel + idx)
643        .unwrap_or(after_fence.len());
644
645    Some((
646        trimmed_start + fence_marker.len() + label_start_rel,
647        trimmed_start + fence_marker.len() + label_end_rel,
648    ))
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654    use crate::lint_context::LintContext;
655
656    fn run_check(content: &str) -> LintResult {
657        let rule = MD040FencedCodeLanguage::default();
658        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
659        rule.check(&ctx)
660    }
661
662    fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
663        let rule = MD040FencedCodeLanguage::with_config(config);
664        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
665        rule.check(&ctx)
666    }
667
668    fn run_fix(content: &str) -> Result<String, LintError> {
669        let rule = MD040FencedCodeLanguage::default();
670        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
671        rule.fix(&ctx)
672    }
673
674    fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
675        let rule = MD040FencedCodeLanguage::with_config(config);
676        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
677        rule.fix(&ctx)
678    }
679
680    fn run_check_mkdocs(content: &str) -> LintResult {
681        let rule = MD040FencedCodeLanguage::default();
682        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
683        rule.check(&ctx)
684    }
685
686    // =========================================================================
687    // Basic functionality tests
688    // =========================================================================
689
690    #[test]
691    fn test_code_blocks_with_language_specified() {
692        let content = r#"# Test
693
694```python
695print("Hello, world!")
696```
697
698```javascript
699console.log("Hello!");
700```
701"#;
702        let result = run_check(content).unwrap();
703        assert!(result.is_empty(), "No warnings expected for code blocks with language");
704    }
705
706    #[test]
707    fn test_code_blocks_without_language() {
708        let content = r#"# Test
709
710```
711print("Hello, world!")
712```
713"#;
714        let result = run_check(content).unwrap();
715        assert_eq!(result.len(), 1);
716        assert_eq!(result[0].message, "Code block (```) missing language");
717        assert_eq!(result[0].line, 3);
718    }
719
720    #[test]
721    fn test_fix_method_adds_text_language() {
722        let content = r#"# Test
723
724```
725code without language
726```
727
728```python
729already has language
730```
731
732```
733another block without
734```
735"#;
736        let fixed = run_fix(content).unwrap();
737        assert!(fixed.contains("```text"));
738        assert!(fixed.contains("```python"));
739        assert_eq!(fixed.matches("```text").count(), 2);
740    }
741
742    #[test]
743    fn test_fix_preserves_indentation() {
744        let content = r#"# Test
745
746- List item
747  ```
748  indented code block
749  ```
750"#;
751        let fixed = run_fix(content).unwrap();
752        assert!(fixed.contains("  ```text"));
753    }
754
755    // =========================================================================
756    // Consistent mode tests
757    // =========================================================================
758
759    #[test]
760    fn test_consistent_mode_detects_inconsistency() {
761        let content = r#"```bash
762echo hi
763```
764
765```sh
766echo there
767```
768
769```bash
770echo again
771```
772"#;
773        let config = MD040Config {
774            style: LanguageStyle::Consistent,
775            ..Default::default()
776        };
777        let result = run_check_with_config(content, config).unwrap();
778        assert_eq!(result.len(), 1);
779        assert!(result[0].message.contains("Inconsistent"));
780        assert!(result[0].message.contains("sh"));
781        assert!(result[0].message.contains("bash"));
782    }
783
784    #[test]
785    fn test_consistent_mode_fix_normalizes() {
786        let content = r#"```bash
787echo hi
788```
789
790```sh
791echo there
792```
793
794```bash
795echo again
796```
797"#;
798        let config = MD040Config {
799            style: LanguageStyle::Consistent,
800            ..Default::default()
801        };
802        let fixed = run_fix_with_config(content, config).unwrap();
803        assert_eq!(fixed.matches("```bash").count(), 3);
804        assert_eq!(fixed.matches("```sh").count(), 0);
805    }
806
807    #[test]
808    fn test_consistent_mode_tie_break_uses_curated_default() {
809        // When there's a tie (1 bash, 1 sh), should use curated default (bash)
810        let content = r#"```bash
811echo hi
812```
813
814```sh
815echo there
816```
817"#;
818        let config = MD040Config {
819            style: LanguageStyle::Consistent,
820            ..Default::default()
821        };
822        let fixed = run_fix_with_config(content, config).unwrap();
823        // bash is the curated default for Shell
824        assert_eq!(fixed.matches("```bash").count(), 2);
825    }
826
827    #[test]
828    fn test_consistent_mode_with_preferred_alias() {
829        let content = r#"```bash
830echo hi
831```
832
833```sh
834echo there
835```
836"#;
837        let mut preferred = HashMap::new();
838        preferred.insert("Shell".to_string(), "sh".to_string());
839
840        let config = MD040Config {
841            style: LanguageStyle::Consistent,
842            preferred_aliases: preferred,
843            ..Default::default()
844        };
845        let fixed = run_fix_with_config(content, config).unwrap();
846        assert_eq!(fixed.matches("```sh").count(), 2);
847        assert_eq!(fixed.matches("```bash").count(), 0);
848    }
849
850    #[test]
851    fn test_consistent_mode_ignores_disabled_blocks() {
852        let content = r#"```bash
853echo hi
854```
855<!-- rumdl-disable MD040 -->
856```sh
857echo there
858```
859```sh
860echo again
861```
862<!-- rumdl-enable MD040 -->
863"#;
864        let config = MD040Config {
865            style: LanguageStyle::Consistent,
866            ..Default::default()
867        };
868        let result = run_check_with_config(content, config).unwrap();
869        assert!(result.is_empty(), "Disabled blocks should not affect consistency");
870    }
871
872    #[test]
873    fn test_fix_preserves_attributes() {
874        let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
875        let config = MD040Config {
876            style: LanguageStyle::Consistent,
877            ..Default::default()
878        };
879        let fixed = run_fix_with_config(content, config).unwrap();
880        assert!(fixed.contains("```bash {.highlight}"));
881    }
882
883    #[test]
884    fn test_fix_preserves_spacing_before_label() {
885        let content = "```bash\ncode\n```\n\n```  sh {.highlight}\ncode\n```";
886        let config = MD040Config {
887            style: LanguageStyle::Consistent,
888            ..Default::default()
889        };
890        let fixed = run_fix_with_config(content, config).unwrap();
891        assert!(fixed.contains("```  bash {.highlight}"));
892        assert!(!fixed.contains("```  sh {.highlight}"));
893    }
894
895    // =========================================================================
896    // Allowlist/denylist tests
897    // =========================================================================
898
899    #[test]
900    fn test_allowlist_blocks_unlisted() {
901        let content = "```java\ncode\n```";
902        let config = MD040Config {
903            allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
904            ..Default::default()
905        };
906        let result = run_check_with_config(content, config).unwrap();
907        assert_eq!(result.len(), 1);
908        assert!(result[0].message.contains("not in the allowed list"));
909    }
910
911    #[test]
912    fn test_allowlist_allows_listed() {
913        let content = "```python\ncode\n```";
914        let config = MD040Config {
915            allowed_languages: vec!["Python".to_string()],
916            ..Default::default()
917        };
918        let result = run_check_with_config(content, config).unwrap();
919        assert!(result.is_empty());
920    }
921
922    #[test]
923    fn test_allowlist_blocks_unknown_language() {
924        let content = "```mysterylang\ncode\n```";
925        let config = MD040Config {
926            allowed_languages: vec!["Python".to_string()],
927            ..Default::default()
928        };
929        let result = run_check_with_config(content, config).unwrap();
930        assert_eq!(result.len(), 1);
931        assert!(result[0].message.contains("allowed list"));
932    }
933
934    #[test]
935    fn test_allowlist_case_insensitive() {
936        let content = "```python\ncode\n```";
937        let config = MD040Config {
938            allowed_languages: vec!["PYTHON".to_string()],
939            ..Default::default()
940        };
941        let result = run_check_with_config(content, config).unwrap();
942        assert!(result.is_empty());
943    }
944
945    #[test]
946    fn test_denylist_blocks_listed() {
947        let content = "```java\ncode\n```";
948        let config = MD040Config {
949            disallowed_languages: vec!["Java".to_string()],
950            ..Default::default()
951        };
952        let result = run_check_with_config(content, config).unwrap();
953        assert_eq!(result.len(), 1);
954        assert!(result[0].message.contains("disallowed"));
955    }
956
957    #[test]
958    fn test_denylist_allows_unlisted() {
959        let content = "```python\ncode\n```";
960        let config = MD040Config {
961            disallowed_languages: vec!["Java".to_string()],
962            ..Default::default()
963        };
964        let result = run_check_with_config(content, config).unwrap();
965        assert!(result.is_empty());
966    }
967
968    #[test]
969    fn test_allowlist_takes_precedence_over_denylist() {
970        let content = "```python\ncode\n```";
971        let config = MD040Config {
972            allowed_languages: vec!["Python".to_string()],
973            disallowed_languages: vec!["Python".to_string()], // Should be ignored
974            ..Default::default()
975        };
976        let result = run_check_with_config(content, config).unwrap();
977        assert!(result.is_empty());
978    }
979
980    // =========================================================================
981    // Unknown language tests
982    // =========================================================================
983
984    #[test]
985    fn test_unknown_language_ignore_default() {
986        let content = "```mycustomlang\ncode\n```";
987        let result = run_check(content).unwrap();
988        assert!(result.is_empty(), "Unknown languages ignored by default");
989    }
990
991    #[test]
992    fn test_unknown_language_warn() {
993        let content = "```mycustomlang\ncode\n```";
994        let config = MD040Config {
995            unknown_language_action: UnknownLanguageAction::Warn,
996            ..Default::default()
997        };
998        let result = run_check_with_config(content, config).unwrap();
999        assert_eq!(result.len(), 1);
1000        assert!(result[0].message.contains("Unknown language"));
1001        assert!(result[0].message.contains("mycustomlang"));
1002        assert_eq!(result[0].severity, Severity::Warning);
1003    }
1004
1005    #[test]
1006    fn test_unknown_language_error() {
1007        let content = "```mycustomlang\ncode\n```";
1008        let config = MD040Config {
1009            unknown_language_action: UnknownLanguageAction::Error,
1010            ..Default::default()
1011        };
1012        let result = run_check_with_config(content, config).unwrap();
1013        assert_eq!(result.len(), 1);
1014        assert!(result[0].message.contains("Unknown language"));
1015        assert_eq!(result[0].severity, Severity::Error);
1016    }
1017
1018    // =========================================================================
1019    // Config validation tests
1020    // =========================================================================
1021
1022    #[test]
1023    fn test_invalid_preferred_alias_detected() {
1024        let mut preferred = HashMap::new();
1025        preferred.insert("Shell".to_string(), "invalid_alias".to_string());
1026
1027        let config = MD040Config {
1028            style: LanguageStyle::Consistent,
1029            preferred_aliases: preferred,
1030            ..Default::default()
1031        };
1032        let rule = MD040FencedCodeLanguage::with_config(config);
1033        let errors = rule.validate_config();
1034        assert_eq!(errors.len(), 1);
1035        assert!(errors[0].contains("Invalid alias"));
1036        assert!(errors[0].contains("invalid_alias"));
1037    }
1038
1039    #[test]
1040    fn test_unknown_language_in_preferred_aliases_detected() {
1041        let mut preferred = HashMap::new();
1042        preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
1043
1044        let config = MD040Config {
1045            style: LanguageStyle::Consistent,
1046            preferred_aliases: preferred,
1047            ..Default::default()
1048        };
1049        let rule = MD040FencedCodeLanguage::with_config(config);
1050        let errors = rule.validate_config();
1051        assert_eq!(errors.len(), 1);
1052        assert!(errors[0].contains("Unknown language"));
1053    }
1054
1055    #[test]
1056    fn test_valid_preferred_alias_accepted() {
1057        let mut preferred = HashMap::new();
1058        preferred.insert("Shell".to_string(), "bash".to_string());
1059        preferred.insert("JavaScript".to_string(), "js".to_string());
1060
1061        let config = MD040Config {
1062            style: LanguageStyle::Consistent,
1063            preferred_aliases: preferred,
1064            ..Default::default()
1065        };
1066        let rule = MD040FencedCodeLanguage::with_config(config);
1067        let errors = rule.validate_config();
1068        assert!(errors.is_empty());
1069    }
1070
1071    #[test]
1072    fn test_config_error_uses_valid_line_column() {
1073        let config = md040_config::MD040Config {
1074            preferred_aliases: {
1075                let mut map = std::collections::HashMap::new();
1076                map.insert("Shell".to_string(), "invalid_alias".to_string());
1077                map
1078            },
1079            ..Default::default()
1080        };
1081        let rule = MD040FencedCodeLanguage::with_config(config);
1082
1083        let content = "```shell\necho hello\n```";
1084        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1085        let result = rule.check(&ctx).unwrap();
1086
1087        // Find the config error warning
1088        let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1089        assert!(config_error.is_some(), "Should have a config error warning");
1090
1091        let warning = config_error.unwrap();
1092        // Line and column should be 1-indexed (not 0)
1093        assert!(
1094            warning.line >= 1,
1095            "Config error line should be >= 1, got {}",
1096            warning.line
1097        );
1098        assert!(
1099            warning.column >= 1,
1100            "Config error column should be >= 1, got {}",
1101            warning.column
1102        );
1103    }
1104
1105    // =========================================================================
1106    // Linguist resolution tests
1107    // =========================================================================
1108
1109    #[test]
1110    fn test_linguist_resolution() {
1111        assert_eq!(resolve_canonical("bash"), Some("Shell"));
1112        assert_eq!(resolve_canonical("sh"), Some("Shell"));
1113        assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1114        assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1115        assert_eq!(resolve_canonical("python"), Some("Python"));
1116        assert_eq!(resolve_canonical("unknown_lang"), None);
1117    }
1118
1119    #[test]
1120    fn test_linguist_resolution_case_insensitive() {
1121        assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1122        assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1123        assert_eq!(resolve_canonical("Python"), Some("Python"));
1124        assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1125    }
1126
1127    #[test]
1128    fn test_alias_validation() {
1129        assert!(is_valid_alias("Shell", "bash"));
1130        assert!(is_valid_alias("Shell", "sh"));
1131        assert!(is_valid_alias("Shell", "zsh"));
1132        assert!(!is_valid_alias("Shell", "python"));
1133        assert!(!is_valid_alias("Shell", "invalid"));
1134    }
1135
1136    #[test]
1137    fn test_default_alias() {
1138        assert_eq!(default_alias("Shell"), Some("bash"));
1139        assert_eq!(default_alias("JavaScript"), Some("js"));
1140        assert_eq!(default_alias("Python"), Some("python"));
1141    }
1142
1143    // =========================================================================
1144    // Edge case tests
1145    // =========================================================================
1146
1147    #[test]
1148    fn test_mixed_case_labels_normalized() {
1149        let content = r#"```BASH
1150echo hi
1151```
1152
1153```Bash
1154echo there
1155```
1156
1157```bash
1158echo again
1159```
1160"#;
1161        let config = MD040Config {
1162            style: LanguageStyle::Consistent,
1163            ..Default::default()
1164        };
1165        // All should resolve to Shell, most prevalent should win
1166        let result = run_check_with_config(content, config).unwrap();
1167        // "bash" appears 1x, "Bash" appears 1x, "BASH" appears 1x
1168        // All are different strings, so there's a 3-way tie
1169        // Should pick curated default "bash" or alphabetically first
1170        assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1171    }
1172
1173    #[test]
1174    fn test_multiple_languages_independent() {
1175        let content = r#"```bash
1176shell code
1177```
1178
1179```python
1180python code
1181```
1182
1183```sh
1184more shell
1185```
1186
1187```python3
1188more python
1189```
1190"#;
1191        let config = MD040Config {
1192            style: LanguageStyle::Consistent,
1193            ..Default::default()
1194        };
1195        let result = run_check_with_config(content, config).unwrap();
1196        // Should have 2 warnings: one for sh (inconsistent with bash) and one for python3 (inconsistent with python)
1197        assert_eq!(result.len(), 2);
1198    }
1199
1200    #[test]
1201    fn test_tilde_fences() {
1202        let content = r#"~~~bash
1203echo hi
1204~~~
1205
1206~~~sh
1207echo there
1208~~~
1209"#;
1210        let config = MD040Config {
1211            style: LanguageStyle::Consistent,
1212            ..Default::default()
1213        };
1214        let result = run_check_with_config(content, config.clone()).unwrap();
1215        assert_eq!(result.len(), 1);
1216
1217        let fixed = run_fix_with_config(content, config).unwrap();
1218        assert!(fixed.contains("~~~bash"));
1219        assert!(!fixed.contains("~~~sh"));
1220    }
1221
1222    #[test]
1223    fn test_longer_fence_markers_preserved() {
1224        let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1225        let config = MD040Config {
1226            style: LanguageStyle::Consistent,
1227            ..Default::default()
1228        };
1229        let fixed = run_fix_with_config(content, config).unwrap();
1230        assert!(fixed.contains("````bash"));
1231        assert!(fixed.contains("```bash"));
1232    }
1233
1234    #[test]
1235    fn test_empty_document() {
1236        let result = run_check("").unwrap();
1237        assert!(result.is_empty());
1238    }
1239
1240    #[test]
1241    fn test_no_code_blocks() {
1242        let content = "# Just a heading\n\nSome text.";
1243        let result = run_check(content).unwrap();
1244        assert!(result.is_empty());
1245    }
1246
1247    #[test]
1248    fn test_single_code_block_no_inconsistency() {
1249        let content = "```bash\necho hi\n```";
1250        let config = MD040Config {
1251            style: LanguageStyle::Consistent,
1252            ..Default::default()
1253        };
1254        let result = run_check_with_config(content, config).unwrap();
1255        assert!(result.is_empty(), "Single block has no inconsistency");
1256    }
1257
1258    #[test]
1259    fn test_idempotent_fix() {
1260        let content = r#"```bash
1261echo hi
1262```
1263
1264```sh
1265echo there
1266```
1267"#;
1268        let config = MD040Config {
1269            style: LanguageStyle::Consistent,
1270            ..Default::default()
1271        };
1272        let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1273        let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1274        assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1275    }
1276
1277    // =========================================================================
1278    // MkDocs superfences tests
1279    // =========================================================================
1280
1281    #[test]
1282    fn test_mkdocs_superfences_title_only() {
1283        // title= attribute without language should not warn in MkDocs flavor
1284        let content = r#"```title="Example"
1285echo hi
1286```
1287"#;
1288        let result = run_check_mkdocs(content).unwrap();
1289        assert!(
1290            result.is_empty(),
1291            "MkDocs superfences with title= should not require language"
1292        );
1293    }
1294
1295    #[test]
1296    fn test_mkdocs_superfences_hl_lines() {
1297        // hl_lines= attribute without language should not warn
1298        let content = r#"```hl_lines="1 2"
1299line 1
1300line 2
1301```
1302"#;
1303        let result = run_check_mkdocs(content).unwrap();
1304        assert!(
1305            result.is_empty(),
1306            "MkDocs superfences with hl_lines= should not require language"
1307        );
1308    }
1309
1310    #[test]
1311    fn test_mkdocs_superfences_linenums() {
1312        // linenums= attribute without language should not warn
1313        let content = r#"```linenums="1"
1314line 1
1315line 2
1316```
1317"#;
1318        let result = run_check_mkdocs(content).unwrap();
1319        assert!(
1320            result.is_empty(),
1321            "MkDocs superfences with linenums= should not require language"
1322        );
1323    }
1324
1325    #[test]
1326    fn test_mkdocs_superfences_class() {
1327        // Custom class (starting with .) should not warn
1328        let content = r#"```.my-class
1329some text
1330```
1331"#;
1332        let result = run_check_mkdocs(content).unwrap();
1333        assert!(
1334            result.is_empty(),
1335            "MkDocs superfences with .class should not require language"
1336        );
1337    }
1338
1339    #[test]
1340    fn test_mkdocs_superfences_id() {
1341        // Custom ID (starting with #) should not warn
1342        let content = r#"```#my-id
1343some text
1344```
1345"#;
1346        let result = run_check_mkdocs(content).unwrap();
1347        assert!(
1348            result.is_empty(),
1349            "MkDocs superfences with #id should not require language"
1350        );
1351    }
1352
1353    #[test]
1354    fn test_mkdocs_superfences_with_language() {
1355        // Language with superfences attributes should work fine
1356        let content = r#"```python title="Example" hl_lines="1"
1357print("hello")
1358```
1359"#;
1360        let result = run_check_mkdocs(content).unwrap();
1361        assert!(result.is_empty(), "Code block with language and attrs should pass");
1362    }
1363
1364    #[test]
1365    fn test_standard_flavor_no_special_handling() {
1366        // In Standard flavor, title= should still warn
1367        let content = r#"```title="Example"
1368echo hi
1369```
1370"#;
1371        let result = run_check(content).unwrap();
1372        assert_eq!(
1373            result.len(),
1374            1,
1375            "Standard flavor should warn about title= without language"
1376        );
1377    }
1378}