Skip to main content

rumdl_lib/rules/
md040_fenced_code_language.rs

1use crate::linguist_data::{default_alias, get_aliases, is_valid_alias, resolve_canonical};
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rule_config_serde::{RuleConfig, load_rule_config};
4use crate::utils::range_utils::calculate_line_range;
5use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
6use std::collections::HashMap;
7
8/// Rule MD040: Fenced code blocks should have a language
9///
10/// See [docs/md040.md](../../docs/md040.md) for full documentation, configuration, and examples.
11pub mod md040_config;
12
13// ============================================================================
14// MkDocs Superfences Attribute Detection
15// ============================================================================
16
17/// Prefixes that indicate MkDocs superfences attributes rather than language identifiers.
18/// These are valid in MkDocs flavor without a language specification.
19/// See: https://facelessuser.github.io/pymdown-extensions/extensions/superfences/
20const MKDOCS_SUPERFENCES_ATTR_PREFIXES: &[&str] = &[
21    "title=",    // Block title
22    "hl_lines=", // Highlighted lines
23    "linenums=", // Line numbers
24    ".",         // CSS class (e.g., .annotate)
25    "#",         // CSS id
26];
27
28/// Check if a string starts with a MkDocs superfences attribute prefix
29#[inline]
30fn is_superfences_attribute(s: &str) -> bool {
31    MKDOCS_SUPERFENCES_ATTR_PREFIXES
32        .iter()
33        .any(|prefix| s.starts_with(prefix))
34}
35use md040_config::{LanguageStyle, MD040Config, UnknownLanguageAction};
36
37struct FencedCodeBlock {
38    /// 0-indexed line number where the code block starts
39    line_idx: usize,
40    /// The language/info string (empty if no language specified)
41    language: String,
42    /// The fence marker used (``` or ~~~)
43    fence_marker: String,
44}
45
46#[derive(Debug, Clone, Default)]
47pub struct MD040FencedCodeLanguage {
48    config: MD040Config,
49}
50
51impl MD040FencedCodeLanguage {
52    pub fn new() -> Self {
53        Self::default()
54    }
55
56    pub fn with_config(config: MD040Config) -> Self {
57        Self { config }
58    }
59
60    /// Validate the configuration and return any errors
61    fn validate_config(&self) -> Vec<String> {
62        let mut errors = Vec::new();
63
64        // Validate preferred-aliases: check that each alias is valid for its language
65        for (canonical, alias) in &self.config.preferred_aliases {
66            // Find the actual canonical name (case-insensitive)
67            if let Some(actual_canonical) = resolve_canonical(canonical) {
68                if !is_valid_alias(actual_canonical, alias)
69                    && let Some(valid_aliases) = get_aliases(actual_canonical)
70                {
71                    let valid_list: Vec<_> = valid_aliases.iter().take(5).collect();
72                    let valid_str = valid_list
73                        .iter()
74                        .map(|s| format!("'{s}'"))
75                        .collect::<Vec<_>>()
76                        .join(", ");
77                    let suffix = if valid_aliases.len() > 5 { ", ..." } else { "" };
78                    errors.push(format!(
79                        "Invalid alias '{alias}' for language '{actual_canonical}'. Valid aliases include: {valid_str}{suffix}"
80                    ));
81                }
82            } else {
83                errors.push(format!(
84                    "Unknown language '{canonical}' in preferred-aliases. Use GitHub Linguist canonical names."
85                ));
86            }
87        }
88
89        errors
90    }
91
92    /// Determine the preferred label for each canonical language in the document
93    fn compute_preferred_labels(
94        &self,
95        blocks: &[FencedCodeBlock],
96        disabled_ranges: &[(usize, usize)],
97    ) -> HashMap<String, String> {
98        // Group labels by canonical language
99        let mut by_canonical: HashMap<String, Vec<&str>> = HashMap::new();
100
101        for block in blocks {
102            if is_line_disabled(disabled_ranges, block.line_idx) {
103                continue;
104            }
105            if block.language.is_empty() {
106                continue;
107            }
108            if let Some(canonical) = resolve_canonical(&block.language) {
109                by_canonical
110                    .entry(canonical.to_string())
111                    .or_default()
112                    .push(&block.language);
113            }
114        }
115
116        // Determine winning label for each canonical language
117        let mut result = HashMap::new();
118
119        for (canonical, labels) in by_canonical {
120            // Check for user override first (case-insensitive lookup)
121            let winner = if let Some(preferred) = self
122                .config
123                .preferred_aliases
124                .iter()
125                .find(|(k, _)| k.eq_ignore_ascii_case(&canonical))
126                .map(|(_, v)| v.clone())
127            {
128                preferred
129            } else {
130                // Find most prevalent label
131                let mut counts: HashMap<&str, usize> = HashMap::new();
132                for label in &labels {
133                    *counts.entry(*label).or_default() += 1;
134                }
135
136                let max_count = counts.values().max().copied().unwrap_or(0);
137                let winners: Vec<_> = counts
138                    .iter()
139                    .filter(|(_, c)| **c == max_count)
140                    .map(|(l, _)| *l)
141                    .collect();
142
143                if winners.len() == 1 {
144                    winners[0].to_string()
145                } else {
146                    // Tie-break: use curated default if available, otherwise alphabetically first
147                    default_alias(&canonical)
148                        .filter(|default| winners.contains(default))
149                        .map(|s| s.to_string())
150                        .unwrap_or_else(|| winners.into_iter().min().unwrap().to_string())
151                }
152            };
153
154            result.insert(canonical, winner);
155        }
156
157        result
158    }
159
160    /// Check if a language is allowed based on config
161    fn check_language_allowed(&self, canonical: Option<&str>, original_label: &str) -> Option<String> {
162        // Allowlist takes precedence
163        if !self.config.allowed_languages.is_empty() {
164            let allowed = self.config.allowed_languages.join(", ");
165            let Some(canonical) = canonical else {
166                return Some(format!(
167                    "Language '{original_label}' is not in the allowed list: {allowed}"
168                ));
169            };
170            if !self
171                .config
172                .allowed_languages
173                .iter()
174                .any(|a| a.eq_ignore_ascii_case(canonical))
175            {
176                return Some(format!(
177                    "Language '{original_label}' ({canonical}) is not in the allowed list: {allowed}"
178                ));
179            }
180        } else if !self.config.disallowed_languages.is_empty()
181            && canonical.is_some_and(|canonical| {
182                self.config
183                    .disallowed_languages
184                    .iter()
185                    .any(|d| d.eq_ignore_ascii_case(canonical))
186            })
187        {
188            let canonical = canonical.unwrap_or("unknown");
189            return Some(format!("Language '{original_label}' ({canonical}) is disallowed"));
190        }
191        None
192    }
193
194    /// Check for unknown language based on config
195    fn check_unknown_language(&self, label: &str) -> Option<(String, Severity)> {
196        if resolve_canonical(label).is_some() {
197            return None;
198        }
199
200        match self.config.unknown_language_action {
201            UnknownLanguageAction::Ignore => None,
202            UnknownLanguageAction::Warn => Some((
203                format!("Unknown language '{label}' (not in GitHub Linguist). Syntax highlighting may not work."),
204                Severity::Warning,
205            )),
206            UnknownLanguageAction::Error => Some((
207                format!("Unknown language '{label}' (not in GitHub Linguist)"),
208                Severity::Error,
209            )),
210        }
211    }
212}
213
214impl Rule for MD040FencedCodeLanguage {
215    fn name(&self) -> &'static str {
216        "MD040"
217    }
218
219    fn description(&self) -> &'static str {
220        "Code blocks should have a language specified"
221    }
222
223    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
224        let content = ctx.content;
225        let mut warnings = Vec::new();
226
227        // Validate config and emit warnings for invalid configuration
228        for error in self.validate_config() {
229            warnings.push(LintWarning {
230                rule_name: Some(self.name().to_string()),
231                line: 1,
232                column: 1,
233                end_line: 1,
234                end_column: 1,
235                message: format!("[config error] {error}"),
236                severity: Severity::Error,
237                fix: None,
238            });
239        }
240
241        // Use pulldown-cmark to detect fenced code blocks with language info
242        let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
243
244        // Pre-compute disabled ranges for efficient lookup
245        let disabled_ranges = compute_disabled_ranges(content, self.name());
246
247        // Compute preferred labels for consistent mode
248        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
249            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
250        } else {
251            HashMap::new()
252        };
253
254        for block in &fenced_blocks {
255            // Skip if this line is in a disabled range
256            if is_line_disabled(&disabled_ranges, block.line_idx) {
257                continue;
258            }
259
260            // Get the actual line content for additional checks
261            let line = content.lines().nth(block.line_idx).unwrap_or("");
262            let trimmed = line.trim();
263            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
264
265            // Check if fence has MkDocs superfences attributes but no language
266            let has_mkdocs_attrs_only =
267                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
268
269            // Check for Quarto/RMarkdown code chunk syntax: {language} or {language, options}
270            let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
271                && after_fence.starts_with('{')
272                && after_fence.contains('}');
273
274            // Determine if this block needs a language specification
275            // In MkDocs flavor, superfences attributes without language are acceptable
276            let needs_language =
277                !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
278
279            if needs_language && !has_quarto_syntax {
280                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
281
282                warnings.push(LintWarning {
283                    rule_name: Some(self.name().to_string()),
284                    line: start_line,
285                    column: start_col,
286                    end_line,
287                    end_column: end_col,
288                    message: "Code block (```) missing language".to_string(),
289                    severity: Severity::Warning,
290                    fix: Some(Fix {
291                        range: {
292                            let trimmed_start = line.len() - line.trim_start().len();
293                            let fence_len = block.fence_marker.len();
294                            let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
295                            let fence_start_byte = line_start_byte + trimmed_start;
296                            let fence_end_byte = fence_start_byte + fence_len;
297                            fence_start_byte..fence_end_byte
298                        },
299                        replacement: format!("{}text", block.fence_marker),
300                    }),
301                });
302                continue;
303            }
304
305            // Skip further checks for special syntax
306            if has_quarto_syntax {
307                continue;
308            }
309
310            let canonical = resolve_canonical(&block.language);
311
312            // Check language restrictions (allowlist/denylist)
313            if let Some(msg) = self.check_language_allowed(canonical, &block.language) {
314                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
315
316                warnings.push(LintWarning {
317                    rule_name: Some(self.name().to_string()),
318                    line: start_line,
319                    column: start_col,
320                    end_line,
321                    end_column: end_col,
322                    message: msg,
323                    severity: Severity::Warning,
324                    fix: None,
325                });
326                continue;
327            }
328
329            // Check for unknown language (only if not handled by allowlist)
330            if canonical.is_none() {
331                if let Some((msg, severity)) = self.check_unknown_language(&block.language) {
332                    let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
333
334                    warnings.push(LintWarning {
335                        rule_name: Some(self.name().to_string()),
336                        line: start_line,
337                        column: start_col,
338                        end_line,
339                        end_column: end_col,
340                        message: msg,
341                        severity,
342                        fix: None,
343                    });
344                }
345                continue;
346            }
347
348            // Check consistency
349            if self.config.style == LanguageStyle::Consistent
350                && let Some(preferred) = preferred_labels.get(canonical.unwrap())
351                && &block.language != preferred
352            {
353                let (start_line, start_col, end_line, end_col) = calculate_line_range(block.line_idx + 1, line);
354
355                let fix = find_label_span(line, &block.fence_marker).map(|(label_start, label_end)| {
356                    let line_start_byte = ctx.line_offsets.get(block.line_idx).copied().unwrap_or(0);
357                    Fix {
358                        range: (line_start_byte + label_start)..(line_start_byte + label_end),
359                        replacement: preferred.clone(),
360                    }
361                });
362                let lang = &block.language;
363                let canonical = canonical.unwrap();
364
365                warnings.push(LintWarning {
366                    rule_name: Some(self.name().to_string()),
367                    line: start_line,
368                    column: start_col,
369                    end_line,
370                    end_column: end_col,
371                    message: format!("Inconsistent language label '{lang}' for {canonical} (use '{preferred}')"),
372                    severity: Severity::Warning,
373                    fix,
374                });
375            }
376        }
377
378        Ok(warnings)
379    }
380
381    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
382        let content = ctx.content;
383
384        // Use pulldown-cmark to detect fenced code blocks
385        let fenced_blocks = detect_fenced_code_blocks(content, &ctx.line_offsets);
386
387        // Pre-compute disabled ranges
388        let disabled_ranges = compute_disabled_ranges(content, self.name());
389
390        // Compute preferred labels for consistent mode
391        let preferred_labels = if self.config.style == LanguageStyle::Consistent {
392            self.compute_preferred_labels(&fenced_blocks, &disabled_ranges)
393        } else {
394            HashMap::new()
395        };
396
397        // Build a set of line indices that need fixing
398        let mut lines_to_fix: std::collections::HashMap<usize, FixAction> = std::collections::HashMap::new();
399
400        for block in &fenced_blocks {
401            if is_line_disabled(&disabled_ranges, block.line_idx) {
402                continue;
403            }
404
405            // Skip lines where this rule is disabled by inline config
406            if ctx.inline_config().is_rule_disabled(self.name(), block.line_idx + 1) {
407                continue;
408            }
409
410            let line = content.lines().nth(block.line_idx).unwrap_or("");
411            let trimmed = line.trim();
412            let after_fence = trimmed.strip_prefix(&block.fence_marker).unwrap_or("").trim();
413
414            let has_mkdocs_attrs_only =
415                ctx.flavor == crate::config::MarkdownFlavor::MkDocs && is_superfences_attribute(after_fence);
416
417            let has_quarto_syntax = ctx.flavor == crate::config::MarkdownFlavor::Quarto
418                && after_fence.starts_with('{')
419                && after_fence.contains('}');
420
421            let needs_language =
422                !has_mkdocs_attrs_only && (block.language.is_empty() || is_superfences_attribute(&block.language));
423
424            if needs_language && !has_quarto_syntax {
425                lines_to_fix.insert(
426                    block.line_idx,
427                    FixAction::AddLanguage {
428                        fence_marker: block.fence_marker.clone(),
429                        has_mkdocs_attrs_only,
430                    },
431                );
432            } else if !has_quarto_syntax
433                && self.config.style == LanguageStyle::Consistent
434                && let Some(canonical) = resolve_canonical(&block.language)
435                && let Some(preferred) = preferred_labels.get(canonical)
436                && &block.language != preferred
437            {
438                lines_to_fix.insert(
439                    block.line_idx,
440                    FixAction::NormalizeLabel {
441                        fence_marker: block.fence_marker.clone(),
442                        new_label: preferred.clone(),
443                    },
444                );
445            }
446        }
447
448        // Build the result by iterating through lines
449        let mut result = String::new();
450        for (i, line) in content.lines().enumerate() {
451            if let Some(action) = lines_to_fix.get(&i) {
452                match action {
453                    FixAction::AddLanguage {
454                        fence_marker,
455                        has_mkdocs_attrs_only,
456                    } => {
457                        let indent = &line[..line.len() - line.trim_start().len()];
458                        let trimmed = line.trim();
459                        let after_fence = trimmed.strip_prefix(fence_marker).unwrap_or("").trim();
460
461                        if *has_mkdocs_attrs_only {
462                            result.push_str(&format!("{indent}{fence_marker}text {after_fence}\n"));
463                        } else {
464                            result.push_str(&format!("{indent}{fence_marker}text\n"));
465                        }
466                    }
467                    FixAction::NormalizeLabel {
468                        fence_marker,
469                        new_label,
470                    } => {
471                        if let Some((label_start, label_end)) = find_label_span(line, fence_marker) {
472                            result.push_str(&line[..label_start]);
473                            result.push_str(new_label);
474                            result.push_str(&line[label_end..]);
475                            result.push('\n');
476                        } else {
477                            result.push_str(line);
478                            result.push('\n');
479                        }
480                    }
481                }
482            } else {
483                result.push_str(line);
484                result.push('\n');
485            }
486        }
487
488        // Remove trailing newline if the original content didn't have one
489        if !content.ends_with('\n') {
490            result.pop();
491        }
492
493        Ok(result)
494    }
495
496    /// Get the category of this rule for selective processing
497    fn category(&self) -> RuleCategory {
498        RuleCategory::CodeBlock
499    }
500
501    /// Check if this rule should be skipped
502    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
503        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
504    }
505
506    fn as_any(&self) -> &dyn std::any::Any {
507        self
508    }
509
510    fn default_config_section(&self) -> Option<(String, toml::Value)> {
511        let default_config = MD040Config::default();
512        let json_value = serde_json::to_value(&default_config).ok()?;
513        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
514
515        if let toml::Value::Table(table) = toml_value {
516            if !table.is_empty() {
517                Some((MD040Config::RULE_NAME.to_string(), toml::Value::Table(table)))
518            } else {
519                None
520            }
521        } else {
522            None
523        }
524    }
525
526    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
527    where
528        Self: Sized,
529    {
530        let rule_config: MD040Config = load_rule_config(config);
531        Box::new(MD040FencedCodeLanguage::with_config(rule_config))
532    }
533}
534
535#[derive(Debug, Clone)]
536enum FixAction {
537    AddLanguage {
538        fence_marker: String,
539        has_mkdocs_attrs_only: bool,
540    },
541    NormalizeLabel {
542        fence_marker: String,
543        new_label: String,
544    },
545}
546
547/// Detect fenced code blocks using pulldown-cmark, returning info about each block's opening fence
548fn detect_fenced_code_blocks(content: &str, line_offsets: &[usize]) -> Vec<FencedCodeBlock> {
549    let mut blocks = Vec::new();
550    let options = Options::all();
551    let parser = Parser::new_ext(content, options).into_offset_iter();
552
553    for (event, range) in parser {
554        if let Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) = event {
555            // Find the line index for this byte offset
556            let line_idx = line_idx_from_offset(line_offsets, range.start);
557
558            // Determine fence marker from the actual line content
559            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
560            let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
561            let line = content.get(line_start..line_end).unwrap_or("");
562            let trimmed = line.trim();
563            let fence_marker = if trimmed.starts_with('`') {
564                let count = trimmed.chars().take_while(|&c| c == '`').count();
565                "`".repeat(count)
566            } else if trimmed.starts_with('~') {
567                let count = trimmed.chars().take_while(|&c| c == '~').count();
568                "~".repeat(count)
569            } else {
570                "```".to_string() // Fallback
571            };
572
573            // Extract just the language (first word of info string)
574            let language = info.split_whitespace().next().unwrap_or("").to_string();
575
576            blocks.push(FencedCodeBlock {
577                line_idx,
578                language,
579                fence_marker,
580            });
581        }
582    }
583
584    blocks
585}
586
587#[inline]
588fn line_idx_from_offset(line_offsets: &[usize], offset: usize) -> usize {
589    match line_offsets.binary_search(&offset) {
590        Ok(idx) => idx,
591        Err(idx) => idx.saturating_sub(1),
592    }
593}
594
595/// Compute disabled line ranges from disable/enable comments
596fn compute_disabled_ranges(content: &str, rule_name: &str) -> Vec<(usize, usize)> {
597    let mut ranges = Vec::new();
598    let mut disabled_start: Option<usize> = None;
599
600    for (i, line) in content.lines().enumerate() {
601        let trimmed = line.trim();
602
603        if let Some(rules) = crate::rule::parse_disable_comment(trimmed)
604            && (rules.is_empty() || rules.contains(&rule_name))
605            && disabled_start.is_none()
606        {
607            disabled_start = Some(i);
608        }
609
610        if let Some(rules) = crate::rule::parse_enable_comment(trimmed)
611            && (rules.is_empty() || rules.contains(&rule_name))
612            && let Some(start) = disabled_start.take()
613        {
614            ranges.push((start, i));
615        }
616    }
617
618    // Handle unclosed disable
619    if let Some(start) = disabled_start {
620        ranges.push((start, usize::MAX));
621    }
622
623    ranges
624}
625
626/// Check if a line index is within a disabled range
627fn is_line_disabled(ranges: &[(usize, usize)], line_idx: usize) -> bool {
628    ranges.iter().any(|&(start, end)| line_idx >= start && line_idx < end)
629}
630
631/// Find the byte span of the language label in a fence line.
632fn find_label_span(line: &str, fence_marker: &str) -> Option<(usize, usize)> {
633    let trimmed_start = line.len() - line.trim_start().len();
634    let after_indent = &line[trimmed_start..];
635    if !after_indent.starts_with(fence_marker) {
636        return None;
637    }
638    let after_fence = &after_indent[fence_marker.len()..];
639
640    let label_start_rel = after_fence
641        .char_indices()
642        .find(|&(_, ch)| !ch.is_whitespace())
643        .map(|(idx, _)| idx)?;
644    let after_label = &after_fence[label_start_rel..];
645    let label_end_rel = after_label
646        .char_indices()
647        .find(|&(_, ch)| ch.is_whitespace())
648        .map(|(idx, _)| label_start_rel + idx)
649        .unwrap_or(after_fence.len());
650
651    Some((
652        trimmed_start + fence_marker.len() + label_start_rel,
653        trimmed_start + fence_marker.len() + label_end_rel,
654    ))
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660    use crate::lint_context::LintContext;
661
662    fn run_check(content: &str) -> LintResult {
663        let rule = MD040FencedCodeLanguage::default();
664        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
665        rule.check(&ctx)
666    }
667
668    fn run_check_with_config(content: &str, config: MD040Config) -> LintResult {
669        let rule = MD040FencedCodeLanguage::with_config(config);
670        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
671        rule.check(&ctx)
672    }
673
674    fn run_fix(content: &str) -> Result<String, LintError> {
675        let rule = MD040FencedCodeLanguage::default();
676        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
677        rule.fix(&ctx)
678    }
679
680    fn run_fix_with_config(content: &str, config: MD040Config) -> Result<String, LintError> {
681        let rule = MD040FencedCodeLanguage::with_config(config);
682        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
683        rule.fix(&ctx)
684    }
685
686    fn run_check_mkdocs(content: &str) -> LintResult {
687        let rule = MD040FencedCodeLanguage::default();
688        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
689        rule.check(&ctx)
690    }
691
692    // =========================================================================
693    // Basic functionality tests
694    // =========================================================================
695
696    #[test]
697    fn test_code_blocks_with_language_specified() {
698        let content = r#"# Test
699
700```python
701print("Hello, world!")
702```
703
704```javascript
705console.log("Hello!");
706```
707"#;
708        let result = run_check(content).unwrap();
709        assert!(result.is_empty(), "No warnings expected for code blocks with language");
710    }
711
712    #[test]
713    fn test_code_blocks_without_language() {
714        let content = r#"# Test
715
716```
717print("Hello, world!")
718```
719"#;
720        let result = run_check(content).unwrap();
721        assert_eq!(result.len(), 1);
722        assert_eq!(result[0].message, "Code block (```) missing language");
723        assert_eq!(result[0].line, 3);
724    }
725
726    #[test]
727    fn test_fix_method_adds_text_language() {
728        let content = r#"# Test
729
730```
731code without language
732```
733
734```python
735already has language
736```
737
738```
739another block without
740```
741"#;
742        let fixed = run_fix(content).unwrap();
743        assert!(fixed.contains("```text"));
744        assert!(fixed.contains("```python"));
745        assert_eq!(fixed.matches("```text").count(), 2);
746    }
747
748    #[test]
749    fn test_fix_preserves_indentation() {
750        let content = r#"# Test
751
752- List item
753  ```
754  indented code block
755  ```
756"#;
757        let fixed = run_fix(content).unwrap();
758        assert!(fixed.contains("  ```text"));
759    }
760
761    // =========================================================================
762    // Consistent mode tests
763    // =========================================================================
764
765    #[test]
766    fn test_consistent_mode_detects_inconsistency() {
767        let content = r#"```bash
768echo hi
769```
770
771```sh
772echo there
773```
774
775```bash
776echo again
777```
778"#;
779        let config = MD040Config {
780            style: LanguageStyle::Consistent,
781            ..Default::default()
782        };
783        let result = run_check_with_config(content, config).unwrap();
784        assert_eq!(result.len(), 1);
785        assert!(result[0].message.contains("Inconsistent"));
786        assert!(result[0].message.contains("sh"));
787        assert!(result[0].message.contains("bash"));
788    }
789
790    #[test]
791    fn test_consistent_mode_fix_normalizes() {
792        let content = r#"```bash
793echo hi
794```
795
796```sh
797echo there
798```
799
800```bash
801echo again
802```
803"#;
804        let config = MD040Config {
805            style: LanguageStyle::Consistent,
806            ..Default::default()
807        };
808        let fixed = run_fix_with_config(content, config).unwrap();
809        assert_eq!(fixed.matches("```bash").count(), 3);
810        assert_eq!(fixed.matches("```sh").count(), 0);
811    }
812
813    #[test]
814    fn test_consistent_mode_tie_break_uses_curated_default() {
815        // When there's a tie (1 bash, 1 sh), should use curated default (bash)
816        let content = r#"```bash
817echo hi
818```
819
820```sh
821echo there
822```
823"#;
824        let config = MD040Config {
825            style: LanguageStyle::Consistent,
826            ..Default::default()
827        };
828        let fixed = run_fix_with_config(content, config).unwrap();
829        // bash is the curated default for Shell
830        assert_eq!(fixed.matches("```bash").count(), 2);
831    }
832
833    #[test]
834    fn test_consistent_mode_with_preferred_alias() {
835        let content = r#"```bash
836echo hi
837```
838
839```sh
840echo there
841```
842"#;
843        let mut preferred = HashMap::new();
844        preferred.insert("Shell".to_string(), "sh".to_string());
845
846        let config = MD040Config {
847            style: LanguageStyle::Consistent,
848            preferred_aliases: preferred,
849            ..Default::default()
850        };
851        let fixed = run_fix_with_config(content, config).unwrap();
852        assert_eq!(fixed.matches("```sh").count(), 2);
853        assert_eq!(fixed.matches("```bash").count(), 0);
854    }
855
856    #[test]
857    fn test_consistent_mode_ignores_disabled_blocks() {
858        let content = r#"```bash
859echo hi
860```
861<!-- rumdl-disable MD040 -->
862```sh
863echo there
864```
865```sh
866echo again
867```
868<!-- rumdl-enable MD040 -->
869"#;
870        let config = MD040Config {
871            style: LanguageStyle::Consistent,
872            ..Default::default()
873        };
874        let result = run_check_with_config(content, config).unwrap();
875        assert!(result.is_empty(), "Disabled blocks should not affect consistency");
876    }
877
878    #[test]
879    fn test_fix_preserves_attributes() {
880        let content = "```sh {.highlight}\ncode\n```\n\n```bash\nmore\n```";
881        let config = MD040Config {
882            style: LanguageStyle::Consistent,
883            ..Default::default()
884        };
885        let fixed = run_fix_with_config(content, config).unwrap();
886        assert!(fixed.contains("```bash {.highlight}"));
887    }
888
889    #[test]
890    fn test_fix_preserves_spacing_before_label() {
891        let content = "```bash\ncode\n```\n\n```  sh {.highlight}\ncode\n```";
892        let config = MD040Config {
893            style: LanguageStyle::Consistent,
894            ..Default::default()
895        };
896        let fixed = run_fix_with_config(content, config).unwrap();
897        assert!(fixed.contains("```  bash {.highlight}"));
898        assert!(!fixed.contains("```  sh {.highlight}"));
899    }
900
901    // =========================================================================
902    // Allowlist/denylist tests
903    // =========================================================================
904
905    #[test]
906    fn test_allowlist_blocks_unlisted() {
907        let content = "```java\ncode\n```";
908        let config = MD040Config {
909            allowed_languages: vec!["Python".to_string(), "Shell".to_string()],
910            ..Default::default()
911        };
912        let result = run_check_with_config(content, config).unwrap();
913        assert_eq!(result.len(), 1);
914        assert!(result[0].message.contains("not in the allowed list"));
915    }
916
917    #[test]
918    fn test_allowlist_allows_listed() {
919        let content = "```python\ncode\n```";
920        let config = MD040Config {
921            allowed_languages: vec!["Python".to_string()],
922            ..Default::default()
923        };
924        let result = run_check_with_config(content, config).unwrap();
925        assert!(result.is_empty());
926    }
927
928    #[test]
929    fn test_allowlist_blocks_unknown_language() {
930        let content = "```mysterylang\ncode\n```";
931        let config = MD040Config {
932            allowed_languages: vec!["Python".to_string()],
933            ..Default::default()
934        };
935        let result = run_check_with_config(content, config).unwrap();
936        assert_eq!(result.len(), 1);
937        assert!(result[0].message.contains("allowed list"));
938    }
939
940    #[test]
941    fn test_allowlist_case_insensitive() {
942        let content = "```python\ncode\n```";
943        let config = MD040Config {
944            allowed_languages: vec!["PYTHON".to_string()],
945            ..Default::default()
946        };
947        let result = run_check_with_config(content, config).unwrap();
948        assert!(result.is_empty());
949    }
950
951    #[test]
952    fn test_denylist_blocks_listed() {
953        let content = "```java\ncode\n```";
954        let config = MD040Config {
955            disallowed_languages: vec!["Java".to_string()],
956            ..Default::default()
957        };
958        let result = run_check_with_config(content, config).unwrap();
959        assert_eq!(result.len(), 1);
960        assert!(result[0].message.contains("disallowed"));
961    }
962
963    #[test]
964    fn test_denylist_allows_unlisted() {
965        let content = "```python\ncode\n```";
966        let config = MD040Config {
967            disallowed_languages: vec!["Java".to_string()],
968            ..Default::default()
969        };
970        let result = run_check_with_config(content, config).unwrap();
971        assert!(result.is_empty());
972    }
973
974    #[test]
975    fn test_allowlist_takes_precedence_over_denylist() {
976        let content = "```python\ncode\n```";
977        let config = MD040Config {
978            allowed_languages: vec!["Python".to_string()],
979            disallowed_languages: vec!["Python".to_string()], // Should be ignored
980            ..Default::default()
981        };
982        let result = run_check_with_config(content, config).unwrap();
983        assert!(result.is_empty());
984    }
985
986    // =========================================================================
987    // Unknown language tests
988    // =========================================================================
989
990    #[test]
991    fn test_unknown_language_ignore_default() {
992        let content = "```mycustomlang\ncode\n```";
993        let result = run_check(content).unwrap();
994        assert!(result.is_empty(), "Unknown languages ignored by default");
995    }
996
997    #[test]
998    fn test_unknown_language_warn() {
999        let content = "```mycustomlang\ncode\n```";
1000        let config = MD040Config {
1001            unknown_language_action: UnknownLanguageAction::Warn,
1002            ..Default::default()
1003        };
1004        let result = run_check_with_config(content, config).unwrap();
1005        assert_eq!(result.len(), 1);
1006        assert!(result[0].message.contains("Unknown language"));
1007        assert!(result[0].message.contains("mycustomlang"));
1008        assert_eq!(result[0].severity, Severity::Warning);
1009    }
1010
1011    #[test]
1012    fn test_unknown_language_error() {
1013        let content = "```mycustomlang\ncode\n```";
1014        let config = MD040Config {
1015            unknown_language_action: UnknownLanguageAction::Error,
1016            ..Default::default()
1017        };
1018        let result = run_check_with_config(content, config).unwrap();
1019        assert_eq!(result.len(), 1);
1020        assert!(result[0].message.contains("Unknown language"));
1021        assert_eq!(result[0].severity, Severity::Error);
1022    }
1023
1024    // =========================================================================
1025    // Config validation tests
1026    // =========================================================================
1027
1028    #[test]
1029    fn test_invalid_preferred_alias_detected() {
1030        let mut preferred = HashMap::new();
1031        preferred.insert("Shell".to_string(), "invalid_alias".to_string());
1032
1033        let config = MD040Config {
1034            style: LanguageStyle::Consistent,
1035            preferred_aliases: preferred,
1036            ..Default::default()
1037        };
1038        let rule = MD040FencedCodeLanguage::with_config(config);
1039        let errors = rule.validate_config();
1040        assert_eq!(errors.len(), 1);
1041        assert!(errors[0].contains("Invalid alias"));
1042        assert!(errors[0].contains("invalid_alias"));
1043    }
1044
1045    #[test]
1046    fn test_unknown_language_in_preferred_aliases_detected() {
1047        let mut preferred = HashMap::new();
1048        preferred.insert("NotARealLanguage".to_string(), "nope".to_string());
1049
1050        let config = MD040Config {
1051            style: LanguageStyle::Consistent,
1052            preferred_aliases: preferred,
1053            ..Default::default()
1054        };
1055        let rule = MD040FencedCodeLanguage::with_config(config);
1056        let errors = rule.validate_config();
1057        assert_eq!(errors.len(), 1);
1058        assert!(errors[0].contains("Unknown language"));
1059    }
1060
1061    #[test]
1062    fn test_valid_preferred_alias_accepted() {
1063        let mut preferred = HashMap::new();
1064        preferred.insert("Shell".to_string(), "bash".to_string());
1065        preferred.insert("JavaScript".to_string(), "js".to_string());
1066
1067        let config = MD040Config {
1068            style: LanguageStyle::Consistent,
1069            preferred_aliases: preferred,
1070            ..Default::default()
1071        };
1072        let rule = MD040FencedCodeLanguage::with_config(config);
1073        let errors = rule.validate_config();
1074        assert!(errors.is_empty());
1075    }
1076
1077    #[test]
1078    fn test_config_error_uses_valid_line_column() {
1079        let config = md040_config::MD040Config {
1080            preferred_aliases: {
1081                let mut map = std::collections::HashMap::new();
1082                map.insert("Shell".to_string(), "invalid_alias".to_string());
1083                map
1084            },
1085            ..Default::default()
1086        };
1087        let rule = MD040FencedCodeLanguage::with_config(config);
1088
1089        let content = "```shell\necho hello\n```";
1090        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1091        let result = rule.check(&ctx).unwrap();
1092
1093        // Find the config error warning
1094        let config_error = result.iter().find(|w| w.message.contains("[config error]"));
1095        assert!(config_error.is_some(), "Should have a config error warning");
1096
1097        let warning = config_error.unwrap();
1098        // Line and column should be 1-indexed (not 0)
1099        assert!(
1100            warning.line >= 1,
1101            "Config error line should be >= 1, got {}",
1102            warning.line
1103        );
1104        assert!(
1105            warning.column >= 1,
1106            "Config error column should be >= 1, got {}",
1107            warning.column
1108        );
1109    }
1110
1111    // =========================================================================
1112    // Linguist resolution tests
1113    // =========================================================================
1114
1115    #[test]
1116    fn test_linguist_resolution() {
1117        assert_eq!(resolve_canonical("bash"), Some("Shell"));
1118        assert_eq!(resolve_canonical("sh"), Some("Shell"));
1119        assert_eq!(resolve_canonical("zsh"), Some("Shell"));
1120        assert_eq!(resolve_canonical("js"), Some("JavaScript"));
1121        assert_eq!(resolve_canonical("python"), Some("Python"));
1122        assert_eq!(resolve_canonical("unknown_lang"), None);
1123    }
1124
1125    #[test]
1126    fn test_linguist_resolution_case_insensitive() {
1127        assert_eq!(resolve_canonical("BASH"), Some("Shell"));
1128        assert_eq!(resolve_canonical("Bash"), Some("Shell"));
1129        assert_eq!(resolve_canonical("Python"), Some("Python"));
1130        assert_eq!(resolve_canonical("PYTHON"), Some("Python"));
1131    }
1132
1133    #[test]
1134    fn test_alias_validation() {
1135        assert!(is_valid_alias("Shell", "bash"));
1136        assert!(is_valid_alias("Shell", "sh"));
1137        assert!(is_valid_alias("Shell", "zsh"));
1138        assert!(!is_valid_alias("Shell", "python"));
1139        assert!(!is_valid_alias("Shell", "invalid"));
1140    }
1141
1142    #[test]
1143    fn test_default_alias() {
1144        assert_eq!(default_alias("Shell"), Some("bash"));
1145        assert_eq!(default_alias("JavaScript"), Some("js"));
1146        assert_eq!(default_alias("Python"), Some("python"));
1147    }
1148
1149    // =========================================================================
1150    // Edge case tests
1151    // =========================================================================
1152
1153    #[test]
1154    fn test_mixed_case_labels_normalized() {
1155        let content = r#"```BASH
1156echo hi
1157```
1158
1159```Bash
1160echo there
1161```
1162
1163```bash
1164echo again
1165```
1166"#;
1167        let config = MD040Config {
1168            style: LanguageStyle::Consistent,
1169            ..Default::default()
1170        };
1171        // All should resolve to Shell, most prevalent should win
1172        let result = run_check_with_config(content, config).unwrap();
1173        // "bash" appears 1x, "Bash" appears 1x, "BASH" appears 1x
1174        // All are different strings, so there's a 3-way tie
1175        // Should pick curated default "bash" or alphabetically first
1176        assert!(result.len() >= 2, "Should flag at least 2 inconsistent labels");
1177    }
1178
1179    #[test]
1180    fn test_multiple_languages_independent() {
1181        let content = r#"```bash
1182shell code
1183```
1184
1185```python
1186python code
1187```
1188
1189```sh
1190more shell
1191```
1192
1193```python3
1194more python
1195```
1196"#;
1197        let config = MD040Config {
1198            style: LanguageStyle::Consistent,
1199            ..Default::default()
1200        };
1201        let result = run_check_with_config(content, config).unwrap();
1202        // Should have 2 warnings: one for sh (inconsistent with bash) and one for python3 (inconsistent with python)
1203        assert_eq!(result.len(), 2);
1204    }
1205
1206    #[test]
1207    fn test_tilde_fences() {
1208        let content = r#"~~~bash
1209echo hi
1210~~~
1211
1212~~~sh
1213echo there
1214~~~
1215"#;
1216        let config = MD040Config {
1217            style: LanguageStyle::Consistent,
1218            ..Default::default()
1219        };
1220        let result = run_check_with_config(content, config.clone()).unwrap();
1221        assert_eq!(result.len(), 1);
1222
1223        let fixed = run_fix_with_config(content, config).unwrap();
1224        assert!(fixed.contains("~~~bash"));
1225        assert!(!fixed.contains("~~~sh"));
1226    }
1227
1228    #[test]
1229    fn test_longer_fence_markers_preserved() {
1230        let content = "````sh\ncode\n````\n\n```bash\ncode\n```";
1231        let config = MD040Config {
1232            style: LanguageStyle::Consistent,
1233            ..Default::default()
1234        };
1235        let fixed = run_fix_with_config(content, config).unwrap();
1236        assert!(fixed.contains("````bash"));
1237        assert!(fixed.contains("```bash"));
1238    }
1239
1240    #[test]
1241    fn test_empty_document() {
1242        let result = run_check("").unwrap();
1243        assert!(result.is_empty());
1244    }
1245
1246    #[test]
1247    fn test_no_code_blocks() {
1248        let content = "# Just a heading\n\nSome text.";
1249        let result = run_check(content).unwrap();
1250        assert!(result.is_empty());
1251    }
1252
1253    #[test]
1254    fn test_single_code_block_no_inconsistency() {
1255        let content = "```bash\necho hi\n```";
1256        let config = MD040Config {
1257            style: LanguageStyle::Consistent,
1258            ..Default::default()
1259        };
1260        let result = run_check_with_config(content, config).unwrap();
1261        assert!(result.is_empty(), "Single block has no inconsistency");
1262    }
1263
1264    #[test]
1265    fn test_idempotent_fix() {
1266        let content = r#"```bash
1267echo hi
1268```
1269
1270```sh
1271echo there
1272```
1273"#;
1274        let config = MD040Config {
1275            style: LanguageStyle::Consistent,
1276            ..Default::default()
1277        };
1278        let fixed1 = run_fix_with_config(content, config.clone()).unwrap();
1279        let fixed2 = run_fix_with_config(&fixed1, config).unwrap();
1280        assert_eq!(fixed1, fixed2, "Fix should be idempotent");
1281    }
1282
1283    // =========================================================================
1284    // MkDocs superfences tests
1285    // =========================================================================
1286
1287    #[test]
1288    fn test_mkdocs_superfences_title_only() {
1289        // title= attribute without language should not warn in MkDocs flavor
1290        let content = r#"```title="Example"
1291echo hi
1292```
1293"#;
1294        let result = run_check_mkdocs(content).unwrap();
1295        assert!(
1296            result.is_empty(),
1297            "MkDocs superfences with title= should not require language"
1298        );
1299    }
1300
1301    #[test]
1302    fn test_mkdocs_superfences_hl_lines() {
1303        // hl_lines= attribute without language should not warn
1304        let content = r#"```hl_lines="1 2"
1305line 1
1306line 2
1307```
1308"#;
1309        let result = run_check_mkdocs(content).unwrap();
1310        assert!(
1311            result.is_empty(),
1312            "MkDocs superfences with hl_lines= should not require language"
1313        );
1314    }
1315
1316    #[test]
1317    fn test_mkdocs_superfences_linenums() {
1318        // linenums= attribute without language should not warn
1319        let content = r#"```linenums="1"
1320line 1
1321line 2
1322```
1323"#;
1324        let result = run_check_mkdocs(content).unwrap();
1325        assert!(
1326            result.is_empty(),
1327            "MkDocs superfences with linenums= should not require language"
1328        );
1329    }
1330
1331    #[test]
1332    fn test_mkdocs_superfences_class() {
1333        // Custom class (starting with .) should not warn
1334        let content = r#"```.my-class
1335some text
1336```
1337"#;
1338        let result = run_check_mkdocs(content).unwrap();
1339        assert!(
1340            result.is_empty(),
1341            "MkDocs superfences with .class should not require language"
1342        );
1343    }
1344
1345    #[test]
1346    fn test_mkdocs_superfences_id() {
1347        // Custom ID (starting with #) should not warn
1348        let content = r#"```#my-id
1349some text
1350```
1351"#;
1352        let result = run_check_mkdocs(content).unwrap();
1353        assert!(
1354            result.is_empty(),
1355            "MkDocs superfences with #id should not require language"
1356        );
1357    }
1358
1359    #[test]
1360    fn test_mkdocs_superfences_with_language() {
1361        // Language with superfences attributes should work fine
1362        let content = r#"```python title="Example" hl_lines="1"
1363print("hello")
1364```
1365"#;
1366        let result = run_check_mkdocs(content).unwrap();
1367        assert!(result.is_empty(), "Code block with language and attrs should pass");
1368    }
1369
1370    #[test]
1371    fn test_standard_flavor_no_special_handling() {
1372        // In Standard flavor, title= should still warn
1373        let content = r#"```title="Example"
1374echo hi
1375```
1376"#;
1377        let result = run_check(content).unwrap();
1378        assert_eq!(
1379            result.len(),
1380            1,
1381            "Standard flavor should warn about title= without language"
1382        );
1383    }
1384}