rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing, ToolDefinition};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::config::MarkdownFlavor;
13use crate::rule::{LintWarning, Severity};
14use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
15
16/// Special built-in tool name for rumdl's own markdown linting.
17/// When this tool is configured for markdown blocks, the processor skips
18/// external execution since it's handled by embedded markdown linting.
19pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
20
21/// Check if a language is markdown (handles common variations).
22fn is_markdown_language(lang: &str) -> bool {
23    matches!(lang.to_lowercase().as_str(), "markdown" | "md")
24}
25
26/// Strip ANSI escape sequences from tool output.
27///
28/// Many tools output colored text (e.g. `\x1b[1;31mError\x1b[0m`), which prevents
29/// structured parsers from matching patterns like `file:line:col: message`.
30fn strip_ansi_codes(s: &str) -> String {
31    let mut result = String::with_capacity(s.len());
32    let mut chars = s.chars().peekable();
33    while let Some(c) = chars.next() {
34        if c == '\x1b' {
35            if chars.peek() == Some(&'[') {
36                chars.next();
37                // Consume until we hit an ASCII letter (the terminator)
38                while let Some(&next) = chars.peek() {
39                    chars.next();
40                    if next.is_ascii_alphabetic() {
41                        break;
42                    }
43                }
44            }
45        } else {
46            result.push(c);
47        }
48    }
49    result
50}
51
52/// Information about a fenced code block for processing.
53#[derive(Debug, Clone)]
54pub struct FencedCodeBlockInfo {
55    /// 0-indexed line number where opening fence starts.
56    pub start_line: usize,
57    /// 0-indexed line number where closing fence ends.
58    pub end_line: usize,
59    /// Byte offset where code content starts (after opening fence line).
60    pub content_start: usize,
61    /// Byte offset where code content ends (before closing fence line).
62    pub content_end: usize,
63    /// Language tag extracted from info string (first token).
64    pub language: String,
65    /// Full info string from the fence.
66    pub info_string: String,
67    /// The fence character used (` or ~).
68    pub fence_char: char,
69    /// Length of the fence (3 or more).
70    pub fence_length: usize,
71    /// Leading whitespace on the fence line.
72    pub indent: usize,
73    /// Exact leading whitespace prefix from the fence line.
74    pub indent_prefix: String,
75}
76
77/// A diagnostic message from an external tool.
78#[derive(Debug, Clone)]
79pub struct CodeBlockDiagnostic {
80    /// Line number in the original markdown file (1-indexed).
81    pub file_line: usize,
82    /// Column number (1-indexed, if available).
83    pub column: Option<usize>,
84    /// Message from the tool.
85    pub message: String,
86    /// Severity (error, warning, info).
87    pub severity: DiagnosticSeverity,
88    /// Name of the tool that produced this.
89    pub tool: String,
90    /// Line where the code block starts (1-indexed, for context).
91    pub code_block_start: usize,
92}
93
94/// Severity level for diagnostics.
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum DiagnosticSeverity {
97    Error,
98    Warning,
99    Info,
100}
101
102impl CodeBlockDiagnostic {
103    /// Convert to a LintWarning for integration with rumdl's warning system.
104    pub fn to_lint_warning(&self) -> LintWarning {
105        let severity = match self.severity {
106            DiagnosticSeverity::Error => Severity::Error,
107            DiagnosticSeverity::Warning => Severity::Warning,
108            DiagnosticSeverity::Info => Severity::Info,
109        };
110
111        LintWarning {
112            message: self.message.clone(),
113            line: self.file_line,
114            column: self.column.unwrap_or(1),
115            end_line: self.file_line,
116            end_column: self.column.unwrap_or(1),
117            severity,
118            fix: None, // External tool diagnostics don't provide fixes
119            rule_name: Some(self.tool.clone()),
120        }
121    }
122}
123
124/// Error during code block processing.
125#[derive(Debug, Clone)]
126pub enum ProcessorError {
127    /// Tool execution failed.
128    ToolError(ExecutorError),
129    /// Tool execution failed with code block location context.
130    ToolErrorAt {
131        error: ExecutorError,
132        line: usize,
133        language: String,
134    },
135    /// No tools configured for language.
136    NoToolsConfigured { language: String, line: usize },
137    /// Tool binary not found.
138    ToolBinaryNotFound {
139        tool: String,
140        language: String,
141        line: usize,
142    },
143    /// Processing was aborted due to on_error = fail.
144    Aborted { message: String },
145}
146
147impl std::fmt::Display for ProcessorError {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        match self {
150            Self::ToolError(e) => write!(f, "{e}"),
151            Self::ToolErrorAt { error, line, language } => {
152                write!(f, "line {line} ({language}): {error}")
153            }
154            Self::NoToolsConfigured { language, line } => {
155                write!(f, "line {line} ({language}): no tools configured")
156            }
157            Self::ToolBinaryNotFound { tool, language, line } => {
158                write!(f, "line {line} ({language}): tool '{tool}' not found in PATH")
159            }
160            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
161        }
162    }
163}
164
165impl std::error::Error for ProcessorError {}
166
167impl From<ExecutorError> for ProcessorError {
168    fn from(e: ExecutorError) -> Self {
169        Self::ToolError(e)
170    }
171}
172
173/// Result of processing a single code block.
174#[derive(Debug)]
175pub struct CodeBlockResult {
176    /// Diagnostics from linting.
177    pub diagnostics: Vec<CodeBlockDiagnostic>,
178    /// Formatted content (if formatting was requested and succeeded).
179    pub formatted_content: Option<String>,
180    /// Whether the code block was modified.
181    pub was_modified: bool,
182}
183
184/// Result of formatting code blocks in a document.
185#[derive(Debug)]
186pub struct FormatOutput {
187    /// The formatted content (may be partially formatted if errors occurred).
188    pub content: String,
189    /// Whether any errors occurred during formatting.
190    pub had_errors: bool,
191    /// Error messages for blocks that couldn't be formatted.
192    pub error_messages: Vec<String>,
193}
194
195/// Main processor for code block tools.
196/// Context in which a tool is being used.
197enum ToolContext {
198    Lint,
199    Format,
200}
201
202pub struct CodeBlockToolProcessor<'a> {
203    config: &'a CodeBlockToolsConfig,
204    flavor: MarkdownFlavor,
205    linguist: LinguistResolver,
206    registry: ToolRegistry,
207    executor: ToolExecutor,
208    user_aliases: std::collections::HashMap<String, String>,
209}
210
211impl<'a> CodeBlockToolProcessor<'a> {
212    /// Create a new processor with the given configuration and markdown flavor.
213    pub fn new(config: &'a CodeBlockToolsConfig, flavor: MarkdownFlavor) -> Self {
214        let user_aliases = config
215            .language_aliases
216            .iter()
217            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
218            .collect();
219        Self {
220            config,
221            flavor,
222            linguist: LinguistResolver::new(),
223            registry: ToolRegistry::new(config.tools.clone()),
224            executor: ToolExecutor::new(config.timeout),
225            user_aliases,
226        }
227    }
228
229    /// Resolve a tool ID with context awareness.
230    ///
231    /// When a bare tool name (e.g., "tombi") is used in a specific context
232    /// (lint or format), try the context-specific variant first (e.g., "tombi:format"),
233    /// then common alternatives (e.g., "tombi:check"), before falling back to the bare name.
234    fn resolve_tool<'b>(&'b self, tool_id: &str, context: ToolContext) -> Option<&'b ToolDefinition> {
235        // If the tool ID already has a colon suffix, use it directly
236        if tool_id.contains(':') {
237            return self.registry.get(tool_id);
238        }
239
240        // Try context-specific variants first
241        let suffixes = match context {
242            ToolContext::Format => &["format", "fmt", "fix", "reformat"][..],
243            ToolContext::Lint => &["lint", "check"][..],
244        };
245
246        for suffix in suffixes {
247            let qualified = format!("{tool_id}:{suffix}");
248            if let Some(def) = self.registry.get(&qualified) {
249                return Some(def);
250            }
251        }
252
253        // Fall back to bare name
254        self.registry.get(tool_id)
255    }
256
257    /// Quick check whether any configured language might appear in fenced code blocks.
258    /// Scans for `` ```lang `` or `` ~~~lang `` patterns without full parsing.
259    fn has_potential_matching_blocks(&self, content: &str, lint_mode: bool) -> bool {
260        // Collect languages that have tools configured for the requested mode
261        let configured_langs: Vec<&str> = self
262            .config
263            .languages
264            .iter()
265            .filter(|(_, lc)| {
266                lc.enabled
267                    && if lint_mode {
268                        !lc.lint.is_empty()
269                    } else {
270                        !lc.format.is_empty()
271                    }
272            })
273            .map(|(lang, _)| lang.as_str())
274            .collect();
275
276        if configured_langs.is_empty() {
277            return false;
278        }
279
280        // Scan content line-by-line for fence openers matching configured languages
281        for line in content.lines() {
282            let trimmed = line.trim_start();
283            let after_fence = if let Some(rest) = trimmed.strip_prefix("```") {
284                rest
285            } else if let Some(rest) = trimmed.strip_prefix("~~~") {
286                rest
287            } else {
288                continue;
289            };
290
291            let lang = after_fence.split_whitespace().next().unwrap_or("");
292            if lang.is_empty() {
293                continue;
294            }
295            // Check both the raw language and the canonical (normalized) form
296            let canonical = self.resolve_language(lang);
297            if configured_langs.contains(&canonical.as_str()) {
298                return true;
299            }
300        }
301
302        false
303    }
304
305    /// Extract all fenced code blocks from content.
306    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
307        let mut blocks = Vec::new();
308        let mut current_block: Option<FencedCodeBlockBuilder> = None;
309
310        let options = Options::all();
311        let parser = Parser::new_ext(content, options).into_offset_iter();
312
313        let lines: Vec<&str> = content.lines().collect();
314
315        for (event, range) in parser {
316            match event {
317                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
318                    let info_string = info.to_string();
319                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
320
321                    // Find start line
322                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
323
324                    // Find content start (after opening fence line)
325                    let content_start = content[range.start..]
326                        .find('\n')
327                        .map(|i| range.start + i + 1)
328                        .unwrap_or(content.len());
329
330                    // Detect fence character and length from the line
331                    let fence_line = lines.get(start_line).unwrap_or(&"");
332                    let trimmed = fence_line.trim_start();
333                    let indent = fence_line.len() - trimmed.len();
334                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
335                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
336                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
337                    } else {
338                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
339                    };
340
341                    current_block = Some(FencedCodeBlockBuilder {
342                        start_line,
343                        content_start,
344                        language,
345                        info_string,
346                        fence_char,
347                        fence_length,
348                        indent,
349                        indent_prefix,
350                    });
351                }
352                Event::End(TagEnd::CodeBlock) => {
353                    if let Some(builder) = current_block.take() {
354                        // Find end line
355                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
356
357                        // Find content end (before closing fence line)
358                        let search_start = builder.content_start.min(range.end);
359                        let content_end = if search_start < range.end {
360                            content[search_start..range.end]
361                                .rfind('\n')
362                                .map(|i| search_start + i)
363                                .unwrap_or(search_start)
364                        } else {
365                            search_start
366                        };
367
368                        if content_end >= builder.content_start {
369                            blocks.push(FencedCodeBlockInfo {
370                                start_line: builder.start_line,
371                                end_line,
372                                content_start: builder.content_start,
373                                content_end,
374                                language: builder.language,
375                                info_string: builder.info_string,
376                                fence_char: builder.fence_char,
377                                fence_length: builder.fence_length,
378                                indent: builder.indent,
379                                indent_prefix: builder.indent_prefix,
380                            });
381                        }
382                    }
383                }
384                _ => {}
385            }
386        }
387
388        // For MkDocs flavor, also extract code blocks inside admonitions and tabs
389        if self.flavor == MarkdownFlavor::MkDocs {
390            let mkdocs_blocks = self.extract_mkdocs_code_blocks(content);
391            for mb in mkdocs_blocks {
392                // Deduplicate: only add if no existing block starts at the same line
393                if !blocks.iter().any(|b| b.start_line == mb.start_line) {
394                    blocks.push(mb);
395                }
396            }
397            blocks.sort_by_key(|b| b.start_line);
398        }
399
400        blocks
401    }
402
403    /// Extract fenced code blocks that are inside MkDocs admonitions or tabs.
404    ///
405    /// pulldown_cmark doesn't parse MkDocs-specific constructs, so indented
406    /// code blocks inside `!!!`/`???` admonitions or `===` tabs are missed.
407    /// This method manually scans for them.
408    fn extract_mkdocs_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
409        use crate::utils::mkdocs_admonitions;
410        use crate::utils::mkdocs_tabs;
411
412        let mut blocks = Vec::new();
413        let lines: Vec<&str> = content.lines().collect();
414
415        // Track current MkDocs context indent level
416        // We only need to know if we're inside any MkDocs block, so a simple stack suffices.
417        let mut context_indent_stack: Vec<usize> = Vec::new();
418
419        // Track fence state inside MkDocs context
420        let mut in_fence = false;
421        let mut fence_start_line: usize = 0;
422        let mut fence_content_start: usize = 0;
423        let mut fence_char: char = '`';
424        let mut fence_length: usize = 0;
425        let mut fence_indent: usize = 0;
426        let mut fence_indent_prefix = String::new();
427        let mut fence_language = String::new();
428        let mut fence_info_string = String::new();
429
430        // Compute byte offsets via pointer arithmetic.
431        // `content.lines()` returns slices into the original string,
432        // so each line's pointer offset from `content` gives its byte position.
433        // This correctly handles \n, \r\n, and empty lines.
434        let content_start_ptr = content.as_ptr() as usize;
435        let line_offsets: Vec<usize> = lines
436            .iter()
437            .map(|line| line.as_ptr() as usize - content_start_ptr)
438            .collect();
439
440        for (i, line) in lines.iter().enumerate() {
441            let line_indent = crate::utils::mkdocs_common::get_line_indent(line);
442            let is_admonition = mkdocs_admonitions::is_admonition_start(line);
443            let is_tab = mkdocs_tabs::is_tab_marker(line);
444
445            // Pop contexts when the current line is not indented enough to be content.
446            // This runs for ALL lines (including new admonition/tab starts) to clean
447            // up stale entries before potentially pushing a new context.
448            if !line.trim().is_empty() {
449                while let Some(&ctx_indent) = context_indent_stack.last() {
450                    if line_indent < ctx_indent + 4 {
451                        context_indent_stack.pop();
452                        if in_fence {
453                            in_fence = false;
454                        }
455                    } else {
456                        break;
457                    }
458                }
459            }
460
461            // Check for admonition start — push new context
462            if is_admonition && let Some(indent) = mkdocs_admonitions::get_admonition_indent(line) {
463                context_indent_stack.push(indent);
464                continue;
465            }
466
467            // Check for tab marker — push new context
468            if is_tab && let Some(indent) = mkdocs_tabs::get_tab_indent(line) {
469                context_indent_stack.push(indent);
470                continue;
471            }
472
473            // Only look for fences inside a MkDocs context
474            if context_indent_stack.is_empty() {
475                continue;
476            }
477
478            let trimmed = line.trim_start();
479            let leading_spaces = line.len() - trimmed.len();
480
481            if !in_fence {
482                // Check for fence opening
483                let (fc, fl) = if trimmed.starts_with("```") {
484                    ('`', trimmed.chars().take_while(|&c| c == '`').count())
485                } else if trimmed.starts_with("~~~") {
486                    ('~', trimmed.chars().take_while(|&c| c == '~').count())
487                } else {
488                    continue;
489                };
490
491                if fl >= 3 {
492                    in_fence = true;
493                    fence_start_line = i;
494                    fence_char = fc;
495                    fence_length = fl;
496                    fence_indent = leading_spaces;
497                    fence_indent_prefix = line.get(..leading_spaces).unwrap_or("").to_string();
498
499                    let after_fence = &trimmed[fl..];
500                    fence_info_string = after_fence.trim().to_string();
501                    fence_language = fence_info_string.split_whitespace().next().unwrap_or("").to_string();
502
503                    // Content starts at the next line's byte offset
504                    fence_content_start = line_offsets.get(i + 1).copied().unwrap_or(content.len());
505                }
506            } else {
507                // Check for fence closing
508                let is_closing = if fence_char == '`' {
509                    trimmed.starts_with("```")
510                        && trimmed.chars().take_while(|&c| c == '`').count() >= fence_length
511                        && trimmed.trim_start_matches('`').trim().is_empty()
512                } else {
513                    trimmed.starts_with("~~~")
514                        && trimmed.chars().take_while(|&c| c == '~').count() >= fence_length
515                        && trimmed.trim_start_matches('~').trim().is_empty()
516                };
517
518                if is_closing {
519                    let content_end = line_offsets.get(i).copied().unwrap_or(content.len());
520
521                    if content_end >= fence_content_start {
522                        blocks.push(FencedCodeBlockInfo {
523                            start_line: fence_start_line,
524                            end_line: i,
525                            content_start: fence_content_start,
526                            content_end,
527                            language: fence_language.clone(),
528                            info_string: fence_info_string.clone(),
529                            fence_char,
530                            fence_length,
531                            indent: fence_indent,
532                            indent_prefix: fence_indent_prefix.clone(),
533                        });
534                    }
535
536                    in_fence = false;
537                }
538            }
539        }
540
541        blocks
542    }
543
544    /// Resolve a language tag to its canonical name.
545    fn resolve_language(&self, language: &str) -> String {
546        let lower = language.to_lowercase();
547        if let Some(mapped) = self.user_aliases.get(&lower) {
548            return mapped.clone();
549        }
550        match self.config.normalize_language {
551            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
552            NormalizeLanguage::Exact => lower,
553        }
554    }
555
556    /// Get the effective on_error setting for a language.
557    fn get_on_error(&self, language: &str) -> OnError {
558        self.config
559            .languages
560            .get(language)
561            .and_then(|lc| lc.on_error)
562            .unwrap_or(self.config.on_error)
563    }
564
565    /// Strip the fence indentation prefix from each line of a code block.
566    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
567        if indent_prefix.is_empty() {
568            return content.to_string();
569        }
570
571        let mut out = String::with_capacity(content.len());
572        for line in content.split_inclusive('\n') {
573            if let Some(stripped) = line.strip_prefix(indent_prefix) {
574                out.push_str(stripped);
575            } else {
576                out.push_str(line);
577            }
578        }
579        out
580    }
581
582    /// Re-apply the fence indentation prefix to each line of a code block.
583    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
584        if indent_prefix.is_empty() {
585            return content.to_string();
586        }
587        if content.is_empty() {
588            return String::new();
589        }
590
591        let mut out = String::with_capacity(content.len() + indent_prefix.len());
592        for line in content.split_inclusive('\n') {
593            if line == "\n" {
594                out.push_str(line);
595            } else {
596                out.push_str(indent_prefix);
597                out.push_str(line);
598            }
599        }
600        out
601    }
602
603    /// Lint all code blocks in the content.
604    ///
605    /// Returns diagnostics from all configured linters.
606    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
607        // Skip the expensive parse when no tools could possibly produce output.
608        // With on_missing=Ignore (default) and no languages with lint tools configured,
609        // every block would be skipped, so the parse is wasted work.
610        if self.config.on_missing_language_definition == OnMissing::Ignore
611            && !self
612                .config
613                .languages
614                .values()
615                .any(|lc| lc.enabled && !lc.lint.is_empty())
616        {
617            return Ok(Vec::new());
618        }
619
620        // Quick content check: skip parsing if no configured language appears in the content.
621        // This avoids the expensive pulldown-cmark parse when there are no matching code blocks.
622        if self.config.on_missing_language_definition == OnMissing::Ignore
623            && !self.has_potential_matching_blocks(content, true)
624        {
625            return Ok(Vec::new());
626        }
627
628        let mut all_diagnostics = Vec::new();
629        let blocks = self.extract_code_blocks(content);
630
631        for block in blocks {
632            if block.language.is_empty() {
633                continue; // Skip blocks without language tag
634            }
635
636            let canonical_lang = self.resolve_language(&block.language);
637
638            // Get lint tools for this language
639            let lang_config = self.config.languages.get(&canonical_lang);
640
641            // If language is explicitly configured with enabled=false, skip silently
642            if let Some(lc) = lang_config
643                && !lc.enabled
644            {
645                continue;
646            }
647
648            let lint_tools = match lang_config {
649                Some(lc) if !lc.lint.is_empty() => &lc.lint,
650                _ => {
651                    // No tools configured for this language in lint mode
652                    match self.config.on_missing_language_definition {
653                        OnMissing::Ignore => continue,
654                        OnMissing::Fail => {
655                            all_diagnostics.push(CodeBlockDiagnostic {
656                                file_line: block.start_line + 1,
657                                column: None,
658                                message: format!("No lint tools configured for language '{canonical_lang}'"),
659                                severity: DiagnosticSeverity::Error,
660                                tool: "code-block-tools".to_string(),
661                                code_block_start: block.start_line + 1,
662                            });
663                            continue;
664                        }
665                        OnMissing::FailFast => {
666                            return Err(ProcessorError::NoToolsConfigured {
667                                language: canonical_lang,
668                                line: block.start_line + 1,
669                            });
670                        }
671                    }
672                }
673            };
674
675            // Extract code block content
676            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
677                &content[block.content_start..block.content_end]
678            } else {
679                continue;
680            };
681            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
682
683            // Run each lint tool
684            for tool_id in lint_tools {
685                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown linting
686                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
687                    continue;
688                }
689
690                let tool_def = match self.resolve_tool(tool_id, ToolContext::Lint) {
691                    Some(t) => t,
692                    None => {
693                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
694                        continue;
695                    }
696                };
697
698                // Check if tool binary exists before running
699                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
700                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
701                    match self.config.on_missing_tool_binary {
702                        OnMissing::Ignore => {
703                            log::debug!("Tool binary '{tool_name}' not found, skipping");
704                            continue;
705                        }
706                        OnMissing::Fail => {
707                            all_diagnostics.push(CodeBlockDiagnostic {
708                                file_line: block.start_line + 1,
709                                column: None,
710                                message: format!("Tool binary '{tool_name}' not found in PATH"),
711                                severity: DiagnosticSeverity::Error,
712                                tool: "code-block-tools".to_string(),
713                                code_block_start: block.start_line + 1,
714                            });
715                            continue;
716                        }
717                        OnMissing::FailFast => {
718                            return Err(ProcessorError::ToolBinaryNotFound {
719                                tool: tool_name.to_string(),
720                                language: canonical_lang.clone(),
721                                line: block.start_line + 1,
722                            });
723                        }
724                    }
725                }
726
727                match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
728                    Ok(output) => {
729                        // Parse tool output into diagnostics
730                        let diagnostics = self.parse_tool_output(
731                            &output,
732                            tool_id,
733                            block.start_line + 1, // Convert to 1-indexed
734                        );
735                        all_diagnostics.extend(diagnostics);
736                    }
737                    Err(e) => {
738                        let on_error = self.get_on_error(&canonical_lang);
739                        match on_error {
740                            OnError::Fail => return Err(e.into()),
741                            OnError::Warn => {
742                                log::warn!("Tool '{tool_id}' failed: {e}");
743                            }
744                            OnError::Skip => {
745                                // Silently skip
746                            }
747                        }
748                    }
749                }
750            }
751        }
752
753        Ok(all_diagnostics)
754    }
755
756    /// Format all code blocks in the content.
757    ///
758    /// Returns the modified content with formatted code blocks and any errors that occurred.
759    /// With `on-missing-*` = `fail`, errors are collected but formatting continues.
760    /// With `on-missing-*` = `fail-fast`, returns Err immediately on first error.
761    pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
762        let no_output = FormatOutput {
763            content: content.to_string(),
764            had_errors: false,
765            error_messages: Vec::new(),
766        };
767
768        // Skip the expensive parse when no tools could produce output
769        if self.config.on_missing_language_definition == OnMissing::Ignore
770            && !self
771                .config
772                .languages
773                .values()
774                .any(|lc| lc.enabled && !lc.format.is_empty())
775        {
776            return Ok(no_output);
777        }
778
779        // Quick content check: skip parsing if no configured language appears in the content
780        if self.config.on_missing_language_definition == OnMissing::Ignore
781            && !self.has_potential_matching_blocks(content, false)
782        {
783            return Ok(no_output);
784        }
785
786        let blocks = self.extract_code_blocks(content);
787
788        if blocks.is_empty() {
789            return Ok(FormatOutput {
790                content: content.to_string(),
791                had_errors: false,
792                error_messages: Vec::new(),
793            });
794        }
795
796        // Process blocks in reverse order to maintain byte offsets
797        let mut result = content.to_string();
798        let mut error_messages: Vec<String> = Vec::new();
799
800        for block in blocks.into_iter().rev() {
801            if block.language.is_empty() {
802                continue;
803            }
804
805            let canonical_lang = self.resolve_language(&block.language);
806
807            // Get format tools for this language
808            let lang_config = self.config.languages.get(&canonical_lang);
809
810            // If language is explicitly configured with enabled=false, skip silently
811            if let Some(lc) = lang_config
812                && !lc.enabled
813            {
814                continue;
815            }
816
817            let format_tools = match lang_config {
818                Some(lc) if !lc.format.is_empty() => &lc.format,
819                _ => {
820                    // No tools configured for this language in format mode
821                    match self.config.on_missing_language_definition {
822                        OnMissing::Ignore => continue,
823                        OnMissing::Fail => {
824                            error_messages.push(format!(
825                                "No format tools configured for language '{canonical_lang}' at line {}",
826                                block.start_line + 1
827                            ));
828                            continue;
829                        }
830                        OnMissing::FailFast => {
831                            return Err(ProcessorError::NoToolsConfigured {
832                                language: canonical_lang,
833                                line: block.start_line + 1,
834                            });
835                        }
836                    }
837                }
838            };
839
840            // Extract code block content
841            if block.content_start >= block.content_end || block.content_end > result.len() {
842                continue;
843            }
844            let code_content_raw = result[block.content_start..block.content_end].to_string();
845            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
846
847            // Run format tools (use first successful one)
848            let mut formatted = code_content.clone();
849            let mut tool_ran = false;
850            for tool_id in format_tools {
851                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown formatting
852                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
853                    continue;
854                }
855
856                let tool_def = match self.resolve_tool(tool_id, ToolContext::Format) {
857                    Some(t) => t,
858                    None => {
859                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
860                        continue;
861                    }
862                };
863
864                // Check if tool binary exists before running
865                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
866                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
867                    match self.config.on_missing_tool_binary {
868                        OnMissing::Ignore => {
869                            log::debug!("Tool binary '{tool_name}' not found, skipping");
870                            continue;
871                        }
872                        OnMissing::Fail => {
873                            error_messages.push(format!(
874                                "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
875                                block.start_line + 1
876                            ));
877                            continue;
878                        }
879                        OnMissing::FailFast => {
880                            return Err(ProcessorError::ToolBinaryNotFound {
881                                tool: tool_name.to_string(),
882                                language: canonical_lang.clone(),
883                                line: block.start_line + 1,
884                            });
885                        }
886                    }
887                }
888
889                match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
890                    Ok(output) => {
891                        // Guard against formatters that produce empty output for non-empty input.
892                        // This prevents data loss from misconfigured tools (e.g., a lint tool
893                        // used as a formatter that validates but doesn't output content).
894                        if output.trim().is_empty() && !formatted.trim().is_empty() {
895                            log::warn!("Formatter '{tool_id}' produced empty output for non-empty input, skipping");
896                            continue;
897                        }
898
899                        // Ensure trailing newline matches original (unindented)
900                        formatted = output;
901                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
902                            formatted.push('\n');
903                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
904                            formatted.pop();
905                        }
906                        tool_ran = true;
907                        break; // Use first successful formatter
908                    }
909                    Err(e) => {
910                        let on_error = self.get_on_error(&canonical_lang);
911                        match on_error {
912                            OnError::Fail => {
913                                return Err(ProcessorError::ToolErrorAt {
914                                    error: e,
915                                    line: block.start_line + 1,
916                                    language: canonical_lang,
917                                });
918                            }
919                            OnError::Warn => {
920                                error_messages.push(format!("line {} ({}): {e}", block.start_line + 1, canonical_lang));
921                            }
922                            OnError::Skip => {}
923                        }
924                    }
925                }
926            }
927
928            // Replace content if changed and a tool actually ran
929            if tool_ran && formatted != code_content {
930                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
931                if reindented != code_content_raw {
932                    result.replace_range(block.content_start..block.content_end, &reindented);
933                }
934            }
935        }
936
937        Ok(FormatOutput {
938            content: result,
939            had_errors: !error_messages.is_empty(),
940            error_messages,
941        })
942    }
943
944    /// Parse tool output into diagnostics.
945    ///
946    /// This is a basic parser that handles common output formats.
947    /// Tools vary widely in their output format, so this is best-effort.
948    fn parse_tool_output(
949        &self,
950        output: &ToolOutput,
951        tool_id: &str,
952        code_block_start_line: usize,
953    ) -> Vec<CodeBlockDiagnostic> {
954        let mut diagnostics = Vec::new();
955        let mut shellcheck_line: Option<usize> = None;
956
957        // Strip ANSI escape codes and combine stdout + stderr for parsing
958        let stdout_clean = strip_ansi_codes(&output.stdout);
959        let stderr_clean = strip_ansi_codes(&output.stderr);
960        let combined = format!("{stdout_clean}\n{stderr_clean}");
961
962        // State for multi-line "Error: msg" / "at line N column M" pattern
963        let mut pending_error: Option<(String, DiagnosticSeverity)> = None;
964
965        for line in combined.lines() {
966            let line = line.trim();
967            if line.is_empty() {
968                continue;
969            }
970
971            // Resolve pending "Error: msg" from previous line
972            if let Some((ref msg, severity)) = pending_error {
973                if let Some((line_num, col)) = Self::parse_at_line_column(line) {
974                    diagnostics.push(CodeBlockDiagnostic {
975                        file_line: code_block_start_line + line_num,
976                        column: Some(col),
977                        message: msg.clone(),
978                        severity,
979                        tool: tool_id.to_string(),
980                        code_block_start: code_block_start_line,
981                    });
982                    pending_error = None;
983                    continue;
984                }
985                // No position info found; emit error without line mapping
986                diagnostics.push(CodeBlockDiagnostic {
987                    file_line: code_block_start_line,
988                    column: None,
989                    message: msg.clone(),
990                    severity,
991                    tool: tool_id.to_string(),
992                    code_block_start: code_block_start_line,
993                });
994                pending_error = None;
995                // Fall through to parse current line
996            }
997
998            if let Some(line_num) = self.parse_shellcheck_header(line) {
999                shellcheck_line = Some(line_num);
1000                continue;
1001            }
1002
1003            if let Some(line_num) = shellcheck_line
1004                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
1005            {
1006                diagnostics.push(diag);
1007                continue;
1008            }
1009
1010            // Try pattern: "file:line:col: message" or "file:line: message"
1011            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
1012                diagnostics.push(diag);
1013                continue;
1014            }
1015
1016            // Try pattern: "line:col message" (eslint style)
1017            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
1018                diagnostics.push(diag);
1019                continue;
1020            }
1021
1022            // Try single-line shellcheck format fallback
1023            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
1024                diagnostics.push(diag);
1025                continue;
1026            }
1027
1028            // Try multi-line "Error: msg" / "Warning: msg" pattern
1029            if let Some(error_info) = Self::parse_error_line(line) {
1030                pending_error = Some(error_info);
1031            }
1032        }
1033
1034        // Flush any remaining pending error
1035        if let Some((msg, severity)) = pending_error {
1036            diagnostics.push(CodeBlockDiagnostic {
1037                file_line: code_block_start_line,
1038                column: None,
1039                message: msg,
1040                severity,
1041                tool: tool_id.to_string(),
1042                code_block_start: code_block_start_line,
1043            });
1044        }
1045
1046        // If no diagnostics parsed but tool failed, use combined output as fallback
1047        if diagnostics.is_empty() && !output.success {
1048            let lines: Vec<&str> = combined.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
1049
1050            if lines.is_empty() {
1051                let exit_code = output.exit_code;
1052                diagnostics.push(CodeBlockDiagnostic {
1053                    file_line: code_block_start_line,
1054                    column: None,
1055                    message: format!("Tool exited with code {exit_code}"),
1056                    severity: DiagnosticSeverity::Error,
1057                    tool: tool_id.to_string(),
1058                    code_block_start: code_block_start_line,
1059                });
1060            } else {
1061                for line_text in lines {
1062                    diagnostics.push(CodeBlockDiagnostic {
1063                        file_line: code_block_start_line,
1064                        column: None,
1065                        message: line_text.to_string(),
1066                        severity: DiagnosticSeverity::Error,
1067                        tool: tool_id.to_string(),
1068                        code_block_start: code_block_start_line,
1069                    });
1070                }
1071            }
1072        }
1073
1074        diagnostics
1075    }
1076
1077    /// Parse standard "file:line:col: message" format.
1078    fn parse_standard_format(
1079        &self,
1080        line: &str,
1081        tool_id: &str,
1082        code_block_start_line: usize,
1083    ) -> Option<CodeBlockDiagnostic> {
1084        // Match patterns like "file.py:1:10: E501 message"
1085        let mut parts = line.rsplitn(4, ':');
1086        let message = parts.next()?.trim().to_string();
1087        let part1 = parts.next()?.trim().to_string();
1088        let part2 = parts.next()?.trim().to_string();
1089        let part3 = parts.next().map(|s| s.trim().to_string());
1090
1091        let (line_part, col_part) = if part3.is_some() {
1092            (part2, Some(part1))
1093        } else {
1094            (part1, None)
1095        };
1096
1097        if let Ok(line_num) = line_part.parse::<usize>() {
1098            let column = col_part.and_then(|s| s.parse::<usize>().ok());
1099            let message = Self::strip_fixable_markers(&message);
1100            if !message.is_empty() {
1101                let severity = self.infer_severity(&message);
1102                return Some(CodeBlockDiagnostic {
1103                    file_line: code_block_start_line + line_num,
1104                    column,
1105                    message,
1106                    severity,
1107                    tool: tool_id.to_string(),
1108                    code_block_start: code_block_start_line,
1109                });
1110            }
1111        }
1112        None
1113    }
1114
1115    /// Parse eslint-style "line:col severity message" format.
1116    fn parse_eslint_format(
1117        &self,
1118        line: &str,
1119        tool_id: &str,
1120        code_block_start_line: usize,
1121    ) -> Option<CodeBlockDiagnostic> {
1122        // Match "1:10 error Message"
1123        let parts: Vec<&str> = line.splitn(3, ' ').collect();
1124        if parts.len() >= 2 {
1125            let loc_parts: Vec<&str> = parts[0].split(':').collect();
1126            if loc_parts.len() == 2
1127                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
1128            {
1129                let (sev_part, msg_part) = if parts.len() >= 3 {
1130                    (parts[1], parts[2])
1131                } else {
1132                    (parts[1], "")
1133                };
1134                let message = if msg_part.is_empty() {
1135                    sev_part.to_string()
1136                } else {
1137                    msg_part.to_string()
1138                };
1139                let message = Self::strip_fixable_markers(&message);
1140                let severity = match sev_part.to_lowercase().as_str() {
1141                    "error" => DiagnosticSeverity::Error,
1142                    "warning" | "warn" => DiagnosticSeverity::Warning,
1143                    "info" => DiagnosticSeverity::Info,
1144                    _ => self.infer_severity(&message),
1145                };
1146                return Some(CodeBlockDiagnostic {
1147                    file_line: code_block_start_line + line_num,
1148                    column: Some(col),
1149                    message,
1150                    severity,
1151                    tool: tool_id.to_string(),
1152                    code_block_start: code_block_start_line,
1153                });
1154            }
1155        }
1156        None
1157    }
1158
1159    /// Parse shellcheck-style "In - line N: message" format.
1160    fn parse_shellcheck_format(
1161        &self,
1162        line: &str,
1163        tool_id: &str,
1164        code_block_start_line: usize,
1165    ) -> Option<CodeBlockDiagnostic> {
1166        // Match "In - line 5:" pattern
1167        if line.starts_with("In ")
1168            && line.contains(" line ")
1169            && let Some(line_start) = line.find(" line ")
1170        {
1171            let after_line = &line[line_start + 6..];
1172            if let Some(colon_pos) = after_line.find(':')
1173                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
1174            {
1175                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
1176                if !message.is_empty() {
1177                    let severity = self.infer_severity(&message);
1178                    return Some(CodeBlockDiagnostic {
1179                        file_line: code_block_start_line + line_num,
1180                        column: None,
1181                        message,
1182                        severity,
1183                        tool: tool_id.to_string(),
1184                        code_block_start: code_block_start_line,
1185                    });
1186                }
1187            }
1188        }
1189        None
1190    }
1191
1192    /// Parse shellcheck header line to capture line number context.
1193    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
1194        if line.starts_with("In ")
1195            && line.contains(" line ")
1196            && let Some(line_start) = line.find(" line ")
1197        {
1198            let after_line = &line[line_start + 6..];
1199            if let Some(colon_pos) = after_line.find(':') {
1200                return after_line[..colon_pos].trim().parse::<usize>().ok();
1201            }
1202        }
1203        None
1204    }
1205
1206    /// Parse shellcheck message line containing SCXXXX codes.
1207    fn parse_shellcheck_message(
1208        &self,
1209        line: &str,
1210        tool_id: &str,
1211        code_block_start_line: usize,
1212        line_num: usize,
1213    ) -> Option<CodeBlockDiagnostic> {
1214        let sc_pos = line.find("SC")?;
1215        let after_sc = &line[sc_pos + 2..];
1216        let code_len = after_sc.chars().take_while(|c| c.is_ascii_digit()).count();
1217        if code_len == 0 {
1218            return None;
1219        }
1220        let after_code = &after_sc[code_len..];
1221        let sev_start = after_code.find('(')? + 1;
1222        let sev_end = after_code[sev_start..].find(')')? + sev_start;
1223        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
1224        let message_start = after_code.find("):")? + 2;
1225        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
1226        if message.is_empty() {
1227            return None;
1228        }
1229
1230        let severity = match sev.as_str() {
1231            "error" => DiagnosticSeverity::Error,
1232            "warning" | "warn" => DiagnosticSeverity::Warning,
1233            "info" | "style" => DiagnosticSeverity::Info,
1234            _ => self.infer_severity(&message),
1235        };
1236
1237        Some(CodeBlockDiagnostic {
1238            file_line: code_block_start_line + line_num,
1239            column: None,
1240            message,
1241            severity,
1242            tool: tool_id.to_string(),
1243            code_block_start: code_block_start_line,
1244        })
1245    }
1246
1247    /// Parse "Error: <message>" or "Warning: <message>" lines.
1248    ///
1249    /// Used for tools like tombi that output multi-line diagnostics where the
1250    /// error message and position are on separate lines. Only matches capitalized
1251    /// prefixes to avoid conflicting with lowercase `error:` in less structured output.
1252    fn parse_error_line(line: &str) -> Option<(String, DiagnosticSeverity)> {
1253        let (msg, severity) = if let Some(msg) = line.strip_prefix("Error:") {
1254            (msg, DiagnosticSeverity::Error)
1255        } else if let Some(msg) = line.strip_prefix("Warning:") {
1256            (msg, DiagnosticSeverity::Warning)
1257        } else {
1258            return None;
1259        };
1260        let msg = msg.trim();
1261        if msg.is_empty() {
1262            return None;
1263        }
1264        Some((msg.to_string(), severity))
1265    }
1266
1267    /// Parse "at line N column M" position lines (case-insensitive).
1268    ///
1269    /// Returns (line_number, column_number) if the pattern matches.
1270    fn parse_at_line_column(line: &str) -> Option<(usize, usize)> {
1271        let lower = line.to_lowercase();
1272        let rest = lower.strip_prefix("at line ")?;
1273        let mut parts = rest.split_whitespace();
1274        let line_num: usize = parts.next()?.parse().ok()?;
1275        if parts.next()? != "column" {
1276            return None;
1277        }
1278        let col: usize = parts.next()?.parse().ok()?;
1279        Some((line_num, col))
1280    }
1281
1282    /// Infer severity from message content.
1283    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
1284        let lower = message.to_lowercase();
1285        if lower.contains("error")
1286            || lower.starts_with("e") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1287            || lower.starts_with("f") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1288        {
1289            DiagnosticSeverity::Error
1290        } else if lower.contains("warning")
1291            || lower.contains("warn")
1292            || lower.starts_with("w") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1293        {
1294            DiagnosticSeverity::Warning
1295        } else {
1296            DiagnosticSeverity::Info
1297        }
1298    }
1299
1300    /// Strip "fixable" markers from external tool messages.
1301    ///
1302    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
1303    /// context these markers can be misleading - the lint tool's fix capability may
1304    /// differ from what our configured formatter can fix. We strip these markers
1305    /// to avoid making promises we can't keep.
1306    fn strip_fixable_markers(message: &str) -> String {
1307        message
1308            .replace(" [*]", "")
1309            .replace("[*] ", "")
1310            .replace("[*]", "")
1311            .replace(" (fixable)", "")
1312            .replace("(fixable) ", "")
1313            .replace("(fixable)", "")
1314            .replace(" [fix available]", "")
1315            .replace("[fix available] ", "")
1316            .replace("[fix available]", "")
1317            .replace(" [autofix]", "")
1318            .replace("[autofix] ", "")
1319            .replace("[autofix]", "")
1320            .trim()
1321            .to_string()
1322    }
1323}
1324
1325/// Builder for FencedCodeBlockInfo during parsing.
1326struct FencedCodeBlockBuilder {
1327    start_line: usize,
1328    content_start: usize,
1329    language: String,
1330    info_string: String,
1331    fence_char: char,
1332    fence_length: usize,
1333    indent: usize,
1334    indent_prefix: String,
1335}
1336
1337#[cfg(test)]
1338mod tests {
1339    use super::*;
1340
1341    fn default_config() -> CodeBlockToolsConfig {
1342        CodeBlockToolsConfig::default()
1343    }
1344
1345    #[test]
1346    fn test_extract_code_blocks() {
1347        let config = default_config();
1348        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1349
1350        let content = r#"# Example
1351
1352```python
1353def hello():
1354    print("Hello")
1355```
1356
1357Some text
1358
1359```rust
1360fn main() {}
1361```
1362"#;
1363
1364        let blocks = processor.extract_code_blocks(content);
1365
1366        assert_eq!(blocks.len(), 2);
1367
1368        assert_eq!(blocks[0].language, "python");
1369        assert_eq!(blocks[0].fence_char, '`');
1370        assert_eq!(blocks[0].fence_length, 3);
1371        assert_eq!(blocks[0].start_line, 2);
1372        assert_eq!(blocks[0].indent, 0);
1373        assert_eq!(blocks[0].indent_prefix, "");
1374
1375        assert_eq!(blocks[1].language, "rust");
1376        assert_eq!(blocks[1].fence_char, '`');
1377        assert_eq!(blocks[1].fence_length, 3);
1378    }
1379
1380    #[test]
1381    fn test_extract_code_blocks_with_info_string() {
1382        let config = default_config();
1383        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1384
1385        let content = "```python title=\"example.py\"\ncode\n```";
1386        let blocks = processor.extract_code_blocks(content);
1387
1388        assert_eq!(blocks.len(), 1);
1389        assert_eq!(blocks[0].language, "python");
1390        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
1391    }
1392
1393    #[test]
1394    fn test_extract_code_blocks_tilde_fence() {
1395        let config = default_config();
1396        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1397
1398        let content = "~~~bash\necho hello\n~~~";
1399        let blocks = processor.extract_code_blocks(content);
1400
1401        assert_eq!(blocks.len(), 1);
1402        assert_eq!(blocks[0].language, "bash");
1403        assert_eq!(blocks[0].fence_char, '~');
1404        assert_eq!(blocks[0].fence_length, 3);
1405        assert_eq!(blocks[0].indent_prefix, "");
1406    }
1407
1408    #[test]
1409    fn test_extract_code_blocks_with_indent_prefix() {
1410        let config = default_config();
1411        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1412
1413        let content = "  - item\n    ```python\n    print('hi')\n    ```";
1414        let blocks = processor.extract_code_blocks(content);
1415
1416        assert_eq!(blocks.len(), 1);
1417        assert_eq!(blocks[0].indent_prefix, "    ");
1418    }
1419
1420    #[test]
1421    fn test_extract_code_blocks_no_language() {
1422        let config = default_config();
1423        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1424
1425        let content = "```\nplain code\n```";
1426        let blocks = processor.extract_code_blocks(content);
1427
1428        assert_eq!(blocks.len(), 1);
1429        assert_eq!(blocks[0].language, "");
1430    }
1431
1432    #[test]
1433    fn test_resolve_language_linguist() {
1434        let mut config = default_config();
1435        config.normalize_language = NormalizeLanguage::Linguist;
1436        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1437
1438        assert_eq!(processor.resolve_language("py"), "python");
1439        assert_eq!(processor.resolve_language("bash"), "shell");
1440        assert_eq!(processor.resolve_language("js"), "javascript");
1441    }
1442
1443    #[test]
1444    fn test_resolve_language_exact() {
1445        let mut config = default_config();
1446        config.normalize_language = NormalizeLanguage::Exact;
1447        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1448
1449        assert_eq!(processor.resolve_language("py"), "py");
1450        assert_eq!(processor.resolve_language("BASH"), "bash");
1451    }
1452
1453    #[test]
1454    fn test_resolve_language_user_alias_override() {
1455        let mut config = default_config();
1456        config.language_aliases.insert("py".to_string(), "python".to_string());
1457        config.normalize_language = NormalizeLanguage::Exact;
1458        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1459
1460        assert_eq!(processor.resolve_language("PY"), "python");
1461    }
1462
1463    #[test]
1464    fn test_indent_strip_and_reapply_roundtrip() {
1465        let config = default_config();
1466        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1467
1468        let raw = "    def hello():\n        print('hi')";
1469        let stripped = processor.strip_indent_from_block(raw, "    ");
1470        assert_eq!(stripped, "def hello():\n    print('hi')");
1471
1472        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
1473        assert_eq!(reapplied, raw);
1474    }
1475
1476    #[test]
1477    fn test_infer_severity() {
1478        let config = default_config();
1479        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1480
1481        assert_eq!(
1482            processor.infer_severity("E501 line too long"),
1483            DiagnosticSeverity::Error
1484        );
1485        assert_eq!(
1486            processor.infer_severity("W291 trailing whitespace"),
1487            DiagnosticSeverity::Warning
1488        );
1489        assert_eq!(
1490            processor.infer_severity("error: something failed"),
1491            DiagnosticSeverity::Error
1492        );
1493        assert_eq!(
1494            processor.infer_severity("warning: unused variable"),
1495            DiagnosticSeverity::Warning
1496        );
1497        assert_eq!(
1498            processor.infer_severity("note: consider using"),
1499            DiagnosticSeverity::Info
1500        );
1501    }
1502
1503    #[test]
1504    fn test_parse_standard_format_windows_path() {
1505        let config = default_config();
1506        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1507
1508        let output = ToolOutput {
1509            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1510            stderr: String::new(),
1511            exit_code: 1,
1512            success: false,
1513        };
1514
1515        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1516        assert_eq!(diags.len(), 1);
1517        assert_eq!(diags[0].file_line, 12);
1518        assert_eq!(diags[0].column, Some(5));
1519        assert_eq!(diags[0].message, "E123 message");
1520    }
1521
1522    #[test]
1523    fn test_parse_eslint_severity() {
1524        let config = default_config();
1525        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1526
1527        let output = ToolOutput {
1528            stdout: "1:2 error Unexpected token".to_string(),
1529            stderr: String::new(),
1530            exit_code: 1,
1531            success: false,
1532        };
1533
1534        let diags = processor.parse_tool_output(&output, "eslint", 5);
1535        assert_eq!(diags.len(), 1);
1536        assert_eq!(diags[0].file_line, 6);
1537        assert_eq!(diags[0].column, Some(2));
1538        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1539        assert_eq!(diags[0].message, "Unexpected token");
1540    }
1541
1542    #[test]
1543    fn test_parse_shellcheck_multiline() {
1544        let config = default_config();
1545        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1546
1547        let output = ToolOutput {
1548            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1549            stderr: String::new(),
1550            exit_code: 1,
1551            success: false,
1552        };
1553
1554        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1555        assert_eq!(diags.len(), 1);
1556        assert_eq!(diags[0].file_line, 13);
1557        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1558        assert_eq!(diags[0].message, "Double quote to prevent globbing");
1559    }
1560
1561    #[test]
1562    fn test_lint_no_config() {
1563        let config = default_config();
1564        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1565
1566        let content = "```python\nprint('hello')\n```";
1567        let result = processor.lint(content);
1568
1569        // Should succeed with no diagnostics (no tools configured)
1570        assert!(result.is_ok());
1571        assert!(result.unwrap().is_empty());
1572    }
1573
1574    #[test]
1575    fn test_format_no_config() {
1576        let config = default_config();
1577        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1578
1579        let content = "```python\nprint('hello')\n```";
1580        let result = processor.format(content);
1581
1582        // Should succeed with unchanged content (no tools configured)
1583        assert!(result.is_ok());
1584        let output = result.unwrap();
1585        assert_eq!(output.content, content);
1586        assert!(!output.had_errors);
1587        assert!(output.error_messages.is_empty());
1588    }
1589
1590    #[test]
1591    fn test_lint_on_missing_language_definition_fail() {
1592        let mut config = default_config();
1593        config.on_missing_language_definition = OnMissing::Fail;
1594        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1595
1596        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1597        let result = processor.lint(content);
1598
1599        // Should succeed but return diagnostics for both missing language definitions
1600        assert!(result.is_ok());
1601        let diagnostics = result.unwrap();
1602        assert_eq!(diagnostics.len(), 2);
1603        assert!(diagnostics[0].message.contains("No lint tools configured"));
1604        assert!(diagnostics[0].message.contains("python"));
1605        assert!(diagnostics[1].message.contains("javascript"));
1606    }
1607
1608    #[test]
1609    fn test_lint_on_missing_language_definition_fail_fast() {
1610        let mut config = default_config();
1611        config.on_missing_language_definition = OnMissing::FailFast;
1612        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1613
1614        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1615        let result = processor.lint(content);
1616
1617        // Should fail immediately on first missing language
1618        assert!(result.is_err());
1619        let err = result.unwrap_err();
1620        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1621    }
1622
1623    #[test]
1624    fn test_format_on_missing_language_definition_fail() {
1625        let mut config = default_config();
1626        config.on_missing_language_definition = OnMissing::Fail;
1627        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1628
1629        let content = "```python\nprint('hello')\n```";
1630        let result = processor.format(content);
1631
1632        // Should succeed but report errors
1633        assert!(result.is_ok());
1634        let output = result.unwrap();
1635        assert_eq!(output.content, content); // Content unchanged
1636        assert!(output.had_errors);
1637        assert!(!output.error_messages.is_empty());
1638        assert!(output.error_messages[0].contains("No format tools configured"));
1639    }
1640
1641    #[test]
1642    fn test_format_on_missing_language_definition_fail_fast() {
1643        let mut config = default_config();
1644        config.on_missing_language_definition = OnMissing::FailFast;
1645        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1646
1647        let content = "```python\nprint('hello')\n```";
1648        let result = processor.format(content);
1649
1650        // Should fail immediately
1651        assert!(result.is_err());
1652        let err = result.unwrap_err();
1653        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1654    }
1655
1656    #[test]
1657    fn test_lint_on_missing_tool_binary_fail() {
1658        use super::super::config::{LanguageToolConfig, ToolDefinition};
1659
1660        let mut config = default_config();
1661        config.on_missing_tool_binary = OnMissing::Fail;
1662
1663        // Configure a tool with a non-existent binary
1664        let lang_config = LanguageToolConfig {
1665            lint: vec!["nonexistent-linter".to_string()],
1666            ..Default::default()
1667        };
1668        config.languages.insert("python".to_string(), lang_config);
1669
1670        let tool_def = ToolDefinition {
1671            command: vec!["nonexistent-binary-xyz123".to_string()],
1672            ..Default::default()
1673        };
1674        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1675
1676        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1677
1678        let content = "```python\nprint('hello')\n```";
1679        let result = processor.lint(content);
1680
1681        // Should succeed but return diagnostic for missing binary
1682        assert!(result.is_ok());
1683        let diagnostics = result.unwrap();
1684        assert_eq!(diagnostics.len(), 1);
1685        assert!(diagnostics[0].message.contains("not found in PATH"));
1686    }
1687
1688    #[test]
1689    fn test_lint_on_missing_tool_binary_fail_fast() {
1690        use super::super::config::{LanguageToolConfig, ToolDefinition};
1691
1692        let mut config = default_config();
1693        config.on_missing_tool_binary = OnMissing::FailFast;
1694
1695        // Configure a tool with a non-existent binary
1696        let lang_config = LanguageToolConfig {
1697            lint: vec!["nonexistent-linter".to_string()],
1698            ..Default::default()
1699        };
1700        config.languages.insert("python".to_string(), lang_config);
1701
1702        let tool_def = ToolDefinition {
1703            command: vec!["nonexistent-binary-xyz123".to_string()],
1704            ..Default::default()
1705        };
1706        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1707
1708        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1709
1710        let content = "```python\nprint('hello')\n```";
1711        let result = processor.lint(content);
1712
1713        // Should fail immediately
1714        assert!(result.is_err());
1715        let err = result.unwrap_err();
1716        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1717    }
1718
1719    #[test]
1720    fn test_format_on_missing_tool_binary_fail() {
1721        use super::super::config::{LanguageToolConfig, ToolDefinition};
1722
1723        let mut config = default_config();
1724        config.on_missing_tool_binary = OnMissing::Fail;
1725
1726        // Configure a tool with a non-existent binary
1727        let lang_config = LanguageToolConfig {
1728            format: vec!["nonexistent-formatter".to_string()],
1729            ..Default::default()
1730        };
1731        config.languages.insert("python".to_string(), lang_config);
1732
1733        let tool_def = ToolDefinition {
1734            command: vec!["nonexistent-binary-xyz123".to_string()],
1735            ..Default::default()
1736        };
1737        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1738
1739        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1740
1741        let content = "```python\nprint('hello')\n```";
1742        let result = processor.format(content);
1743
1744        // Should succeed but report errors
1745        assert!(result.is_ok());
1746        let output = result.unwrap();
1747        assert_eq!(output.content, content); // Content unchanged
1748        assert!(output.had_errors);
1749        assert!(!output.error_messages.is_empty());
1750        assert!(output.error_messages[0].contains("not found in PATH"));
1751    }
1752
1753    #[test]
1754    fn test_format_on_missing_tool_binary_fail_fast() {
1755        use super::super::config::{LanguageToolConfig, ToolDefinition};
1756
1757        let mut config = default_config();
1758        config.on_missing_tool_binary = OnMissing::FailFast;
1759
1760        // Configure a tool with a non-existent binary
1761        let lang_config = LanguageToolConfig {
1762            format: vec!["nonexistent-formatter".to_string()],
1763            ..Default::default()
1764        };
1765        config.languages.insert("python".to_string(), lang_config);
1766
1767        let tool_def = ToolDefinition {
1768            command: vec!["nonexistent-binary-xyz123".to_string()],
1769            ..Default::default()
1770        };
1771        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1772
1773        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1774
1775        let content = "```python\nprint('hello')\n```";
1776        let result = processor.format(content);
1777
1778        // Should fail immediately
1779        assert!(result.is_err());
1780        let err = result.unwrap_err();
1781        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1782    }
1783
1784    #[test]
1785    fn test_lint_rumdl_builtin_skipped_for_markdown() {
1786        // Configure the built-in "rumdl" tool for markdown
1787        // The processor should skip it (handled by embedded markdown linting)
1788        let mut config = default_config();
1789        config.languages.insert(
1790            "markdown".to_string(),
1791            LanguageToolConfig {
1792                lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1793                ..Default::default()
1794            },
1795        );
1796        config.on_missing_language_definition = OnMissing::Fail;
1797        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1798
1799        let content = "```markdown\n# Hello\n```";
1800        let result = processor.lint(content);
1801
1802        // Should succeed with no diagnostics - "rumdl" tool is skipped, not treated as unknown
1803        assert!(result.is_ok());
1804        assert!(result.unwrap().is_empty());
1805    }
1806
1807    #[test]
1808    fn test_format_rumdl_builtin_skipped_for_markdown() {
1809        // Configure the built-in "rumdl" tool for markdown
1810        let mut config = default_config();
1811        config.languages.insert(
1812            "markdown".to_string(),
1813            LanguageToolConfig {
1814                format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1815                ..Default::default()
1816            },
1817        );
1818        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1819
1820        let content = "```markdown\n# Hello\n```";
1821        let result = processor.format(content);
1822
1823        // Should succeed with unchanged content - "rumdl" tool is skipped
1824        assert!(result.is_ok());
1825        let output = result.unwrap();
1826        assert_eq!(output.content, content);
1827        assert!(!output.had_errors);
1828    }
1829
1830    #[test]
1831    fn test_is_markdown_language() {
1832        // Test the helper function
1833        assert!(is_markdown_language("markdown"));
1834        assert!(is_markdown_language("Markdown"));
1835        assert!(is_markdown_language("MARKDOWN"));
1836        assert!(is_markdown_language("md"));
1837        assert!(is_markdown_language("MD"));
1838        assert!(!is_markdown_language("python"));
1839        assert!(!is_markdown_language("rust"));
1840        assert!(!is_markdown_language(""));
1841    }
1842
1843    // Issue #423: MkDocs admonition code block detection
1844
1845    #[test]
1846    fn test_extract_mkdocs_admonition_code_block() {
1847        let config = default_config();
1848        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1849
1850        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1851        let blocks = processor.extract_code_blocks(content);
1852
1853        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs admonition");
1854        assert_eq!(blocks[0].language, "python");
1855    }
1856
1857    #[test]
1858    fn test_extract_mkdocs_tab_code_block() {
1859        let config = default_config();
1860        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1861
1862        let content = "=== \"Python\"\n\n    ```python\n    print(\"hello\")\n    ```\n";
1863        let blocks = processor.extract_code_blocks(content);
1864
1865        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs tab");
1866        assert_eq!(blocks[0].language, "python");
1867    }
1868
1869    #[test]
1870    fn test_standard_flavor_ignores_admonition_indented_content() {
1871        let config = default_config();
1872        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1873
1874        // With standard flavor, pulldown_cmark parses this differently;
1875        // our MkDocs extraction should NOT run
1876        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1877        let blocks = processor.extract_code_blocks(content);
1878
1879        // Standard flavor relies on pulldown_cmark only, which may or may not detect
1880        // indented fenced blocks. The key assertion is that we don't double-detect.
1881        // With standard flavor, the MkDocs extraction path is skipped entirely.
1882        for (i, b) in blocks.iter().enumerate() {
1883            for (j, b2) in blocks.iter().enumerate() {
1884                if i != j {
1885                    assert_ne!(b.start_line, b2.start_line, "No duplicate blocks should exist");
1886                }
1887            }
1888        }
1889    }
1890
1891    #[test]
1892    fn test_mkdocs_top_level_blocks_alongside_admonition() {
1893        let config = default_config();
1894        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1895
1896        let content =
1897            "```rust\nfn main() {}\n```\n\n!!! note\n    Some text\n\n    ```python\n    print(\"hello\")\n    ```\n";
1898        let blocks = processor.extract_code_blocks(content);
1899
1900        assert_eq!(
1901            blocks.len(),
1902            2,
1903            "Should detect both top-level and admonition code blocks"
1904        );
1905        assert_eq!(blocks[0].language, "rust");
1906        assert_eq!(blocks[1].language, "python");
1907    }
1908
1909    #[test]
1910    fn test_mkdocs_nested_admonition_code_block() {
1911        let config = default_config();
1912        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1913
1914        let content = "\
1915!!! note
1916    Some text
1917
1918    !!! warning
1919        Nested content
1920
1921        ```python
1922        x = 1
1923        ```
1924";
1925        let blocks = processor.extract_code_blocks(content);
1926        assert_eq!(blocks.len(), 1, "Should detect code block inside nested admonition");
1927        assert_eq!(blocks[0].language, "python");
1928    }
1929
1930    #[test]
1931    fn test_mkdocs_consecutive_admonitions_no_stale_context() {
1932        let config = default_config();
1933        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1934
1935        // Two consecutive admonitions at the same indent level.
1936        // The first has no code block, the second does.
1937        let content = "\
1938!!! note
1939    First admonition content
1940
1941!!! warning
1942    Second admonition content
1943
1944    ```python
1945    y = 2
1946    ```
1947";
1948        let blocks = processor.extract_code_blocks(content);
1949        assert_eq!(blocks.len(), 1, "Should detect code block in second admonition only");
1950        assert_eq!(blocks[0].language, "python");
1951    }
1952
1953    #[test]
1954    fn test_mkdocs_crlf_line_endings() {
1955        let config = default_config();
1956        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1957
1958        // Use \r\n line endings
1959        let content = "!!! note\r\n    Some text\r\n\r\n    ```python\r\n    x = 1\r\n    ```\r\n";
1960        let blocks = processor.extract_code_blocks(content);
1961
1962        assert_eq!(blocks.len(), 1, "Should detect code block with CRLF line endings");
1963        assert_eq!(blocks[0].language, "python");
1964
1965        // Verify byte offsets point to valid content
1966        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1967        assert!(
1968            extracted.contains("x = 1"),
1969            "Extracted content should contain code. Got: {extracted:?}"
1970        );
1971    }
1972
1973    #[test]
1974    fn test_mkdocs_unclosed_fence_in_admonition() {
1975        let config = default_config();
1976        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1977
1978        // Unclosed fence should not produce a block
1979        let content = "!!! note\n    ```python\n    x = 1\n    no closing fence\n";
1980        let blocks = processor.extract_code_blocks(content);
1981        assert_eq!(blocks.len(), 0, "Unclosed fence should not produce a block");
1982    }
1983
1984    #[test]
1985    fn test_mkdocs_tilde_fence_in_admonition() {
1986        let config = default_config();
1987        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1988
1989        let content = "!!! note\n    ~~~ruby\n    puts 'hi'\n    ~~~\n";
1990        let blocks = processor.extract_code_blocks(content);
1991        assert_eq!(blocks.len(), 1, "Should detect tilde-fenced code block");
1992        assert_eq!(blocks[0].language, "ruby");
1993    }
1994
1995    #[test]
1996    fn test_mkdocs_empty_lines_in_code_block() {
1997        let config = default_config();
1998        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1999
2000        // Code block with empty lines inside — verifies byte offsets are correct
2001        // across empty lines (the previous find("") approach would break here)
2002        let content = "!!! note\n    ```python\n    x = 1\n\n    y = 2\n    ```\n";
2003        let blocks = processor.extract_code_blocks(content);
2004        assert_eq!(blocks.len(), 1);
2005
2006        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2007        assert!(
2008            extracted.contains("x = 1") && extracted.contains("y = 2"),
2009            "Extracted content should span across the empty line. Got: {extracted:?}"
2010        );
2011    }
2012
2013    #[test]
2014    fn test_mkdocs_content_byte_offsets_lf() {
2015        let config = default_config();
2016        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2017
2018        let content = "!!! note\n    ```python\n    print('hi')\n    ```\n";
2019        let blocks = processor.extract_code_blocks(content);
2020        assert_eq!(blocks.len(), 1);
2021
2022        // Verify the extracted content is exactly the code body
2023        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2024        assert_eq!(extracted, "    print('hi')\n", "Content offsets should be exact for LF");
2025    }
2026
2027    #[test]
2028    fn test_mkdocs_content_byte_offsets_crlf() {
2029        let config = default_config();
2030        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2031
2032        let content = "!!! note\r\n    ```python\r\n    print('hi')\r\n    ```\r\n";
2033        let blocks = processor.extract_code_blocks(content);
2034        assert_eq!(blocks.len(), 1);
2035
2036        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2037        assert_eq!(
2038            extracted, "    print('hi')\r\n",
2039            "Content offsets should be exact for CRLF"
2040        );
2041    }
2042
2043    #[test]
2044    fn test_lint_enabled_false_skips_language_in_strict_mode() {
2045        // With on-missing-language-definition = "fail", a language configured
2046        // with enabled=false should be silently skipped (no error).
2047        let mut config = default_config();
2048        config.normalize_language = NormalizeLanguage::Exact;
2049        config.on_missing_language_definition = OnMissing::Fail;
2050
2051        // Python has tools, plaintext is disabled
2052        config.languages.insert(
2053            "python".to_string(),
2054            LanguageToolConfig {
2055                lint: vec!["ruff:check".to_string()],
2056                ..Default::default()
2057            },
2058        );
2059        config.languages.insert(
2060            "plaintext".to_string(),
2061            LanguageToolConfig {
2062                enabled: false,
2063                ..Default::default()
2064            },
2065        );
2066
2067        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2068
2069        let content = "```plaintext\nsome text\n```";
2070        let result = processor.lint(content);
2071
2072        // No error for plaintext: enabled=false satisfies strict mode
2073        assert!(result.is_ok());
2074        let diagnostics = result.unwrap();
2075        assert!(
2076            diagnostics.is_empty(),
2077            "Expected no diagnostics for disabled language, got: {diagnostics:?}"
2078        );
2079    }
2080
2081    #[test]
2082    fn test_format_enabled_false_skips_language_in_strict_mode() {
2083        // Same test but for format mode
2084        let mut config = default_config();
2085        config.normalize_language = NormalizeLanguage::Exact;
2086        config.on_missing_language_definition = OnMissing::Fail;
2087
2088        config.languages.insert(
2089            "plaintext".to_string(),
2090            LanguageToolConfig {
2091                enabled: false,
2092                ..Default::default()
2093            },
2094        );
2095
2096        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2097
2098        let content = "```plaintext\nsome text\n```";
2099        let result = processor.format(content);
2100
2101        // No error for plaintext: enabled=false satisfies strict mode
2102        assert!(result.is_ok());
2103        let output = result.unwrap();
2104        assert!(!output.had_errors, "Expected no errors for disabled language");
2105        assert!(
2106            output.error_messages.is_empty(),
2107            "Expected no error messages, got: {:?}",
2108            output.error_messages
2109        );
2110    }
2111
2112    #[test]
2113    fn test_enabled_false_default_true_preserved() {
2114        // Verify that when enabled is not set, it defaults to true (existing behavior)
2115        let mut config = default_config();
2116        config.on_missing_language_definition = OnMissing::Fail;
2117
2118        // Configure python without explicitly setting enabled
2119        config.languages.insert(
2120            "python".to_string(),
2121            LanguageToolConfig {
2122                lint: vec!["ruff:check".to_string()],
2123                ..Default::default()
2124            },
2125        );
2126
2127        let lang_config = config.languages.get("python").unwrap();
2128        assert!(lang_config.enabled, "enabled should default to true");
2129    }
2130
2131    #[test]
2132    fn test_enabled_false_with_fail_fast_no_error() {
2133        // Even with fail-fast, enabled=false should skip silently
2134        let mut config = default_config();
2135        config.normalize_language = NormalizeLanguage::Exact;
2136        config.on_missing_language_definition = OnMissing::FailFast;
2137
2138        config.languages.insert(
2139            "unknown".to_string(),
2140            LanguageToolConfig {
2141                enabled: false,
2142                ..Default::default()
2143            },
2144        );
2145
2146        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2147
2148        let content = "```unknown\nsome content\n```";
2149        let result = processor.lint(content);
2150
2151        // Should not return an error: enabled=false takes precedence over fail-fast
2152        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2153        assert!(result.unwrap().is_empty());
2154    }
2155
2156    #[test]
2157    fn test_enabled_false_format_with_fail_fast_no_error() {
2158        // Same for format mode
2159        let mut config = default_config();
2160        config.normalize_language = NormalizeLanguage::Exact;
2161        config.on_missing_language_definition = OnMissing::FailFast;
2162
2163        config.languages.insert(
2164            "unknown".to_string(),
2165            LanguageToolConfig {
2166                enabled: false,
2167                ..Default::default()
2168            },
2169        );
2170
2171        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2172
2173        let content = "```unknown\nsome content\n```";
2174        let result = processor.format(content);
2175
2176        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2177        let output = result.unwrap();
2178        assert!(!output.had_errors);
2179    }
2180
2181    #[test]
2182    fn test_enabled_false_with_tools_still_skips() {
2183        // If enabled=false but tools are listed, the language should still be skipped
2184        let mut config = default_config();
2185        config.on_missing_language_definition = OnMissing::Fail;
2186
2187        config.languages.insert(
2188            "python".to_string(),
2189            LanguageToolConfig {
2190                enabled: false,
2191                lint: vec!["ruff:check".to_string()],
2192                format: vec!["ruff:format".to_string()],
2193                on_error: None,
2194            },
2195        );
2196
2197        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2198
2199        let content = "```python\nprint('hello')\n```";
2200
2201        // Lint should skip
2202        let lint_result = processor.lint(content);
2203        assert!(lint_result.is_ok());
2204        assert!(lint_result.unwrap().is_empty());
2205
2206        // Format should skip
2207        let format_result = processor.format(content);
2208        assert!(format_result.is_ok());
2209        let output = format_result.unwrap();
2210        assert!(!output.had_errors);
2211        assert_eq!(output.content, content, "Content should be unchanged");
2212    }
2213
2214    #[test]
2215    fn test_enabled_true_without_tools_triggers_strict_mode() {
2216        // A language configured with enabled=true (default) but no tools
2217        // should still trigger strict mode errors
2218        let mut config = default_config();
2219        config.on_missing_language_definition = OnMissing::Fail;
2220
2221        config.languages.insert(
2222            "python".to_string(),
2223            LanguageToolConfig {
2224                // enabled defaults to true, no tools
2225                ..Default::default()
2226            },
2227        );
2228
2229        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2230
2231        let content = "```python\nprint('hello')\n```";
2232        let result = processor.lint(content);
2233
2234        // Should report an error because enabled=true but no lint tools configured
2235        assert!(result.is_ok());
2236        let diagnostics = result.unwrap();
2237        assert_eq!(diagnostics.len(), 1);
2238        assert!(diagnostics[0].message.contains("No lint tools configured"));
2239    }
2240
2241    #[test]
2242    fn test_mixed_enabled_and_disabled_languages() {
2243        // Multiple languages: one disabled, one unconfigured
2244        let mut config = default_config();
2245        config.normalize_language = NormalizeLanguage::Exact;
2246        config.on_missing_language_definition = OnMissing::Fail;
2247
2248        config.languages.insert(
2249            "plaintext".to_string(),
2250            LanguageToolConfig {
2251                enabled: false,
2252                ..Default::default()
2253            },
2254        );
2255
2256        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2257
2258        let content = "\
2259```plaintext
2260some text
2261```
2262
2263```javascript
2264console.log('hi');
2265```
2266";
2267
2268        let result = processor.lint(content);
2269        assert!(result.is_ok());
2270        let diagnostics = result.unwrap();
2271
2272        // plaintext: skipped (enabled=false), no error
2273        // javascript: not configured at all, should trigger strict mode error
2274        assert_eq!(diagnostics.len(), 1, "Expected 1 diagnostic, got: {diagnostics:?}");
2275        assert!(
2276            diagnostics[0].message.contains("javascript"),
2277            "Error should be about javascript, got: {}",
2278            diagnostics[0].message
2279        );
2280    }
2281
2282    #[test]
2283    fn test_generic_fallback_includes_all_stderr_lines() {
2284        let config = default_config();
2285        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2286
2287        // Use output that won't be parsed by any structured format parser
2288        let output = ToolOutput {
2289            stdout: String::new(),
2290            stderr: "Parse error at position 42\nUnexpected token '::'\n3 errors found".to_string(),
2291            exit_code: 1,
2292            success: false,
2293        };
2294
2295        let diags = processor.parse_tool_output(&output, "tombi", 5);
2296        assert_eq!(diags.len(), 3, "Expected one diagnostic per non-empty stderr line");
2297        assert_eq!(diags[0].message, "Parse error at position 42");
2298        assert_eq!(diags[1].message, "Unexpected token '::'");
2299        assert_eq!(diags[2].message, "3 errors found");
2300        assert!(diags.iter().all(|d| d.tool == "tombi"));
2301        assert!(diags.iter().all(|d| d.file_line == 5));
2302    }
2303
2304    #[test]
2305    fn test_generic_fallback_includes_all_stdout_lines_when_stderr_empty() {
2306        let config = default_config();
2307        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2308
2309        let output = ToolOutput {
2310            stdout: "Line 1 error\nLine 2 detail\nLine 3 summary".to_string(),
2311            stderr: String::new(),
2312            exit_code: 1,
2313            success: false,
2314        };
2315
2316        let diags = processor.parse_tool_output(&output, "some-tool", 10);
2317        assert_eq!(diags.len(), 3);
2318        assert_eq!(diags[0].message, "Line 1 error");
2319        assert_eq!(diags[1].message, "Line 2 detail");
2320        assert_eq!(diags[2].message, "Line 3 summary");
2321    }
2322
2323    #[test]
2324    fn test_generic_fallback_skips_blank_lines() {
2325        let config = default_config();
2326        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2327
2328        let output = ToolOutput {
2329            stdout: String::new(),
2330            stderr: "error: bad input\n\n  \n\ndetail: see above\n".to_string(),
2331            exit_code: 1,
2332            success: false,
2333        };
2334
2335        let diags = processor.parse_tool_output(&output, "tool", 1);
2336        assert_eq!(diags.len(), 2);
2337        assert_eq!(diags[0].message, "error: bad input");
2338        assert_eq!(diags[1].message, "detail: see above");
2339    }
2340
2341    #[test]
2342    fn test_generic_fallback_exit_code_when_no_output() {
2343        let config = default_config();
2344        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2345
2346        let output = ToolOutput {
2347            stdout: String::new(),
2348            stderr: String::new(),
2349            exit_code: 42,
2350            success: false,
2351        };
2352
2353        let diags = processor.parse_tool_output(&output, "tool", 1);
2354        assert_eq!(diags.len(), 1);
2355        assert_eq!(diags[0].message, "Tool exited with code 42");
2356    }
2357
2358    #[test]
2359    fn test_generic_fallback_not_triggered_on_success() {
2360        let config = default_config();
2361        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2362
2363        let output = ToolOutput {
2364            stdout: "some informational output".to_string(),
2365            stderr: String::new(),
2366            exit_code: 0,
2367            success: true,
2368        };
2369
2370        let diags = processor.parse_tool_output(&output, "tool", 1);
2371        assert!(
2372            diags.is_empty(),
2373            "Successful tool runs should produce no fallback diagnostics"
2374        );
2375    }
2376
2377    #[test]
2378    fn test_ansi_codes_stripped_before_parsing() {
2379        let config = default_config();
2380        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2381
2382        // ruff-style output with ANSI color codes wrapping the message
2383        let output = ToolOutput {
2384            stdout: "\x1b[1m_.py\x1b[0m:\x1b[33m1\x1b[0m:\x1b[33m1\x1b[0m: \x1b[31mE501\x1b[0m Line too long"
2385                .to_string(),
2386            stderr: String::new(),
2387            exit_code: 1,
2388            success: false,
2389        };
2390
2391        let diags = processor.parse_tool_output(&output, "ruff:check", 5);
2392        assert_eq!(diags.len(), 1, "ANSI-colored output should still be parsed");
2393        assert_eq!(diags[0].message, "E501 Line too long");
2394        assert_eq!(diags[0].file_line, 6); // 5 + 1
2395    }
2396
2397    #[test]
2398    fn test_tombi_multiline_error_format() {
2399        let config = default_config();
2400        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2401
2402        // Simulates tombi output (ANSI codes stripped for clarity)
2403        let output = ToolOutput {
2404            stdout: "[test]\ntest: \"test\"\nError: invalid key\n    at line 2 column 1\nError: expected key\n    at line 2 column 1\nError: expected '='\n    at line 2 column 1\nError: expected value\n    at line 2 column 1".to_string(),
2405            stderr: "1 file failed to be formatted".to_string(),
2406            exit_code: 1,
2407            success: false,
2408        };
2409
2410        let diags = processor.parse_tool_output(&output, "tombi", 7);
2411        assert_eq!(
2412            diags.len(),
2413            4,
2414            "Expected 4 diagnostics from tombi errors, got {diags:?}"
2415        );
2416        assert_eq!(diags[0].message, "invalid key");
2417        assert_eq!(diags[0].file_line, 9); // 7 + 2
2418        assert_eq!(diags[0].column, Some(1));
2419        assert_eq!(diags[1].message, "expected key");
2420        assert_eq!(diags[1].file_line, 9);
2421        assert_eq!(diags[2].message, "expected '='");
2422        assert_eq!(diags[3].message, "expected value");
2423        assert!(diags.iter().all(|d| d.tool == "tombi"));
2424    }
2425
2426    #[test]
2427    fn test_tombi_with_ansi_codes() {
2428        let config = default_config();
2429        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2430
2431        // Real tombi output with ANSI escape codes
2432        let output = ToolOutput {
2433            stdout: "[test]\ntest: \"test\"\n\x1b[1;31m  Error\x1b[0m: \x1b[1minvalid key\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m\n\x1b[1;31m  Error\x1b[0m: \x1b[1mexpected '='\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m".to_string(),
2434            stderr: "1 file failed to be formatted".to_string(),
2435            exit_code: 1,
2436            success: false,
2437        };
2438
2439        let diags = processor.parse_tool_output(&output, "tombi", 7);
2440        assert_eq!(
2441            diags.len(),
2442            2,
2443            "Expected 2 diagnostics from ANSI-colored tombi output, got {diags:?}"
2444        );
2445        assert_eq!(diags[0].message, "invalid key");
2446        assert_eq!(diags[0].file_line, 9);
2447        assert_eq!(diags[1].message, "expected '='");
2448        assert_eq!(diags[1].file_line, 9);
2449    }
2450
2451    #[test]
2452    fn test_fallback_combines_stdout_and_stderr() {
2453        let config = default_config();
2454        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2455
2456        // Tool puts some errors on stdout, summary on stderr
2457        let output = ToolOutput {
2458            stdout: "problem found in input".to_string(),
2459            stderr: "1 file failed".to_string(),
2460            exit_code: 1,
2461            success: false,
2462        };
2463
2464        let diags = processor.parse_tool_output(&output, "tool", 1);
2465        assert_eq!(diags.len(), 2, "Fallback should include both stdout and stderr");
2466        assert_eq!(diags[0].message, "problem found in input");
2467        assert_eq!(diags[1].message, "1 file failed");
2468    }
2469
2470    #[test]
2471    fn test_error_line_without_position_info() {
2472        let config = default_config();
2473        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2474
2475        // Error: line not followed by "at line N column M"
2476        let output = ToolOutput {
2477            stdout: "Error: something went wrong\nsome unrelated line".to_string(),
2478            stderr: String::new(),
2479            exit_code: 1,
2480            success: false,
2481        };
2482
2483        let diags = processor.parse_tool_output(&output, "tool", 5);
2484        // "Error: something went wrong" → parsed by error-line parser (no position)
2485        // "some unrelated line" → no parser matches, but diagnostics not empty → no fallback
2486        assert!(!diags.is_empty());
2487        assert_eq!(diags[0].message, "something went wrong");
2488        assert_eq!(diags[0].file_line, 5); // No line offset, uses code_block_start
2489    }
2490
2491    #[test]
2492    fn test_warning_line_with_position() {
2493        let config = default_config();
2494        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2495
2496        let output = ToolOutput {
2497            stdout: "Warning: deprecated syntax\n    at line 3 column 5".to_string(),
2498            stderr: String::new(),
2499            exit_code: 1,
2500            success: false,
2501        };
2502
2503        let diags = processor.parse_tool_output(&output, "tool", 10);
2504        assert_eq!(diags.len(), 1);
2505        assert_eq!(diags[0].message, "deprecated syntax");
2506        assert_eq!(diags[0].file_line, 13); // 10 + 3
2507        assert_eq!(diags[0].column, Some(5));
2508        assert!(matches!(diags[0].severity, DiagnosticSeverity::Warning));
2509    }
2510
2511    #[test]
2512    fn test_strip_ansi_codes() {
2513        assert_eq!(strip_ansi_codes("hello"), "hello");
2514        assert_eq!(strip_ansi_codes("\x1b[31mred\x1b[0m"), "red");
2515        assert_eq!(
2516            strip_ansi_codes("\x1b[1;31m  Error\x1b[0m: \x1b[1mmsg\x1b[0m"),
2517            "  Error: msg"
2518        );
2519        assert_eq!(strip_ansi_codes("no codes here"), "no codes here");
2520        assert_eq!(strip_ansi_codes(""), "");
2521        assert_eq!(
2522            strip_ansi_codes("\x1b[90mat line 2 column 1\x1b[0m"),
2523            "at line 2 column 1"
2524        );
2525    }
2526
2527    #[test]
2528    fn test_parse_at_line_column() {
2529        assert_eq!(
2530            CodeBlockToolProcessor::parse_at_line_column("at line 2 column 1"),
2531            Some((2, 1))
2532        );
2533        assert_eq!(
2534            CodeBlockToolProcessor::parse_at_line_column("at line 10 column 15"),
2535            Some((10, 15))
2536        );
2537        assert_eq!(
2538            CodeBlockToolProcessor::parse_at_line_column("At Line 5 Column 3"),
2539            Some((5, 3))
2540        );
2541        assert_eq!(
2542            CodeBlockToolProcessor::parse_at_line_column("not a position line"),
2543            None
2544        );
2545        assert_eq!(
2546            CodeBlockToolProcessor::parse_at_line_column("at line abc column 1"),
2547            None
2548        );
2549    }
2550
2551    #[test]
2552    fn test_parse_error_line() {
2553        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Error: invalid key").unwrap();
2554        assert_eq!(msg, "invalid key");
2555        assert!(matches!(sev, DiagnosticSeverity::Error));
2556
2557        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Warning: deprecated").unwrap();
2558        assert_eq!(msg, "deprecated");
2559        assert!(matches!(sev, DiagnosticSeverity::Warning));
2560
2561        // Lowercase should NOT match (avoids conflict with unstructured tool output)
2562        assert!(CodeBlockToolProcessor::parse_error_line("error: bad input").is_none());
2563        assert!(CodeBlockToolProcessor::parse_error_line("warning: minor issue").is_none());
2564
2565        // Empty message after prefix should not match
2566        assert!(CodeBlockToolProcessor::parse_error_line("Error:").is_none());
2567        assert!(CodeBlockToolProcessor::parse_error_line("Error:   ").is_none());
2568
2569        // Not an error line
2570        assert!(CodeBlockToolProcessor::parse_error_line("some random text").is_none());
2571    }
2572
2573    #[test]
2574    fn test_consecutive_error_lines_without_position() {
2575        let config = default_config();
2576        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2577
2578        // Two Error: lines in a row — first should flush without position,
2579        // second gets position from "at line"
2580        let output = ToolOutput {
2581            stdout: "Error: first problem\nError: second problem\n    at line 3 column 1".to_string(),
2582            stderr: String::new(),
2583            exit_code: 1,
2584            success: false,
2585        };
2586
2587        let diags = processor.parse_tool_output(&output, "tool", 5);
2588        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2589        // First error flushed without position when second Error: was encountered
2590        assert_eq!(diags[0].message, "first problem");
2591        assert_eq!(diags[0].file_line, 5); // No line mapping
2592        assert_eq!(diags[0].column, None);
2593        // Second error resolved with position
2594        assert_eq!(diags[1].message, "second problem");
2595        assert_eq!(diags[1].file_line, 8); // 5 + 3
2596        assert_eq!(diags[1].column, Some(1));
2597    }
2598
2599    #[test]
2600    fn test_error_line_at_end_of_output() {
2601        let config = default_config();
2602        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2603
2604        // Error: as the very last line — flushed by post-loop code
2605        let output = ToolOutput {
2606            stdout: "Error: trailing error".to_string(),
2607            stderr: String::new(),
2608            exit_code: 1,
2609            success: false,
2610        };
2611
2612        let diags = processor.parse_tool_output(&output, "tool", 5);
2613        assert_eq!(diags.len(), 1);
2614        assert_eq!(diags[0].message, "trailing error");
2615        assert_eq!(diags[0].file_line, 5); // No position info available
2616        assert_eq!(diags[0].column, None);
2617    }
2618
2619    #[test]
2620    fn test_blank_lines_between_error_and_position() {
2621        let config = default_config();
2622        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2623
2624        // Blank lines between Error: and "at line" should be transparently skipped
2625        let output = ToolOutput {
2626            stdout: "Error: spaced out\n\n\n    at line 4 column 2".to_string(),
2627            stderr: String::new(),
2628            exit_code: 1,
2629            success: false,
2630        };
2631
2632        let diags = processor.parse_tool_output(&output, "tool", 10);
2633        assert_eq!(diags.len(), 1);
2634        assert_eq!(diags[0].message, "spaced out");
2635        assert_eq!(diags[0].file_line, 14); // 10 + 4
2636        assert_eq!(diags[0].column, Some(2));
2637    }
2638
2639    #[test]
2640    fn test_mixed_structured_and_error_line_parsers() {
2641        let config = default_config();
2642        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2643
2644        // Mix of ruff-style structured output and tombi-style Error: output
2645        let output = ToolOutput {
2646            stdout: "_.py:1:5: E501 Line too long\nError: invalid syntax\n    at line 3 column 1".to_string(),
2647            stderr: String::new(),
2648            exit_code: 1,
2649            success: false,
2650        };
2651
2652        let diags = processor.parse_tool_output(&output, "tool", 5);
2653        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2654        // First: standard format parser
2655        assert_eq!(diags[0].message, "E501 Line too long");
2656        assert_eq!(diags[0].file_line, 6); // 5 + 1
2657        // Second: Error: + at line parser
2658        assert_eq!(diags[1].message, "invalid syntax");
2659        assert_eq!(diags[1].file_line, 8); // 5 + 3
2660    }
2661
2662    #[test]
2663    fn test_at_line_without_preceding_error() {
2664        let config = default_config();
2665        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2666
2667        // "at line N column M" without a preceding Error: should not create a diagnostic
2668        let output = ToolOutput {
2669            stdout: "at line 2 column 1\nsome other text".to_string(),
2670            stderr: String::new(),
2671            exit_code: 1,
2672            success: false,
2673        };
2674
2675        let diags = processor.parse_tool_output(&output, "tool", 5);
2676        // No pending error, so "at line" is just an unmatched line
2677        // Both lines are unmatched, fallback fires with combined output
2678        assert_eq!(diags.len(), 2);
2679        assert_eq!(diags[0].message, "at line 2 column 1");
2680        assert_eq!(diags[1].message, "some other text");
2681    }
2682
2683    // =========================================================================
2684    // Issue #527: formatter that produces empty output should not erase content
2685    // =========================================================================
2686
2687    /// A formatter that produces no stdout (like `tombi lint -` mistakenly used
2688    /// as a formatter) should not replace non-empty content with an empty string.
2689    /// This test uses `true` which exits 0 with no output, simulating the bug.
2690    #[test]
2691    fn test_format_empty_output_does_not_erase_content() {
2692        use super::super::config::LanguageToolConfig;
2693
2694        let mut config = default_config();
2695        config.languages.insert(
2696            "toml".to_string(),
2697            LanguageToolConfig {
2698                format: vec!["empty-formatter".to_string()],
2699                ..Default::default()
2700            },
2701        );
2702        // Define a tool that exits 0 but produces no stdout (simulates `tombi lint -`)
2703        config.tools.insert(
2704            "empty-formatter".to_string(),
2705            super::super::config::ToolDefinition {
2706                command: vec!["true".to_string()],
2707                stdin: true,
2708                stdout: true,
2709                lint_args: vec![],
2710                format_args: vec![],
2711            },
2712        );
2713
2714        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2715
2716        let content = "```toml\nkey = \"value\"\n```\n";
2717        let result = processor.format(content);
2718
2719        assert!(result.is_ok(), "Format should not error");
2720        let output = result.unwrap();
2721
2722        // The content must NOT be erased — original content should be preserved
2723        assert!(
2724            output.content.contains("key = \"value\""),
2725            "Empty formatter output should not erase content. Got: {:?}",
2726            output.content
2727        );
2728    }
2729
2730    /// A formatter that echoes input back (like `cat`) should preserve content.
2731    #[test]
2732    fn test_format_identity_formatter_preserves_content() {
2733        use super::super::config::LanguageToolConfig;
2734
2735        let mut config = default_config();
2736        config.languages.insert(
2737            "toml".to_string(),
2738            LanguageToolConfig {
2739                format: vec!["cat-formatter".to_string()],
2740                ..Default::default()
2741            },
2742        );
2743        config.tools.insert(
2744            "cat-formatter".to_string(),
2745            super::super::config::ToolDefinition {
2746                command: vec!["cat".to_string()],
2747                stdin: true,
2748                stdout: true,
2749                lint_args: vec![],
2750                format_args: vec![],
2751            },
2752        );
2753
2754        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2755
2756        let content = "```toml\nkey = \"value\"\n```\n";
2757        let result = processor.format(content);
2758
2759        assert!(result.is_ok(), "Format should not error");
2760        let output = result.unwrap();
2761        assert_eq!(
2762            output.content, content,
2763            "Identity formatter should preserve content exactly"
2764        );
2765    }
2766
2767    /// Verify that the context-aware tool resolution resolves bare "tombi"
2768    /// to "tombi:format" in format context and "tombi:lint" in lint context.
2769    #[test]
2770    fn test_resolve_tool_context_aware_tombi() {
2771        let config = default_config();
2772        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2773
2774        // In format context, bare "tombi" should resolve to "tombi:format"
2775        let format_def = processor
2776            .resolve_tool("tombi", ToolContext::Format)
2777            .expect("Should resolve tombi in format context");
2778        assert!(
2779            format_def.command.iter().any(|arg| arg == "format"),
2780            "Bare 'tombi' in format context should resolve to 'tombi format', got: {:?}",
2781            format_def.command
2782        );
2783
2784        // In lint context, bare "tombi" should resolve to "tombi:lint"
2785        let lint_def = processor
2786            .resolve_tool("tombi", ToolContext::Lint)
2787            .expect("Should resolve tombi in lint context");
2788        assert!(
2789            lint_def.command.iter().any(|arg| arg == "lint"),
2790            "Bare 'tombi' in lint context should resolve to 'tombi lint', got: {:?}",
2791            lint_def.command
2792        );
2793
2794        // Explicit suffix should bypass context-aware resolution
2795        let explicit_def = processor
2796            .resolve_tool("tombi:lint", ToolContext::Format)
2797            .expect("Should resolve explicit tombi:lint even in format context");
2798        assert!(
2799            explicit_def.command.iter().any(|arg| arg == "lint"),
2800            "Explicit 'tombi:lint' should always use lint, got: {:?}",
2801            explicit_def.command
2802        );
2803    }
2804
2805    /// Verify context-aware resolution for ruff (uses "check" suffix, not "lint").
2806    #[test]
2807    fn test_resolve_tool_context_aware_ruff() {
2808        let config = default_config();
2809        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2810
2811        // In lint context, bare "ruff" should resolve to "ruff:check"
2812        let lint_def = processor
2813            .resolve_tool("ruff", ToolContext::Lint)
2814            .expect("Should resolve ruff in lint context");
2815        assert!(
2816            lint_def.command.iter().any(|arg| arg == "check"),
2817            "Bare 'ruff' in lint context should resolve to 'ruff check', got: {:?}",
2818            lint_def.command
2819        );
2820
2821        // In format context, bare "ruff" should resolve to "ruff:format"
2822        let format_def = processor
2823            .resolve_tool("ruff", ToolContext::Format)
2824            .expect("Should resolve ruff in format context");
2825        assert!(
2826            format_def.command.iter().any(|arg| arg == "format"),
2827            "Bare 'ruff' in format context should resolve to 'ruff format', got: {:?}",
2828            format_def.command
2829        );
2830    }
2831
2832    /// Tools without context-specific variants should still resolve via bare name.
2833    #[test]
2834    fn test_resolve_tool_bare_name_fallback() {
2835        let config = default_config();
2836        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2837
2838        // "shellcheck" has no :lint or :format variant — should fall back to bare name
2839        let def = processor
2840            .resolve_tool("shellcheck", ToolContext::Lint)
2841            .expect("Should resolve shellcheck via fallback");
2842        assert!(
2843            def.command.iter().any(|arg| arg == "shellcheck"),
2844            "shellcheck should resolve to itself, got: {:?}",
2845            def.command
2846        );
2847    }
2848
2849    /// Context-aware resolution for tools with non-standard format suffixes.
2850    #[test]
2851    fn test_resolve_tool_context_aware_sqlfluff() {
2852        let config = default_config();
2853        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2854
2855        // sqlfluff uses ":fix" as its format variant
2856        let format_def = processor
2857            .resolve_tool("sqlfluff", ToolContext::Format)
2858            .expect("Should resolve sqlfluff in format context");
2859        assert!(
2860            format_def.command.iter().any(|arg| arg == "fix"),
2861            "Bare 'sqlfluff' in format context should resolve to 'sqlfluff fix', got: {:?}",
2862            format_def.command
2863        );
2864    }
2865
2866    /// Context-aware resolution for djlint (:reformat suffix).
2867    #[test]
2868    fn test_resolve_tool_context_aware_djlint() {
2869        let config = default_config();
2870        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2871
2872        // djlint uses ":reformat" as its format variant
2873        let format_def = processor
2874            .resolve_tool("djlint", ToolContext::Format)
2875            .expect("Should resolve djlint in format context");
2876        assert!(
2877            format_def.command.iter().any(|arg| arg.contains("reformat")),
2878            "Bare 'djlint' in format context should resolve to djlint reformat, got: {:?}",
2879            format_def.command
2880        );
2881    }
2882
2883    /// User-defined tools with context-specific variants resolve correctly.
2884    #[test]
2885    fn test_resolve_tool_user_defined_with_context_variant() {
2886        use super::super::config::ToolDefinition;
2887
2888        let mut config = default_config();
2889        config.tools.insert(
2890            "mytool".to_string(),
2891            ToolDefinition {
2892                command: vec!["mytool".to_string(), "--lint".to_string()],
2893                ..Default::default()
2894            },
2895        );
2896        config.tools.insert(
2897            "mytool:format".to_string(),
2898            ToolDefinition {
2899                command: vec!["mytool".to_string(), "--format".to_string()],
2900                ..Default::default()
2901            },
2902        );
2903
2904        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2905
2906        // In format context, should resolve to "mytool:format"
2907        let def = processor
2908            .resolve_tool("mytool", ToolContext::Format)
2909            .expect("Should resolve user tool in format context");
2910        assert!(
2911            def.command.iter().any(|arg| arg == "--format"),
2912            "User 'mytool' in format context should resolve to mytool:format, got: {:?}",
2913            def.command
2914        );
2915
2916        // In lint context, should fall back to bare "mytool" (no mytool:lint exists)
2917        let def = processor
2918            .resolve_tool("mytool", ToolContext::Lint)
2919            .expect("Should resolve user tool in lint context via fallback");
2920        assert!(
2921            def.command.iter().any(|arg| arg == "--lint"),
2922            "User 'mytool' in lint context should fall back to bare name, got: {:?}",
2923            def.command
2924        );
2925    }
2926
2927    /// Nonexistent tool returns None.
2928    #[test]
2929    fn test_resolve_tool_nonexistent_returns_none() {
2930        let config = default_config();
2931        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2932
2933        assert!(
2934            processor
2935                .resolve_tool("nonexistent-tool-xyz", ToolContext::Lint)
2936                .is_none(),
2937            "Nonexistent tool should return None in lint context"
2938        );
2939        assert!(
2940            processor
2941                .resolve_tool("nonexistent-tool-xyz", ToolContext::Format)
2942                .is_none(),
2943            "Nonexistent tool should return None in format context"
2944        );
2945    }
2946
2947    #[test]
2948    fn test_strip_ansi_codes_edge_cases() {
2949        // Lone ESC without CSI bracket — non-printable, safely dropped
2950        assert_eq!(strip_ansi_codes("before\x1bafter"), "beforeafter");
2951        // ESC at end of string
2952        assert_eq!(strip_ansi_codes("trailing\x1b"), "trailing");
2953        // Nested/consecutive sequences
2954        assert_eq!(strip_ansi_codes("\x1b[1m\x1b[31mbold red\x1b[0m"), "bold red");
2955        // 256-color and RGB sequences
2956        assert_eq!(strip_ansi_codes("\x1b[38;5;196mred\x1b[0m"), "red");
2957        assert_eq!(strip_ansi_codes("\x1b[38;2;255;0;0mred\x1b[0m"), "red");
2958    }
2959}
rumdl_lib/code_block_tools/processor.rs

rumdl_lib/code_block_tools/
processor.rs