rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing, ToolDefinition};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::config::MarkdownFlavor;
13use crate::rule::{LintWarning, Severity};
14use crate::utils::rumdl_parser_options;
15use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
16
17/// Special built-in tool name for rumdl's own markdown linting.
18/// When this tool is configured for markdown blocks, the processor skips
19/// external execution since it's handled by embedded markdown linting.
20pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
21
22/// Check if a language is markdown (handles common variations).
23fn is_markdown_language(lang: &str) -> bool {
24    matches!(lang.to_lowercase().as_str(), "markdown" | "md")
25}
26
27/// Strip ANSI escape sequences from tool output.
28///
29/// Many tools output colored text (e.g. `\x1b[1;31mError\x1b[0m`), which prevents
30/// structured parsers from matching patterns like `file:line:col: message`.
31fn strip_ansi_codes(s: &str) -> String {
32    let mut result = String::with_capacity(s.len());
33    let mut chars = s.chars().peekable();
34    while let Some(c) = chars.next() {
35        if c == '\x1b' {
36            if chars.peek() == Some(&'[') {
37                chars.next();
38                // Consume until we hit an ASCII letter (the terminator)
39                while let Some(&next) = chars.peek() {
40                    chars.next();
41                    if next.is_ascii_alphabetic() {
42                        break;
43                    }
44                }
45            }
46        } else {
47            result.push(c);
48        }
49    }
50    result
51}
52
53/// Information about a fenced code block for processing.
54#[derive(Debug, Clone)]
55pub struct FencedCodeBlockInfo {
56    /// 0-indexed line number where opening fence starts.
57    pub start_line: usize,
58    /// 0-indexed line number where closing fence ends.
59    pub end_line: usize,
60    /// Byte offset where code content starts (after opening fence line).
61    pub content_start: usize,
62    /// Byte offset where code content ends (before closing fence line).
63    pub content_end: usize,
64    /// Language tag extracted from info string (first token).
65    pub language: String,
66    /// Full info string from the fence.
67    pub info_string: String,
68    /// The fence character used (` or ~).
69    pub fence_char: char,
70    /// Length of the fence (3 or more).
71    pub fence_length: usize,
72    /// Leading whitespace on the fence line.
73    pub indent: usize,
74    /// Exact leading whitespace prefix from the fence line.
75    pub indent_prefix: String,
76}
77
78/// A diagnostic message from an external tool.
79#[derive(Debug, Clone)]
80pub struct CodeBlockDiagnostic {
81    /// Line number in the original markdown file (1-indexed).
82    pub file_line: usize,
83    /// Column number (1-indexed, if available).
84    pub column: Option<usize>,
85    /// Message from the tool.
86    pub message: String,
87    /// Severity (error, warning, info).
88    pub severity: DiagnosticSeverity,
89    /// Name of the tool that produced this.
90    pub tool: String,
91    /// Line where the code block starts (1-indexed, for context).
92    pub code_block_start: usize,
93}
94
95/// Severity level for diagnostics.
96#[derive(Debug, Clone, Copy, PartialEq, Eq)]
97pub enum DiagnosticSeverity {
98    Error,
99    Warning,
100    Info,
101}
102
103impl CodeBlockDiagnostic {
104    /// Convert to a LintWarning for integration with rumdl's warning system.
105    pub fn to_lint_warning(&self) -> LintWarning {
106        let severity = match self.severity {
107            DiagnosticSeverity::Error => Severity::Error,
108            DiagnosticSeverity::Warning => Severity::Warning,
109            DiagnosticSeverity::Info => Severity::Info,
110        };
111
112        LintWarning {
113            message: self.message.clone(),
114            line: self.file_line,
115            column: self.column.unwrap_or(1),
116            end_line: self.file_line,
117            end_column: self.column.unwrap_or(1),
118            severity,
119            fix: None, // External tool diagnostics don't provide fixes
120            rule_name: Some(self.tool.clone()),
121        }
122    }
123}
124
125/// Error during code block processing.
126#[derive(Debug, Clone)]
127pub enum ProcessorError {
128    /// Tool execution failed.
129    ToolError(ExecutorError),
130    /// Tool execution failed with code block location context.
131    ToolErrorAt {
132        error: ExecutorError,
133        line: usize,
134        language: String,
135    },
136    /// No tools configured for language.
137    NoToolsConfigured { language: String, line: usize },
138    /// Tool binary not found.
139    ToolBinaryNotFound {
140        tool: String,
141        language: String,
142        line: usize,
143    },
144    /// Processing was aborted due to on_error = fail.
145    Aborted { message: String },
146}
147
148impl std::fmt::Display for ProcessorError {
149    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150        match self {
151            Self::ToolError(e) => write!(f, "{e}"),
152            Self::ToolErrorAt { error, line, language } => {
153                write!(f, "line {line} ({language}): {error}")
154            }
155            Self::NoToolsConfigured { language, line } => {
156                write!(f, "line {line} ({language}): no tools configured")
157            }
158            Self::ToolBinaryNotFound { tool, language, line } => {
159                write!(f, "line {line} ({language}): tool '{tool}' not found in PATH")
160            }
161            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
162        }
163    }
164}
165
166impl std::error::Error for ProcessorError {}
167
168impl From<ExecutorError> for ProcessorError {
169    fn from(e: ExecutorError) -> Self {
170        Self::ToolError(e)
171    }
172}
173
174/// Result of processing a single code block.
175#[derive(Debug)]
176pub struct CodeBlockResult {
177    /// Diagnostics from linting.
178    pub diagnostics: Vec<CodeBlockDiagnostic>,
179    /// Formatted content (if formatting was requested and succeeded).
180    pub formatted_content: Option<String>,
181    /// Whether the code block was modified.
182    pub was_modified: bool,
183}
184
185/// Result of formatting code blocks in a document.
186#[derive(Debug)]
187pub struct FormatOutput {
188    /// The formatted content (may be partially formatted if errors occurred).
189    pub content: String,
190    /// Whether any errors occurred during formatting.
191    pub had_errors: bool,
192    /// Error messages for blocks that couldn't be formatted.
193    pub error_messages: Vec<String>,
194}
195
196/// Main processor for code block tools.
197/// Context in which a tool is being used.
198#[derive(Copy, Clone)]
199enum ToolContext {
200    Lint,
201    Format,
202}
203
204pub struct CodeBlockToolProcessor<'a> {
205    config: &'a CodeBlockToolsConfig,
206    flavor: MarkdownFlavor,
207    linguist: LinguistResolver,
208    registry: ToolRegistry,
209    executor: ToolExecutor,
210    user_aliases: std::collections::HashMap<String, String>,
211}
212
213impl<'a> CodeBlockToolProcessor<'a> {
214    /// Create a new processor with the given configuration and markdown flavor.
215    pub fn new(config: &'a CodeBlockToolsConfig, flavor: MarkdownFlavor) -> Self {
216        let user_aliases = config
217            .language_aliases
218            .iter()
219            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
220            .collect();
221        Self {
222            config,
223            flavor,
224            linguist: LinguistResolver::new(),
225            registry: ToolRegistry::new(config.tools.clone()),
226            executor: ToolExecutor::new(config.timeout),
227            user_aliases,
228        }
229    }
230
231    /// Resolve a tool ID with context awareness.
232    ///
233    /// When a bare tool name (e.g., "tombi") is used in a specific context
234    /// (lint or format), try the context-specific variant first (e.g., "tombi:format"),
235    /// then common alternatives (e.g., "tombi:check"), before falling back to the bare name.
236    fn resolve_tool<'b>(&'b self, tool_id: &str, context: ToolContext) -> Option<&'b ToolDefinition> {
237        // If the tool ID already has a colon suffix, use it directly
238        if tool_id.contains(':') {
239            return self.registry.get(tool_id);
240        }
241
242        // Try context-specific variants first
243        let suffixes = match context {
244            ToolContext::Format => &["format", "fmt", "fix", "reformat"][..],
245            ToolContext::Lint => &["lint", "check"][..],
246        };
247
248        for suffix in suffixes {
249            let qualified = format!("{tool_id}:{suffix}");
250            if let Some(def) = self.registry.get(&qualified) {
251                return Some(def);
252            }
253        }
254
255        // Fall back to bare name
256        self.registry.get(tool_id)
257    }
258
259    /// Quick check whether any configured language might appear in fenced code blocks.
260    /// Scans for `` ```lang `` or `` ~~~lang `` patterns without full parsing.
261    fn has_potential_matching_blocks(&self, content: &str, lint_mode: bool) -> bool {
262        // Collect languages that have tools configured for the requested mode
263        let configured_langs: Vec<&str> = self
264            .config
265            .languages
266            .iter()
267            .filter(|(_, lc)| {
268                lc.enabled
269                    && if lint_mode {
270                        !lc.lint.is_empty()
271                    } else {
272                        !lc.format.is_empty()
273                    }
274            })
275            .map(|(lang, _)| lang.as_str())
276            .collect();
277
278        if configured_langs.is_empty() {
279            return false;
280        }
281
282        // Scan content line-by-line for fence openers matching configured languages
283        for line in content.lines() {
284            let trimmed = line.trim_start();
285            let after_fence = if let Some(rest) = trimmed.strip_prefix("```") {
286                rest
287            } else if let Some(rest) = trimmed.strip_prefix("~~~") {
288                rest
289            } else {
290                continue;
291            };
292
293            let lang = after_fence.split_whitespace().next().unwrap_or("");
294            if lang.is_empty() {
295                continue;
296            }
297            // Check both the raw language and the canonical (normalized) form
298            let canonical = self.resolve_language(lang);
299            if configured_langs.contains(&canonical.as_str()) {
300                return true;
301            }
302        }
303
304        false
305    }
306
307    /// Extract all fenced code blocks from content.
308    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
309        let mut blocks = Vec::new();
310        let mut current_block: Option<FencedCodeBlockBuilder> = None;
311
312        let options = rumdl_parser_options();
313        let parser = Parser::new_ext(content, options).into_offset_iter();
314
315        let lines: Vec<&str> = content.lines().collect();
316
317        for (event, range) in parser {
318            match event {
319                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
320                    let info_string = info.to_string();
321                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
322
323                    // Find start line
324                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
325
326                    // Find content start (after opening fence line)
327                    let content_start = content[range.start..]
328                        .find('\n')
329                        .map_or(content.len(), |i| range.start + i + 1);
330
331                    // Detect fence character and length from the line
332                    let fence_line = lines.get(start_line).unwrap_or(&"");
333                    let trimmed = fence_line.trim_start();
334                    let indent = fence_line.len() - trimmed.len();
335                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
336                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
337                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
338                    } else {
339                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
340                    };
341
342                    current_block = Some(FencedCodeBlockBuilder {
343                        start_line,
344                        content_start,
345                        language,
346                        info_string,
347                        fence_char,
348                        fence_length,
349                        indent,
350                        indent_prefix,
351                    });
352                }
353                Event::End(TagEnd::CodeBlock) => {
354                    if let Some(builder) = current_block.take() {
355                        // Find end line
356                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
357
358                        // Find content end (before closing fence line)
359                        let search_start = builder.content_start.min(range.end);
360                        let content_end = if search_start < range.end {
361                            content[search_start..range.end]
362                                .rfind('\n')
363                                .map_or(search_start, |i| search_start + i)
364                        } else {
365                            search_start
366                        };
367
368                        if content_end >= builder.content_start {
369                            blocks.push(FencedCodeBlockInfo {
370                                start_line: builder.start_line,
371                                end_line,
372                                content_start: builder.content_start,
373                                content_end,
374                                language: builder.language,
375                                info_string: builder.info_string,
376                                fence_char: builder.fence_char,
377                                fence_length: builder.fence_length,
378                                indent: builder.indent,
379                                indent_prefix: builder.indent_prefix,
380                            });
381                        }
382                    }
383                }
384                _ => {}
385            }
386        }
387
388        // For MkDocs flavor, also extract code blocks inside admonitions and tabs
389        if self.flavor == MarkdownFlavor::MkDocs {
390            let mkdocs_blocks = self.extract_mkdocs_code_blocks(content);
391            for mb in mkdocs_blocks {
392                // Deduplicate: only add if no existing block starts at the same line
393                if !blocks.iter().any(|b| b.start_line == mb.start_line) {
394                    blocks.push(mb);
395                }
396            }
397            blocks.sort_by_key(|b| b.start_line);
398        }
399
400        blocks
401    }
402
403    /// Extract fenced code blocks that are inside MkDocs admonitions or tabs.
404    ///
405    /// pulldown_cmark doesn't parse MkDocs-specific constructs, so indented
406    /// code blocks inside `!!!`/`???` admonitions or `===` tabs are missed.
407    /// This method manually scans for them.
408    fn extract_mkdocs_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
409        use crate::utils::mkdocs_admonitions;
410        use crate::utils::mkdocs_tabs;
411
412        let mut blocks = Vec::new();
413        let lines: Vec<&str> = content.lines().collect();
414
415        // Track current MkDocs context indent level
416        // We only need to know if we're inside any MkDocs block, so a simple stack suffices.
417        let mut context_indent_stack: Vec<usize> = Vec::new();
418
419        // Track fence state inside MkDocs context
420        let mut in_fence = false;
421        let mut fence_start_line: usize = 0;
422        let mut fence_content_start: usize = 0;
423        let mut fence_char: char = '`';
424        let mut fence_length: usize = 0;
425        let mut fence_indent: usize = 0;
426        let mut fence_indent_prefix = String::new();
427        let mut fence_language = String::new();
428        let mut fence_info_string = String::new();
429
430        // Compute byte offsets via pointer arithmetic.
431        // `content.lines()` returns slices into the original string,
432        // so each line's pointer offset from `content` gives its byte position.
433        // This correctly handles \n, \r\n, and empty lines.
434        let content_start_ptr = content.as_ptr() as usize;
435        let line_offsets: Vec<usize> = lines
436            .iter()
437            .map(|line| line.as_ptr() as usize - content_start_ptr)
438            .collect();
439
440        for (i, line) in lines.iter().enumerate() {
441            let line_indent = crate::utils::mkdocs_common::get_line_indent(line);
442            let is_admonition = mkdocs_admonitions::is_admonition_start(line);
443            let is_tab = mkdocs_tabs::is_tab_marker(line);
444
445            // Pop contexts when the current line is not indented enough to be content.
446            // This runs for ALL lines (including new admonition/tab starts) to clean
447            // up stale entries before potentially pushing a new context.
448            if !line.trim().is_empty() {
449                while let Some(&ctx_indent) = context_indent_stack.last() {
450                    if line_indent < ctx_indent + 4 {
451                        context_indent_stack.pop();
452                        if in_fence {
453                            in_fence = false;
454                        }
455                    } else {
456                        break;
457                    }
458                }
459            }
460
461            // Check for admonition start — push new context
462            if is_admonition && let Some(indent) = mkdocs_admonitions::get_admonition_indent(line) {
463                context_indent_stack.push(indent);
464                continue;
465            }
466
467            // Check for tab marker — push new context
468            if is_tab && let Some(indent) = mkdocs_tabs::get_tab_indent(line) {
469                context_indent_stack.push(indent);
470                continue;
471            }
472
473            // Only look for fences inside a MkDocs context
474            if context_indent_stack.is_empty() {
475                continue;
476            }
477
478            let trimmed = line.trim_start();
479            let leading_spaces = line.len() - trimmed.len();
480
481            if !in_fence {
482                // Check for fence opening
483                let (fc, fl) = if trimmed.starts_with("```") {
484                    ('`', trimmed.chars().take_while(|&c| c == '`').count())
485                } else if trimmed.starts_with("~~~") {
486                    ('~', trimmed.chars().take_while(|&c| c == '~').count())
487                } else {
488                    continue;
489                };
490
491                if fl >= 3 {
492                    in_fence = true;
493                    fence_start_line = i;
494                    fence_char = fc;
495                    fence_length = fl;
496                    fence_indent = leading_spaces;
497                    fence_indent_prefix = line.get(..leading_spaces).unwrap_or("").to_string();
498
499                    let after_fence = &trimmed[fl..];
500                    fence_info_string = after_fence.trim().to_string();
501                    fence_language = fence_info_string.split_whitespace().next().unwrap_or("").to_string();
502
503                    // Content starts at the next line's byte offset
504                    fence_content_start = line_offsets.get(i + 1).copied().unwrap_or(content.len());
505                }
506            } else {
507                // Check for fence closing
508                let is_closing = if fence_char == '`' {
509                    trimmed.starts_with("```")
510                        && trimmed.chars().take_while(|&c| c == '`').count() >= fence_length
511                        && trimmed.trim_start_matches('`').trim().is_empty()
512                } else {
513                    trimmed.starts_with("~~~")
514                        && trimmed.chars().take_while(|&c| c == '~').count() >= fence_length
515                        && trimmed.trim_start_matches('~').trim().is_empty()
516                };
517
518                if is_closing {
519                    let content_end = line_offsets.get(i).copied().unwrap_or(content.len());
520
521                    if content_end >= fence_content_start {
522                        blocks.push(FencedCodeBlockInfo {
523                            start_line: fence_start_line,
524                            end_line: i,
525                            content_start: fence_content_start,
526                            content_end,
527                            language: fence_language.clone(),
528                            info_string: fence_info_string.clone(),
529                            fence_char,
530                            fence_length,
531                            indent: fence_indent,
532                            indent_prefix: fence_indent_prefix.clone(),
533                        });
534                    }
535
536                    in_fence = false;
537                }
538            }
539        }
540
541        blocks
542    }
543
544    /// Resolve a language tag to its canonical name.
545    fn resolve_language(&self, language: &str) -> String {
546        let lower = language.to_lowercase();
547        if let Some(mapped) = self.user_aliases.get(&lower) {
548            return mapped.clone();
549        }
550        match self.config.normalize_language {
551            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
552            NormalizeLanguage::Exact => lower,
553        }
554    }
555
556    /// Get the effective on_error setting for a language.
557    fn get_on_error(&self, language: &str) -> OnError {
558        self.config
559            .languages
560            .get(language)
561            .and_then(|lc| lc.on_error)
562            .unwrap_or(self.config.on_error)
563    }
564
565    /// Strip the fence indentation prefix from each line of a code block.
566    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
567        if indent_prefix.is_empty() {
568            return content.to_string();
569        }
570
571        let mut out = String::with_capacity(content.len());
572        for line in content.split_inclusive('\n') {
573            if let Some(stripped) = line.strip_prefix(indent_prefix) {
574                out.push_str(stripped);
575            } else {
576                out.push_str(line);
577            }
578        }
579        out
580    }
581
582    /// Re-apply the fence indentation prefix to each line of a code block.
583    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
584        if indent_prefix.is_empty() {
585            return content.to_string();
586        }
587        if content.is_empty() {
588            return String::new();
589        }
590
591        let mut out = String::with_capacity(content.len() + indent_prefix.len());
592        for line in content.split_inclusive('\n') {
593            if line == "\n" {
594                out.push_str(line);
595            } else {
596                out.push_str(indent_prefix);
597                out.push_str(line);
598            }
599        }
600        out
601    }
602
603    /// Lint all code blocks in the content.
604    ///
605    /// Returns diagnostics from all configured linters.
606    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
607        // Skip the expensive parse when no tools could possibly produce output.
608        // With on_missing=Ignore (default) and no languages with lint tools configured,
609        // every block would be skipped, so the parse is wasted work.
610        if self.config.on_missing_language_definition == OnMissing::Ignore
611            && !self
612                .config
613                .languages
614                .values()
615                .any(|lc| lc.enabled && !lc.lint.is_empty())
616        {
617            return Ok(Vec::new());
618        }
619
620        // Quick content check: skip parsing if no configured language appears in the content.
621        // This avoids the expensive pulldown-cmark parse when there are no matching code blocks.
622        if self.config.on_missing_language_definition == OnMissing::Ignore
623            && !self.has_potential_matching_blocks(content, true)
624        {
625            return Ok(Vec::new());
626        }
627
628        let mut all_diagnostics = Vec::new();
629        let blocks = self.extract_code_blocks(content);
630
631        for block in blocks {
632            if block.language.is_empty() {
633                continue; // Skip blocks without language tag
634            }
635
636            let canonical_lang = self.resolve_language(&block.language);
637
638            // Get lint tools for this language
639            let lang_config = self.config.languages.get(&canonical_lang);
640
641            // If language is explicitly configured with enabled=false, skip silently
642            if let Some(lc) = lang_config
643                && !lc.enabled
644            {
645                continue;
646            }
647
648            let lint_tools = match lang_config {
649                Some(lc) if !lc.lint.is_empty() => &lc.lint,
650                _ => {
651                    // No tools configured for this language in lint mode
652                    match self.config.on_missing_language_definition {
653                        OnMissing::Ignore => continue,
654                        OnMissing::Fail => {
655                            all_diagnostics.push(CodeBlockDiagnostic {
656                                file_line: block.start_line + 1,
657                                column: None,
658                                message: format!("No lint tools configured for language '{canonical_lang}'"),
659                                severity: DiagnosticSeverity::Error,
660                                tool: "code-block-tools".to_string(),
661                                code_block_start: block.start_line + 1,
662                            });
663                            continue;
664                        }
665                        OnMissing::FailFast => {
666                            return Err(ProcessorError::NoToolsConfigured {
667                                language: canonical_lang,
668                                line: block.start_line + 1,
669                            });
670                        }
671                    }
672                }
673            };
674
675            // Extract code block content
676            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
677                &content[block.content_start..block.content_end]
678            } else {
679                continue;
680            };
681            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
682
683            // Run each lint tool
684            for tool_id in lint_tools {
685                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown linting
686                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
687                    continue;
688                }
689
690                let Some(tool_def) = self.resolve_tool(tool_id, ToolContext::Lint) else {
691                    log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
692                    continue;
693                };
694
695                // Check if tool binary exists before running
696                let tool_name = tool_def.command.first().map_or("", String::as_str);
697                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
698                    match self.config.on_missing_tool_binary {
699                        OnMissing::Ignore => {
700                            log::debug!("Tool binary '{tool_name}' not found, skipping");
701                            continue;
702                        }
703                        OnMissing::Fail => {
704                            all_diagnostics.push(CodeBlockDiagnostic {
705                                file_line: block.start_line + 1,
706                                column: None,
707                                message: format!("Tool binary '{tool_name}' not found in PATH"),
708                                severity: DiagnosticSeverity::Error,
709                                tool: "code-block-tools".to_string(),
710                                code_block_start: block.start_line + 1,
711                            });
712                            continue;
713                        }
714                        OnMissing::FailFast => {
715                            return Err(ProcessorError::ToolBinaryNotFound {
716                                tool: tool_name.to_string(),
717                                language: canonical_lang.clone(),
718                                line: block.start_line + 1,
719                            });
720                        }
721                    }
722                }
723
724                match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
725                    Ok(output) => {
726                        // Parse tool output into diagnostics
727                        let diagnostics = self.parse_tool_output(
728                            &output,
729                            tool_id,
730                            block.start_line + 1, // Convert to 1-indexed
731                        );
732                        all_diagnostics.extend(diagnostics);
733                    }
734                    Err(e) => {
735                        let on_error = self.get_on_error(&canonical_lang);
736                        match on_error {
737                            OnError::Fail => return Err(e.into()),
738                            OnError::Warn => {
739                                log::warn!("Tool '{tool_id}' failed: {e}");
740                            }
741                            OnError::Skip => {
742                                // Silently skip
743                            }
744                        }
745                    }
746                }
747            }
748        }
749
750        Ok(all_diagnostics)
751    }
752
753    /// Format all code blocks in the content.
754    ///
755    /// Returns the modified content with formatted code blocks and any errors that occurred.
756    /// With `on-missing-*` = `fail`, errors are collected but formatting continues.
757    /// With `on-missing-*` = `fail-fast`, returns Err immediately on first error.
758    pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
759        let no_output = FormatOutput {
760            content: content.to_string(),
761            had_errors: false,
762            error_messages: Vec::new(),
763        };
764
765        // Skip the expensive parse when no tools could produce output
766        if self.config.on_missing_language_definition == OnMissing::Ignore
767            && !self
768                .config
769                .languages
770                .values()
771                .any(|lc| lc.enabled && !lc.format.is_empty())
772        {
773            return Ok(no_output);
774        }
775
776        // Quick content check: skip parsing if no configured language appears in the content
777        if self.config.on_missing_language_definition == OnMissing::Ignore
778            && !self.has_potential_matching_blocks(content, false)
779        {
780            return Ok(no_output);
781        }
782
783        let blocks = self.extract_code_blocks(content);
784
785        if blocks.is_empty() {
786            return Ok(FormatOutput {
787                content: content.to_string(),
788                had_errors: false,
789                error_messages: Vec::new(),
790            });
791        }
792
793        // Process blocks in reverse order to maintain byte offsets
794        let mut result = content.to_string();
795        let mut error_messages: Vec<String> = Vec::new();
796
797        for block in blocks.into_iter().rev() {
798            if block.language.is_empty() {
799                continue;
800            }
801
802            let canonical_lang = self.resolve_language(&block.language);
803
804            // Get format tools for this language
805            let lang_config = self.config.languages.get(&canonical_lang);
806
807            // If language is explicitly configured with enabled=false, skip silently
808            if let Some(lc) = lang_config
809                && !lc.enabled
810            {
811                continue;
812            }
813
814            let format_tools = match lang_config {
815                Some(lc) if !lc.format.is_empty() => &lc.format,
816                _ => {
817                    // No tools configured for this language in format mode
818                    match self.config.on_missing_language_definition {
819                        OnMissing::Ignore => continue,
820                        OnMissing::Fail => {
821                            error_messages.push(format!(
822                                "No format tools configured for language '{canonical_lang}' at line {}",
823                                block.start_line + 1
824                            ));
825                            continue;
826                        }
827                        OnMissing::FailFast => {
828                            return Err(ProcessorError::NoToolsConfigured {
829                                language: canonical_lang,
830                                line: block.start_line + 1,
831                            });
832                        }
833                    }
834                }
835            };
836
837            // Extract code block content
838            if block.content_start >= block.content_end || block.content_end > result.len() {
839                continue;
840            }
841            let code_content_raw = result[block.content_start..block.content_end].to_string();
842            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
843
844            // Run format tools (use first successful one)
845            let mut formatted = code_content.clone();
846            let mut tool_ran = false;
847            for tool_id in format_tools {
848                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown formatting
849                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
850                    continue;
851                }
852
853                let Some(tool_def) = self.resolve_tool(tool_id, ToolContext::Format) else {
854                    log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
855                    continue;
856                };
857
858                // Check if tool binary exists before running
859                let tool_name = tool_def.command.first().map_or("", String::as_str);
860                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
861                    match self.config.on_missing_tool_binary {
862                        OnMissing::Ignore => {
863                            log::debug!("Tool binary '{tool_name}' not found, skipping");
864                            continue;
865                        }
866                        OnMissing::Fail => {
867                            error_messages.push(format!(
868                                "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
869                                block.start_line + 1
870                            ));
871                            continue;
872                        }
873                        OnMissing::FailFast => {
874                            return Err(ProcessorError::ToolBinaryNotFound {
875                                tool: tool_name.to_string(),
876                                language: canonical_lang.clone(),
877                                line: block.start_line + 1,
878                            });
879                        }
880                    }
881                }
882
883                match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
884                    Ok(output) => {
885                        // Guard against formatters that produce empty output for non-empty input.
886                        // This prevents data loss from misconfigured tools (e.g., a lint tool
887                        // used as a formatter that validates but doesn't output content).
888                        if output.trim().is_empty() && !formatted.trim().is_empty() {
889                            log::warn!("Formatter '{tool_id}' produced empty output for non-empty input, skipping");
890                            continue;
891                        }
892
893                        // Ensure trailing newline matches original (unindented)
894                        formatted = output;
895                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
896                            formatted.push('\n');
897                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
898                            formatted.pop();
899                        }
900                        tool_ran = true;
901                        break; // Use first successful formatter
902                    }
903                    Err(e) => {
904                        let on_error = self.get_on_error(&canonical_lang);
905                        match on_error {
906                            OnError::Fail => {
907                                return Err(ProcessorError::ToolErrorAt {
908                                    error: e,
909                                    line: block.start_line + 1,
910                                    language: canonical_lang,
911                                });
912                            }
913                            OnError::Warn => {
914                                error_messages.push(format!("line {} ({}): {e}", block.start_line + 1, canonical_lang));
915                            }
916                            OnError::Skip => {}
917                        }
918                    }
919                }
920            }
921
922            // Replace content if changed and a tool actually ran
923            if tool_ran && formatted != code_content {
924                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
925                if reindented != code_content_raw {
926                    result.replace_range(block.content_start..block.content_end, &reindented);
927                }
928            }
929        }
930
931        Ok(FormatOutput {
932            content: result,
933            had_errors: !error_messages.is_empty(),
934            error_messages,
935        })
936    }
937
938    /// Parse tool output into diagnostics.
939    ///
940    /// This is a basic parser that handles common output formats.
941    /// Tools vary widely in their output format, so this is best-effort.
942    fn parse_tool_output(
943        &self,
944        output: &ToolOutput,
945        tool_id: &str,
946        code_block_start_line: usize,
947    ) -> Vec<CodeBlockDiagnostic> {
948        let mut diagnostics = Vec::new();
949        let mut shellcheck_line: Option<usize> = None;
950
951        // Strip ANSI escape codes and combine stdout + stderr for parsing
952        let stdout_clean = strip_ansi_codes(&output.stdout);
953        let stderr_clean = strip_ansi_codes(&output.stderr);
954        let combined = format!("{stdout_clean}\n{stderr_clean}");
955
956        // State for multi-line "Error: msg" / "at line N column M" pattern
957        let mut pending_error: Option<(String, DiagnosticSeverity)> = None;
958
959        for line in combined.lines() {
960            let line = line.trim();
961            if line.is_empty() {
962                continue;
963            }
964
965            // Resolve pending "Error: msg" from previous line
966            if let Some((ref msg, severity)) = pending_error {
967                if let Some((line_num, col)) = Self::parse_at_line_column(line) {
968                    diagnostics.push(CodeBlockDiagnostic {
969                        file_line: code_block_start_line + line_num,
970                        column: Some(col),
971                        message: msg.clone(),
972                        severity,
973                        tool: tool_id.to_string(),
974                        code_block_start: code_block_start_line,
975                    });
976                    pending_error = None;
977                    continue;
978                }
979                // No position info found; emit error without line mapping
980                diagnostics.push(CodeBlockDiagnostic {
981                    file_line: code_block_start_line,
982                    column: None,
983                    message: msg.clone(),
984                    severity,
985                    tool: tool_id.to_string(),
986                    code_block_start: code_block_start_line,
987                });
988                pending_error = None;
989                // Fall through to parse current line
990            }
991
992            if let Some(line_num) = self.parse_shellcheck_header(line) {
993                shellcheck_line = Some(line_num);
994                continue;
995            }
996
997            if let Some(line_num) = shellcheck_line
998                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
999            {
1000                diagnostics.push(diag);
1001                continue;
1002            }
1003
1004            // Try pattern: "file:line:col: message" or "file:line: message"
1005            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
1006                diagnostics.push(diag);
1007                continue;
1008            }
1009
1010            // Try pattern: "line:col message" (eslint style)
1011            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
1012                diagnostics.push(diag);
1013                continue;
1014            }
1015
1016            // Try single-line shellcheck format fallback
1017            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
1018                diagnostics.push(diag);
1019                continue;
1020            }
1021
1022            // Try multi-line "Error: msg" / "Warning: msg" pattern
1023            if let Some(error_info) = Self::parse_error_line(line) {
1024                pending_error = Some(error_info);
1025            }
1026        }
1027
1028        // Flush any remaining pending error
1029        if let Some((msg, severity)) = pending_error {
1030            diagnostics.push(CodeBlockDiagnostic {
1031                file_line: code_block_start_line,
1032                column: None,
1033                message: msg,
1034                severity,
1035                tool: tool_id.to_string(),
1036                code_block_start: code_block_start_line,
1037            });
1038        }
1039
1040        // If no diagnostics parsed but tool failed, use combined output as fallback
1041        if diagnostics.is_empty() && !output.success {
1042            let lines: Vec<&str> = combined.lines().map(str::trim).filter(|l| !l.is_empty()).collect();
1043
1044            if lines.is_empty() {
1045                let exit_code = output.exit_code;
1046                diagnostics.push(CodeBlockDiagnostic {
1047                    file_line: code_block_start_line,
1048                    column: None,
1049                    message: format!("Tool exited with code {exit_code}"),
1050                    severity: DiagnosticSeverity::Error,
1051                    tool: tool_id.to_string(),
1052                    code_block_start: code_block_start_line,
1053                });
1054            } else {
1055                for line_text in lines {
1056                    diagnostics.push(CodeBlockDiagnostic {
1057                        file_line: code_block_start_line,
1058                        column: None,
1059                        message: line_text.to_string(),
1060                        severity: DiagnosticSeverity::Error,
1061                        tool: tool_id.to_string(),
1062                        code_block_start: code_block_start_line,
1063                    });
1064                }
1065            }
1066        }
1067
1068        diagnostics
1069    }
1070
1071    /// Parse standard "file:line:col: message" format.
1072    fn parse_standard_format(
1073        &self,
1074        line: &str,
1075        tool_id: &str,
1076        code_block_start_line: usize,
1077    ) -> Option<CodeBlockDiagnostic> {
1078        // Match patterns like "file.py:1:10: E501 message"
1079        let mut parts = line.rsplitn(4, ':');
1080        let message = parts.next()?.trim().to_string();
1081        let part1 = parts.next()?.trim().to_string();
1082        let part2 = parts.next()?.trim().to_string();
1083        let part3 = parts.next().map(|s| s.trim().to_string());
1084
1085        let (line_part, col_part) = if part3.is_some() {
1086            (part2, Some(part1))
1087        } else {
1088            (part1, None)
1089        };
1090
1091        if let Ok(line_num) = line_part.parse::<usize>() {
1092            let column = col_part.and_then(|s| s.parse::<usize>().ok());
1093            let message = Self::strip_fixable_markers(&message);
1094            if !message.is_empty() {
1095                let severity = self.infer_severity(&message);
1096                return Some(CodeBlockDiagnostic {
1097                    file_line: code_block_start_line + line_num,
1098                    column,
1099                    message,
1100                    severity,
1101                    tool: tool_id.to_string(),
1102                    code_block_start: code_block_start_line,
1103                });
1104            }
1105        }
1106        None
1107    }
1108
1109    /// Parse eslint-style "line:col severity message" format.
1110    fn parse_eslint_format(
1111        &self,
1112        line: &str,
1113        tool_id: &str,
1114        code_block_start_line: usize,
1115    ) -> Option<CodeBlockDiagnostic> {
1116        // Match "1:10 error Message"
1117        let parts: Vec<&str> = line.splitn(3, ' ').collect();
1118        if parts.len() >= 2 {
1119            let loc_parts: Vec<&str> = parts[0].split(':').collect();
1120            if loc_parts.len() == 2
1121                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
1122            {
1123                let (sev_part, msg_part) = if parts.len() >= 3 {
1124                    (parts[1], parts[2])
1125                } else {
1126                    (parts[1], "")
1127                };
1128                let message = if msg_part.is_empty() {
1129                    sev_part.to_string()
1130                } else {
1131                    msg_part.to_string()
1132                };
1133                let message = Self::strip_fixable_markers(&message);
1134                let severity = match sev_part.to_lowercase().as_str() {
1135                    "error" => DiagnosticSeverity::Error,
1136                    "warning" | "warn" => DiagnosticSeverity::Warning,
1137                    "info" => DiagnosticSeverity::Info,
1138                    _ => self.infer_severity(&message),
1139                };
1140                return Some(CodeBlockDiagnostic {
1141                    file_line: code_block_start_line + line_num,
1142                    column: Some(col),
1143                    message,
1144                    severity,
1145                    tool: tool_id.to_string(),
1146                    code_block_start: code_block_start_line,
1147                });
1148            }
1149        }
1150        None
1151    }
1152
1153    /// Parse shellcheck-style "In - line N: message" format.
1154    fn parse_shellcheck_format(
1155        &self,
1156        line: &str,
1157        tool_id: &str,
1158        code_block_start_line: usize,
1159    ) -> Option<CodeBlockDiagnostic> {
1160        // Match "In - line 5:" pattern
1161        if line.starts_with("In ")
1162            && line.contains(" line ")
1163            && let Some(line_start) = line.find(" line ")
1164        {
1165            let after_line = &line[line_start + 6..];
1166            if let Some(colon_pos) = after_line.find(':')
1167                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
1168            {
1169                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
1170                if !message.is_empty() {
1171                    let severity = self.infer_severity(&message);
1172                    return Some(CodeBlockDiagnostic {
1173                        file_line: code_block_start_line + line_num,
1174                        column: None,
1175                        message,
1176                        severity,
1177                        tool: tool_id.to_string(),
1178                        code_block_start: code_block_start_line,
1179                    });
1180                }
1181            }
1182        }
1183        None
1184    }
1185
1186    /// Parse shellcheck header line to capture line number context.
1187    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
1188        if line.starts_with("In ")
1189            && line.contains(" line ")
1190            && let Some(line_start) = line.find(" line ")
1191        {
1192            let after_line = &line[line_start + 6..];
1193            if let Some(colon_pos) = after_line.find(':') {
1194                return after_line[..colon_pos].trim().parse::<usize>().ok();
1195            }
1196        }
1197        None
1198    }
1199
1200    /// Parse shellcheck message line containing SCXXXX codes.
1201    fn parse_shellcheck_message(
1202        &self,
1203        line: &str,
1204        tool_id: &str,
1205        code_block_start_line: usize,
1206        line_num: usize,
1207    ) -> Option<CodeBlockDiagnostic> {
1208        let sc_pos = line.find("SC")?;
1209        let after_sc = &line[sc_pos + 2..];
1210        let code_len = after_sc.chars().take_while(char::is_ascii_digit).count();
1211        if code_len == 0 {
1212            return None;
1213        }
1214        let after_code = &after_sc[code_len..];
1215        let sev_start = after_code.find('(')? + 1;
1216        let sev_end = after_code[sev_start..].find(')')? + sev_start;
1217        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
1218        let message_start = after_code.find("):")? + 2;
1219        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
1220        if message.is_empty() {
1221            return None;
1222        }
1223
1224        let severity = match sev.as_str() {
1225            "error" => DiagnosticSeverity::Error,
1226            "warning" | "warn" => DiagnosticSeverity::Warning,
1227            "info" | "style" => DiagnosticSeverity::Info,
1228            _ => self.infer_severity(&message),
1229        };
1230
1231        Some(CodeBlockDiagnostic {
1232            file_line: code_block_start_line + line_num,
1233            column: None,
1234            message,
1235            severity,
1236            tool: tool_id.to_string(),
1237            code_block_start: code_block_start_line,
1238        })
1239    }
1240
1241    /// Parse "Error: <message>" or "Warning: <message>" lines.
1242    ///
1243    /// Used for tools like tombi that output multi-line diagnostics where the
1244    /// error message and position are on separate lines. Only matches capitalized
1245    /// prefixes to avoid conflicting with lowercase `error:` in less structured output.
1246    fn parse_error_line(line: &str) -> Option<(String, DiagnosticSeverity)> {
1247        let (msg, severity) = if let Some(msg) = line.strip_prefix("Error:") {
1248            (msg, DiagnosticSeverity::Error)
1249        } else if let Some(msg) = line.strip_prefix("Warning:") {
1250            (msg, DiagnosticSeverity::Warning)
1251        } else {
1252            return None;
1253        };
1254        let msg = msg.trim();
1255        if msg.is_empty() {
1256            return None;
1257        }
1258        Some((msg.to_string(), severity))
1259    }
1260
1261    /// Parse "at line N column M" position lines (case-insensitive).
1262    ///
1263    /// Returns (line_number, column_number) if the pattern matches.
1264    fn parse_at_line_column(line: &str) -> Option<(usize, usize)> {
1265        let lower = line.to_lowercase();
1266        let rest = lower.strip_prefix("at line ")?;
1267        let mut parts = rest.split_whitespace();
1268        let line_num: usize = parts.next()?.parse().ok()?;
1269        if parts.next()? != "column" {
1270            return None;
1271        }
1272        let col: usize = parts.next()?.parse().ok()?;
1273        Some((line_num, col))
1274    }
1275
1276    /// Infer severity from message content.
1277    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
1278        let lower = message.to_lowercase();
1279        if lower.contains("error")
1280            || lower.starts_with('e') && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1281            || lower.starts_with('f') && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1282        {
1283            DiagnosticSeverity::Error
1284        } else if lower.contains("warning")
1285            || lower.contains("warn")
1286            || lower.starts_with('w') && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1287        {
1288            DiagnosticSeverity::Warning
1289        } else {
1290            DiagnosticSeverity::Info
1291        }
1292    }
1293
1294    /// Strip "fixable" markers from external tool messages.
1295    ///
1296    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
1297    /// context these markers can be misleading - the lint tool's fix capability may
1298    /// differ from what our configured formatter can fix. We strip these markers
1299    /// to avoid making promises we can't keep.
1300    fn strip_fixable_markers(message: &str) -> String {
1301        message
1302            .replace(" [*]", "")
1303            .replace("[*] ", "")
1304            .replace("[*]", "")
1305            .replace(" (fixable)", "")
1306            .replace("(fixable) ", "")
1307            .replace("(fixable)", "")
1308            .replace(" [fix available]", "")
1309            .replace("[fix available] ", "")
1310            .replace("[fix available]", "")
1311            .replace(" [autofix]", "")
1312            .replace("[autofix] ", "")
1313            .replace("[autofix]", "")
1314            .trim()
1315            .to_string()
1316    }
1317}
1318
1319/// Builder for FencedCodeBlockInfo during parsing.
1320struct FencedCodeBlockBuilder {
1321    start_line: usize,
1322    content_start: usize,
1323    language: String,
1324    info_string: String,
1325    fence_char: char,
1326    fence_length: usize,
1327    indent: usize,
1328    indent_prefix: String,
1329}
1330
1331#[cfg(test)]
1332mod tests {
1333    use super::*;
1334
1335    fn default_config() -> CodeBlockToolsConfig {
1336        CodeBlockToolsConfig::default()
1337    }
1338
1339    #[test]
1340    fn test_extract_code_blocks() {
1341        let config = default_config();
1342        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1343
1344        let content = r#"# Example
1345
1346```python
1347def hello():
1348    print("Hello")
1349```
1350
1351Some text
1352
1353```rust
1354fn main() {}
1355```
1356"#;
1357
1358        let blocks = processor.extract_code_blocks(content);
1359
1360        assert_eq!(blocks.len(), 2);
1361
1362        assert_eq!(blocks[0].language, "python");
1363        assert_eq!(blocks[0].fence_char, '`');
1364        assert_eq!(blocks[0].fence_length, 3);
1365        assert_eq!(blocks[0].start_line, 2);
1366        assert_eq!(blocks[0].indent, 0);
1367        assert_eq!(blocks[0].indent_prefix, "");
1368
1369        assert_eq!(blocks[1].language, "rust");
1370        assert_eq!(blocks[1].fence_char, '`');
1371        assert_eq!(blocks[1].fence_length, 3);
1372    }
1373
1374    #[test]
1375    fn test_extract_code_blocks_with_info_string() {
1376        let config = default_config();
1377        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1378
1379        let content = "```python title=\"example.py\"\ncode\n```";
1380        let blocks = processor.extract_code_blocks(content);
1381
1382        assert_eq!(blocks.len(), 1);
1383        assert_eq!(blocks[0].language, "python");
1384        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
1385    }
1386
1387    #[test]
1388    fn test_extract_code_blocks_tilde_fence() {
1389        let config = default_config();
1390        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1391
1392        let content = "~~~bash\necho hello\n~~~";
1393        let blocks = processor.extract_code_blocks(content);
1394
1395        assert_eq!(blocks.len(), 1);
1396        assert_eq!(blocks[0].language, "bash");
1397        assert_eq!(blocks[0].fence_char, '~');
1398        assert_eq!(blocks[0].fence_length, 3);
1399        assert_eq!(blocks[0].indent_prefix, "");
1400    }
1401
1402    #[test]
1403    fn test_extract_code_blocks_with_indent_prefix() {
1404        let config = default_config();
1405        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1406
1407        let content = "  - item\n    ```python\n    print('hi')\n    ```";
1408        let blocks = processor.extract_code_blocks(content);
1409
1410        assert_eq!(blocks.len(), 1);
1411        assert_eq!(blocks[0].indent_prefix, "    ");
1412    }
1413
1414    #[test]
1415    fn test_extract_code_blocks_no_language() {
1416        let config = default_config();
1417        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1418
1419        let content = "```\nplain code\n```";
1420        let blocks = processor.extract_code_blocks(content);
1421
1422        assert_eq!(blocks.len(), 1);
1423        assert_eq!(blocks[0].language, "");
1424    }
1425
1426    #[test]
1427    fn test_resolve_language_linguist() {
1428        let mut config = default_config();
1429        config.normalize_language = NormalizeLanguage::Linguist;
1430        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1431
1432        assert_eq!(processor.resolve_language("py"), "python");
1433        assert_eq!(processor.resolve_language("bash"), "shell");
1434        assert_eq!(processor.resolve_language("js"), "javascript");
1435    }
1436
1437    #[test]
1438    fn test_resolve_language_exact() {
1439        let mut config = default_config();
1440        config.normalize_language = NormalizeLanguage::Exact;
1441        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1442
1443        assert_eq!(processor.resolve_language("py"), "py");
1444        assert_eq!(processor.resolve_language("BASH"), "bash");
1445    }
1446
1447    #[test]
1448    fn test_resolve_language_user_alias_override() {
1449        let mut config = default_config();
1450        config.language_aliases.insert("py".to_string(), "python".to_string());
1451        config.normalize_language = NormalizeLanguage::Exact;
1452        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1453
1454        assert_eq!(processor.resolve_language("PY"), "python");
1455    }
1456
1457    #[test]
1458    fn test_indent_strip_and_reapply_roundtrip() {
1459        let config = default_config();
1460        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1461
1462        let raw = "    def hello():\n        print('hi')";
1463        let stripped = processor.strip_indent_from_block(raw, "    ");
1464        assert_eq!(stripped, "def hello():\n    print('hi')");
1465
1466        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
1467        assert_eq!(reapplied, raw);
1468    }
1469
1470    #[test]
1471    fn test_infer_severity() {
1472        let config = default_config();
1473        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1474
1475        assert_eq!(
1476            processor.infer_severity("E501 line too long"),
1477            DiagnosticSeverity::Error
1478        );
1479        assert_eq!(
1480            processor.infer_severity("W291 trailing whitespace"),
1481            DiagnosticSeverity::Warning
1482        );
1483        assert_eq!(
1484            processor.infer_severity("error: something failed"),
1485            DiagnosticSeverity::Error
1486        );
1487        assert_eq!(
1488            processor.infer_severity("warning: unused variable"),
1489            DiagnosticSeverity::Warning
1490        );
1491        assert_eq!(
1492            processor.infer_severity("note: consider using"),
1493            DiagnosticSeverity::Info
1494        );
1495    }
1496
1497    #[test]
1498    fn test_parse_standard_format_windows_path() {
1499        let config = default_config();
1500        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1501
1502        let output = ToolOutput {
1503            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1504            stderr: String::new(),
1505            exit_code: 1,
1506            success: false,
1507        };
1508
1509        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1510        assert_eq!(diags.len(), 1);
1511        assert_eq!(diags[0].file_line, 12);
1512        assert_eq!(diags[0].column, Some(5));
1513        assert_eq!(diags[0].message, "E123 message");
1514    }
1515
1516    #[test]
1517    fn test_parse_eslint_severity() {
1518        let config = default_config();
1519        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1520
1521        let output = ToolOutput {
1522            stdout: "1:2 error Unexpected token".to_string(),
1523            stderr: String::new(),
1524            exit_code: 1,
1525            success: false,
1526        };
1527
1528        let diags = processor.parse_tool_output(&output, "eslint", 5);
1529        assert_eq!(diags.len(), 1);
1530        assert_eq!(diags[0].file_line, 6);
1531        assert_eq!(diags[0].column, Some(2));
1532        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1533        assert_eq!(diags[0].message, "Unexpected token");
1534    }
1535
1536    #[test]
1537    fn test_parse_shellcheck_multiline() {
1538        let config = default_config();
1539        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1540
1541        let output = ToolOutput {
1542            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1543            stderr: String::new(),
1544            exit_code: 1,
1545            success: false,
1546        };
1547
1548        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1549        assert_eq!(diags.len(), 1);
1550        assert_eq!(diags[0].file_line, 13);
1551        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1552        assert_eq!(diags[0].message, "Double quote to prevent globbing");
1553    }
1554
1555    #[test]
1556    fn test_lint_no_config() {
1557        let config = default_config();
1558        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1559
1560        let content = "```python\nprint('hello')\n```";
1561        let result = processor.lint(content);
1562
1563        // Should succeed with no diagnostics (no tools configured)
1564        assert!(result.is_ok());
1565        assert!(result.unwrap().is_empty());
1566    }
1567
1568    #[test]
1569    fn test_format_no_config() {
1570        let config = default_config();
1571        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1572
1573        let content = "```python\nprint('hello')\n```";
1574        let result = processor.format(content);
1575
1576        // Should succeed with unchanged content (no tools configured)
1577        assert!(result.is_ok());
1578        let output = result.unwrap();
1579        assert_eq!(output.content, content);
1580        assert!(!output.had_errors);
1581        assert!(output.error_messages.is_empty());
1582    }
1583
1584    #[test]
1585    fn test_lint_on_missing_language_definition_fail() {
1586        let mut config = default_config();
1587        config.on_missing_language_definition = OnMissing::Fail;
1588        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1589
1590        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1591        let result = processor.lint(content);
1592
1593        // Should succeed but return diagnostics for both missing language definitions
1594        assert!(result.is_ok());
1595        let diagnostics = result.unwrap();
1596        assert_eq!(diagnostics.len(), 2);
1597        assert!(diagnostics[0].message.contains("No lint tools configured"));
1598        assert!(diagnostics[0].message.contains("python"));
1599        assert!(diagnostics[1].message.contains("javascript"));
1600    }
1601
1602    #[test]
1603    fn test_lint_on_missing_language_definition_fail_fast() {
1604        let mut config = default_config();
1605        config.on_missing_language_definition = OnMissing::FailFast;
1606        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1607
1608        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1609        let result = processor.lint(content);
1610
1611        // Should fail immediately on first missing language
1612        assert!(result.is_err());
1613        let err = result.unwrap_err();
1614        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1615    }
1616
1617    #[test]
1618    fn test_format_on_missing_language_definition_fail() {
1619        let mut config = default_config();
1620        config.on_missing_language_definition = OnMissing::Fail;
1621        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1622
1623        let content = "```python\nprint('hello')\n```";
1624        let result = processor.format(content);
1625
1626        // Should succeed but report errors
1627        assert!(result.is_ok());
1628        let output = result.unwrap();
1629        assert_eq!(output.content, content); // Content unchanged
1630        assert!(output.had_errors);
1631        assert!(!output.error_messages.is_empty());
1632        assert!(output.error_messages[0].contains("No format tools configured"));
1633    }
1634
1635    #[test]
1636    fn test_format_on_missing_language_definition_fail_fast() {
1637        let mut config = default_config();
1638        config.on_missing_language_definition = OnMissing::FailFast;
1639        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1640
1641        let content = "```python\nprint('hello')\n```";
1642        let result = processor.format(content);
1643
1644        // Should fail immediately
1645        assert!(result.is_err());
1646        let err = result.unwrap_err();
1647        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1648    }
1649
1650    #[test]
1651    fn test_lint_on_missing_tool_binary_fail() {
1652        use super::super::config::{LanguageToolConfig, ToolDefinition};
1653
1654        let mut config = default_config();
1655        config.on_missing_tool_binary = OnMissing::Fail;
1656
1657        // Configure a tool with a non-existent binary
1658        let lang_config = LanguageToolConfig {
1659            lint: vec!["nonexistent-linter".to_string()],
1660            ..Default::default()
1661        };
1662        config.languages.insert("python".to_string(), lang_config);
1663
1664        let tool_def = ToolDefinition {
1665            command: vec!["nonexistent-binary-xyz123".to_string()],
1666            ..Default::default()
1667        };
1668        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1669
1670        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1671
1672        let content = "```python\nprint('hello')\n```";
1673        let result = processor.lint(content);
1674
1675        // Should succeed but return diagnostic for missing binary
1676        assert!(result.is_ok());
1677        let diagnostics = result.unwrap();
1678        assert_eq!(diagnostics.len(), 1);
1679        assert!(diagnostics[0].message.contains("not found in PATH"));
1680    }
1681
1682    #[test]
1683    fn test_lint_on_missing_tool_binary_fail_fast() {
1684        use super::super::config::{LanguageToolConfig, ToolDefinition};
1685
1686        let mut config = default_config();
1687        config.on_missing_tool_binary = OnMissing::FailFast;
1688
1689        // Configure a tool with a non-existent binary
1690        let lang_config = LanguageToolConfig {
1691            lint: vec!["nonexistent-linter".to_string()],
1692            ..Default::default()
1693        };
1694        config.languages.insert("python".to_string(), lang_config);
1695
1696        let tool_def = ToolDefinition {
1697            command: vec!["nonexistent-binary-xyz123".to_string()],
1698            ..Default::default()
1699        };
1700        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1701
1702        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1703
1704        let content = "```python\nprint('hello')\n```";
1705        let result = processor.lint(content);
1706
1707        // Should fail immediately
1708        assert!(result.is_err());
1709        let err = result.unwrap_err();
1710        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1711    }
1712
1713    #[test]
1714    fn test_format_on_missing_tool_binary_fail() {
1715        use super::super::config::{LanguageToolConfig, ToolDefinition};
1716
1717        let mut config = default_config();
1718        config.on_missing_tool_binary = OnMissing::Fail;
1719
1720        // Configure a tool with a non-existent binary
1721        let lang_config = LanguageToolConfig {
1722            format: vec!["nonexistent-formatter".to_string()],
1723            ..Default::default()
1724        };
1725        config.languages.insert("python".to_string(), lang_config);
1726
1727        let tool_def = ToolDefinition {
1728            command: vec!["nonexistent-binary-xyz123".to_string()],
1729            ..Default::default()
1730        };
1731        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1732
1733        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1734
1735        let content = "```python\nprint('hello')\n```";
1736        let result = processor.format(content);
1737
1738        // Should succeed but report errors
1739        assert!(result.is_ok());
1740        let output = result.unwrap();
1741        assert_eq!(output.content, content); // Content unchanged
1742        assert!(output.had_errors);
1743        assert!(!output.error_messages.is_empty());
1744        assert!(output.error_messages[0].contains("not found in PATH"));
1745    }
1746
1747    #[test]
1748    fn test_format_on_missing_tool_binary_fail_fast() {
1749        use super::super::config::{LanguageToolConfig, ToolDefinition};
1750
1751        let mut config = default_config();
1752        config.on_missing_tool_binary = OnMissing::FailFast;
1753
1754        // Configure a tool with a non-existent binary
1755        let lang_config = LanguageToolConfig {
1756            format: vec!["nonexistent-formatter".to_string()],
1757            ..Default::default()
1758        };
1759        config.languages.insert("python".to_string(), lang_config);
1760
1761        let tool_def = ToolDefinition {
1762            command: vec!["nonexistent-binary-xyz123".to_string()],
1763            ..Default::default()
1764        };
1765        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1766
1767        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1768
1769        let content = "```python\nprint('hello')\n```";
1770        let result = processor.format(content);
1771
1772        // Should fail immediately
1773        assert!(result.is_err());
1774        let err = result.unwrap_err();
1775        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1776    }
1777
1778    #[test]
1779    fn test_lint_rumdl_builtin_skipped_for_markdown() {
1780        // Configure the built-in "rumdl" tool for markdown
1781        // The processor should skip it (handled by embedded markdown linting)
1782        let mut config = default_config();
1783        config.languages.insert(
1784            "markdown".to_string(),
1785            LanguageToolConfig {
1786                lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1787                ..Default::default()
1788            },
1789        );
1790        config.on_missing_language_definition = OnMissing::Fail;
1791        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1792
1793        let content = "```markdown\n# Hello\n```";
1794        let result = processor.lint(content);
1795
1796        // Should succeed with no diagnostics - "rumdl" tool is skipped, not treated as unknown
1797        assert!(result.is_ok());
1798        assert!(result.unwrap().is_empty());
1799    }
1800
1801    #[test]
1802    fn test_format_rumdl_builtin_skipped_for_markdown() {
1803        // Configure the built-in "rumdl" tool for markdown
1804        let mut config = default_config();
1805        config.languages.insert(
1806            "markdown".to_string(),
1807            LanguageToolConfig {
1808                format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1809                ..Default::default()
1810            },
1811        );
1812        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1813
1814        let content = "```markdown\n# Hello\n```";
1815        let result = processor.format(content);
1816
1817        // Should succeed with unchanged content - "rumdl" tool is skipped
1818        assert!(result.is_ok());
1819        let output = result.unwrap();
1820        assert_eq!(output.content, content);
1821        assert!(!output.had_errors);
1822    }
1823
1824    #[test]
1825    fn test_is_markdown_language() {
1826        // Test the helper function
1827        assert!(is_markdown_language("markdown"));
1828        assert!(is_markdown_language("Markdown"));
1829        assert!(is_markdown_language("MARKDOWN"));
1830        assert!(is_markdown_language("md"));
1831        assert!(is_markdown_language("MD"));
1832        assert!(!is_markdown_language("python"));
1833        assert!(!is_markdown_language("rust"));
1834        assert!(!is_markdown_language(""));
1835    }
1836
1837    // Issue #423: MkDocs admonition code block detection
1838
1839    #[test]
1840    fn test_extract_mkdocs_admonition_code_block() {
1841        let config = default_config();
1842        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1843
1844        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1845        let blocks = processor.extract_code_blocks(content);
1846
1847        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs admonition");
1848        assert_eq!(blocks[0].language, "python");
1849    }
1850
1851    #[test]
1852    fn test_extract_mkdocs_tab_code_block() {
1853        let config = default_config();
1854        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1855
1856        let content = "=== \"Python\"\n\n    ```python\n    print(\"hello\")\n    ```\n";
1857        let blocks = processor.extract_code_blocks(content);
1858
1859        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs tab");
1860        assert_eq!(blocks[0].language, "python");
1861    }
1862
1863    #[test]
1864    fn test_standard_flavor_ignores_admonition_indented_content() {
1865        let config = default_config();
1866        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1867
1868        // With standard flavor, pulldown_cmark parses this differently;
1869        // our MkDocs extraction should NOT run
1870        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1871        let blocks = processor.extract_code_blocks(content);
1872
1873        // Standard flavor relies on pulldown_cmark only, which may or may not detect
1874        // indented fenced blocks. The key assertion is that we don't double-detect.
1875        // With standard flavor, the MkDocs extraction path is skipped entirely.
1876        for (i, b) in blocks.iter().enumerate() {
1877            for (j, b2) in blocks.iter().enumerate() {
1878                if i != j {
1879                    assert_ne!(b.start_line, b2.start_line, "No duplicate blocks should exist");
1880                }
1881            }
1882        }
1883    }
1884
1885    #[test]
1886    fn test_mkdocs_top_level_blocks_alongside_admonition() {
1887        let config = default_config();
1888        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1889
1890        let content =
1891            "```rust\nfn main() {}\n```\n\n!!! note\n    Some text\n\n    ```python\n    print(\"hello\")\n    ```\n";
1892        let blocks = processor.extract_code_blocks(content);
1893
1894        assert_eq!(
1895            blocks.len(),
1896            2,
1897            "Should detect both top-level and admonition code blocks"
1898        );
1899        assert_eq!(blocks[0].language, "rust");
1900        assert_eq!(blocks[1].language, "python");
1901    }
1902
1903    #[test]
1904    fn test_mkdocs_nested_admonition_code_block() {
1905        let config = default_config();
1906        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1907
1908        let content = "\
1909!!! note
1910    Some text
1911
1912    !!! warning
1913        Nested content
1914
1915        ```python
1916        x = 1
1917        ```
1918";
1919        let blocks = processor.extract_code_blocks(content);
1920        assert_eq!(blocks.len(), 1, "Should detect code block inside nested admonition");
1921        assert_eq!(blocks[0].language, "python");
1922    }
1923
1924    #[test]
1925    fn test_mkdocs_consecutive_admonitions_no_stale_context() {
1926        let config = default_config();
1927        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1928
1929        // Two consecutive admonitions at the same indent level.
1930        // The first has no code block, the second does.
1931        let content = "\
1932!!! note
1933    First admonition content
1934
1935!!! warning
1936    Second admonition content
1937
1938    ```python
1939    y = 2
1940    ```
1941";
1942        let blocks = processor.extract_code_blocks(content);
1943        assert_eq!(blocks.len(), 1, "Should detect code block in second admonition only");
1944        assert_eq!(blocks[0].language, "python");
1945    }
1946
1947    #[test]
1948    fn test_mkdocs_crlf_line_endings() {
1949        let config = default_config();
1950        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1951
1952        // Use \r\n line endings
1953        let content = "!!! note\r\n    Some text\r\n\r\n    ```python\r\n    x = 1\r\n    ```\r\n";
1954        let blocks = processor.extract_code_blocks(content);
1955
1956        assert_eq!(blocks.len(), 1, "Should detect code block with CRLF line endings");
1957        assert_eq!(blocks[0].language, "python");
1958
1959        // Verify byte offsets point to valid content
1960        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1961        assert!(
1962            extracted.contains("x = 1"),
1963            "Extracted content should contain code. Got: {extracted:?}"
1964        );
1965    }
1966
1967    #[test]
1968    fn test_mkdocs_unclosed_fence_in_admonition() {
1969        let config = default_config();
1970        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1971
1972        // Unclosed fence should not produce a block
1973        let content = "!!! note\n    ```python\n    x = 1\n    no closing fence\n";
1974        let blocks = processor.extract_code_blocks(content);
1975        assert_eq!(blocks.len(), 0, "Unclosed fence should not produce a block");
1976    }
1977
1978    #[test]
1979    fn test_mkdocs_tilde_fence_in_admonition() {
1980        let config = default_config();
1981        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1982
1983        let content = "!!! note\n    ~~~ruby\n    puts 'hi'\n    ~~~\n";
1984        let blocks = processor.extract_code_blocks(content);
1985        assert_eq!(blocks.len(), 1, "Should detect tilde-fenced code block");
1986        assert_eq!(blocks[0].language, "ruby");
1987    }
1988
1989    #[test]
1990    fn test_mkdocs_empty_lines_in_code_block() {
1991        let config = default_config();
1992        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1993
1994        // Code block with empty lines inside — verifies byte offsets are correct
1995        // across empty lines (the previous find("") approach would break here)
1996        let content = "!!! note\n    ```python\n    x = 1\n\n    y = 2\n    ```\n";
1997        let blocks = processor.extract_code_blocks(content);
1998        assert_eq!(blocks.len(), 1);
1999
2000        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2001        assert!(
2002            extracted.contains("x = 1") && extracted.contains("y = 2"),
2003            "Extracted content should span across the empty line. Got: {extracted:?}"
2004        );
2005    }
2006
2007    #[test]
2008    fn test_mkdocs_content_byte_offsets_lf() {
2009        let config = default_config();
2010        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2011
2012        let content = "!!! note\n    ```python\n    print('hi')\n    ```\n";
2013        let blocks = processor.extract_code_blocks(content);
2014        assert_eq!(blocks.len(), 1);
2015
2016        // Verify the extracted content is exactly the code body
2017        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2018        assert_eq!(extracted, "    print('hi')\n", "Content offsets should be exact for LF");
2019    }
2020
2021    #[test]
2022    fn test_mkdocs_content_byte_offsets_crlf() {
2023        let config = default_config();
2024        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2025
2026        let content = "!!! note\r\n    ```python\r\n    print('hi')\r\n    ```\r\n";
2027        let blocks = processor.extract_code_blocks(content);
2028        assert_eq!(blocks.len(), 1);
2029
2030        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2031        assert_eq!(
2032            extracted, "    print('hi')\r\n",
2033            "Content offsets should be exact for CRLF"
2034        );
2035    }
2036
2037    #[test]
2038    fn test_lint_enabled_false_skips_language_in_strict_mode() {
2039        // With on-missing-language-definition = "fail", a language configured
2040        // with enabled=false should be silently skipped (no error).
2041        let mut config = default_config();
2042        config.normalize_language = NormalizeLanguage::Exact;
2043        config.on_missing_language_definition = OnMissing::Fail;
2044
2045        // Python has tools, plaintext is disabled
2046        config.languages.insert(
2047            "python".to_string(),
2048            LanguageToolConfig {
2049                lint: vec!["ruff:check".to_string()],
2050                ..Default::default()
2051            },
2052        );
2053        config.languages.insert(
2054            "plaintext".to_string(),
2055            LanguageToolConfig {
2056                enabled: false,
2057                ..Default::default()
2058            },
2059        );
2060
2061        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2062
2063        let content = "```plaintext\nsome text\n```";
2064        let result = processor.lint(content);
2065
2066        // No error for plaintext: enabled=false satisfies strict mode
2067        assert!(result.is_ok());
2068        let diagnostics = result.unwrap();
2069        assert!(
2070            diagnostics.is_empty(),
2071            "Expected no diagnostics for disabled language, got: {diagnostics:?}"
2072        );
2073    }
2074
2075    #[test]
2076    fn test_format_enabled_false_skips_language_in_strict_mode() {
2077        // Same test but for format mode
2078        let mut config = default_config();
2079        config.normalize_language = NormalizeLanguage::Exact;
2080        config.on_missing_language_definition = OnMissing::Fail;
2081
2082        config.languages.insert(
2083            "plaintext".to_string(),
2084            LanguageToolConfig {
2085                enabled: false,
2086                ..Default::default()
2087            },
2088        );
2089
2090        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2091
2092        let content = "```plaintext\nsome text\n```";
2093        let result = processor.format(content);
2094
2095        // No error for plaintext: enabled=false satisfies strict mode
2096        assert!(result.is_ok());
2097        let output = result.unwrap();
2098        assert!(!output.had_errors, "Expected no errors for disabled language");
2099        assert!(
2100            output.error_messages.is_empty(),
2101            "Expected no error messages, got: {:?}",
2102            output.error_messages
2103        );
2104    }
2105
2106    #[test]
2107    fn test_enabled_false_default_true_preserved() {
2108        // Verify that when enabled is not set, it defaults to true (existing behavior)
2109        let mut config = default_config();
2110        config.on_missing_language_definition = OnMissing::Fail;
2111
2112        // Configure python without explicitly setting enabled
2113        config.languages.insert(
2114            "python".to_string(),
2115            LanguageToolConfig {
2116                lint: vec!["ruff:check".to_string()],
2117                ..Default::default()
2118            },
2119        );
2120
2121        let lang_config = config.languages.get("python").unwrap();
2122        assert!(lang_config.enabled, "enabled should default to true");
2123    }
2124
2125    #[test]
2126    fn test_enabled_false_with_fail_fast_no_error() {
2127        // Even with fail-fast, enabled=false should skip silently
2128        let mut config = default_config();
2129        config.normalize_language = NormalizeLanguage::Exact;
2130        config.on_missing_language_definition = OnMissing::FailFast;
2131
2132        config.languages.insert(
2133            "unknown".to_string(),
2134            LanguageToolConfig {
2135                enabled: false,
2136                ..Default::default()
2137            },
2138        );
2139
2140        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2141
2142        let content = "```unknown\nsome content\n```";
2143        let result = processor.lint(content);
2144
2145        // Should not return an error: enabled=false takes precedence over fail-fast
2146        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2147        assert!(result.unwrap().is_empty());
2148    }
2149
2150    #[test]
2151    fn test_enabled_false_format_with_fail_fast_no_error() {
2152        // Same for format mode
2153        let mut config = default_config();
2154        config.normalize_language = NormalizeLanguage::Exact;
2155        config.on_missing_language_definition = OnMissing::FailFast;
2156
2157        config.languages.insert(
2158            "unknown".to_string(),
2159            LanguageToolConfig {
2160                enabled: false,
2161                ..Default::default()
2162            },
2163        );
2164
2165        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2166
2167        let content = "```unknown\nsome content\n```";
2168        let result = processor.format(content);
2169
2170        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2171        let output = result.unwrap();
2172        assert!(!output.had_errors);
2173    }
2174
2175    #[test]
2176    fn test_enabled_false_with_tools_still_skips() {
2177        // If enabled=false but tools are listed, the language should still be skipped
2178        let mut config = default_config();
2179        config.on_missing_language_definition = OnMissing::Fail;
2180
2181        config.languages.insert(
2182            "python".to_string(),
2183            LanguageToolConfig {
2184                enabled: false,
2185                lint: vec!["ruff:check".to_string()],
2186                format: vec!["ruff:format".to_string()],
2187                on_error: None,
2188            },
2189        );
2190
2191        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2192
2193        let content = "```python\nprint('hello')\n```";
2194
2195        // Lint should skip
2196        let lint_result = processor.lint(content);
2197        assert!(lint_result.is_ok());
2198        assert!(lint_result.unwrap().is_empty());
2199
2200        // Format should skip
2201        let format_result = processor.format(content);
2202        assert!(format_result.is_ok());
2203        let output = format_result.unwrap();
2204        assert!(!output.had_errors);
2205        assert_eq!(output.content, content, "Content should be unchanged");
2206    }
2207
2208    #[test]
2209    fn test_enabled_true_without_tools_triggers_strict_mode() {
2210        // A language configured with enabled=true (default) but no tools
2211        // should still trigger strict mode errors
2212        let mut config = default_config();
2213        config.on_missing_language_definition = OnMissing::Fail;
2214
2215        config.languages.insert(
2216            "python".to_string(),
2217            LanguageToolConfig {
2218                // enabled defaults to true, no tools
2219                ..Default::default()
2220            },
2221        );
2222
2223        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2224
2225        let content = "```python\nprint('hello')\n```";
2226        let result = processor.lint(content);
2227
2228        // Should report an error because enabled=true but no lint tools configured
2229        assert!(result.is_ok());
2230        let diagnostics = result.unwrap();
2231        assert_eq!(diagnostics.len(), 1);
2232        assert!(diagnostics[0].message.contains("No lint tools configured"));
2233    }
2234
2235    #[test]
2236    fn test_mixed_enabled_and_disabled_languages() {
2237        // Multiple languages: one disabled, one unconfigured
2238        let mut config = default_config();
2239        config.normalize_language = NormalizeLanguage::Exact;
2240        config.on_missing_language_definition = OnMissing::Fail;
2241
2242        config.languages.insert(
2243            "plaintext".to_string(),
2244            LanguageToolConfig {
2245                enabled: false,
2246                ..Default::default()
2247            },
2248        );
2249
2250        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2251
2252        let content = "\
2253```plaintext
2254some text
2255```
2256
2257```javascript
2258console.log('hi');
2259```
2260";
2261
2262        let result = processor.lint(content);
2263        assert!(result.is_ok());
2264        let diagnostics = result.unwrap();
2265
2266        // plaintext: skipped (enabled=false), no error
2267        // javascript: not configured at all, should trigger strict mode error
2268        assert_eq!(diagnostics.len(), 1, "Expected 1 diagnostic, got: {diagnostics:?}");
2269        assert!(
2270            diagnostics[0].message.contains("javascript"),
2271            "Error should be about javascript, got: {}",
2272            diagnostics[0].message
2273        );
2274    }
2275
2276    #[test]
2277    fn test_generic_fallback_includes_all_stderr_lines() {
2278        let config = default_config();
2279        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2280
2281        // Use output that won't be parsed by any structured format parser
2282        let output = ToolOutput {
2283            stdout: String::new(),
2284            stderr: "Parse error at position 42\nUnexpected token '::'\n3 errors found".to_string(),
2285            exit_code: 1,
2286            success: false,
2287        };
2288
2289        let diags = processor.parse_tool_output(&output, "tombi", 5);
2290        assert_eq!(diags.len(), 3, "Expected one diagnostic per non-empty stderr line");
2291        assert_eq!(diags[0].message, "Parse error at position 42");
2292        assert_eq!(diags[1].message, "Unexpected token '::'");
2293        assert_eq!(diags[2].message, "3 errors found");
2294        assert!(diags.iter().all(|d| d.tool == "tombi"));
2295        assert!(diags.iter().all(|d| d.file_line == 5));
2296    }
2297
2298    #[test]
2299    fn test_generic_fallback_includes_all_stdout_lines_when_stderr_empty() {
2300        let config = default_config();
2301        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2302
2303        let output = ToolOutput {
2304            stdout: "Line 1 error\nLine 2 detail\nLine 3 summary".to_string(),
2305            stderr: String::new(),
2306            exit_code: 1,
2307            success: false,
2308        };
2309
2310        let diags = processor.parse_tool_output(&output, "some-tool", 10);
2311        assert_eq!(diags.len(), 3);
2312        assert_eq!(diags[0].message, "Line 1 error");
2313        assert_eq!(diags[1].message, "Line 2 detail");
2314        assert_eq!(diags[2].message, "Line 3 summary");
2315    }
2316
2317    #[test]
2318    fn test_generic_fallback_skips_blank_lines() {
2319        let config = default_config();
2320        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2321
2322        let output = ToolOutput {
2323            stdout: String::new(),
2324            stderr: "error: bad input\n\n  \n\ndetail: see above\n".to_string(),
2325            exit_code: 1,
2326            success: false,
2327        };
2328
2329        let diags = processor.parse_tool_output(&output, "tool", 1);
2330        assert_eq!(diags.len(), 2);
2331        assert_eq!(diags[0].message, "error: bad input");
2332        assert_eq!(diags[1].message, "detail: see above");
2333    }
2334
2335    #[test]
2336    fn test_generic_fallback_exit_code_when_no_output() {
2337        let config = default_config();
2338        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2339
2340        let output = ToolOutput {
2341            stdout: String::new(),
2342            stderr: String::new(),
2343            exit_code: 42,
2344            success: false,
2345        };
2346
2347        let diags = processor.parse_tool_output(&output, "tool", 1);
2348        assert_eq!(diags.len(), 1);
2349        assert_eq!(diags[0].message, "Tool exited with code 42");
2350    }
2351
2352    #[test]
2353    fn test_generic_fallback_not_triggered_on_success() {
2354        let config = default_config();
2355        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2356
2357        let output = ToolOutput {
2358            stdout: "some informational output".to_string(),
2359            stderr: String::new(),
2360            exit_code: 0,
2361            success: true,
2362        };
2363
2364        let diags = processor.parse_tool_output(&output, "tool", 1);
2365        assert!(
2366            diags.is_empty(),
2367            "Successful tool runs should produce no fallback diagnostics"
2368        );
2369    }
2370
2371    #[test]
2372    fn test_ansi_codes_stripped_before_parsing() {
2373        let config = default_config();
2374        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2375
2376        // ruff-style output with ANSI color codes wrapping the message
2377        let output = ToolOutput {
2378            stdout: "\x1b[1m_.py\x1b[0m:\x1b[33m1\x1b[0m:\x1b[33m1\x1b[0m: \x1b[31mE501\x1b[0m Line too long"
2379                .to_string(),
2380            stderr: String::new(),
2381            exit_code: 1,
2382            success: false,
2383        };
2384
2385        let diags = processor.parse_tool_output(&output, "ruff:check", 5);
2386        assert_eq!(diags.len(), 1, "ANSI-colored output should still be parsed");
2387        assert_eq!(diags[0].message, "E501 Line too long");
2388        assert_eq!(diags[0].file_line, 6); // 5 + 1
2389    }
2390
2391    #[test]
2392    fn test_tombi_multiline_error_format() {
2393        let config = default_config();
2394        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2395
2396        // Simulates tombi output (ANSI codes stripped for clarity)
2397        let output = ToolOutput {
2398            stdout: "[test]\ntest: \"test\"\nError: invalid key\n    at line 2 column 1\nError: expected key\n    at line 2 column 1\nError: expected '='\n    at line 2 column 1\nError: expected value\n    at line 2 column 1".to_string(),
2399            stderr: "1 file failed to be formatted".to_string(),
2400            exit_code: 1,
2401            success: false,
2402        };
2403
2404        let diags = processor.parse_tool_output(&output, "tombi", 7);
2405        assert_eq!(
2406            diags.len(),
2407            4,
2408            "Expected 4 diagnostics from tombi errors, got {diags:?}"
2409        );
2410        assert_eq!(diags[0].message, "invalid key");
2411        assert_eq!(diags[0].file_line, 9); // 7 + 2
2412        assert_eq!(diags[0].column, Some(1));
2413        assert_eq!(diags[1].message, "expected key");
2414        assert_eq!(diags[1].file_line, 9);
2415        assert_eq!(diags[2].message, "expected '='");
2416        assert_eq!(diags[3].message, "expected value");
2417        assert!(diags.iter().all(|d| d.tool == "tombi"));
2418    }
2419
2420    #[test]
2421    fn test_tombi_with_ansi_codes() {
2422        let config = default_config();
2423        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2424
2425        // Real tombi output with ANSI escape codes
2426        let output = ToolOutput {
2427            stdout: "[test]\ntest: \"test\"\n\x1b[1;31m  Error\x1b[0m: \x1b[1minvalid key\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m\n\x1b[1;31m  Error\x1b[0m: \x1b[1mexpected '='\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m".to_string(),
2428            stderr: "1 file failed to be formatted".to_string(),
2429            exit_code: 1,
2430            success: false,
2431        };
2432
2433        let diags = processor.parse_tool_output(&output, "tombi", 7);
2434        assert_eq!(
2435            diags.len(),
2436            2,
2437            "Expected 2 diagnostics from ANSI-colored tombi output, got {diags:?}"
2438        );
2439        assert_eq!(diags[0].message, "invalid key");
2440        assert_eq!(diags[0].file_line, 9);
2441        assert_eq!(diags[1].message, "expected '='");
2442        assert_eq!(diags[1].file_line, 9);
2443    }
2444
2445    #[test]
2446    fn test_fallback_combines_stdout_and_stderr() {
2447        let config = default_config();
2448        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2449
2450        // Tool puts some errors on stdout, summary on stderr
2451        let output = ToolOutput {
2452            stdout: "problem found in input".to_string(),
2453            stderr: "1 file failed".to_string(),
2454            exit_code: 1,
2455            success: false,
2456        };
2457
2458        let diags = processor.parse_tool_output(&output, "tool", 1);
2459        assert_eq!(diags.len(), 2, "Fallback should include both stdout and stderr");
2460        assert_eq!(diags[0].message, "problem found in input");
2461        assert_eq!(diags[1].message, "1 file failed");
2462    }
2463
2464    #[test]
2465    fn test_error_line_without_position_info() {
2466        let config = default_config();
2467        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2468
2469        // Error: line not followed by "at line N column M"
2470        let output = ToolOutput {
2471            stdout: "Error: something went wrong\nsome unrelated line".to_string(),
2472            stderr: String::new(),
2473            exit_code: 1,
2474            success: false,
2475        };
2476
2477        let diags = processor.parse_tool_output(&output, "tool", 5);
2478        // "Error: something went wrong" → parsed by error-line parser (no position)
2479        // "some unrelated line" → no parser matches, but diagnostics not empty → no fallback
2480        assert!(!diags.is_empty());
2481        assert_eq!(diags[0].message, "something went wrong");
2482        assert_eq!(diags[0].file_line, 5); // No line offset, uses code_block_start
2483    }
2484
2485    #[test]
2486    fn test_warning_line_with_position() {
2487        let config = default_config();
2488        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2489
2490        let output = ToolOutput {
2491            stdout: "Warning: deprecated syntax\n    at line 3 column 5".to_string(),
2492            stderr: String::new(),
2493            exit_code: 1,
2494            success: false,
2495        };
2496
2497        let diags = processor.parse_tool_output(&output, "tool", 10);
2498        assert_eq!(diags.len(), 1);
2499        assert_eq!(diags[0].message, "deprecated syntax");
2500        assert_eq!(diags[0].file_line, 13); // 10 + 3
2501        assert_eq!(diags[0].column, Some(5));
2502        assert!(matches!(diags[0].severity, DiagnosticSeverity::Warning));
2503    }
2504
2505    #[test]
2506    fn test_strip_ansi_codes() {
2507        assert_eq!(strip_ansi_codes("hello"), "hello");
2508        assert_eq!(strip_ansi_codes("\x1b[31mred\x1b[0m"), "red");
2509        assert_eq!(
2510            strip_ansi_codes("\x1b[1;31m  Error\x1b[0m: \x1b[1mmsg\x1b[0m"),
2511            "  Error: msg"
2512        );
2513        assert_eq!(strip_ansi_codes("no codes here"), "no codes here");
2514        assert_eq!(strip_ansi_codes(""), "");
2515        assert_eq!(
2516            strip_ansi_codes("\x1b[90mat line 2 column 1\x1b[0m"),
2517            "at line 2 column 1"
2518        );
2519    }
2520
2521    #[test]
2522    fn test_parse_at_line_column() {
2523        assert_eq!(
2524            CodeBlockToolProcessor::parse_at_line_column("at line 2 column 1"),
2525            Some((2, 1))
2526        );
2527        assert_eq!(
2528            CodeBlockToolProcessor::parse_at_line_column("at line 10 column 15"),
2529            Some((10, 15))
2530        );
2531        assert_eq!(
2532            CodeBlockToolProcessor::parse_at_line_column("At Line 5 Column 3"),
2533            Some((5, 3))
2534        );
2535        assert_eq!(
2536            CodeBlockToolProcessor::parse_at_line_column("not a position line"),
2537            None
2538        );
2539        assert_eq!(
2540            CodeBlockToolProcessor::parse_at_line_column("at line abc column 1"),
2541            None
2542        );
2543    }
2544
2545    #[test]
2546    fn test_parse_error_line() {
2547        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Error: invalid key").unwrap();
2548        assert_eq!(msg, "invalid key");
2549        assert!(matches!(sev, DiagnosticSeverity::Error));
2550
2551        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Warning: deprecated").unwrap();
2552        assert_eq!(msg, "deprecated");
2553        assert!(matches!(sev, DiagnosticSeverity::Warning));
2554
2555        // Lowercase should NOT match (avoids conflict with unstructured tool output)
2556        assert!(CodeBlockToolProcessor::parse_error_line("error: bad input").is_none());
2557        assert!(CodeBlockToolProcessor::parse_error_line("warning: minor issue").is_none());
2558
2559        // Empty message after prefix should not match
2560        assert!(CodeBlockToolProcessor::parse_error_line("Error:").is_none());
2561        assert!(CodeBlockToolProcessor::parse_error_line("Error:   ").is_none());
2562
2563        // Not an error line
2564        assert!(CodeBlockToolProcessor::parse_error_line("some random text").is_none());
2565    }
2566
2567    #[test]
2568    fn test_consecutive_error_lines_without_position() {
2569        let config = default_config();
2570        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2571
2572        // Two Error: lines in a row — first should flush without position,
2573        // second gets position from "at line"
2574        let output = ToolOutput {
2575            stdout: "Error: first problem\nError: second problem\n    at line 3 column 1".to_string(),
2576            stderr: String::new(),
2577            exit_code: 1,
2578            success: false,
2579        };
2580
2581        let diags = processor.parse_tool_output(&output, "tool", 5);
2582        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2583        // First error flushed without position when second Error: was encountered
2584        assert_eq!(diags[0].message, "first problem");
2585        assert_eq!(diags[0].file_line, 5); // No line mapping
2586        assert_eq!(diags[0].column, None);
2587        // Second error resolved with position
2588        assert_eq!(diags[1].message, "second problem");
2589        assert_eq!(diags[1].file_line, 8); // 5 + 3
2590        assert_eq!(diags[1].column, Some(1));
2591    }
2592
2593    #[test]
2594    fn test_error_line_at_end_of_output() {
2595        let config = default_config();
2596        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2597
2598        // Error: as the very last line — flushed by post-loop code
2599        let output = ToolOutput {
2600            stdout: "Error: trailing error".to_string(),
2601            stderr: String::new(),
2602            exit_code: 1,
2603            success: false,
2604        };
2605
2606        let diags = processor.parse_tool_output(&output, "tool", 5);
2607        assert_eq!(diags.len(), 1);
2608        assert_eq!(diags[0].message, "trailing error");
2609        assert_eq!(diags[0].file_line, 5); // No position info available
2610        assert_eq!(diags[0].column, None);
2611    }
2612
2613    #[test]
2614    fn test_blank_lines_between_error_and_position() {
2615        let config = default_config();
2616        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2617
2618        // Blank lines between Error: and "at line" should be transparently skipped
2619        let output = ToolOutput {
2620            stdout: "Error: spaced out\n\n\n    at line 4 column 2".to_string(),
2621            stderr: String::new(),
2622            exit_code: 1,
2623            success: false,
2624        };
2625
2626        let diags = processor.parse_tool_output(&output, "tool", 10);
2627        assert_eq!(diags.len(), 1);
2628        assert_eq!(diags[0].message, "spaced out");
2629        assert_eq!(diags[0].file_line, 14); // 10 + 4
2630        assert_eq!(diags[0].column, Some(2));
2631    }
2632
2633    #[test]
2634    fn test_mixed_structured_and_error_line_parsers() {
2635        let config = default_config();
2636        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2637
2638        // Mix of ruff-style structured output and tombi-style Error: output
2639        let output = ToolOutput {
2640            stdout: "_.py:1:5: E501 Line too long\nError: invalid syntax\n    at line 3 column 1".to_string(),
2641            stderr: String::new(),
2642            exit_code: 1,
2643            success: false,
2644        };
2645
2646        let diags = processor.parse_tool_output(&output, "tool", 5);
2647        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2648        // First: standard format parser
2649        assert_eq!(diags[0].message, "E501 Line too long");
2650        assert_eq!(diags[0].file_line, 6); // 5 + 1
2651        // Second: Error: + at line parser
2652        assert_eq!(diags[1].message, "invalid syntax");
2653        assert_eq!(diags[1].file_line, 8); // 5 + 3
2654    }
2655
2656    #[test]
2657    fn test_at_line_without_preceding_error() {
2658        let config = default_config();
2659        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2660
2661        // "at line N column M" without a preceding Error: should not create a diagnostic
2662        let output = ToolOutput {
2663            stdout: "at line 2 column 1\nsome other text".to_string(),
2664            stderr: String::new(),
2665            exit_code: 1,
2666            success: false,
2667        };
2668
2669        let diags = processor.parse_tool_output(&output, "tool", 5);
2670        // No pending error, so "at line" is just an unmatched line
2671        // Both lines are unmatched, fallback fires with combined output
2672        assert_eq!(diags.len(), 2);
2673        assert_eq!(diags[0].message, "at line 2 column 1");
2674        assert_eq!(diags[1].message, "some other text");
2675    }
2676
2677    // =========================================================================
2678    // Issue #527: formatter that produces empty output should not erase content
2679    // =========================================================================
2680
2681    /// A formatter that produces no stdout (like `tombi lint -` mistakenly used
2682    /// as a formatter) should not replace non-empty content with an empty string.
2683    /// This test uses `true` which exits 0 with no output, simulating the bug.
2684    #[test]
2685    fn test_format_empty_output_does_not_erase_content() {
2686        use super::super::config::LanguageToolConfig;
2687
2688        let mut config = default_config();
2689        config.languages.insert(
2690            "toml".to_string(),
2691            LanguageToolConfig {
2692                format: vec!["empty-formatter".to_string()],
2693                ..Default::default()
2694            },
2695        );
2696        // Define a tool that exits 0 but produces no stdout (simulates `tombi lint -`)
2697        config.tools.insert(
2698            "empty-formatter".to_string(),
2699            super::super::config::ToolDefinition {
2700                command: vec!["true".to_string()],
2701                stdin: true,
2702                stdout: true,
2703                lint_args: vec![],
2704                format_args: vec![],
2705            },
2706        );
2707
2708        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2709
2710        let content = "```toml\nkey = \"value\"\n```\n";
2711        let result = processor.format(content);
2712
2713        assert!(result.is_ok(), "Format should not error");
2714        let output = result.unwrap();
2715
2716        // The content must NOT be erased — original content should be preserved
2717        assert!(
2718            output.content.contains("key = \"value\""),
2719            "Empty formatter output should not erase content. Got: {:?}",
2720            output.content
2721        );
2722    }
2723
2724    /// A formatter that echoes input back (like `cat`) should preserve content.
2725    #[test]
2726    fn test_format_identity_formatter_preserves_content() {
2727        use super::super::config::LanguageToolConfig;
2728
2729        let mut config = default_config();
2730        config.languages.insert(
2731            "toml".to_string(),
2732            LanguageToolConfig {
2733                format: vec!["cat-formatter".to_string()],
2734                ..Default::default()
2735            },
2736        );
2737        config.tools.insert(
2738            "cat-formatter".to_string(),
2739            super::super::config::ToolDefinition {
2740                command: vec!["cat".to_string()],
2741                stdin: true,
2742                stdout: true,
2743                lint_args: vec![],
2744                format_args: vec![],
2745            },
2746        );
2747
2748        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2749
2750        let content = "```toml\nkey = \"value\"\n```\n";
2751        let result = processor.format(content);
2752
2753        assert!(result.is_ok(), "Format should not error");
2754        let output = result.unwrap();
2755        assert_eq!(
2756            output.content, content,
2757            "Identity formatter should preserve content exactly"
2758        );
2759    }
2760
2761    /// Verify that the context-aware tool resolution resolves bare "tombi"
2762    /// to "tombi:format" in format context and "tombi:lint" in lint context.
2763    #[test]
2764    fn test_resolve_tool_context_aware_tombi() {
2765        let config = default_config();
2766        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2767
2768        // In format context, bare "tombi" should resolve to "tombi:format"
2769        let format_def = processor
2770            .resolve_tool("tombi", ToolContext::Format)
2771            .expect("Should resolve tombi in format context");
2772        assert!(
2773            format_def.command.iter().any(|arg| arg == "format"),
2774            "Bare 'tombi' in format context should resolve to 'tombi format', got: {:?}",
2775            format_def.command
2776        );
2777
2778        // In lint context, bare "tombi" should resolve to "tombi:lint"
2779        let lint_def = processor
2780            .resolve_tool("tombi", ToolContext::Lint)
2781            .expect("Should resolve tombi in lint context");
2782        assert!(
2783            lint_def.command.iter().any(|arg| arg == "lint"),
2784            "Bare 'tombi' in lint context should resolve to 'tombi lint', got: {:?}",
2785            lint_def.command
2786        );
2787
2788        // Explicit suffix should bypass context-aware resolution
2789        let explicit_def = processor
2790            .resolve_tool("tombi:lint", ToolContext::Format)
2791            .expect("Should resolve explicit tombi:lint even in format context");
2792        assert!(
2793            explicit_def.command.iter().any(|arg| arg == "lint"),
2794            "Explicit 'tombi:lint' should always use lint, got: {:?}",
2795            explicit_def.command
2796        );
2797    }
2798
2799    /// Verify context-aware resolution for ruff (uses "check" suffix, not "lint").
2800    #[test]
2801    fn test_resolve_tool_context_aware_ruff() {
2802        let config = default_config();
2803        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2804
2805        // In lint context, bare "ruff" should resolve to "ruff:check"
2806        let lint_def = processor
2807            .resolve_tool("ruff", ToolContext::Lint)
2808            .expect("Should resolve ruff in lint context");
2809        assert!(
2810            lint_def.command.iter().any(|arg| arg == "check"),
2811            "Bare 'ruff' in lint context should resolve to 'ruff check', got: {:?}",
2812            lint_def.command
2813        );
2814
2815        // In format context, bare "ruff" should resolve to "ruff:format"
2816        let format_def = processor
2817            .resolve_tool("ruff", ToolContext::Format)
2818            .expect("Should resolve ruff in format context");
2819        assert!(
2820            format_def.command.iter().any(|arg| arg == "format"),
2821            "Bare 'ruff' in format context should resolve to 'ruff format', got: {:?}",
2822            format_def.command
2823        );
2824    }
2825
2826    /// Tools without context-specific variants should still resolve via bare name.
2827    #[test]
2828    fn test_resolve_tool_bare_name_fallback() {
2829        let config = default_config();
2830        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2831
2832        // "shellcheck" has no :lint or :format variant — should fall back to bare name
2833        let def = processor
2834            .resolve_tool("shellcheck", ToolContext::Lint)
2835            .expect("Should resolve shellcheck via fallback");
2836        assert!(
2837            def.command.iter().any(|arg| arg == "shellcheck"),
2838            "shellcheck should resolve to itself, got: {:?}",
2839            def.command
2840        );
2841    }
2842
2843    /// Context-aware resolution for tools with non-standard format suffixes.
2844    #[test]
2845    fn test_resolve_tool_context_aware_sqlfluff() {
2846        let config = default_config();
2847        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2848
2849        // sqlfluff uses ":fix" as its format variant
2850        let format_def = processor
2851            .resolve_tool("sqlfluff", ToolContext::Format)
2852            .expect("Should resolve sqlfluff in format context");
2853        assert!(
2854            format_def.command.iter().any(|arg| arg == "fix"),
2855            "Bare 'sqlfluff' in format context should resolve to 'sqlfluff fix', got: {:?}",
2856            format_def.command
2857        );
2858    }
2859
2860    /// Context-aware resolution for djlint (:reformat suffix).
2861    #[test]
2862    fn test_resolve_tool_context_aware_djlint() {
2863        let config = default_config();
2864        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2865
2866        // djlint uses ":reformat" as its format variant
2867        let format_def = processor
2868            .resolve_tool("djlint", ToolContext::Format)
2869            .expect("Should resolve djlint in format context");
2870        assert!(
2871            format_def.command.iter().any(|arg| arg.contains("reformat")),
2872            "Bare 'djlint' in format context should resolve to djlint reformat, got: {:?}",
2873            format_def.command
2874        );
2875    }
2876
2877    /// User-defined tools with context-specific variants resolve correctly.
2878    #[test]
2879    fn test_resolve_tool_user_defined_with_context_variant() {
2880        use super::super::config::ToolDefinition;
2881
2882        let mut config = default_config();
2883        config.tools.insert(
2884            "mytool".to_string(),
2885            ToolDefinition {
2886                command: vec!["mytool".to_string(), "--lint".to_string()],
2887                ..Default::default()
2888            },
2889        );
2890        config.tools.insert(
2891            "mytool:format".to_string(),
2892            ToolDefinition {
2893                command: vec!["mytool".to_string(), "--format".to_string()],
2894                ..Default::default()
2895            },
2896        );
2897
2898        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2899
2900        // In format context, should resolve to "mytool:format"
2901        let def = processor
2902            .resolve_tool("mytool", ToolContext::Format)
2903            .expect("Should resolve user tool in format context");
2904        assert!(
2905            def.command.iter().any(|arg| arg == "--format"),
2906            "User 'mytool' in format context should resolve to mytool:format, got: {:?}",
2907            def.command
2908        );
2909
2910        // In lint context, should fall back to bare "mytool" (no mytool:lint exists)
2911        let def = processor
2912            .resolve_tool("mytool", ToolContext::Lint)
2913            .expect("Should resolve user tool in lint context via fallback");
2914        assert!(
2915            def.command.iter().any(|arg| arg == "--lint"),
2916            "User 'mytool' in lint context should fall back to bare name, got: {:?}",
2917            def.command
2918        );
2919    }
2920
2921    /// Nonexistent tool returns None.
2922    #[test]
2923    fn test_resolve_tool_nonexistent_returns_none() {
2924        let config = default_config();
2925        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2926
2927        assert!(
2928            processor
2929                .resolve_tool("nonexistent-tool-xyz", ToolContext::Lint)
2930                .is_none(),
2931            "Nonexistent tool should return None in lint context"
2932        );
2933        assert!(
2934            processor
2935                .resolve_tool("nonexistent-tool-xyz", ToolContext::Format)
2936                .is_none(),
2937            "Nonexistent tool should return None in format context"
2938        );
2939    }
2940
2941    #[test]
2942    fn test_strip_ansi_codes_edge_cases() {
2943        // Lone ESC without CSI bracket — non-printable, safely dropped
2944        assert_eq!(strip_ansi_codes("before\x1bafter"), "beforeafter");
2945        // ESC at end of string
2946        assert_eq!(strip_ansi_codes("trailing\x1b"), "trailing");
2947        // Nested/consecutive sequences
2948        assert_eq!(strip_ansi_codes("\x1b[1m\x1b[31mbold red\x1b[0m"), "bold red");
2949        // 256-color and RGB sequences
2950        assert_eq!(strip_ansi_codes("\x1b[38;5;196mred\x1b[0m"), "red");
2951        assert_eq!(strip_ansi_codes("\x1b[38;2;255;0;0mred\x1b[0m"), "red");
2952    }
2953}
rumdl_lib/code_block_tools/processor.rs

rumdl_lib/code_block_tools/
processor.rs