rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing, ToolDefinition};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::config::MarkdownFlavor;
13use crate::rule::{LintWarning, Severity};
14use crate::utils::rumdl_parser_options;
15use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
16
17/// Special built-in tool name for rumdl's own markdown linting.
18/// When this tool is configured for markdown blocks, the processor skips
19/// external execution since it's handled by embedded markdown linting.
20pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
21
22/// Check if a language is markdown (handles common variations).
23fn is_markdown_language(lang: &str) -> bool {
24    matches!(lang.to_lowercase().as_str(), "markdown" | "md")
25}
26
27/// Strip ANSI escape sequences from tool output.
28///
29/// Many tools output colored text (e.g. `\x1b[1;31mError\x1b[0m`), which prevents
30/// structured parsers from matching patterns like `file:line:col: message`.
31fn strip_ansi_codes(s: &str) -> String {
32    let mut result = String::with_capacity(s.len());
33    let mut chars = s.chars().peekable();
34    while let Some(c) = chars.next() {
35        if c == '\x1b' {
36            if chars.peek() == Some(&'[') {
37                chars.next();
38                // Consume until we hit an ASCII letter (the terminator)
39                while let Some(&next) = chars.peek() {
40                    chars.next();
41                    if next.is_ascii_alphabetic() {
42                        break;
43                    }
44                }
45            }
46        } else {
47            result.push(c);
48        }
49    }
50    result
51}
52
53/// Information about a fenced code block for processing.
54#[derive(Debug, Clone)]
55pub struct FencedCodeBlockInfo {
56    /// 0-indexed line number where opening fence starts.
57    pub start_line: usize,
58    /// 0-indexed line number where closing fence ends.
59    pub end_line: usize,
60    /// Byte offset where code content starts (after opening fence line).
61    pub content_start: usize,
62    /// Byte offset where code content ends (before closing fence line).
63    pub content_end: usize,
64    /// Language tag extracted from info string (first token).
65    pub language: String,
66    /// Full info string from the fence.
67    pub info_string: String,
68    /// The fence character used (` or ~).
69    pub fence_char: char,
70    /// Length of the fence (3 or more).
71    pub fence_length: usize,
72    /// Leading whitespace on the fence line.
73    pub indent: usize,
74    /// Exact leading whitespace prefix from the fence line.
75    pub indent_prefix: String,
76}
77
78/// A diagnostic message from an external tool.
79#[derive(Debug, Clone)]
80pub struct CodeBlockDiagnostic {
81    /// Line number in the original markdown file (1-indexed).
82    pub file_line: usize,
83    /// Column number (1-indexed, if available).
84    pub column: Option<usize>,
85    /// Message from the tool.
86    pub message: String,
87    /// Severity (error, warning, info).
88    pub severity: DiagnosticSeverity,
89    /// Name of the tool that produced this.
90    pub tool: String,
91    /// Line where the code block starts (1-indexed, for context).
92    pub code_block_start: usize,
93}
94
95/// Severity level for diagnostics.
96#[derive(Debug, Clone, Copy, PartialEq, Eq)]
97pub enum DiagnosticSeverity {
98    Error,
99    Warning,
100    Info,
101}
102
103impl CodeBlockDiagnostic {
104    /// Convert to a LintWarning for integration with rumdl's warning system.
105    pub fn to_lint_warning(&self) -> LintWarning {
106        let severity = match self.severity {
107            DiagnosticSeverity::Error => Severity::Error,
108            DiagnosticSeverity::Warning => Severity::Warning,
109            DiagnosticSeverity::Info => Severity::Info,
110        };
111
112        LintWarning {
113            message: self.message.clone(),
114            line: self.file_line,
115            column: self.column.unwrap_or(1),
116            end_line: self.file_line,
117            end_column: self.column.unwrap_or(1),
118            severity,
119            fix: None, // External tool diagnostics don't provide fixes
120            rule_name: Some(self.tool.clone()),
121        }
122    }
123}
124
125/// Error during code block processing.
126#[derive(Debug, Clone)]
127pub enum ProcessorError {
128    /// Tool execution failed.
129    ToolError(ExecutorError),
130    /// Tool execution failed with code block location context.
131    ToolErrorAt {
132        error: ExecutorError,
133        line: usize,
134        language: String,
135    },
136    /// No tools configured for language.
137    NoToolsConfigured { language: String, line: usize },
138    /// Tool binary not found.
139    ToolBinaryNotFound {
140        tool: String,
141        language: String,
142        line: usize,
143    },
144    /// Processing was aborted due to on_error = fail.
145    Aborted { message: String },
146}
147
148impl std::fmt::Display for ProcessorError {
149    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150        match self {
151            Self::ToolError(e) => write!(f, "{e}"),
152            Self::ToolErrorAt { error, line, language } => {
153                write!(f, "line {line} ({language}): {error}")
154            }
155            Self::NoToolsConfigured { language, line } => {
156                write!(f, "line {line} ({language}): no tools configured")
157            }
158            Self::ToolBinaryNotFound { tool, language, line } => {
159                write!(f, "line {line} ({language}): tool '{tool}' not found in PATH")
160            }
161            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
162        }
163    }
164}
165
166impl std::error::Error for ProcessorError {}
167
168impl From<ExecutorError> for ProcessorError {
169    fn from(e: ExecutorError) -> Self {
170        Self::ToolError(e)
171    }
172}
173
174/// Result of processing a single code block.
175#[derive(Debug)]
176pub struct CodeBlockResult {
177    /// Diagnostics from linting.
178    pub diagnostics: Vec<CodeBlockDiagnostic>,
179    /// Formatted content (if formatting was requested and succeeded).
180    pub formatted_content: Option<String>,
181    /// Whether the code block was modified.
182    pub was_modified: bool,
183}
184
185/// Result of formatting code blocks in a document.
186#[derive(Debug)]
187pub struct FormatOutput {
188    /// The formatted content (may be partially formatted if errors occurred).
189    pub content: String,
190    /// Whether any errors occurred during formatting.
191    pub had_errors: bool,
192    /// Error messages for blocks that couldn't be formatted.
193    pub error_messages: Vec<String>,
194}
195
196/// Main processor for code block tools.
197/// Context in which a tool is being used.
198enum ToolContext {
199    Lint,
200    Format,
201}
202
203pub struct CodeBlockToolProcessor<'a> {
204    config: &'a CodeBlockToolsConfig,
205    flavor: MarkdownFlavor,
206    linguist: LinguistResolver,
207    registry: ToolRegistry,
208    executor: ToolExecutor,
209    user_aliases: std::collections::HashMap<String, String>,
210}
211
212impl<'a> CodeBlockToolProcessor<'a> {
213    /// Create a new processor with the given configuration and markdown flavor.
214    pub fn new(config: &'a CodeBlockToolsConfig, flavor: MarkdownFlavor) -> Self {
215        let user_aliases = config
216            .language_aliases
217            .iter()
218            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
219            .collect();
220        Self {
221            config,
222            flavor,
223            linguist: LinguistResolver::new(),
224            registry: ToolRegistry::new(config.tools.clone()),
225            executor: ToolExecutor::new(config.timeout),
226            user_aliases,
227        }
228    }
229
230    /// Resolve a tool ID with context awareness.
231    ///
232    /// When a bare tool name (e.g., "tombi") is used in a specific context
233    /// (lint or format), try the context-specific variant first (e.g., "tombi:format"),
234    /// then common alternatives (e.g., "tombi:check"), before falling back to the bare name.
235    fn resolve_tool<'b>(&'b self, tool_id: &str, context: ToolContext) -> Option<&'b ToolDefinition> {
236        // If the tool ID already has a colon suffix, use it directly
237        if tool_id.contains(':') {
238            return self.registry.get(tool_id);
239        }
240
241        // Try context-specific variants first
242        let suffixes = match context {
243            ToolContext::Format => &["format", "fmt", "fix", "reformat"][..],
244            ToolContext::Lint => &["lint", "check"][..],
245        };
246
247        for suffix in suffixes {
248            let qualified = format!("{tool_id}:{suffix}");
249            if let Some(def) = self.registry.get(&qualified) {
250                return Some(def);
251            }
252        }
253
254        // Fall back to bare name
255        self.registry.get(tool_id)
256    }
257
258    /// Quick check whether any configured language might appear in fenced code blocks.
259    /// Scans for `` ```lang `` or `` ~~~lang `` patterns without full parsing.
260    fn has_potential_matching_blocks(&self, content: &str, lint_mode: bool) -> bool {
261        // Collect languages that have tools configured for the requested mode
262        let configured_langs: Vec<&str> = self
263            .config
264            .languages
265            .iter()
266            .filter(|(_, lc)| {
267                lc.enabled
268                    && if lint_mode {
269                        !lc.lint.is_empty()
270                    } else {
271                        !lc.format.is_empty()
272                    }
273            })
274            .map(|(lang, _)| lang.as_str())
275            .collect();
276
277        if configured_langs.is_empty() {
278            return false;
279        }
280
281        // Scan content line-by-line for fence openers matching configured languages
282        for line in content.lines() {
283            let trimmed = line.trim_start();
284            let after_fence = if let Some(rest) = trimmed.strip_prefix("```") {
285                rest
286            } else if let Some(rest) = trimmed.strip_prefix("~~~") {
287                rest
288            } else {
289                continue;
290            };
291
292            let lang = after_fence.split_whitespace().next().unwrap_or("");
293            if lang.is_empty() {
294                continue;
295            }
296            // Check both the raw language and the canonical (normalized) form
297            let canonical = self.resolve_language(lang);
298            if configured_langs.contains(&canonical.as_str()) {
299                return true;
300            }
301        }
302
303        false
304    }
305
306    /// Extract all fenced code blocks from content.
307    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
308        let mut blocks = Vec::new();
309        let mut current_block: Option<FencedCodeBlockBuilder> = None;
310
311        let options = rumdl_parser_options();
312        let parser = Parser::new_ext(content, options).into_offset_iter();
313
314        let lines: Vec<&str> = content.lines().collect();
315
316        for (event, range) in parser {
317            match event {
318                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
319                    let info_string = info.to_string();
320                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
321
322                    // Find start line
323                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
324
325                    // Find content start (after opening fence line)
326                    let content_start = content[range.start..]
327                        .find('\n')
328                        .map(|i| range.start + i + 1)
329                        .unwrap_or(content.len());
330
331                    // Detect fence character and length from the line
332                    let fence_line = lines.get(start_line).unwrap_or(&"");
333                    let trimmed = fence_line.trim_start();
334                    let indent = fence_line.len() - trimmed.len();
335                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
336                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
337                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
338                    } else {
339                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
340                    };
341
342                    current_block = Some(FencedCodeBlockBuilder {
343                        start_line,
344                        content_start,
345                        language,
346                        info_string,
347                        fence_char,
348                        fence_length,
349                        indent,
350                        indent_prefix,
351                    });
352                }
353                Event::End(TagEnd::CodeBlock) => {
354                    if let Some(builder) = current_block.take() {
355                        // Find end line
356                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
357
358                        // Find content end (before closing fence line)
359                        let search_start = builder.content_start.min(range.end);
360                        let content_end = if search_start < range.end {
361                            content[search_start..range.end]
362                                .rfind('\n')
363                                .map(|i| search_start + i)
364                                .unwrap_or(search_start)
365                        } else {
366                            search_start
367                        };
368
369                        if content_end >= builder.content_start {
370                            blocks.push(FencedCodeBlockInfo {
371                                start_line: builder.start_line,
372                                end_line,
373                                content_start: builder.content_start,
374                                content_end,
375                                language: builder.language,
376                                info_string: builder.info_string,
377                                fence_char: builder.fence_char,
378                                fence_length: builder.fence_length,
379                                indent: builder.indent,
380                                indent_prefix: builder.indent_prefix,
381                            });
382                        }
383                    }
384                }
385                _ => {}
386            }
387        }
388
389        // For MkDocs flavor, also extract code blocks inside admonitions and tabs
390        if self.flavor == MarkdownFlavor::MkDocs {
391            let mkdocs_blocks = self.extract_mkdocs_code_blocks(content);
392            for mb in mkdocs_blocks {
393                // Deduplicate: only add if no existing block starts at the same line
394                if !blocks.iter().any(|b| b.start_line == mb.start_line) {
395                    blocks.push(mb);
396                }
397            }
398            blocks.sort_by_key(|b| b.start_line);
399        }
400
401        blocks
402    }
403
404    /// Extract fenced code blocks that are inside MkDocs admonitions or tabs.
405    ///
406    /// pulldown_cmark doesn't parse MkDocs-specific constructs, so indented
407    /// code blocks inside `!!!`/`???` admonitions or `===` tabs are missed.
408    /// This method manually scans for them.
409    fn extract_mkdocs_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
410        use crate::utils::mkdocs_admonitions;
411        use crate::utils::mkdocs_tabs;
412
413        let mut blocks = Vec::new();
414        let lines: Vec<&str> = content.lines().collect();
415
416        // Track current MkDocs context indent level
417        // We only need to know if we're inside any MkDocs block, so a simple stack suffices.
418        let mut context_indent_stack: Vec<usize> = Vec::new();
419
420        // Track fence state inside MkDocs context
421        let mut in_fence = false;
422        let mut fence_start_line: usize = 0;
423        let mut fence_content_start: usize = 0;
424        let mut fence_char: char = '`';
425        let mut fence_length: usize = 0;
426        let mut fence_indent: usize = 0;
427        let mut fence_indent_prefix = String::new();
428        let mut fence_language = String::new();
429        let mut fence_info_string = String::new();
430
431        // Compute byte offsets via pointer arithmetic.
432        // `content.lines()` returns slices into the original string,
433        // so each line's pointer offset from `content` gives its byte position.
434        // This correctly handles \n, \r\n, and empty lines.
435        let content_start_ptr = content.as_ptr() as usize;
436        let line_offsets: Vec<usize> = lines
437            .iter()
438            .map(|line| line.as_ptr() as usize - content_start_ptr)
439            .collect();
440
441        for (i, line) in lines.iter().enumerate() {
442            let line_indent = crate::utils::mkdocs_common::get_line_indent(line);
443            let is_admonition = mkdocs_admonitions::is_admonition_start(line);
444            let is_tab = mkdocs_tabs::is_tab_marker(line);
445
446            // Pop contexts when the current line is not indented enough to be content.
447            // This runs for ALL lines (including new admonition/tab starts) to clean
448            // up stale entries before potentially pushing a new context.
449            if !line.trim().is_empty() {
450                while let Some(&ctx_indent) = context_indent_stack.last() {
451                    if line_indent < ctx_indent + 4 {
452                        context_indent_stack.pop();
453                        if in_fence {
454                            in_fence = false;
455                        }
456                    } else {
457                        break;
458                    }
459                }
460            }
461
462            // Check for admonition start — push new context
463            if is_admonition && let Some(indent) = mkdocs_admonitions::get_admonition_indent(line) {
464                context_indent_stack.push(indent);
465                continue;
466            }
467
468            // Check for tab marker — push new context
469            if is_tab && let Some(indent) = mkdocs_tabs::get_tab_indent(line) {
470                context_indent_stack.push(indent);
471                continue;
472            }
473
474            // Only look for fences inside a MkDocs context
475            if context_indent_stack.is_empty() {
476                continue;
477            }
478
479            let trimmed = line.trim_start();
480            let leading_spaces = line.len() - trimmed.len();
481
482            if !in_fence {
483                // Check for fence opening
484                let (fc, fl) = if trimmed.starts_with("```") {
485                    ('`', trimmed.chars().take_while(|&c| c == '`').count())
486                } else if trimmed.starts_with("~~~") {
487                    ('~', trimmed.chars().take_while(|&c| c == '~').count())
488                } else {
489                    continue;
490                };
491
492                if fl >= 3 {
493                    in_fence = true;
494                    fence_start_line = i;
495                    fence_char = fc;
496                    fence_length = fl;
497                    fence_indent = leading_spaces;
498                    fence_indent_prefix = line.get(..leading_spaces).unwrap_or("").to_string();
499
500                    let after_fence = &trimmed[fl..];
501                    fence_info_string = after_fence.trim().to_string();
502                    fence_language = fence_info_string.split_whitespace().next().unwrap_or("").to_string();
503
504                    // Content starts at the next line's byte offset
505                    fence_content_start = line_offsets.get(i + 1).copied().unwrap_or(content.len());
506                }
507            } else {
508                // Check for fence closing
509                let is_closing = if fence_char == '`' {
510                    trimmed.starts_with("```")
511                        && trimmed.chars().take_while(|&c| c == '`').count() >= fence_length
512                        && trimmed.trim_start_matches('`').trim().is_empty()
513                } else {
514                    trimmed.starts_with("~~~")
515                        && trimmed.chars().take_while(|&c| c == '~').count() >= fence_length
516                        && trimmed.trim_start_matches('~').trim().is_empty()
517                };
518
519                if is_closing {
520                    let content_end = line_offsets.get(i).copied().unwrap_or(content.len());
521
522                    if content_end >= fence_content_start {
523                        blocks.push(FencedCodeBlockInfo {
524                            start_line: fence_start_line,
525                            end_line: i,
526                            content_start: fence_content_start,
527                            content_end,
528                            language: fence_language.clone(),
529                            info_string: fence_info_string.clone(),
530                            fence_char,
531                            fence_length,
532                            indent: fence_indent,
533                            indent_prefix: fence_indent_prefix.clone(),
534                        });
535                    }
536
537                    in_fence = false;
538                }
539            }
540        }
541
542        blocks
543    }
544
545    /// Resolve a language tag to its canonical name.
546    fn resolve_language(&self, language: &str) -> String {
547        let lower = language.to_lowercase();
548        if let Some(mapped) = self.user_aliases.get(&lower) {
549            return mapped.clone();
550        }
551        match self.config.normalize_language {
552            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
553            NormalizeLanguage::Exact => lower,
554        }
555    }
556
557    /// Get the effective on_error setting for a language.
558    fn get_on_error(&self, language: &str) -> OnError {
559        self.config
560            .languages
561            .get(language)
562            .and_then(|lc| lc.on_error)
563            .unwrap_or(self.config.on_error)
564    }
565
566    /// Strip the fence indentation prefix from each line of a code block.
567    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
568        if indent_prefix.is_empty() {
569            return content.to_string();
570        }
571
572        let mut out = String::with_capacity(content.len());
573        for line in content.split_inclusive('\n') {
574            if let Some(stripped) = line.strip_prefix(indent_prefix) {
575                out.push_str(stripped);
576            } else {
577                out.push_str(line);
578            }
579        }
580        out
581    }
582
583    /// Re-apply the fence indentation prefix to each line of a code block.
584    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
585        if indent_prefix.is_empty() {
586            return content.to_string();
587        }
588        if content.is_empty() {
589            return String::new();
590        }
591
592        let mut out = String::with_capacity(content.len() + indent_prefix.len());
593        for line in content.split_inclusive('\n') {
594            if line == "\n" {
595                out.push_str(line);
596            } else {
597                out.push_str(indent_prefix);
598                out.push_str(line);
599            }
600        }
601        out
602    }
603
604    /// Lint all code blocks in the content.
605    ///
606    /// Returns diagnostics from all configured linters.
607    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
608        // Skip the expensive parse when no tools could possibly produce output.
609        // With on_missing=Ignore (default) and no languages with lint tools configured,
610        // every block would be skipped, so the parse is wasted work.
611        if self.config.on_missing_language_definition == OnMissing::Ignore
612            && !self
613                .config
614                .languages
615                .values()
616                .any(|lc| lc.enabled && !lc.lint.is_empty())
617        {
618            return Ok(Vec::new());
619        }
620
621        // Quick content check: skip parsing if no configured language appears in the content.
622        // This avoids the expensive pulldown-cmark parse when there are no matching code blocks.
623        if self.config.on_missing_language_definition == OnMissing::Ignore
624            && !self.has_potential_matching_blocks(content, true)
625        {
626            return Ok(Vec::new());
627        }
628
629        let mut all_diagnostics = Vec::new();
630        let blocks = self.extract_code_blocks(content);
631
632        for block in blocks {
633            if block.language.is_empty() {
634                continue; // Skip blocks without language tag
635            }
636
637            let canonical_lang = self.resolve_language(&block.language);
638
639            // Get lint tools for this language
640            let lang_config = self.config.languages.get(&canonical_lang);
641
642            // If language is explicitly configured with enabled=false, skip silently
643            if let Some(lc) = lang_config
644                && !lc.enabled
645            {
646                continue;
647            }
648
649            let lint_tools = match lang_config {
650                Some(lc) if !lc.lint.is_empty() => &lc.lint,
651                _ => {
652                    // No tools configured for this language in lint mode
653                    match self.config.on_missing_language_definition {
654                        OnMissing::Ignore => continue,
655                        OnMissing::Fail => {
656                            all_diagnostics.push(CodeBlockDiagnostic {
657                                file_line: block.start_line + 1,
658                                column: None,
659                                message: format!("No lint tools configured for language '{canonical_lang}'"),
660                                severity: DiagnosticSeverity::Error,
661                                tool: "code-block-tools".to_string(),
662                                code_block_start: block.start_line + 1,
663                            });
664                            continue;
665                        }
666                        OnMissing::FailFast => {
667                            return Err(ProcessorError::NoToolsConfigured {
668                                language: canonical_lang,
669                                line: block.start_line + 1,
670                            });
671                        }
672                    }
673                }
674            };
675
676            // Extract code block content
677            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
678                &content[block.content_start..block.content_end]
679            } else {
680                continue;
681            };
682            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
683
684            // Run each lint tool
685            for tool_id in lint_tools {
686                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown linting
687                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
688                    continue;
689                }
690
691                let tool_def = match self.resolve_tool(tool_id, ToolContext::Lint) {
692                    Some(t) => t,
693                    None => {
694                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
695                        continue;
696                    }
697                };
698
699                // Check if tool binary exists before running
700                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
701                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
702                    match self.config.on_missing_tool_binary {
703                        OnMissing::Ignore => {
704                            log::debug!("Tool binary '{tool_name}' not found, skipping");
705                            continue;
706                        }
707                        OnMissing::Fail => {
708                            all_diagnostics.push(CodeBlockDiagnostic {
709                                file_line: block.start_line + 1,
710                                column: None,
711                                message: format!("Tool binary '{tool_name}' not found in PATH"),
712                                severity: DiagnosticSeverity::Error,
713                                tool: "code-block-tools".to_string(),
714                                code_block_start: block.start_line + 1,
715                            });
716                            continue;
717                        }
718                        OnMissing::FailFast => {
719                            return Err(ProcessorError::ToolBinaryNotFound {
720                                tool: tool_name.to_string(),
721                                language: canonical_lang.clone(),
722                                line: block.start_line + 1,
723                            });
724                        }
725                    }
726                }
727
728                match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
729                    Ok(output) => {
730                        // Parse tool output into diagnostics
731                        let diagnostics = self.parse_tool_output(
732                            &output,
733                            tool_id,
734                            block.start_line + 1, // Convert to 1-indexed
735                        );
736                        all_diagnostics.extend(diagnostics);
737                    }
738                    Err(e) => {
739                        let on_error = self.get_on_error(&canonical_lang);
740                        match on_error {
741                            OnError::Fail => return Err(e.into()),
742                            OnError::Warn => {
743                                log::warn!("Tool '{tool_id}' failed: {e}");
744                            }
745                            OnError::Skip => {
746                                // Silently skip
747                            }
748                        }
749                    }
750                }
751            }
752        }
753
754        Ok(all_diagnostics)
755    }
756
757    /// Format all code blocks in the content.
758    ///
759    /// Returns the modified content with formatted code blocks and any errors that occurred.
760    /// With `on-missing-*` = `fail`, errors are collected but formatting continues.
761    /// With `on-missing-*` = `fail-fast`, returns Err immediately on first error.
762    pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
763        let no_output = FormatOutput {
764            content: content.to_string(),
765            had_errors: false,
766            error_messages: Vec::new(),
767        };
768
769        // Skip the expensive parse when no tools could produce output
770        if self.config.on_missing_language_definition == OnMissing::Ignore
771            && !self
772                .config
773                .languages
774                .values()
775                .any(|lc| lc.enabled && !lc.format.is_empty())
776        {
777            return Ok(no_output);
778        }
779
780        // Quick content check: skip parsing if no configured language appears in the content
781        if self.config.on_missing_language_definition == OnMissing::Ignore
782            && !self.has_potential_matching_blocks(content, false)
783        {
784            return Ok(no_output);
785        }
786
787        let blocks = self.extract_code_blocks(content);
788
789        if blocks.is_empty() {
790            return Ok(FormatOutput {
791                content: content.to_string(),
792                had_errors: false,
793                error_messages: Vec::new(),
794            });
795        }
796
797        // Process blocks in reverse order to maintain byte offsets
798        let mut result = content.to_string();
799        let mut error_messages: Vec<String> = Vec::new();
800
801        for block in blocks.into_iter().rev() {
802            if block.language.is_empty() {
803                continue;
804            }
805
806            let canonical_lang = self.resolve_language(&block.language);
807
808            // Get format tools for this language
809            let lang_config = self.config.languages.get(&canonical_lang);
810
811            // If language is explicitly configured with enabled=false, skip silently
812            if let Some(lc) = lang_config
813                && !lc.enabled
814            {
815                continue;
816            }
817
818            let format_tools = match lang_config {
819                Some(lc) if !lc.format.is_empty() => &lc.format,
820                _ => {
821                    // No tools configured for this language in format mode
822                    match self.config.on_missing_language_definition {
823                        OnMissing::Ignore => continue,
824                        OnMissing::Fail => {
825                            error_messages.push(format!(
826                                "No format tools configured for language '{canonical_lang}' at line {}",
827                                block.start_line + 1
828                            ));
829                            continue;
830                        }
831                        OnMissing::FailFast => {
832                            return Err(ProcessorError::NoToolsConfigured {
833                                language: canonical_lang,
834                                line: block.start_line + 1,
835                            });
836                        }
837                    }
838                }
839            };
840
841            // Extract code block content
842            if block.content_start >= block.content_end || block.content_end > result.len() {
843                continue;
844            }
845            let code_content_raw = result[block.content_start..block.content_end].to_string();
846            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
847
848            // Run format tools (use first successful one)
849            let mut formatted = code_content.clone();
850            let mut tool_ran = false;
851            for tool_id in format_tools {
852                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown formatting
853                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
854                    continue;
855                }
856
857                let tool_def = match self.resolve_tool(tool_id, ToolContext::Format) {
858                    Some(t) => t,
859                    None => {
860                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
861                        continue;
862                    }
863                };
864
865                // Check if tool binary exists before running
866                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
867                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
868                    match self.config.on_missing_tool_binary {
869                        OnMissing::Ignore => {
870                            log::debug!("Tool binary '{tool_name}' not found, skipping");
871                            continue;
872                        }
873                        OnMissing::Fail => {
874                            error_messages.push(format!(
875                                "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
876                                block.start_line + 1
877                            ));
878                            continue;
879                        }
880                        OnMissing::FailFast => {
881                            return Err(ProcessorError::ToolBinaryNotFound {
882                                tool: tool_name.to_string(),
883                                language: canonical_lang.clone(),
884                                line: block.start_line + 1,
885                            });
886                        }
887                    }
888                }
889
890                match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
891                    Ok(output) => {
892                        // Guard against formatters that produce empty output for non-empty input.
893                        // This prevents data loss from misconfigured tools (e.g., a lint tool
894                        // used as a formatter that validates but doesn't output content).
895                        if output.trim().is_empty() && !formatted.trim().is_empty() {
896                            log::warn!("Formatter '{tool_id}' produced empty output for non-empty input, skipping");
897                            continue;
898                        }
899
900                        // Ensure trailing newline matches original (unindented)
901                        formatted = output;
902                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
903                            formatted.push('\n');
904                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
905                            formatted.pop();
906                        }
907                        tool_ran = true;
908                        break; // Use first successful formatter
909                    }
910                    Err(e) => {
911                        let on_error = self.get_on_error(&canonical_lang);
912                        match on_error {
913                            OnError::Fail => {
914                                return Err(ProcessorError::ToolErrorAt {
915                                    error: e,
916                                    line: block.start_line + 1,
917                                    language: canonical_lang,
918                                });
919                            }
920                            OnError::Warn => {
921                                error_messages.push(format!("line {} ({}): {e}", block.start_line + 1, canonical_lang));
922                            }
923                            OnError::Skip => {}
924                        }
925                    }
926                }
927            }
928
929            // Replace content if changed and a tool actually ran
930            if tool_ran && formatted != code_content {
931                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
932                if reindented != code_content_raw {
933                    result.replace_range(block.content_start..block.content_end, &reindented);
934                }
935            }
936        }
937
938        Ok(FormatOutput {
939            content: result,
940            had_errors: !error_messages.is_empty(),
941            error_messages,
942        })
943    }
944
945    /// Parse tool output into diagnostics.
946    ///
947    /// This is a basic parser that handles common output formats.
948    /// Tools vary widely in their output format, so this is best-effort.
949    fn parse_tool_output(
950        &self,
951        output: &ToolOutput,
952        tool_id: &str,
953        code_block_start_line: usize,
954    ) -> Vec<CodeBlockDiagnostic> {
955        let mut diagnostics = Vec::new();
956        let mut shellcheck_line: Option<usize> = None;
957
958        // Strip ANSI escape codes and combine stdout + stderr for parsing
959        let stdout_clean = strip_ansi_codes(&output.stdout);
960        let stderr_clean = strip_ansi_codes(&output.stderr);
961        let combined = format!("{stdout_clean}\n{stderr_clean}");
962
963        // State for multi-line "Error: msg" / "at line N column M" pattern
964        let mut pending_error: Option<(String, DiagnosticSeverity)> = None;
965
966        for line in combined.lines() {
967            let line = line.trim();
968            if line.is_empty() {
969                continue;
970            }
971
972            // Resolve pending "Error: msg" from previous line
973            if let Some((ref msg, severity)) = pending_error {
974                if let Some((line_num, col)) = Self::parse_at_line_column(line) {
975                    diagnostics.push(CodeBlockDiagnostic {
976                        file_line: code_block_start_line + line_num,
977                        column: Some(col),
978                        message: msg.clone(),
979                        severity,
980                        tool: tool_id.to_string(),
981                        code_block_start: code_block_start_line,
982                    });
983                    pending_error = None;
984                    continue;
985                }
986                // No position info found; emit error without line mapping
987                diagnostics.push(CodeBlockDiagnostic {
988                    file_line: code_block_start_line,
989                    column: None,
990                    message: msg.clone(),
991                    severity,
992                    tool: tool_id.to_string(),
993                    code_block_start: code_block_start_line,
994                });
995                pending_error = None;
996                // Fall through to parse current line
997            }
998
999            if let Some(line_num) = self.parse_shellcheck_header(line) {
1000                shellcheck_line = Some(line_num);
1001                continue;
1002            }
1003
1004            if let Some(line_num) = shellcheck_line
1005                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
1006            {
1007                diagnostics.push(diag);
1008                continue;
1009            }
1010
1011            // Try pattern: "file:line:col: message" or "file:line: message"
1012            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
1013                diagnostics.push(diag);
1014                continue;
1015            }
1016
1017            // Try pattern: "line:col message" (eslint style)
1018            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
1019                diagnostics.push(diag);
1020                continue;
1021            }
1022
1023            // Try single-line shellcheck format fallback
1024            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
1025                diagnostics.push(diag);
1026                continue;
1027            }
1028
1029            // Try multi-line "Error: msg" / "Warning: msg" pattern
1030            if let Some(error_info) = Self::parse_error_line(line) {
1031                pending_error = Some(error_info);
1032            }
1033        }
1034
1035        // Flush any remaining pending error
1036        if let Some((msg, severity)) = pending_error {
1037            diagnostics.push(CodeBlockDiagnostic {
1038                file_line: code_block_start_line,
1039                column: None,
1040                message: msg,
1041                severity,
1042                tool: tool_id.to_string(),
1043                code_block_start: code_block_start_line,
1044            });
1045        }
1046
1047        // If no diagnostics parsed but tool failed, use combined output as fallback
1048        if diagnostics.is_empty() && !output.success {
1049            let lines: Vec<&str> = combined.lines().map(|l| l.trim()).filter(|l| !l.is_empty()).collect();
1050
1051            if lines.is_empty() {
1052                let exit_code = output.exit_code;
1053                diagnostics.push(CodeBlockDiagnostic {
1054                    file_line: code_block_start_line,
1055                    column: None,
1056                    message: format!("Tool exited with code {exit_code}"),
1057                    severity: DiagnosticSeverity::Error,
1058                    tool: tool_id.to_string(),
1059                    code_block_start: code_block_start_line,
1060                });
1061            } else {
1062                for line_text in lines {
1063                    diagnostics.push(CodeBlockDiagnostic {
1064                        file_line: code_block_start_line,
1065                        column: None,
1066                        message: line_text.to_string(),
1067                        severity: DiagnosticSeverity::Error,
1068                        tool: tool_id.to_string(),
1069                        code_block_start: code_block_start_line,
1070                    });
1071                }
1072            }
1073        }
1074
1075        diagnostics
1076    }
1077
1078    /// Parse standard "file:line:col: message" format.
1079    fn parse_standard_format(
1080        &self,
1081        line: &str,
1082        tool_id: &str,
1083        code_block_start_line: usize,
1084    ) -> Option<CodeBlockDiagnostic> {
1085        // Match patterns like "file.py:1:10: E501 message"
1086        let mut parts = line.rsplitn(4, ':');
1087        let message = parts.next()?.trim().to_string();
1088        let part1 = parts.next()?.trim().to_string();
1089        let part2 = parts.next()?.trim().to_string();
1090        let part3 = parts.next().map(|s| s.trim().to_string());
1091
1092        let (line_part, col_part) = if part3.is_some() {
1093            (part2, Some(part1))
1094        } else {
1095            (part1, None)
1096        };
1097
1098        if let Ok(line_num) = line_part.parse::<usize>() {
1099            let column = col_part.and_then(|s| s.parse::<usize>().ok());
1100            let message = Self::strip_fixable_markers(&message);
1101            if !message.is_empty() {
1102                let severity = self.infer_severity(&message);
1103                return Some(CodeBlockDiagnostic {
1104                    file_line: code_block_start_line + line_num,
1105                    column,
1106                    message,
1107                    severity,
1108                    tool: tool_id.to_string(),
1109                    code_block_start: code_block_start_line,
1110                });
1111            }
1112        }
1113        None
1114    }
1115
1116    /// Parse eslint-style "line:col severity message" format.
1117    fn parse_eslint_format(
1118        &self,
1119        line: &str,
1120        tool_id: &str,
1121        code_block_start_line: usize,
1122    ) -> Option<CodeBlockDiagnostic> {
1123        // Match "1:10 error Message"
1124        let parts: Vec<&str> = line.splitn(3, ' ').collect();
1125        if parts.len() >= 2 {
1126            let loc_parts: Vec<&str> = parts[0].split(':').collect();
1127            if loc_parts.len() == 2
1128                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
1129            {
1130                let (sev_part, msg_part) = if parts.len() >= 3 {
1131                    (parts[1], parts[2])
1132                } else {
1133                    (parts[1], "")
1134                };
1135                let message = if msg_part.is_empty() {
1136                    sev_part.to_string()
1137                } else {
1138                    msg_part.to_string()
1139                };
1140                let message = Self::strip_fixable_markers(&message);
1141                let severity = match sev_part.to_lowercase().as_str() {
1142                    "error" => DiagnosticSeverity::Error,
1143                    "warning" | "warn" => DiagnosticSeverity::Warning,
1144                    "info" => DiagnosticSeverity::Info,
1145                    _ => self.infer_severity(&message),
1146                };
1147                return Some(CodeBlockDiagnostic {
1148                    file_line: code_block_start_line + line_num,
1149                    column: Some(col),
1150                    message,
1151                    severity,
1152                    tool: tool_id.to_string(),
1153                    code_block_start: code_block_start_line,
1154                });
1155            }
1156        }
1157        None
1158    }
1159
1160    /// Parse shellcheck-style "In - line N: message" format.
1161    fn parse_shellcheck_format(
1162        &self,
1163        line: &str,
1164        tool_id: &str,
1165        code_block_start_line: usize,
1166    ) -> Option<CodeBlockDiagnostic> {
1167        // Match "In - line 5:" pattern
1168        if line.starts_with("In ")
1169            && line.contains(" line ")
1170            && let Some(line_start) = line.find(" line ")
1171        {
1172            let after_line = &line[line_start + 6..];
1173            if let Some(colon_pos) = after_line.find(':')
1174                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
1175            {
1176                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
1177                if !message.is_empty() {
1178                    let severity = self.infer_severity(&message);
1179                    return Some(CodeBlockDiagnostic {
1180                        file_line: code_block_start_line + line_num,
1181                        column: None,
1182                        message,
1183                        severity,
1184                        tool: tool_id.to_string(),
1185                        code_block_start: code_block_start_line,
1186                    });
1187                }
1188            }
1189        }
1190        None
1191    }
1192
1193    /// Parse shellcheck header line to capture line number context.
1194    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
1195        if line.starts_with("In ")
1196            && line.contains(" line ")
1197            && let Some(line_start) = line.find(" line ")
1198        {
1199            let after_line = &line[line_start + 6..];
1200            if let Some(colon_pos) = after_line.find(':') {
1201                return after_line[..colon_pos].trim().parse::<usize>().ok();
1202            }
1203        }
1204        None
1205    }
1206
1207    /// Parse shellcheck message line containing SCXXXX codes.
1208    fn parse_shellcheck_message(
1209        &self,
1210        line: &str,
1211        tool_id: &str,
1212        code_block_start_line: usize,
1213        line_num: usize,
1214    ) -> Option<CodeBlockDiagnostic> {
1215        let sc_pos = line.find("SC")?;
1216        let after_sc = &line[sc_pos + 2..];
1217        let code_len = after_sc.chars().take_while(|c| c.is_ascii_digit()).count();
1218        if code_len == 0 {
1219            return None;
1220        }
1221        let after_code = &after_sc[code_len..];
1222        let sev_start = after_code.find('(')? + 1;
1223        let sev_end = after_code[sev_start..].find(')')? + sev_start;
1224        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
1225        let message_start = after_code.find("):")? + 2;
1226        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
1227        if message.is_empty() {
1228            return None;
1229        }
1230
1231        let severity = match sev.as_str() {
1232            "error" => DiagnosticSeverity::Error,
1233            "warning" | "warn" => DiagnosticSeverity::Warning,
1234            "info" | "style" => DiagnosticSeverity::Info,
1235            _ => self.infer_severity(&message),
1236        };
1237
1238        Some(CodeBlockDiagnostic {
1239            file_line: code_block_start_line + line_num,
1240            column: None,
1241            message,
1242            severity,
1243            tool: tool_id.to_string(),
1244            code_block_start: code_block_start_line,
1245        })
1246    }
1247
1248    /// Parse "Error: <message>" or "Warning: <message>" lines.
1249    ///
1250    /// Used for tools like tombi that output multi-line diagnostics where the
1251    /// error message and position are on separate lines. Only matches capitalized
1252    /// prefixes to avoid conflicting with lowercase `error:` in less structured output.
1253    fn parse_error_line(line: &str) -> Option<(String, DiagnosticSeverity)> {
1254        let (msg, severity) = if let Some(msg) = line.strip_prefix("Error:") {
1255            (msg, DiagnosticSeverity::Error)
1256        } else if let Some(msg) = line.strip_prefix("Warning:") {
1257            (msg, DiagnosticSeverity::Warning)
1258        } else {
1259            return None;
1260        };
1261        let msg = msg.trim();
1262        if msg.is_empty() {
1263            return None;
1264        }
1265        Some((msg.to_string(), severity))
1266    }
1267
1268    /// Parse "at line N column M" position lines (case-insensitive).
1269    ///
1270    /// Returns (line_number, column_number) if the pattern matches.
1271    fn parse_at_line_column(line: &str) -> Option<(usize, usize)> {
1272        let lower = line.to_lowercase();
1273        let rest = lower.strip_prefix("at line ")?;
1274        let mut parts = rest.split_whitespace();
1275        let line_num: usize = parts.next()?.parse().ok()?;
1276        if parts.next()? != "column" {
1277            return None;
1278        }
1279        let col: usize = parts.next()?.parse().ok()?;
1280        Some((line_num, col))
1281    }
1282
1283    /// Infer severity from message content.
1284    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
1285        let lower = message.to_lowercase();
1286        if lower.contains("error")
1287            || lower.starts_with("e") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1288            || lower.starts_with("f") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1289        {
1290            DiagnosticSeverity::Error
1291        } else if lower.contains("warning")
1292            || lower.contains("warn")
1293            || lower.starts_with("w") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1294        {
1295            DiagnosticSeverity::Warning
1296        } else {
1297            DiagnosticSeverity::Info
1298        }
1299    }
1300
1301    /// Strip "fixable" markers from external tool messages.
1302    ///
1303    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
1304    /// context these markers can be misleading - the lint tool's fix capability may
1305    /// differ from what our configured formatter can fix. We strip these markers
1306    /// to avoid making promises we can't keep.
1307    fn strip_fixable_markers(message: &str) -> String {
1308        message
1309            .replace(" [*]", "")
1310            .replace("[*] ", "")
1311            .replace("[*]", "")
1312            .replace(" (fixable)", "")
1313            .replace("(fixable) ", "")
1314            .replace("(fixable)", "")
1315            .replace(" [fix available]", "")
1316            .replace("[fix available] ", "")
1317            .replace("[fix available]", "")
1318            .replace(" [autofix]", "")
1319            .replace("[autofix] ", "")
1320            .replace("[autofix]", "")
1321            .trim()
1322            .to_string()
1323    }
1324}
1325
1326/// Builder for FencedCodeBlockInfo during parsing.
1327struct FencedCodeBlockBuilder {
1328    start_line: usize,
1329    content_start: usize,
1330    language: String,
1331    info_string: String,
1332    fence_char: char,
1333    fence_length: usize,
1334    indent: usize,
1335    indent_prefix: String,
1336}
1337
1338#[cfg(test)]
1339mod tests {
1340    use super::*;
1341
1342    fn default_config() -> CodeBlockToolsConfig {
1343        CodeBlockToolsConfig::default()
1344    }
1345
1346    #[test]
1347    fn test_extract_code_blocks() {
1348        let config = default_config();
1349        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1350
1351        let content = r#"# Example
1352
1353```python
1354def hello():
1355    print("Hello")
1356```
1357
1358Some text
1359
1360```rust
1361fn main() {}
1362```
1363"#;
1364
1365        let blocks = processor.extract_code_blocks(content);
1366
1367        assert_eq!(blocks.len(), 2);
1368
1369        assert_eq!(blocks[0].language, "python");
1370        assert_eq!(blocks[0].fence_char, '`');
1371        assert_eq!(blocks[0].fence_length, 3);
1372        assert_eq!(blocks[0].start_line, 2);
1373        assert_eq!(blocks[0].indent, 0);
1374        assert_eq!(blocks[0].indent_prefix, "");
1375
1376        assert_eq!(blocks[1].language, "rust");
1377        assert_eq!(blocks[1].fence_char, '`');
1378        assert_eq!(blocks[1].fence_length, 3);
1379    }
1380
1381    #[test]
1382    fn test_extract_code_blocks_with_info_string() {
1383        let config = default_config();
1384        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1385
1386        let content = "```python title=\"example.py\"\ncode\n```";
1387        let blocks = processor.extract_code_blocks(content);
1388
1389        assert_eq!(blocks.len(), 1);
1390        assert_eq!(blocks[0].language, "python");
1391        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
1392    }
1393
1394    #[test]
1395    fn test_extract_code_blocks_tilde_fence() {
1396        let config = default_config();
1397        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1398
1399        let content = "~~~bash\necho hello\n~~~";
1400        let blocks = processor.extract_code_blocks(content);
1401
1402        assert_eq!(blocks.len(), 1);
1403        assert_eq!(blocks[0].language, "bash");
1404        assert_eq!(blocks[0].fence_char, '~');
1405        assert_eq!(blocks[0].fence_length, 3);
1406        assert_eq!(blocks[0].indent_prefix, "");
1407    }
1408
1409    #[test]
1410    fn test_extract_code_blocks_with_indent_prefix() {
1411        let config = default_config();
1412        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1413
1414        let content = "  - item\n    ```python\n    print('hi')\n    ```";
1415        let blocks = processor.extract_code_blocks(content);
1416
1417        assert_eq!(blocks.len(), 1);
1418        assert_eq!(blocks[0].indent_prefix, "    ");
1419    }
1420
1421    #[test]
1422    fn test_extract_code_blocks_no_language() {
1423        let config = default_config();
1424        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1425
1426        let content = "```\nplain code\n```";
1427        let blocks = processor.extract_code_blocks(content);
1428
1429        assert_eq!(blocks.len(), 1);
1430        assert_eq!(blocks[0].language, "");
1431    }
1432
1433    #[test]
1434    fn test_resolve_language_linguist() {
1435        let mut config = default_config();
1436        config.normalize_language = NormalizeLanguage::Linguist;
1437        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1438
1439        assert_eq!(processor.resolve_language("py"), "python");
1440        assert_eq!(processor.resolve_language("bash"), "shell");
1441        assert_eq!(processor.resolve_language("js"), "javascript");
1442    }
1443
1444    #[test]
1445    fn test_resolve_language_exact() {
1446        let mut config = default_config();
1447        config.normalize_language = NormalizeLanguage::Exact;
1448        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1449
1450        assert_eq!(processor.resolve_language("py"), "py");
1451        assert_eq!(processor.resolve_language("BASH"), "bash");
1452    }
1453
1454    #[test]
1455    fn test_resolve_language_user_alias_override() {
1456        let mut config = default_config();
1457        config.language_aliases.insert("py".to_string(), "python".to_string());
1458        config.normalize_language = NormalizeLanguage::Exact;
1459        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1460
1461        assert_eq!(processor.resolve_language("PY"), "python");
1462    }
1463
1464    #[test]
1465    fn test_indent_strip_and_reapply_roundtrip() {
1466        let config = default_config();
1467        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1468
1469        let raw = "    def hello():\n        print('hi')";
1470        let stripped = processor.strip_indent_from_block(raw, "    ");
1471        assert_eq!(stripped, "def hello():\n    print('hi')");
1472
1473        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
1474        assert_eq!(reapplied, raw);
1475    }
1476
1477    #[test]
1478    fn test_infer_severity() {
1479        let config = default_config();
1480        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1481
1482        assert_eq!(
1483            processor.infer_severity("E501 line too long"),
1484            DiagnosticSeverity::Error
1485        );
1486        assert_eq!(
1487            processor.infer_severity("W291 trailing whitespace"),
1488            DiagnosticSeverity::Warning
1489        );
1490        assert_eq!(
1491            processor.infer_severity("error: something failed"),
1492            DiagnosticSeverity::Error
1493        );
1494        assert_eq!(
1495            processor.infer_severity("warning: unused variable"),
1496            DiagnosticSeverity::Warning
1497        );
1498        assert_eq!(
1499            processor.infer_severity("note: consider using"),
1500            DiagnosticSeverity::Info
1501        );
1502    }
1503
1504    #[test]
1505    fn test_parse_standard_format_windows_path() {
1506        let config = default_config();
1507        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1508
1509        let output = ToolOutput {
1510            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1511            stderr: String::new(),
1512            exit_code: 1,
1513            success: false,
1514        };
1515
1516        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1517        assert_eq!(diags.len(), 1);
1518        assert_eq!(diags[0].file_line, 12);
1519        assert_eq!(diags[0].column, Some(5));
1520        assert_eq!(diags[0].message, "E123 message");
1521    }
1522
1523    #[test]
1524    fn test_parse_eslint_severity() {
1525        let config = default_config();
1526        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1527
1528        let output = ToolOutput {
1529            stdout: "1:2 error Unexpected token".to_string(),
1530            stderr: String::new(),
1531            exit_code: 1,
1532            success: false,
1533        };
1534
1535        let diags = processor.parse_tool_output(&output, "eslint", 5);
1536        assert_eq!(diags.len(), 1);
1537        assert_eq!(diags[0].file_line, 6);
1538        assert_eq!(diags[0].column, Some(2));
1539        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1540        assert_eq!(diags[0].message, "Unexpected token");
1541    }
1542
1543    #[test]
1544    fn test_parse_shellcheck_multiline() {
1545        let config = default_config();
1546        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1547
1548        let output = ToolOutput {
1549            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1550            stderr: String::new(),
1551            exit_code: 1,
1552            success: false,
1553        };
1554
1555        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1556        assert_eq!(diags.len(), 1);
1557        assert_eq!(diags[0].file_line, 13);
1558        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1559        assert_eq!(diags[0].message, "Double quote to prevent globbing");
1560    }
1561
1562    #[test]
1563    fn test_lint_no_config() {
1564        let config = default_config();
1565        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1566
1567        let content = "```python\nprint('hello')\n```";
1568        let result = processor.lint(content);
1569
1570        // Should succeed with no diagnostics (no tools configured)
1571        assert!(result.is_ok());
1572        assert!(result.unwrap().is_empty());
1573    }
1574
1575    #[test]
1576    fn test_format_no_config() {
1577        let config = default_config();
1578        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1579
1580        let content = "```python\nprint('hello')\n```";
1581        let result = processor.format(content);
1582
1583        // Should succeed with unchanged content (no tools configured)
1584        assert!(result.is_ok());
1585        let output = result.unwrap();
1586        assert_eq!(output.content, content);
1587        assert!(!output.had_errors);
1588        assert!(output.error_messages.is_empty());
1589    }
1590
1591    #[test]
1592    fn test_lint_on_missing_language_definition_fail() {
1593        let mut config = default_config();
1594        config.on_missing_language_definition = OnMissing::Fail;
1595        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1596
1597        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1598        let result = processor.lint(content);
1599
1600        // Should succeed but return diagnostics for both missing language definitions
1601        assert!(result.is_ok());
1602        let diagnostics = result.unwrap();
1603        assert_eq!(diagnostics.len(), 2);
1604        assert!(diagnostics[0].message.contains("No lint tools configured"));
1605        assert!(diagnostics[0].message.contains("python"));
1606        assert!(diagnostics[1].message.contains("javascript"));
1607    }
1608
1609    #[test]
1610    fn test_lint_on_missing_language_definition_fail_fast() {
1611        let mut config = default_config();
1612        config.on_missing_language_definition = OnMissing::FailFast;
1613        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1614
1615        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1616        let result = processor.lint(content);
1617
1618        // Should fail immediately on first missing language
1619        assert!(result.is_err());
1620        let err = result.unwrap_err();
1621        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1622    }
1623
1624    #[test]
1625    fn test_format_on_missing_language_definition_fail() {
1626        let mut config = default_config();
1627        config.on_missing_language_definition = OnMissing::Fail;
1628        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1629
1630        let content = "```python\nprint('hello')\n```";
1631        let result = processor.format(content);
1632
1633        // Should succeed but report errors
1634        assert!(result.is_ok());
1635        let output = result.unwrap();
1636        assert_eq!(output.content, content); // Content unchanged
1637        assert!(output.had_errors);
1638        assert!(!output.error_messages.is_empty());
1639        assert!(output.error_messages[0].contains("No format tools configured"));
1640    }
1641
1642    #[test]
1643    fn test_format_on_missing_language_definition_fail_fast() {
1644        let mut config = default_config();
1645        config.on_missing_language_definition = OnMissing::FailFast;
1646        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1647
1648        let content = "```python\nprint('hello')\n```";
1649        let result = processor.format(content);
1650
1651        // Should fail immediately
1652        assert!(result.is_err());
1653        let err = result.unwrap_err();
1654        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1655    }
1656
1657    #[test]
1658    fn test_lint_on_missing_tool_binary_fail() {
1659        use super::super::config::{LanguageToolConfig, ToolDefinition};
1660
1661        let mut config = default_config();
1662        config.on_missing_tool_binary = OnMissing::Fail;
1663
1664        // Configure a tool with a non-existent binary
1665        let lang_config = LanguageToolConfig {
1666            lint: vec!["nonexistent-linter".to_string()],
1667            ..Default::default()
1668        };
1669        config.languages.insert("python".to_string(), lang_config);
1670
1671        let tool_def = ToolDefinition {
1672            command: vec!["nonexistent-binary-xyz123".to_string()],
1673            ..Default::default()
1674        };
1675        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1676
1677        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1678
1679        let content = "```python\nprint('hello')\n```";
1680        let result = processor.lint(content);
1681
1682        // Should succeed but return diagnostic for missing binary
1683        assert!(result.is_ok());
1684        let diagnostics = result.unwrap();
1685        assert_eq!(diagnostics.len(), 1);
1686        assert!(diagnostics[0].message.contains("not found in PATH"));
1687    }
1688
1689    #[test]
1690    fn test_lint_on_missing_tool_binary_fail_fast() {
1691        use super::super::config::{LanguageToolConfig, ToolDefinition};
1692
1693        let mut config = default_config();
1694        config.on_missing_tool_binary = OnMissing::FailFast;
1695
1696        // Configure a tool with a non-existent binary
1697        let lang_config = LanguageToolConfig {
1698            lint: vec!["nonexistent-linter".to_string()],
1699            ..Default::default()
1700        };
1701        config.languages.insert("python".to_string(), lang_config);
1702
1703        let tool_def = ToolDefinition {
1704            command: vec!["nonexistent-binary-xyz123".to_string()],
1705            ..Default::default()
1706        };
1707        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1708
1709        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1710
1711        let content = "```python\nprint('hello')\n```";
1712        let result = processor.lint(content);
1713
1714        // Should fail immediately
1715        assert!(result.is_err());
1716        let err = result.unwrap_err();
1717        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1718    }
1719
1720    #[test]
1721    fn test_format_on_missing_tool_binary_fail() {
1722        use super::super::config::{LanguageToolConfig, ToolDefinition};
1723
1724        let mut config = default_config();
1725        config.on_missing_tool_binary = OnMissing::Fail;
1726
1727        // Configure a tool with a non-existent binary
1728        let lang_config = LanguageToolConfig {
1729            format: vec!["nonexistent-formatter".to_string()],
1730            ..Default::default()
1731        };
1732        config.languages.insert("python".to_string(), lang_config);
1733
1734        let tool_def = ToolDefinition {
1735            command: vec!["nonexistent-binary-xyz123".to_string()],
1736            ..Default::default()
1737        };
1738        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1739
1740        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1741
1742        let content = "```python\nprint('hello')\n```";
1743        let result = processor.format(content);
1744
1745        // Should succeed but report errors
1746        assert!(result.is_ok());
1747        let output = result.unwrap();
1748        assert_eq!(output.content, content); // Content unchanged
1749        assert!(output.had_errors);
1750        assert!(!output.error_messages.is_empty());
1751        assert!(output.error_messages[0].contains("not found in PATH"));
1752    }
1753
1754    #[test]
1755    fn test_format_on_missing_tool_binary_fail_fast() {
1756        use super::super::config::{LanguageToolConfig, ToolDefinition};
1757
1758        let mut config = default_config();
1759        config.on_missing_tool_binary = OnMissing::FailFast;
1760
1761        // Configure a tool with a non-existent binary
1762        let lang_config = LanguageToolConfig {
1763            format: vec!["nonexistent-formatter".to_string()],
1764            ..Default::default()
1765        };
1766        config.languages.insert("python".to_string(), lang_config);
1767
1768        let tool_def = ToolDefinition {
1769            command: vec!["nonexistent-binary-xyz123".to_string()],
1770            ..Default::default()
1771        };
1772        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1773
1774        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1775
1776        let content = "```python\nprint('hello')\n```";
1777        let result = processor.format(content);
1778
1779        // Should fail immediately
1780        assert!(result.is_err());
1781        let err = result.unwrap_err();
1782        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1783    }
1784
1785    #[test]
1786    fn test_lint_rumdl_builtin_skipped_for_markdown() {
1787        // Configure the built-in "rumdl" tool for markdown
1788        // The processor should skip it (handled by embedded markdown linting)
1789        let mut config = default_config();
1790        config.languages.insert(
1791            "markdown".to_string(),
1792            LanguageToolConfig {
1793                lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1794                ..Default::default()
1795            },
1796        );
1797        config.on_missing_language_definition = OnMissing::Fail;
1798        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1799
1800        let content = "```markdown\n# Hello\n```";
1801        let result = processor.lint(content);
1802
1803        // Should succeed with no diagnostics - "rumdl" tool is skipped, not treated as unknown
1804        assert!(result.is_ok());
1805        assert!(result.unwrap().is_empty());
1806    }
1807
1808    #[test]
1809    fn test_format_rumdl_builtin_skipped_for_markdown() {
1810        // Configure the built-in "rumdl" tool for markdown
1811        let mut config = default_config();
1812        config.languages.insert(
1813            "markdown".to_string(),
1814            LanguageToolConfig {
1815                format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1816                ..Default::default()
1817            },
1818        );
1819        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1820
1821        let content = "```markdown\n# Hello\n```";
1822        let result = processor.format(content);
1823
1824        // Should succeed with unchanged content - "rumdl" tool is skipped
1825        assert!(result.is_ok());
1826        let output = result.unwrap();
1827        assert_eq!(output.content, content);
1828        assert!(!output.had_errors);
1829    }
1830
1831    #[test]
1832    fn test_is_markdown_language() {
1833        // Test the helper function
1834        assert!(is_markdown_language("markdown"));
1835        assert!(is_markdown_language("Markdown"));
1836        assert!(is_markdown_language("MARKDOWN"));
1837        assert!(is_markdown_language("md"));
1838        assert!(is_markdown_language("MD"));
1839        assert!(!is_markdown_language("python"));
1840        assert!(!is_markdown_language("rust"));
1841        assert!(!is_markdown_language(""));
1842    }
1843
1844    // Issue #423: MkDocs admonition code block detection
1845
1846    #[test]
1847    fn test_extract_mkdocs_admonition_code_block() {
1848        let config = default_config();
1849        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1850
1851        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1852        let blocks = processor.extract_code_blocks(content);
1853
1854        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs admonition");
1855        assert_eq!(blocks[0].language, "python");
1856    }
1857
1858    #[test]
1859    fn test_extract_mkdocs_tab_code_block() {
1860        let config = default_config();
1861        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1862
1863        let content = "=== \"Python\"\n\n    ```python\n    print(\"hello\")\n    ```\n";
1864        let blocks = processor.extract_code_blocks(content);
1865
1866        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs tab");
1867        assert_eq!(blocks[0].language, "python");
1868    }
1869
1870    #[test]
1871    fn test_standard_flavor_ignores_admonition_indented_content() {
1872        let config = default_config();
1873        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1874
1875        // With standard flavor, pulldown_cmark parses this differently;
1876        // our MkDocs extraction should NOT run
1877        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1878        let blocks = processor.extract_code_blocks(content);
1879
1880        // Standard flavor relies on pulldown_cmark only, which may or may not detect
1881        // indented fenced blocks. The key assertion is that we don't double-detect.
1882        // With standard flavor, the MkDocs extraction path is skipped entirely.
1883        for (i, b) in blocks.iter().enumerate() {
1884            for (j, b2) in blocks.iter().enumerate() {
1885                if i != j {
1886                    assert_ne!(b.start_line, b2.start_line, "No duplicate blocks should exist");
1887                }
1888            }
1889        }
1890    }
1891
1892    #[test]
1893    fn test_mkdocs_top_level_blocks_alongside_admonition() {
1894        let config = default_config();
1895        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1896
1897        let content =
1898            "```rust\nfn main() {}\n```\n\n!!! note\n    Some text\n\n    ```python\n    print(\"hello\")\n    ```\n";
1899        let blocks = processor.extract_code_blocks(content);
1900
1901        assert_eq!(
1902            blocks.len(),
1903            2,
1904            "Should detect both top-level and admonition code blocks"
1905        );
1906        assert_eq!(blocks[0].language, "rust");
1907        assert_eq!(blocks[1].language, "python");
1908    }
1909
1910    #[test]
1911    fn test_mkdocs_nested_admonition_code_block() {
1912        let config = default_config();
1913        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1914
1915        let content = "\
1916!!! note
1917    Some text
1918
1919    !!! warning
1920        Nested content
1921
1922        ```python
1923        x = 1
1924        ```
1925";
1926        let blocks = processor.extract_code_blocks(content);
1927        assert_eq!(blocks.len(), 1, "Should detect code block inside nested admonition");
1928        assert_eq!(blocks[0].language, "python");
1929    }
1930
1931    #[test]
1932    fn test_mkdocs_consecutive_admonitions_no_stale_context() {
1933        let config = default_config();
1934        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1935
1936        // Two consecutive admonitions at the same indent level.
1937        // The first has no code block, the second does.
1938        let content = "\
1939!!! note
1940    First admonition content
1941
1942!!! warning
1943    Second admonition content
1944
1945    ```python
1946    y = 2
1947    ```
1948";
1949        let blocks = processor.extract_code_blocks(content);
1950        assert_eq!(blocks.len(), 1, "Should detect code block in second admonition only");
1951        assert_eq!(blocks[0].language, "python");
1952    }
1953
1954    #[test]
1955    fn test_mkdocs_crlf_line_endings() {
1956        let config = default_config();
1957        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1958
1959        // Use \r\n line endings
1960        let content = "!!! note\r\n    Some text\r\n\r\n    ```python\r\n    x = 1\r\n    ```\r\n";
1961        let blocks = processor.extract_code_blocks(content);
1962
1963        assert_eq!(blocks.len(), 1, "Should detect code block with CRLF line endings");
1964        assert_eq!(blocks[0].language, "python");
1965
1966        // Verify byte offsets point to valid content
1967        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1968        assert!(
1969            extracted.contains("x = 1"),
1970            "Extracted content should contain code. Got: {extracted:?}"
1971        );
1972    }
1973
1974    #[test]
1975    fn test_mkdocs_unclosed_fence_in_admonition() {
1976        let config = default_config();
1977        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1978
1979        // Unclosed fence should not produce a block
1980        let content = "!!! note\n    ```python\n    x = 1\n    no closing fence\n";
1981        let blocks = processor.extract_code_blocks(content);
1982        assert_eq!(blocks.len(), 0, "Unclosed fence should not produce a block");
1983    }
1984
1985    #[test]
1986    fn test_mkdocs_tilde_fence_in_admonition() {
1987        let config = default_config();
1988        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1989
1990        let content = "!!! note\n    ~~~ruby\n    puts 'hi'\n    ~~~\n";
1991        let blocks = processor.extract_code_blocks(content);
1992        assert_eq!(blocks.len(), 1, "Should detect tilde-fenced code block");
1993        assert_eq!(blocks[0].language, "ruby");
1994    }
1995
1996    #[test]
1997    fn test_mkdocs_empty_lines_in_code_block() {
1998        let config = default_config();
1999        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2000
2001        // Code block with empty lines inside — verifies byte offsets are correct
2002        // across empty lines (the previous find("") approach would break here)
2003        let content = "!!! note\n    ```python\n    x = 1\n\n    y = 2\n    ```\n";
2004        let blocks = processor.extract_code_blocks(content);
2005        assert_eq!(blocks.len(), 1);
2006
2007        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2008        assert!(
2009            extracted.contains("x = 1") && extracted.contains("y = 2"),
2010            "Extracted content should span across the empty line. Got: {extracted:?}"
2011        );
2012    }
2013
2014    #[test]
2015    fn test_mkdocs_content_byte_offsets_lf() {
2016        let config = default_config();
2017        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2018
2019        let content = "!!! note\n    ```python\n    print('hi')\n    ```\n";
2020        let blocks = processor.extract_code_blocks(content);
2021        assert_eq!(blocks.len(), 1);
2022
2023        // Verify the extracted content is exactly the code body
2024        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2025        assert_eq!(extracted, "    print('hi')\n", "Content offsets should be exact for LF");
2026    }
2027
2028    #[test]
2029    fn test_mkdocs_content_byte_offsets_crlf() {
2030        let config = default_config();
2031        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2032
2033        let content = "!!! note\r\n    ```python\r\n    print('hi')\r\n    ```\r\n";
2034        let blocks = processor.extract_code_blocks(content);
2035        assert_eq!(blocks.len(), 1);
2036
2037        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2038        assert_eq!(
2039            extracted, "    print('hi')\r\n",
2040            "Content offsets should be exact for CRLF"
2041        );
2042    }
2043
2044    #[test]
2045    fn test_lint_enabled_false_skips_language_in_strict_mode() {
2046        // With on-missing-language-definition = "fail", a language configured
2047        // with enabled=false should be silently skipped (no error).
2048        let mut config = default_config();
2049        config.normalize_language = NormalizeLanguage::Exact;
2050        config.on_missing_language_definition = OnMissing::Fail;
2051
2052        // Python has tools, plaintext is disabled
2053        config.languages.insert(
2054            "python".to_string(),
2055            LanguageToolConfig {
2056                lint: vec!["ruff:check".to_string()],
2057                ..Default::default()
2058            },
2059        );
2060        config.languages.insert(
2061            "plaintext".to_string(),
2062            LanguageToolConfig {
2063                enabled: false,
2064                ..Default::default()
2065            },
2066        );
2067
2068        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2069
2070        let content = "```plaintext\nsome text\n```";
2071        let result = processor.lint(content);
2072
2073        // No error for plaintext: enabled=false satisfies strict mode
2074        assert!(result.is_ok());
2075        let diagnostics = result.unwrap();
2076        assert!(
2077            diagnostics.is_empty(),
2078            "Expected no diagnostics for disabled language, got: {diagnostics:?}"
2079        );
2080    }
2081
2082    #[test]
2083    fn test_format_enabled_false_skips_language_in_strict_mode() {
2084        // Same test but for format mode
2085        let mut config = default_config();
2086        config.normalize_language = NormalizeLanguage::Exact;
2087        config.on_missing_language_definition = OnMissing::Fail;
2088
2089        config.languages.insert(
2090            "plaintext".to_string(),
2091            LanguageToolConfig {
2092                enabled: false,
2093                ..Default::default()
2094            },
2095        );
2096
2097        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2098
2099        let content = "```plaintext\nsome text\n```";
2100        let result = processor.format(content);
2101
2102        // No error for plaintext: enabled=false satisfies strict mode
2103        assert!(result.is_ok());
2104        let output = result.unwrap();
2105        assert!(!output.had_errors, "Expected no errors for disabled language");
2106        assert!(
2107            output.error_messages.is_empty(),
2108            "Expected no error messages, got: {:?}",
2109            output.error_messages
2110        );
2111    }
2112
2113    #[test]
2114    fn test_enabled_false_default_true_preserved() {
2115        // Verify that when enabled is not set, it defaults to true (existing behavior)
2116        let mut config = default_config();
2117        config.on_missing_language_definition = OnMissing::Fail;
2118
2119        // Configure python without explicitly setting enabled
2120        config.languages.insert(
2121            "python".to_string(),
2122            LanguageToolConfig {
2123                lint: vec!["ruff:check".to_string()],
2124                ..Default::default()
2125            },
2126        );
2127
2128        let lang_config = config.languages.get("python").unwrap();
2129        assert!(lang_config.enabled, "enabled should default to true");
2130    }
2131
2132    #[test]
2133    fn test_enabled_false_with_fail_fast_no_error() {
2134        // Even with fail-fast, enabled=false should skip silently
2135        let mut config = default_config();
2136        config.normalize_language = NormalizeLanguage::Exact;
2137        config.on_missing_language_definition = OnMissing::FailFast;
2138
2139        config.languages.insert(
2140            "unknown".to_string(),
2141            LanguageToolConfig {
2142                enabled: false,
2143                ..Default::default()
2144            },
2145        );
2146
2147        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2148
2149        let content = "```unknown\nsome content\n```";
2150        let result = processor.lint(content);
2151
2152        // Should not return an error: enabled=false takes precedence over fail-fast
2153        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2154        assert!(result.unwrap().is_empty());
2155    }
2156
2157    #[test]
2158    fn test_enabled_false_format_with_fail_fast_no_error() {
2159        // Same for format mode
2160        let mut config = default_config();
2161        config.normalize_language = NormalizeLanguage::Exact;
2162        config.on_missing_language_definition = OnMissing::FailFast;
2163
2164        config.languages.insert(
2165            "unknown".to_string(),
2166            LanguageToolConfig {
2167                enabled: false,
2168                ..Default::default()
2169            },
2170        );
2171
2172        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2173
2174        let content = "```unknown\nsome content\n```";
2175        let result = processor.format(content);
2176
2177        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2178        let output = result.unwrap();
2179        assert!(!output.had_errors);
2180    }
2181
2182    #[test]
2183    fn test_enabled_false_with_tools_still_skips() {
2184        // If enabled=false but tools are listed, the language should still be skipped
2185        let mut config = default_config();
2186        config.on_missing_language_definition = OnMissing::Fail;
2187
2188        config.languages.insert(
2189            "python".to_string(),
2190            LanguageToolConfig {
2191                enabled: false,
2192                lint: vec!["ruff:check".to_string()],
2193                format: vec!["ruff:format".to_string()],
2194                on_error: None,
2195            },
2196        );
2197
2198        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2199
2200        let content = "```python\nprint('hello')\n```";
2201
2202        // Lint should skip
2203        let lint_result = processor.lint(content);
2204        assert!(lint_result.is_ok());
2205        assert!(lint_result.unwrap().is_empty());
2206
2207        // Format should skip
2208        let format_result = processor.format(content);
2209        assert!(format_result.is_ok());
2210        let output = format_result.unwrap();
2211        assert!(!output.had_errors);
2212        assert_eq!(output.content, content, "Content should be unchanged");
2213    }
2214
2215    #[test]
2216    fn test_enabled_true_without_tools_triggers_strict_mode() {
2217        // A language configured with enabled=true (default) but no tools
2218        // should still trigger strict mode errors
2219        let mut config = default_config();
2220        config.on_missing_language_definition = OnMissing::Fail;
2221
2222        config.languages.insert(
2223            "python".to_string(),
2224            LanguageToolConfig {
2225                // enabled defaults to true, no tools
2226                ..Default::default()
2227            },
2228        );
2229
2230        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2231
2232        let content = "```python\nprint('hello')\n```";
2233        let result = processor.lint(content);
2234
2235        // Should report an error because enabled=true but no lint tools configured
2236        assert!(result.is_ok());
2237        let diagnostics = result.unwrap();
2238        assert_eq!(diagnostics.len(), 1);
2239        assert!(diagnostics[0].message.contains("No lint tools configured"));
2240    }
2241
2242    #[test]
2243    fn test_mixed_enabled_and_disabled_languages() {
2244        // Multiple languages: one disabled, one unconfigured
2245        let mut config = default_config();
2246        config.normalize_language = NormalizeLanguage::Exact;
2247        config.on_missing_language_definition = OnMissing::Fail;
2248
2249        config.languages.insert(
2250            "plaintext".to_string(),
2251            LanguageToolConfig {
2252                enabled: false,
2253                ..Default::default()
2254            },
2255        );
2256
2257        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2258
2259        let content = "\
2260```plaintext
2261some text
2262```
2263
2264```javascript
2265console.log('hi');
2266```
2267";
2268
2269        let result = processor.lint(content);
2270        assert!(result.is_ok());
2271        let diagnostics = result.unwrap();
2272
2273        // plaintext: skipped (enabled=false), no error
2274        // javascript: not configured at all, should trigger strict mode error
2275        assert_eq!(diagnostics.len(), 1, "Expected 1 diagnostic, got: {diagnostics:?}");
2276        assert!(
2277            diagnostics[0].message.contains("javascript"),
2278            "Error should be about javascript, got: {}",
2279            diagnostics[0].message
2280        );
2281    }
2282
2283    #[test]
2284    fn test_generic_fallback_includes_all_stderr_lines() {
2285        let config = default_config();
2286        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2287
2288        // Use output that won't be parsed by any structured format parser
2289        let output = ToolOutput {
2290            stdout: String::new(),
2291            stderr: "Parse error at position 42\nUnexpected token '::'\n3 errors found".to_string(),
2292            exit_code: 1,
2293            success: false,
2294        };
2295
2296        let diags = processor.parse_tool_output(&output, "tombi", 5);
2297        assert_eq!(diags.len(), 3, "Expected one diagnostic per non-empty stderr line");
2298        assert_eq!(diags[0].message, "Parse error at position 42");
2299        assert_eq!(diags[1].message, "Unexpected token '::'");
2300        assert_eq!(diags[2].message, "3 errors found");
2301        assert!(diags.iter().all(|d| d.tool == "tombi"));
2302        assert!(diags.iter().all(|d| d.file_line == 5));
2303    }
2304
2305    #[test]
2306    fn test_generic_fallback_includes_all_stdout_lines_when_stderr_empty() {
2307        let config = default_config();
2308        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2309
2310        let output = ToolOutput {
2311            stdout: "Line 1 error\nLine 2 detail\nLine 3 summary".to_string(),
2312            stderr: String::new(),
2313            exit_code: 1,
2314            success: false,
2315        };
2316
2317        let diags = processor.parse_tool_output(&output, "some-tool", 10);
2318        assert_eq!(diags.len(), 3);
2319        assert_eq!(diags[0].message, "Line 1 error");
2320        assert_eq!(diags[1].message, "Line 2 detail");
2321        assert_eq!(diags[2].message, "Line 3 summary");
2322    }
2323
2324    #[test]
2325    fn test_generic_fallback_skips_blank_lines() {
2326        let config = default_config();
2327        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2328
2329        let output = ToolOutput {
2330            stdout: String::new(),
2331            stderr: "error: bad input\n\n  \n\ndetail: see above\n".to_string(),
2332            exit_code: 1,
2333            success: false,
2334        };
2335
2336        let diags = processor.parse_tool_output(&output, "tool", 1);
2337        assert_eq!(diags.len(), 2);
2338        assert_eq!(diags[0].message, "error: bad input");
2339        assert_eq!(diags[1].message, "detail: see above");
2340    }
2341
2342    #[test]
2343    fn test_generic_fallback_exit_code_when_no_output() {
2344        let config = default_config();
2345        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2346
2347        let output = ToolOutput {
2348            stdout: String::new(),
2349            stderr: String::new(),
2350            exit_code: 42,
2351            success: false,
2352        };
2353
2354        let diags = processor.parse_tool_output(&output, "tool", 1);
2355        assert_eq!(diags.len(), 1);
2356        assert_eq!(diags[0].message, "Tool exited with code 42");
2357    }
2358
2359    #[test]
2360    fn test_generic_fallback_not_triggered_on_success() {
2361        let config = default_config();
2362        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2363
2364        let output = ToolOutput {
2365            stdout: "some informational output".to_string(),
2366            stderr: String::new(),
2367            exit_code: 0,
2368            success: true,
2369        };
2370
2371        let diags = processor.parse_tool_output(&output, "tool", 1);
2372        assert!(
2373            diags.is_empty(),
2374            "Successful tool runs should produce no fallback diagnostics"
2375        );
2376    }
2377
2378    #[test]
2379    fn test_ansi_codes_stripped_before_parsing() {
2380        let config = default_config();
2381        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2382
2383        // ruff-style output with ANSI color codes wrapping the message
2384        let output = ToolOutput {
2385            stdout: "\x1b[1m_.py\x1b[0m:\x1b[33m1\x1b[0m:\x1b[33m1\x1b[0m: \x1b[31mE501\x1b[0m Line too long"
2386                .to_string(),
2387            stderr: String::new(),
2388            exit_code: 1,
2389            success: false,
2390        };
2391
2392        let diags = processor.parse_tool_output(&output, "ruff:check", 5);
2393        assert_eq!(diags.len(), 1, "ANSI-colored output should still be parsed");
2394        assert_eq!(diags[0].message, "E501 Line too long");
2395        assert_eq!(diags[0].file_line, 6); // 5 + 1
2396    }
2397
2398    #[test]
2399    fn test_tombi_multiline_error_format() {
2400        let config = default_config();
2401        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2402
2403        // Simulates tombi output (ANSI codes stripped for clarity)
2404        let output = ToolOutput {
2405            stdout: "[test]\ntest: \"test\"\nError: invalid key\n    at line 2 column 1\nError: expected key\n    at line 2 column 1\nError: expected '='\n    at line 2 column 1\nError: expected value\n    at line 2 column 1".to_string(),
2406            stderr: "1 file failed to be formatted".to_string(),
2407            exit_code: 1,
2408            success: false,
2409        };
2410
2411        let diags = processor.parse_tool_output(&output, "tombi", 7);
2412        assert_eq!(
2413            diags.len(),
2414            4,
2415            "Expected 4 diagnostics from tombi errors, got {diags:?}"
2416        );
2417        assert_eq!(diags[0].message, "invalid key");
2418        assert_eq!(diags[0].file_line, 9); // 7 + 2
2419        assert_eq!(diags[0].column, Some(1));
2420        assert_eq!(diags[1].message, "expected key");
2421        assert_eq!(diags[1].file_line, 9);
2422        assert_eq!(diags[2].message, "expected '='");
2423        assert_eq!(diags[3].message, "expected value");
2424        assert!(diags.iter().all(|d| d.tool == "tombi"));
2425    }
2426
2427    #[test]
2428    fn test_tombi_with_ansi_codes() {
2429        let config = default_config();
2430        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2431
2432        // Real tombi output with ANSI escape codes
2433        let output = ToolOutput {
2434            stdout: "[test]\ntest: \"test\"\n\x1b[1;31m  Error\x1b[0m: \x1b[1minvalid key\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m\n\x1b[1;31m  Error\x1b[0m: \x1b[1mexpected '='\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m".to_string(),
2435            stderr: "1 file failed to be formatted".to_string(),
2436            exit_code: 1,
2437            success: false,
2438        };
2439
2440        let diags = processor.parse_tool_output(&output, "tombi", 7);
2441        assert_eq!(
2442            diags.len(),
2443            2,
2444            "Expected 2 diagnostics from ANSI-colored tombi output, got {diags:?}"
2445        );
2446        assert_eq!(diags[0].message, "invalid key");
2447        assert_eq!(diags[0].file_line, 9);
2448        assert_eq!(diags[1].message, "expected '='");
2449        assert_eq!(diags[1].file_line, 9);
2450    }
2451
2452    #[test]
2453    fn test_fallback_combines_stdout_and_stderr() {
2454        let config = default_config();
2455        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2456
2457        // Tool puts some errors on stdout, summary on stderr
2458        let output = ToolOutput {
2459            stdout: "problem found in input".to_string(),
2460            stderr: "1 file failed".to_string(),
2461            exit_code: 1,
2462            success: false,
2463        };
2464
2465        let diags = processor.parse_tool_output(&output, "tool", 1);
2466        assert_eq!(diags.len(), 2, "Fallback should include both stdout and stderr");
2467        assert_eq!(diags[0].message, "problem found in input");
2468        assert_eq!(diags[1].message, "1 file failed");
2469    }
2470
2471    #[test]
2472    fn test_error_line_without_position_info() {
2473        let config = default_config();
2474        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2475
2476        // Error: line not followed by "at line N column M"
2477        let output = ToolOutput {
2478            stdout: "Error: something went wrong\nsome unrelated line".to_string(),
2479            stderr: String::new(),
2480            exit_code: 1,
2481            success: false,
2482        };
2483
2484        let diags = processor.parse_tool_output(&output, "tool", 5);
2485        // "Error: something went wrong" → parsed by error-line parser (no position)
2486        // "some unrelated line" → no parser matches, but diagnostics not empty → no fallback
2487        assert!(!diags.is_empty());
2488        assert_eq!(diags[0].message, "something went wrong");
2489        assert_eq!(diags[0].file_line, 5); // No line offset, uses code_block_start
2490    }
2491
2492    #[test]
2493    fn test_warning_line_with_position() {
2494        let config = default_config();
2495        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2496
2497        let output = ToolOutput {
2498            stdout: "Warning: deprecated syntax\n    at line 3 column 5".to_string(),
2499            stderr: String::new(),
2500            exit_code: 1,
2501            success: false,
2502        };
2503
2504        let diags = processor.parse_tool_output(&output, "tool", 10);
2505        assert_eq!(diags.len(), 1);
2506        assert_eq!(diags[0].message, "deprecated syntax");
2507        assert_eq!(diags[0].file_line, 13); // 10 + 3
2508        assert_eq!(diags[0].column, Some(5));
2509        assert!(matches!(diags[0].severity, DiagnosticSeverity::Warning));
2510    }
2511
2512    #[test]
2513    fn test_strip_ansi_codes() {
2514        assert_eq!(strip_ansi_codes("hello"), "hello");
2515        assert_eq!(strip_ansi_codes("\x1b[31mred\x1b[0m"), "red");
2516        assert_eq!(
2517            strip_ansi_codes("\x1b[1;31m  Error\x1b[0m: \x1b[1mmsg\x1b[0m"),
2518            "  Error: msg"
2519        );
2520        assert_eq!(strip_ansi_codes("no codes here"), "no codes here");
2521        assert_eq!(strip_ansi_codes(""), "");
2522        assert_eq!(
2523            strip_ansi_codes("\x1b[90mat line 2 column 1\x1b[0m"),
2524            "at line 2 column 1"
2525        );
2526    }
2527
2528    #[test]
2529    fn test_parse_at_line_column() {
2530        assert_eq!(
2531            CodeBlockToolProcessor::parse_at_line_column("at line 2 column 1"),
2532            Some((2, 1))
2533        );
2534        assert_eq!(
2535            CodeBlockToolProcessor::parse_at_line_column("at line 10 column 15"),
2536            Some((10, 15))
2537        );
2538        assert_eq!(
2539            CodeBlockToolProcessor::parse_at_line_column("At Line 5 Column 3"),
2540            Some((5, 3))
2541        );
2542        assert_eq!(
2543            CodeBlockToolProcessor::parse_at_line_column("not a position line"),
2544            None
2545        );
2546        assert_eq!(
2547            CodeBlockToolProcessor::parse_at_line_column("at line abc column 1"),
2548            None
2549        );
2550    }
2551
2552    #[test]
2553    fn test_parse_error_line() {
2554        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Error: invalid key").unwrap();
2555        assert_eq!(msg, "invalid key");
2556        assert!(matches!(sev, DiagnosticSeverity::Error));
2557
2558        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Warning: deprecated").unwrap();
2559        assert_eq!(msg, "deprecated");
2560        assert!(matches!(sev, DiagnosticSeverity::Warning));
2561
2562        // Lowercase should NOT match (avoids conflict with unstructured tool output)
2563        assert!(CodeBlockToolProcessor::parse_error_line("error: bad input").is_none());
2564        assert!(CodeBlockToolProcessor::parse_error_line("warning: minor issue").is_none());
2565
2566        // Empty message after prefix should not match
2567        assert!(CodeBlockToolProcessor::parse_error_line("Error:").is_none());
2568        assert!(CodeBlockToolProcessor::parse_error_line("Error:   ").is_none());
2569
2570        // Not an error line
2571        assert!(CodeBlockToolProcessor::parse_error_line("some random text").is_none());
2572    }
2573
2574    #[test]
2575    fn test_consecutive_error_lines_without_position() {
2576        let config = default_config();
2577        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2578
2579        // Two Error: lines in a row — first should flush without position,
2580        // second gets position from "at line"
2581        let output = ToolOutput {
2582            stdout: "Error: first problem\nError: second problem\n    at line 3 column 1".to_string(),
2583            stderr: String::new(),
2584            exit_code: 1,
2585            success: false,
2586        };
2587
2588        let diags = processor.parse_tool_output(&output, "tool", 5);
2589        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2590        // First error flushed without position when second Error: was encountered
2591        assert_eq!(diags[0].message, "first problem");
2592        assert_eq!(diags[0].file_line, 5); // No line mapping
2593        assert_eq!(diags[0].column, None);
2594        // Second error resolved with position
2595        assert_eq!(diags[1].message, "second problem");
2596        assert_eq!(diags[1].file_line, 8); // 5 + 3
2597        assert_eq!(diags[1].column, Some(1));
2598    }
2599
2600    #[test]
2601    fn test_error_line_at_end_of_output() {
2602        let config = default_config();
2603        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2604
2605        // Error: as the very last line — flushed by post-loop code
2606        let output = ToolOutput {
2607            stdout: "Error: trailing error".to_string(),
2608            stderr: String::new(),
2609            exit_code: 1,
2610            success: false,
2611        };
2612
2613        let diags = processor.parse_tool_output(&output, "tool", 5);
2614        assert_eq!(diags.len(), 1);
2615        assert_eq!(diags[0].message, "trailing error");
2616        assert_eq!(diags[0].file_line, 5); // No position info available
2617        assert_eq!(diags[0].column, None);
2618    }
2619
2620    #[test]
2621    fn test_blank_lines_between_error_and_position() {
2622        let config = default_config();
2623        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2624
2625        // Blank lines between Error: and "at line" should be transparently skipped
2626        let output = ToolOutput {
2627            stdout: "Error: spaced out\n\n\n    at line 4 column 2".to_string(),
2628            stderr: String::new(),
2629            exit_code: 1,
2630            success: false,
2631        };
2632
2633        let diags = processor.parse_tool_output(&output, "tool", 10);
2634        assert_eq!(diags.len(), 1);
2635        assert_eq!(diags[0].message, "spaced out");
2636        assert_eq!(diags[0].file_line, 14); // 10 + 4
2637        assert_eq!(diags[0].column, Some(2));
2638    }
2639
2640    #[test]
2641    fn test_mixed_structured_and_error_line_parsers() {
2642        let config = default_config();
2643        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2644
2645        // Mix of ruff-style structured output and tombi-style Error: output
2646        let output = ToolOutput {
2647            stdout: "_.py:1:5: E501 Line too long\nError: invalid syntax\n    at line 3 column 1".to_string(),
2648            stderr: String::new(),
2649            exit_code: 1,
2650            success: false,
2651        };
2652
2653        let diags = processor.parse_tool_output(&output, "tool", 5);
2654        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2655        // First: standard format parser
2656        assert_eq!(diags[0].message, "E501 Line too long");
2657        assert_eq!(diags[0].file_line, 6); // 5 + 1
2658        // Second: Error: + at line parser
2659        assert_eq!(diags[1].message, "invalid syntax");
2660        assert_eq!(diags[1].file_line, 8); // 5 + 3
2661    }
2662
2663    #[test]
2664    fn test_at_line_without_preceding_error() {
2665        let config = default_config();
2666        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2667
2668        // "at line N column M" without a preceding Error: should not create a diagnostic
2669        let output = ToolOutput {
2670            stdout: "at line 2 column 1\nsome other text".to_string(),
2671            stderr: String::new(),
2672            exit_code: 1,
2673            success: false,
2674        };
2675
2676        let diags = processor.parse_tool_output(&output, "tool", 5);
2677        // No pending error, so "at line" is just an unmatched line
2678        // Both lines are unmatched, fallback fires with combined output
2679        assert_eq!(diags.len(), 2);
2680        assert_eq!(diags[0].message, "at line 2 column 1");
2681        assert_eq!(diags[1].message, "some other text");
2682    }
2683
2684    // =========================================================================
2685    // Issue #527: formatter that produces empty output should not erase content
2686    // =========================================================================
2687
2688    /// A formatter that produces no stdout (like `tombi lint -` mistakenly used
2689    /// as a formatter) should not replace non-empty content with an empty string.
2690    /// This test uses `true` which exits 0 with no output, simulating the bug.
2691    #[test]
2692    fn test_format_empty_output_does_not_erase_content() {
2693        use super::super::config::LanguageToolConfig;
2694
2695        let mut config = default_config();
2696        config.languages.insert(
2697            "toml".to_string(),
2698            LanguageToolConfig {
2699                format: vec!["empty-formatter".to_string()],
2700                ..Default::default()
2701            },
2702        );
2703        // Define a tool that exits 0 but produces no stdout (simulates `tombi lint -`)
2704        config.tools.insert(
2705            "empty-formatter".to_string(),
2706            super::super::config::ToolDefinition {
2707                command: vec!["true".to_string()],
2708                stdin: true,
2709                stdout: true,
2710                lint_args: vec![],
2711                format_args: vec![],
2712            },
2713        );
2714
2715        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2716
2717        let content = "```toml\nkey = \"value\"\n```\n";
2718        let result = processor.format(content);
2719
2720        assert!(result.is_ok(), "Format should not error");
2721        let output = result.unwrap();
2722
2723        // The content must NOT be erased — original content should be preserved
2724        assert!(
2725            output.content.contains("key = \"value\""),
2726            "Empty formatter output should not erase content. Got: {:?}",
2727            output.content
2728        );
2729    }
2730
2731    /// A formatter that echoes input back (like `cat`) should preserve content.
2732    #[test]
2733    fn test_format_identity_formatter_preserves_content() {
2734        use super::super::config::LanguageToolConfig;
2735
2736        let mut config = default_config();
2737        config.languages.insert(
2738            "toml".to_string(),
2739            LanguageToolConfig {
2740                format: vec!["cat-formatter".to_string()],
2741                ..Default::default()
2742            },
2743        );
2744        config.tools.insert(
2745            "cat-formatter".to_string(),
2746            super::super::config::ToolDefinition {
2747                command: vec!["cat".to_string()],
2748                stdin: true,
2749                stdout: true,
2750                lint_args: vec![],
2751                format_args: vec![],
2752            },
2753        );
2754
2755        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2756
2757        let content = "```toml\nkey = \"value\"\n```\n";
2758        let result = processor.format(content);
2759
2760        assert!(result.is_ok(), "Format should not error");
2761        let output = result.unwrap();
2762        assert_eq!(
2763            output.content, content,
2764            "Identity formatter should preserve content exactly"
2765        );
2766    }
2767
2768    /// Verify that the context-aware tool resolution resolves bare "tombi"
2769    /// to "tombi:format" in format context and "tombi:lint" in lint context.
2770    #[test]
2771    fn test_resolve_tool_context_aware_tombi() {
2772        let config = default_config();
2773        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2774
2775        // In format context, bare "tombi" should resolve to "tombi:format"
2776        let format_def = processor
2777            .resolve_tool("tombi", ToolContext::Format)
2778            .expect("Should resolve tombi in format context");
2779        assert!(
2780            format_def.command.iter().any(|arg| arg == "format"),
2781            "Bare 'tombi' in format context should resolve to 'tombi format', got: {:?}",
2782            format_def.command
2783        );
2784
2785        // In lint context, bare "tombi" should resolve to "tombi:lint"
2786        let lint_def = processor
2787            .resolve_tool("tombi", ToolContext::Lint)
2788            .expect("Should resolve tombi in lint context");
2789        assert!(
2790            lint_def.command.iter().any(|arg| arg == "lint"),
2791            "Bare 'tombi' in lint context should resolve to 'tombi lint', got: {:?}",
2792            lint_def.command
2793        );
2794
2795        // Explicit suffix should bypass context-aware resolution
2796        let explicit_def = processor
2797            .resolve_tool("tombi:lint", ToolContext::Format)
2798            .expect("Should resolve explicit tombi:lint even in format context");
2799        assert!(
2800            explicit_def.command.iter().any(|arg| arg == "lint"),
2801            "Explicit 'tombi:lint' should always use lint, got: {:?}",
2802            explicit_def.command
2803        );
2804    }
2805
2806    /// Verify context-aware resolution for ruff (uses "check" suffix, not "lint").
2807    #[test]
2808    fn test_resolve_tool_context_aware_ruff() {
2809        let config = default_config();
2810        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2811
2812        // In lint context, bare "ruff" should resolve to "ruff:check"
2813        let lint_def = processor
2814            .resolve_tool("ruff", ToolContext::Lint)
2815            .expect("Should resolve ruff in lint context");
2816        assert!(
2817            lint_def.command.iter().any(|arg| arg == "check"),
2818            "Bare 'ruff' in lint context should resolve to 'ruff check', got: {:?}",
2819            lint_def.command
2820        );
2821
2822        // In format context, bare "ruff" should resolve to "ruff:format"
2823        let format_def = processor
2824            .resolve_tool("ruff", ToolContext::Format)
2825            .expect("Should resolve ruff in format context");
2826        assert!(
2827            format_def.command.iter().any(|arg| arg == "format"),
2828            "Bare 'ruff' in format context should resolve to 'ruff format', got: {:?}",
2829            format_def.command
2830        );
2831    }
2832
2833    /// Tools without context-specific variants should still resolve via bare name.
2834    #[test]
2835    fn test_resolve_tool_bare_name_fallback() {
2836        let config = default_config();
2837        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2838
2839        // "shellcheck" has no :lint or :format variant — should fall back to bare name
2840        let def = processor
2841            .resolve_tool("shellcheck", ToolContext::Lint)
2842            .expect("Should resolve shellcheck via fallback");
2843        assert!(
2844            def.command.iter().any(|arg| arg == "shellcheck"),
2845            "shellcheck should resolve to itself, got: {:?}",
2846            def.command
2847        );
2848    }
2849
2850    /// Context-aware resolution for tools with non-standard format suffixes.
2851    #[test]
2852    fn test_resolve_tool_context_aware_sqlfluff() {
2853        let config = default_config();
2854        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2855
2856        // sqlfluff uses ":fix" as its format variant
2857        let format_def = processor
2858            .resolve_tool("sqlfluff", ToolContext::Format)
2859            .expect("Should resolve sqlfluff in format context");
2860        assert!(
2861            format_def.command.iter().any(|arg| arg == "fix"),
2862            "Bare 'sqlfluff' in format context should resolve to 'sqlfluff fix', got: {:?}",
2863            format_def.command
2864        );
2865    }
2866
2867    /// Context-aware resolution for djlint (:reformat suffix).
2868    #[test]
2869    fn test_resolve_tool_context_aware_djlint() {
2870        let config = default_config();
2871        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2872
2873        // djlint uses ":reformat" as its format variant
2874        let format_def = processor
2875            .resolve_tool("djlint", ToolContext::Format)
2876            .expect("Should resolve djlint in format context");
2877        assert!(
2878            format_def.command.iter().any(|arg| arg.contains("reformat")),
2879            "Bare 'djlint' in format context should resolve to djlint reformat, got: {:?}",
2880            format_def.command
2881        );
2882    }
2883
2884    /// User-defined tools with context-specific variants resolve correctly.
2885    #[test]
2886    fn test_resolve_tool_user_defined_with_context_variant() {
2887        use super::super::config::ToolDefinition;
2888
2889        let mut config = default_config();
2890        config.tools.insert(
2891            "mytool".to_string(),
2892            ToolDefinition {
2893                command: vec!["mytool".to_string(), "--lint".to_string()],
2894                ..Default::default()
2895            },
2896        );
2897        config.tools.insert(
2898            "mytool:format".to_string(),
2899            ToolDefinition {
2900                command: vec!["mytool".to_string(), "--format".to_string()],
2901                ..Default::default()
2902            },
2903        );
2904
2905        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2906
2907        // In format context, should resolve to "mytool:format"
2908        let def = processor
2909            .resolve_tool("mytool", ToolContext::Format)
2910            .expect("Should resolve user tool in format context");
2911        assert!(
2912            def.command.iter().any(|arg| arg == "--format"),
2913            "User 'mytool' in format context should resolve to mytool:format, got: {:?}",
2914            def.command
2915        );
2916
2917        // In lint context, should fall back to bare "mytool" (no mytool:lint exists)
2918        let def = processor
2919            .resolve_tool("mytool", ToolContext::Lint)
2920            .expect("Should resolve user tool in lint context via fallback");
2921        assert!(
2922            def.command.iter().any(|arg| arg == "--lint"),
2923            "User 'mytool' in lint context should fall back to bare name, got: {:?}",
2924            def.command
2925        );
2926    }
2927
2928    /// Nonexistent tool returns None.
2929    #[test]
2930    fn test_resolve_tool_nonexistent_returns_none() {
2931        let config = default_config();
2932        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2933
2934        assert!(
2935            processor
2936                .resolve_tool("nonexistent-tool-xyz", ToolContext::Lint)
2937                .is_none(),
2938            "Nonexistent tool should return None in lint context"
2939        );
2940        assert!(
2941            processor
2942                .resolve_tool("nonexistent-tool-xyz", ToolContext::Format)
2943                .is_none(),
2944            "Nonexistent tool should return None in format context"
2945        );
2946    }
2947
2948    #[test]
2949    fn test_strip_ansi_codes_edge_cases() {
2950        // Lone ESC without CSI bracket — non-printable, safely dropped
2951        assert_eq!(strip_ansi_codes("before\x1bafter"), "beforeafter");
2952        // ESC at end of string
2953        assert_eq!(strip_ansi_codes("trailing\x1b"), "trailing");
2954        // Nested/consecutive sequences
2955        assert_eq!(strip_ansi_codes("\x1b[1m\x1b[31mbold red\x1b[0m"), "bold red");
2956        // 256-color and RGB sequences
2957        assert_eq!(strip_ansi_codes("\x1b[38;5;196mred\x1b[0m"), "red");
2958        assert_eq!(strip_ansi_codes("\x1b[38;2;255;0;0mred\x1b[0m"), "red");
2959    }
2960}
rumdl_lib/code_block_tools/processor.rs

rumdl_lib/code_block_tools/
processor.rs