Skip to main content

rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing, ToolDefinition};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::config::MarkdownFlavor;
13use crate::rule::{LintWarning, Severity};
14use crate::utils::rumdl_parser_options;
15use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
16
17/// Special built-in tool name for rumdl's own markdown linting.
18/// When this tool is configured for markdown blocks, the processor skips
19/// external execution since it's handled by embedded markdown linting.
20pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
21
22/// Check if a language is markdown (handles common variations).
23fn is_markdown_language(lang: &str) -> bool {
24    matches!(lang.to_lowercase().as_str(), "markdown" | "md")
25}
26
27/// Strip ANSI escape sequences from tool output.
28///
29/// Many tools output colored text (e.g. `\x1b[1;31mError\x1b[0m`), which prevents
30/// structured parsers from matching patterns like `file:line:col: message`.
31fn strip_ansi_codes(s: &str) -> String {
32    let mut result = String::with_capacity(s.len());
33    let mut chars = s.chars().peekable();
34    while let Some(c) = chars.next() {
35        if c == '\x1b' {
36            if chars.peek() == Some(&'[') {
37                chars.next();
38                // Consume until we hit an ASCII letter (the terminator)
39                while let Some(&next) = chars.peek() {
40                    chars.next();
41                    if next.is_ascii_alphabetic() {
42                        break;
43                    }
44                }
45            }
46        } else {
47            result.push(c);
48        }
49    }
50    result
51}
52
53/// Ensure content handed to an external tool is newline-terminated.
54///
55/// Code block extraction strips the line ending before the closing fence, but
56/// per CommonMark that final newline is part of the block's content. External
57/// tools treat their stdin as a complete file, so a tool with an end-of-file
58/// newline rule (e.g. yamllint/ryl `new-line-at-end-of-file`, ruff `W292`)
59/// would otherwise emit a false positive. Genuinely empty content is left
60/// untouched so we never synthesize content where there is none.
61fn ensure_trailing_newline(content: &str) -> std::borrow::Cow<'_, str> {
62    if content.is_empty() || content.ends_with('\n') {
63        std::borrow::Cow::Borrowed(content)
64    } else {
65        std::borrow::Cow::Owned(format!("{content}\n"))
66    }
67}
68
69/// Information about a fenced code block for processing.
70#[derive(Debug, Clone)]
71pub struct FencedCodeBlockInfo {
72    /// 0-indexed line number where opening fence starts.
73    pub start_line: usize,
74    /// 0-indexed line number where closing fence ends.
75    pub end_line: usize,
76    /// Byte offset where code content starts (after opening fence line).
77    pub content_start: usize,
78    /// Byte offset where code content ends (before closing fence line).
79    pub content_end: usize,
80    /// Language tag extracted from info string (first token).
81    pub language: String,
82    /// Full info string from the fence.
83    pub info_string: String,
84    /// The fence character used (` or ~).
85    pub fence_char: char,
86    /// Length of the fence (3 or more).
87    pub fence_length: usize,
88    /// Leading whitespace on the fence line.
89    pub indent: usize,
90    /// Exact leading whitespace prefix from the fence line.
91    pub indent_prefix: String,
92}
93
94/// A diagnostic message from an external tool.
95#[derive(Debug, Clone)]
96pub struct CodeBlockDiagnostic {
97    /// Line number in the original markdown file (1-indexed).
98    pub file_line: usize,
99    /// Column number (1-indexed, if available).
100    pub column: Option<usize>,
101    /// Message from the tool.
102    pub message: String,
103    /// Severity (error, warning, info).
104    pub severity: DiagnosticSeverity,
105    /// Name of the tool that produced this.
106    pub tool: String,
107    /// Line where the code block starts (1-indexed, for context).
108    pub code_block_start: usize,
109}
110
111/// Severity level for diagnostics.
112#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113pub enum DiagnosticSeverity {
114    Error,
115    Warning,
116    Info,
117}
118
119impl CodeBlockDiagnostic {
120    /// Convert to a LintWarning for integration with rumdl's warning system.
121    pub fn to_lint_warning(&self) -> LintWarning {
122        let severity = match self.severity {
123            DiagnosticSeverity::Error => Severity::Error,
124            DiagnosticSeverity::Warning => Severity::Warning,
125            DiagnosticSeverity::Info => Severity::Info,
126        };
127
128        LintWarning {
129            message: self.message.clone(),
130            line: self.file_line,
131            column: self.column.unwrap_or(1),
132            end_line: self.file_line,
133            end_column: self.column.unwrap_or(1),
134            severity,
135            fix: None, // External tool diagnostics don't provide fixes
136            rule_name: Some(self.tool.clone()),
137        }
138    }
139}
140
141/// Error during code block processing.
142#[derive(Debug, Clone)]
143pub enum ProcessorError {
144    /// Tool execution failed.
145    ToolError(ExecutorError),
146    /// Tool execution failed with code block location context.
147    ToolErrorAt {
148        error: ExecutorError,
149        line: usize,
150        language: String,
151    },
152    /// No tools configured for language.
153    NoToolsConfigured { language: String, line: usize },
154    /// Tool binary not found.
155    ToolBinaryNotFound {
156        tool: String,
157        language: String,
158        line: usize,
159    },
160    /// Processing was aborted due to on_error = fail.
161    Aborted { message: String },
162}
163
164impl std::fmt::Display for ProcessorError {
165    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
166        match self {
167            Self::ToolError(e) => write!(f, "{e}"),
168            Self::ToolErrorAt { error, line, language } => {
169                write!(f, "line {line} ({language}): {error}")
170            }
171            Self::NoToolsConfigured { language, line } => {
172                write!(f, "line {line} ({language}): no tools configured")
173            }
174            Self::ToolBinaryNotFound { tool, language, line } => {
175                write!(f, "line {line} ({language}): tool '{tool}' not found in PATH")
176            }
177            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
178        }
179    }
180}
181
182impl std::error::Error for ProcessorError {}
183
184impl From<ExecutorError> for ProcessorError {
185    fn from(e: ExecutorError) -> Self {
186        Self::ToolError(e)
187    }
188}
189
190/// Result of processing a single code block.
191#[derive(Debug)]
192pub struct CodeBlockResult {
193    /// Diagnostics from linting.
194    pub diagnostics: Vec<CodeBlockDiagnostic>,
195    /// Formatted content (if formatting was requested and succeeded).
196    pub formatted_content: Option<String>,
197    /// Whether the code block was modified.
198    pub was_modified: bool,
199}
200
201/// Result of formatting code blocks in a document.
202#[derive(Debug)]
203pub struct FormatOutput {
204    /// The formatted content (may be partially formatted if errors occurred).
205    pub content: String,
206    /// Whether any errors occurred during formatting.
207    pub had_errors: bool,
208    /// Error messages for blocks that couldn't be formatted.
209    pub error_messages: Vec<String>,
210}
211
212/// Main processor for code block tools.
213/// Context in which a tool is being used.
214#[derive(Copy, Clone)]
215enum ToolContext {
216    Lint,
217    Format,
218}
219
220pub struct CodeBlockToolProcessor<'a> {
221    config: &'a CodeBlockToolsConfig,
222    flavor: MarkdownFlavor,
223    linguist: LinguistResolver,
224    registry: ToolRegistry,
225    executor: ToolExecutor,
226    user_aliases: std::collections::HashMap<String, String>,
227}
228
229impl<'a> CodeBlockToolProcessor<'a> {
230    /// Create a new processor with the given configuration and markdown flavor.
231    pub fn new(config: &'a CodeBlockToolsConfig, flavor: MarkdownFlavor) -> Self {
232        let user_aliases = config
233            .language_aliases
234            .iter()
235            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
236            .collect();
237        Self {
238            config,
239            flavor,
240            linguist: LinguistResolver::new(),
241            registry: ToolRegistry::new(config.tools.clone()),
242            executor: ToolExecutor::new(config.timeout),
243            user_aliases,
244        }
245    }
246
247    /// Resolve a tool ID with context awareness.
248    ///
249    /// When a bare tool name (e.g., "tombi") is used in a specific context
250    /// (lint or format), try the context-specific variant first (e.g., "tombi:format"),
251    /// then common alternatives (e.g., "tombi:check"), before falling back to the bare name.
252    fn resolve_tool<'b>(&'b self, tool_id: &str, context: ToolContext) -> Option<&'b ToolDefinition> {
253        // If the tool ID already has a colon suffix, use it directly
254        if tool_id.contains(':') {
255            return self.registry.get(tool_id);
256        }
257
258        // Try context-specific variants first
259        let suffixes = match context {
260            ToolContext::Format => &["format", "fmt", "fix", "reformat"][..],
261            ToolContext::Lint => &["lint", "check"][..],
262        };
263
264        for suffix in suffixes {
265            let qualified = format!("{tool_id}:{suffix}");
266            if let Some(def) = self.registry.get(&qualified) {
267                return Some(def);
268            }
269        }
270
271        // Fall back to bare name
272        self.registry.get(tool_id)
273    }
274
275    /// Quick check whether any configured language might appear in fenced code blocks.
276    /// Scans for `` ```lang `` or `` ~~~lang `` patterns without full parsing.
277    fn has_potential_matching_blocks(&self, content: &str, lint_mode: bool) -> bool {
278        // Collect languages that have tools configured for the requested mode
279        let configured_langs: Vec<&str> = self
280            .config
281            .languages
282            .iter()
283            .filter(|(_, lc)| {
284                lc.enabled
285                    && if lint_mode {
286                        !lc.lint.is_empty()
287                    } else {
288                        !lc.format.is_empty()
289                    }
290            })
291            .map(|(lang, _)| lang.as_str())
292            .collect();
293
294        if configured_langs.is_empty() {
295            return false;
296        }
297
298        // Scan content line-by-line for fence openers matching configured languages
299        for line in content.lines() {
300            let trimmed = line.trim_start();
301            let after_fence = if let Some(rest) = trimmed.strip_prefix("```") {
302                rest
303            } else if let Some(rest) = trimmed.strip_prefix("~~~") {
304                rest
305            } else {
306                continue;
307            };
308
309            let lang = after_fence.split_whitespace().next().unwrap_or("");
310            if lang.is_empty() {
311                continue;
312            }
313            // Check both the raw language and the canonical (normalized) form
314            let canonical = self.resolve_language(lang);
315            if configured_langs.contains(&canonical.as_str()) {
316                return true;
317            }
318        }
319
320        false
321    }
322
323    /// Extract all fenced code blocks from content.
324    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
325        let mut blocks = Vec::new();
326        let mut current_block: Option<FencedCodeBlockBuilder> = None;
327
328        let options = rumdl_parser_options();
329        let parser = Parser::new_ext(content, options).into_offset_iter();
330
331        let lines: Vec<&str> = content.lines().collect();
332
333        for (event, range) in parser {
334            match event {
335                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
336                    let info_string = info.to_string();
337                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
338
339                    // Find start line
340                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
341
342                    // Find content start (after opening fence line)
343                    let content_start = content[range.start..]
344                        .find('\n')
345                        .map_or(content.len(), |i| range.start + i + 1);
346
347                    // Detect fence character and length from the line
348                    let fence_line = lines.get(start_line).unwrap_or(&"");
349                    let trimmed = fence_line.trim_start();
350                    let indent = fence_line.len() - trimmed.len();
351                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
352                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
353                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
354                    } else {
355                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
356                    };
357
358                    current_block = Some(FencedCodeBlockBuilder {
359                        start_line,
360                        content_start,
361                        language,
362                        info_string,
363                        fence_char,
364                        fence_length,
365                        indent,
366                        indent_prefix,
367                    });
368                }
369                Event::End(TagEnd::CodeBlock) => {
370                    if let Some(builder) = current_block.take() {
371                        // Find end line
372                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
373
374                        // Find content end (before closing fence line)
375                        let search_start = builder.content_start.min(range.end);
376                        let content_end = if search_start < range.end {
377                            content[search_start..range.end]
378                                .rfind('\n')
379                                .map_or(search_start, |i| search_start + i)
380                        } else {
381                            search_start
382                        };
383
384                        if content_end >= builder.content_start {
385                            blocks.push(FencedCodeBlockInfo {
386                                start_line: builder.start_line,
387                                end_line,
388                                content_start: builder.content_start,
389                                content_end,
390                                language: builder.language,
391                                info_string: builder.info_string,
392                                fence_char: builder.fence_char,
393                                fence_length: builder.fence_length,
394                                indent: builder.indent,
395                                indent_prefix: builder.indent_prefix,
396                            });
397                        }
398                    }
399                }
400                _ => {}
401            }
402        }
403
404        // For MkDocs flavor, also extract code blocks inside admonitions and tabs
405        if self.flavor == MarkdownFlavor::MkDocs {
406            let mkdocs_blocks = self.extract_mkdocs_code_blocks(content);
407            for mb in mkdocs_blocks {
408                // Deduplicate: only add if no existing block starts at the same line
409                if !blocks.iter().any(|b| b.start_line == mb.start_line) {
410                    blocks.push(mb);
411                }
412            }
413            blocks.sort_by_key(|b| b.start_line);
414        }
415
416        blocks
417    }
418
419    /// Extract fenced code blocks that are inside MkDocs admonitions or tabs.
420    ///
421    /// pulldown_cmark doesn't parse MkDocs-specific constructs, so indented
422    /// code blocks inside `!!!`/`???` admonitions or `===` tabs are missed.
423    /// This method manually scans for them.
424    fn extract_mkdocs_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
425        use crate::utils::mkdocs_admonitions;
426        use crate::utils::mkdocs_tabs;
427
428        let mut blocks = Vec::new();
429        let lines: Vec<&str> = content.lines().collect();
430
431        // Track current MkDocs context indent level
432        // We only need to know if we're inside any MkDocs block, so a simple stack suffices.
433        let mut context_indent_stack: Vec<usize> = Vec::new();
434
435        // Track fence state inside MkDocs context
436        let mut in_fence = false;
437        let mut fence_start_line: usize = 0;
438        let mut fence_content_start: usize = 0;
439        let mut fence_char: char = '`';
440        let mut fence_length: usize = 0;
441        let mut fence_indent: usize = 0;
442        let mut fence_indent_prefix = String::new();
443        let mut fence_language = String::new();
444        let mut fence_info_string = String::new();
445
446        // Compute byte offsets via pointer arithmetic.
447        // `content.lines()` returns slices into the original string,
448        // so each line's pointer offset from `content` gives its byte position.
449        // This correctly handles \n, \r\n, and empty lines.
450        let content_start_ptr = content.as_ptr() as usize;
451        let line_offsets: Vec<usize> = lines
452            .iter()
453            .map(|line| line.as_ptr() as usize - content_start_ptr)
454            .collect();
455
456        for (i, line) in lines.iter().enumerate() {
457            let line_indent = crate::utils::mkdocs_common::get_line_indent(line);
458            let is_admonition = mkdocs_admonitions::is_admonition_start(line);
459            let is_tab = mkdocs_tabs::is_tab_marker(line);
460
461            // Pop contexts when the current line is not indented enough to be content.
462            // This runs for ALL lines (including new admonition/tab starts) to clean
463            // up stale entries before potentially pushing a new context.
464            if !line.trim().is_empty() {
465                while let Some(&ctx_indent) = context_indent_stack.last() {
466                    if line_indent < ctx_indent + 4 {
467                        context_indent_stack.pop();
468                        if in_fence {
469                            in_fence = false;
470                        }
471                    } else {
472                        break;
473                    }
474                }
475            }
476
477            // Check for admonition start — push new context
478            if is_admonition && let Some(indent) = mkdocs_admonitions::get_admonition_indent(line) {
479                context_indent_stack.push(indent);
480                continue;
481            }
482
483            // Check for tab marker — push new context
484            if is_tab && let Some(indent) = mkdocs_tabs::get_tab_indent(line) {
485                context_indent_stack.push(indent);
486                continue;
487            }
488
489            // Only look for fences inside a MkDocs context
490            if context_indent_stack.is_empty() {
491                continue;
492            }
493
494            let trimmed = line.trim_start();
495            let leading_spaces = line.len() - trimmed.len();
496
497            if !in_fence {
498                // Check for fence opening
499                let (fc, fl) = if trimmed.starts_with("```") {
500                    ('`', trimmed.chars().take_while(|&c| c == '`').count())
501                } else if trimmed.starts_with("~~~") {
502                    ('~', trimmed.chars().take_while(|&c| c == '~').count())
503                } else {
504                    continue;
505                };
506
507                if fl >= 3 {
508                    in_fence = true;
509                    fence_start_line = i;
510                    fence_char = fc;
511                    fence_length = fl;
512                    fence_indent = leading_spaces;
513                    fence_indent_prefix = line.get(..leading_spaces).unwrap_or("").to_string();
514
515                    let after_fence = &trimmed[fl..];
516                    fence_info_string = after_fence.trim().to_string();
517                    fence_language = fence_info_string.split_whitespace().next().unwrap_or("").to_string();
518
519                    // Content starts at the next line's byte offset
520                    fence_content_start = line_offsets.get(i + 1).copied().unwrap_or(content.len());
521                }
522            } else {
523                // Check for fence closing
524                let is_closing = if fence_char == '`' {
525                    trimmed.starts_with("```")
526                        && trimmed.chars().take_while(|&c| c == '`').count() >= fence_length
527                        && trimmed.trim_start_matches('`').trim().is_empty()
528                } else {
529                    trimmed.starts_with("~~~")
530                        && trimmed.chars().take_while(|&c| c == '~').count() >= fence_length
531                        && trimmed.trim_start_matches('~').trim().is_empty()
532                };
533
534                if is_closing {
535                    let content_end = line_offsets.get(i).copied().unwrap_or(content.len());
536
537                    if content_end >= fence_content_start {
538                        blocks.push(FencedCodeBlockInfo {
539                            start_line: fence_start_line,
540                            end_line: i,
541                            content_start: fence_content_start,
542                            content_end,
543                            language: fence_language.clone(),
544                            info_string: fence_info_string.clone(),
545                            fence_char,
546                            fence_length,
547                            indent: fence_indent,
548                            indent_prefix: fence_indent_prefix.clone(),
549                        });
550                    }
551
552                    in_fence = false;
553                }
554            }
555        }
556
557        blocks
558    }
559
560    /// Resolve a language tag to its canonical name.
561    fn resolve_language(&self, language: &str) -> String {
562        let lower = language.to_lowercase();
563        if let Some(mapped) = self.user_aliases.get(&lower) {
564            return mapped.clone();
565        }
566        match self.config.normalize_language {
567            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
568            NormalizeLanguage::Exact => lower,
569        }
570    }
571
572    /// Get the effective on_error setting for a language.
573    fn get_on_error(&self, language: &str) -> OnError {
574        self.config
575            .languages
576            .get(language)
577            .and_then(|lc| lc.on_error)
578            .unwrap_or(self.config.on_error)
579    }
580
581    /// Strip the fence indentation prefix from each line of a code block.
582    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
583        if indent_prefix.is_empty() {
584            return content.to_string();
585        }
586
587        let mut out = String::with_capacity(content.len());
588        for line in content.split_inclusive('\n') {
589            if let Some(stripped) = line.strip_prefix(indent_prefix) {
590                out.push_str(stripped);
591            } else {
592                out.push_str(line);
593            }
594        }
595        out
596    }
597
598    /// Re-apply the fence indentation prefix to each line of a code block.
599    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
600        if indent_prefix.is_empty() {
601            return content.to_string();
602        }
603        if content.is_empty() {
604            return String::new();
605        }
606
607        let mut out = String::with_capacity(content.len() + indent_prefix.len());
608        for line in content.split_inclusive('\n') {
609            if line == "\n" {
610                out.push_str(line);
611            } else {
612                out.push_str(indent_prefix);
613                out.push_str(line);
614            }
615        }
616        out
617    }
618
619    /// Lint all code blocks in the content.
620    ///
621    /// Returns diagnostics from all configured linters.
622    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
623        // Skip the expensive parse when no tools could possibly produce output.
624        // With on_missing=Ignore (default) and no languages with lint tools configured,
625        // every block would be skipped, so the parse is wasted work.
626        if self.config.on_missing_language_definition == OnMissing::Ignore
627            && !self
628                .config
629                .languages
630                .values()
631                .any(|lc| lc.enabled && !lc.lint.is_empty())
632        {
633            return Ok(Vec::new());
634        }
635
636        // Quick content check: skip parsing if no configured language appears in the content.
637        // This avoids the expensive pulldown-cmark parse when there are no matching code blocks.
638        if self.config.on_missing_language_definition == OnMissing::Ignore
639            && !self.has_potential_matching_blocks(content, true)
640        {
641            return Ok(Vec::new());
642        }
643
644        let mut all_diagnostics = Vec::new();
645        let blocks = self.extract_code_blocks(content);
646
647        for block in blocks {
648            if block.language.is_empty() {
649                continue; // Skip blocks without language tag
650            }
651
652            let canonical_lang = self.resolve_language(&block.language);
653
654            // Get lint tools for this language
655            let lang_config = self.config.languages.get(&canonical_lang);
656
657            // If language is explicitly configured with enabled=false, skip silently
658            if let Some(lc) = lang_config
659                && !lc.enabled
660            {
661                continue;
662            }
663
664            let lint_tools = match lang_config {
665                Some(lc) if !lc.lint.is_empty() => &lc.lint,
666                _ => {
667                    // No tools configured for this language in lint mode
668                    match self.config.on_missing_language_definition {
669                        OnMissing::Ignore => continue,
670                        OnMissing::Fail => {
671                            all_diagnostics.push(CodeBlockDiagnostic {
672                                file_line: block.start_line + 1,
673                                column: None,
674                                message: format!("No lint tools configured for language '{canonical_lang}'"),
675                                severity: DiagnosticSeverity::Error,
676                                tool: "code-block-tools".to_string(),
677                                code_block_start: block.start_line + 1,
678                            });
679                            continue;
680                        }
681                        OnMissing::FailFast => {
682                            return Err(ProcessorError::NoToolsConfigured {
683                                language: canonical_lang,
684                                line: block.start_line + 1,
685                            });
686                        }
687                    }
688                }
689            };
690
691            // Extract code block content
692            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
693                &content[block.content_start..block.content_end]
694            } else {
695                continue;
696            };
697            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
698
699            // Run each lint tool
700            for tool_id in lint_tools {
701                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown linting
702                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
703                    continue;
704                }
705
706                let Some(tool_def) = self.resolve_tool(tool_id, ToolContext::Lint) else {
707                    log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
708                    continue;
709                };
710
711                // Check if tool binary exists before running
712                let tool_name = tool_def.command.first().map_or("", String::as_str);
713                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
714                    match self.config.on_missing_tool_binary {
715                        OnMissing::Ignore => {
716                            log::debug!("Tool binary '{tool_name}' not found, skipping");
717                            continue;
718                        }
719                        OnMissing::Fail => {
720                            all_diagnostics.push(CodeBlockDiagnostic {
721                                file_line: block.start_line + 1,
722                                column: None,
723                                message: format!("Tool binary '{tool_name}' not found in PATH"),
724                                severity: DiagnosticSeverity::Error,
725                                tool: "code-block-tools".to_string(),
726                                code_block_start: block.start_line + 1,
727                            });
728                            continue;
729                        }
730                        OnMissing::FailFast => {
731                            return Err(ProcessorError::ToolBinaryNotFound {
732                                tool: tool_name.to_string(),
733                                language: canonical_lang.clone(),
734                                line: block.start_line + 1,
735                            });
736                        }
737                    }
738                }
739
740                let tool_input = ensure_trailing_newline(&code_content);
741                match self.executor.lint(tool_def, &tool_input, Some(self.config.timeout)) {
742                    Ok(output) => {
743                        // Parse tool output into diagnostics
744                        let diagnostics = self.parse_tool_output(
745                            &output,
746                            tool_id,
747                            block.start_line + 1, // Convert to 1-indexed
748                        );
749                        all_diagnostics.extend(diagnostics);
750                    }
751                    Err(e) => {
752                        let on_error = self.get_on_error(&canonical_lang);
753                        match on_error {
754                            OnError::Fail => return Err(e.into()),
755                            OnError::Warn => {
756                                log::warn!("Tool '{tool_id}' failed: {e}");
757                            }
758                            OnError::Skip => {
759                                // Silently skip
760                            }
761                        }
762                    }
763                }
764            }
765        }
766
767        Ok(all_diagnostics)
768    }
769
770    /// Format all code blocks in the content.
771    ///
772    /// Returns the modified content with formatted code blocks and any errors that occurred.
773    /// With `on-missing-*` = `fail`, errors are collected but formatting continues.
774    /// With `on-missing-*` = `fail-fast`, returns Err immediately on first error.
775    pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
776        let no_output = FormatOutput {
777            content: content.to_string(),
778            had_errors: false,
779            error_messages: Vec::new(),
780        };
781
782        // Skip the expensive parse when no tools could produce output
783        if self.config.on_missing_language_definition == OnMissing::Ignore
784            && !self
785                .config
786                .languages
787                .values()
788                .any(|lc| lc.enabled && !lc.format.is_empty())
789        {
790            return Ok(no_output);
791        }
792
793        // Quick content check: skip parsing if no configured language appears in the content
794        if self.config.on_missing_language_definition == OnMissing::Ignore
795            && !self.has_potential_matching_blocks(content, false)
796        {
797            return Ok(no_output);
798        }
799
800        let blocks = self.extract_code_blocks(content);
801
802        if blocks.is_empty() {
803            return Ok(FormatOutput {
804                content: content.to_string(),
805                had_errors: false,
806                error_messages: Vec::new(),
807            });
808        }
809
810        // Process blocks in reverse order to maintain byte offsets
811        let mut result = content.to_string();
812        let mut error_messages: Vec<String> = Vec::new();
813
814        for block in blocks.into_iter().rev() {
815            if block.language.is_empty() {
816                continue;
817            }
818
819            let canonical_lang = self.resolve_language(&block.language);
820
821            // Get format tools for this language
822            let lang_config = self.config.languages.get(&canonical_lang);
823
824            // If language is explicitly configured with enabled=false, skip silently
825            if let Some(lc) = lang_config
826                && !lc.enabled
827            {
828                continue;
829            }
830
831            let format_tools = match lang_config {
832                Some(lc) if !lc.format.is_empty() => &lc.format,
833                _ => {
834                    // No tools configured for this language in format mode
835                    match self.config.on_missing_language_definition {
836                        OnMissing::Ignore => continue,
837                        OnMissing::Fail => {
838                            error_messages.push(format!(
839                                "No format tools configured for language '{canonical_lang}' at line {}",
840                                block.start_line + 1
841                            ));
842                            continue;
843                        }
844                        OnMissing::FailFast => {
845                            return Err(ProcessorError::NoToolsConfigured {
846                                language: canonical_lang,
847                                line: block.start_line + 1,
848                            });
849                        }
850                    }
851                }
852            };
853
854            // Extract code block content
855            if block.content_start >= block.content_end || block.content_end > result.len() {
856                continue;
857            }
858            let code_content_raw = result[block.content_start..block.content_end].to_string();
859            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
860
861            // Run format tools (use first successful one)
862            let mut formatted = code_content.clone();
863            let mut tool_ran = false;
864            for tool_id in format_tools {
865                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown formatting
866                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
867                    continue;
868                }
869
870                let Some(tool_def) = self.resolve_tool(tool_id, ToolContext::Format) else {
871                    log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
872                    continue;
873                };
874
875                // Check if tool binary exists before running
876                let tool_name = tool_def.command.first().map_or("", String::as_str);
877                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
878                    match self.config.on_missing_tool_binary {
879                        OnMissing::Ignore => {
880                            log::debug!("Tool binary '{tool_name}' not found, skipping");
881                            continue;
882                        }
883                        OnMissing::Fail => {
884                            error_messages.push(format!(
885                                "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
886                                block.start_line + 1
887                            ));
888                            continue;
889                        }
890                        OnMissing::FailFast => {
891                            return Err(ProcessorError::ToolBinaryNotFound {
892                                tool: tool_name.to_string(),
893                                language: canonical_lang.clone(),
894                                line: block.start_line + 1,
895                            });
896                        }
897                    }
898                }
899
900                let tool_input = ensure_trailing_newline(&formatted);
901                match self.executor.format(tool_def, &tool_input, Some(self.config.timeout)) {
902                    Ok(output) => {
903                        // Guard against formatters that produce empty output for non-empty input.
904                        // This prevents data loss from misconfigured tools (e.g., a lint tool
905                        // used as a formatter that validates but doesn't output content).
906                        if output.trim().is_empty() && !formatted.trim().is_empty() {
907                            log::warn!("Formatter '{tool_id}' produced empty output for non-empty input, skipping");
908                            continue;
909                        }
910
911                        // Ensure trailing newline matches original (unindented)
912                        formatted = output;
913                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
914                            formatted.push('\n');
915                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
916                            formatted.pop();
917                        }
918                        tool_ran = true;
919                        break; // Use first successful formatter
920                    }
921                    Err(e) => {
922                        let on_error = self.get_on_error(&canonical_lang);
923                        match on_error {
924                            OnError::Fail => {
925                                return Err(ProcessorError::ToolErrorAt {
926                                    error: e,
927                                    line: block.start_line + 1,
928                                    language: canonical_lang,
929                                });
930                            }
931                            OnError::Warn => {
932                                error_messages.push(format!("line {} ({}): {e}", block.start_line + 1, canonical_lang));
933                            }
934                            OnError::Skip => {}
935                        }
936                    }
937                }
938            }
939
940            // Replace content if changed and a tool actually ran
941            if tool_ran && formatted != code_content {
942                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
943                if reindented != code_content_raw {
944                    result.replace_range(block.content_start..block.content_end, &reindented);
945                }
946            }
947        }
948
949        Ok(FormatOutput {
950            content: result,
951            had_errors: !error_messages.is_empty(),
952            error_messages,
953        })
954    }
955
956    /// Parse tool output into diagnostics.
957    ///
958    /// This is a basic parser that handles common output formats.
959    /// Tools vary widely in their output format, so this is best-effort.
960    fn parse_tool_output(
961        &self,
962        output: &ToolOutput,
963        tool_id: &str,
964        code_block_start_line: usize,
965    ) -> Vec<CodeBlockDiagnostic> {
966        let mut diagnostics = Vec::new();
967        let mut shellcheck_line: Option<usize> = None;
968
969        // Strip ANSI escape codes and combine stdout + stderr for parsing
970        let stdout_clean = strip_ansi_codes(&output.stdout);
971        let stderr_clean = strip_ansi_codes(&output.stderr);
972        let combined = format!("{stdout_clean}\n{stderr_clean}");
973
974        // State for multi-line "Error: msg" / "at line N column M" pattern
975        let mut pending_error: Option<(String, DiagnosticSeverity)> = None;
976
977        for line in combined.lines() {
978            let line = line.trim();
979            if line.is_empty() {
980                continue;
981            }
982
983            // Resolve pending "Error: msg" from previous line
984            if let Some((ref msg, severity)) = pending_error {
985                if let Some((line_num, col)) = Self::parse_at_line_column(line) {
986                    diagnostics.push(CodeBlockDiagnostic {
987                        file_line: code_block_start_line + line_num,
988                        column: Some(col),
989                        message: msg.clone(),
990                        severity,
991                        tool: tool_id.to_string(),
992                        code_block_start: code_block_start_line,
993                    });
994                    pending_error = None;
995                    continue;
996                }
997                // No position info found; emit error without line mapping
998                diagnostics.push(CodeBlockDiagnostic {
999                    file_line: code_block_start_line,
1000                    column: None,
1001                    message: msg.clone(),
1002                    severity,
1003                    tool: tool_id.to_string(),
1004                    code_block_start: code_block_start_line,
1005                });
1006                pending_error = None;
1007                // Fall through to parse current line
1008            }
1009
1010            if let Some(line_num) = self.parse_shellcheck_header(line) {
1011                shellcheck_line = Some(line_num);
1012                continue;
1013            }
1014
1015            if let Some(line_num) = shellcheck_line
1016                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
1017            {
1018                diagnostics.push(diag);
1019                continue;
1020            }
1021
1022            // Try pattern: "file:line:col: message" or "file:line: message"
1023            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
1024                diagnostics.push(diag);
1025                continue;
1026            }
1027
1028            // Try pattern: "line:col message" (eslint style)
1029            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
1030                diagnostics.push(diag);
1031                continue;
1032            }
1033
1034            // Try single-line shellcheck format fallback
1035            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
1036                diagnostics.push(diag);
1037                continue;
1038            }
1039
1040            // Try multi-line "Error: msg" / "Warning: msg" pattern
1041            if let Some(error_info) = Self::parse_error_line(line) {
1042                pending_error = Some(error_info);
1043            }
1044        }
1045
1046        // Flush any remaining pending error
1047        if let Some((msg, severity)) = pending_error {
1048            diagnostics.push(CodeBlockDiagnostic {
1049                file_line: code_block_start_line,
1050                column: None,
1051                message: msg,
1052                severity,
1053                tool: tool_id.to_string(),
1054                code_block_start: code_block_start_line,
1055            });
1056        }
1057
1058        // If no diagnostics parsed but tool failed, use combined output as fallback
1059        if diagnostics.is_empty() && !output.success {
1060            let lines: Vec<&str> = combined.lines().map(str::trim).filter(|l| !l.is_empty()).collect();
1061
1062            if lines.is_empty() {
1063                let exit_code = output.exit_code;
1064                diagnostics.push(CodeBlockDiagnostic {
1065                    file_line: code_block_start_line,
1066                    column: None,
1067                    message: format!("Tool exited with code {exit_code}"),
1068                    severity: DiagnosticSeverity::Error,
1069                    tool: tool_id.to_string(),
1070                    code_block_start: code_block_start_line,
1071                });
1072            } else {
1073                for line_text in lines {
1074                    diagnostics.push(CodeBlockDiagnostic {
1075                        file_line: code_block_start_line,
1076                        column: None,
1077                        message: line_text.to_string(),
1078                        severity: DiagnosticSeverity::Error,
1079                        tool: tool_id.to_string(),
1080                        code_block_start: code_block_start_line,
1081                    });
1082                }
1083            }
1084        }
1085
1086        diagnostics
1087    }
1088
1089    /// Parse standard "file:line:col: message" format.
1090    fn parse_standard_format(
1091        &self,
1092        line: &str,
1093        tool_id: &str,
1094        code_block_start_line: usize,
1095    ) -> Option<CodeBlockDiagnostic> {
1096        // Match patterns like "file.py:1:10: E501 message"
1097        let mut parts = line.rsplitn(4, ':');
1098        let message = parts.next()?.trim().to_string();
1099        let part1 = parts.next()?.trim().to_string();
1100        let part2 = parts.next()?.trim().to_string();
1101        let part3 = parts.next().map(|s| s.trim().to_string());
1102
1103        let (line_part, col_part) = if part3.is_some() {
1104            (part2, Some(part1))
1105        } else {
1106            (part1, None)
1107        };
1108
1109        if let Ok(line_num) = line_part.parse::<usize>() {
1110            let column = col_part.and_then(|s| s.parse::<usize>().ok());
1111            let message = Self::strip_fixable_markers(&message);
1112            if !message.is_empty() {
1113                let severity = self.infer_severity(&message);
1114                return Some(CodeBlockDiagnostic {
1115                    file_line: code_block_start_line + line_num,
1116                    column,
1117                    message,
1118                    severity,
1119                    tool: tool_id.to_string(),
1120                    code_block_start: code_block_start_line,
1121                });
1122            }
1123        }
1124        None
1125    }
1126
1127    /// Parse eslint-style "line:col severity message" format.
1128    fn parse_eslint_format(
1129        &self,
1130        line: &str,
1131        tool_id: &str,
1132        code_block_start_line: usize,
1133    ) -> Option<CodeBlockDiagnostic> {
1134        // Match "1:10 error Message"
1135        let parts: Vec<&str> = line.splitn(3, ' ').collect();
1136        if parts.len() >= 2 {
1137            let loc_parts: Vec<&str> = parts[0].split(':').collect();
1138            if loc_parts.len() == 2
1139                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
1140            {
1141                let (sev_part, msg_part) = if parts.len() >= 3 {
1142                    (parts[1], parts[2])
1143                } else {
1144                    (parts[1], "")
1145                };
1146                let message = if msg_part.is_empty() {
1147                    sev_part.to_string()
1148                } else {
1149                    msg_part.to_string()
1150                };
1151                let message = Self::strip_fixable_markers(&message);
1152                let severity = match sev_part.to_lowercase().as_str() {
1153                    "error" => DiagnosticSeverity::Error,
1154                    "warning" | "warn" => DiagnosticSeverity::Warning,
1155                    "info" => DiagnosticSeverity::Info,
1156                    _ => self.infer_severity(&message),
1157                };
1158                return Some(CodeBlockDiagnostic {
1159                    file_line: code_block_start_line + line_num,
1160                    column: Some(col),
1161                    message,
1162                    severity,
1163                    tool: tool_id.to_string(),
1164                    code_block_start: code_block_start_line,
1165                });
1166            }
1167        }
1168        None
1169    }
1170
1171    /// Parse shellcheck-style "In - line N: message" format.
1172    fn parse_shellcheck_format(
1173        &self,
1174        line: &str,
1175        tool_id: &str,
1176        code_block_start_line: usize,
1177    ) -> Option<CodeBlockDiagnostic> {
1178        // Match "In - line 5:" pattern
1179        if line.starts_with("In ")
1180            && line.contains(" line ")
1181            && let Some(line_start) = line.find(" line ")
1182        {
1183            let after_line = &line[line_start + 6..];
1184            if let Some(colon_pos) = after_line.find(':')
1185                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
1186            {
1187                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
1188                if !message.is_empty() {
1189                    let severity = self.infer_severity(&message);
1190                    return Some(CodeBlockDiagnostic {
1191                        file_line: code_block_start_line + line_num,
1192                        column: None,
1193                        message,
1194                        severity,
1195                        tool: tool_id.to_string(),
1196                        code_block_start: code_block_start_line,
1197                    });
1198                }
1199            }
1200        }
1201        None
1202    }
1203
1204    /// Parse shellcheck header line to capture line number context.
1205    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
1206        if line.starts_with("In ")
1207            && line.contains(" line ")
1208            && let Some(line_start) = line.find(" line ")
1209        {
1210            let after_line = &line[line_start + 6..];
1211            if let Some(colon_pos) = after_line.find(':') {
1212                return after_line[..colon_pos].trim().parse::<usize>().ok();
1213            }
1214        }
1215        None
1216    }
1217
1218    /// Parse shellcheck message line containing SCXXXX codes.
1219    fn parse_shellcheck_message(
1220        &self,
1221        line: &str,
1222        tool_id: &str,
1223        code_block_start_line: usize,
1224        line_num: usize,
1225    ) -> Option<CodeBlockDiagnostic> {
1226        let sc_pos = line.find("SC")?;
1227        let after_sc = &line[sc_pos + 2..];
1228        let code_len = after_sc.chars().take_while(char::is_ascii_digit).count();
1229        if code_len == 0 {
1230            return None;
1231        }
1232        let after_code = &after_sc[code_len..];
1233        let sev_start = after_code.find('(')? + 1;
1234        let sev_end = after_code[sev_start..].find(')')? + sev_start;
1235        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
1236        let message_start = after_code.find("):")? + 2;
1237        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
1238        if message.is_empty() {
1239            return None;
1240        }
1241
1242        let severity = match sev.as_str() {
1243            "error" => DiagnosticSeverity::Error,
1244            "warning" | "warn" => DiagnosticSeverity::Warning,
1245            "info" | "style" => DiagnosticSeverity::Info,
1246            _ => self.infer_severity(&message),
1247        };
1248
1249        Some(CodeBlockDiagnostic {
1250            file_line: code_block_start_line + line_num,
1251            column: None,
1252            message,
1253            severity,
1254            tool: tool_id.to_string(),
1255            code_block_start: code_block_start_line,
1256        })
1257    }
1258
1259    /// Parse "Error: <message>" or "Warning: <message>" lines.
1260    ///
1261    /// Used for tools like tombi that output multi-line diagnostics where the
1262    /// error message and position are on separate lines. Only matches capitalized
1263    /// prefixes to avoid conflicting with lowercase `error:` in less structured output.
1264    fn parse_error_line(line: &str) -> Option<(String, DiagnosticSeverity)> {
1265        let (msg, severity) = if let Some(msg) = line.strip_prefix("Error:") {
1266            (msg, DiagnosticSeverity::Error)
1267        } else if let Some(msg) = line.strip_prefix("Warning:") {
1268            (msg, DiagnosticSeverity::Warning)
1269        } else {
1270            return None;
1271        };
1272        let msg = msg.trim();
1273        if msg.is_empty() {
1274            return None;
1275        }
1276        Some((msg.to_string(), severity))
1277    }
1278
1279    /// Parse "at line N column M" position lines (case-insensitive).
1280    ///
1281    /// Returns (line_number, column_number) if the pattern matches.
1282    fn parse_at_line_column(line: &str) -> Option<(usize, usize)> {
1283        let lower = line.to_lowercase();
1284        let rest = lower.strip_prefix("at line ")?;
1285        let mut parts = rest.split_whitespace();
1286        let line_num: usize = parts.next()?.parse().ok()?;
1287        if parts.next()? != "column" {
1288            return None;
1289        }
1290        let col: usize = parts.next()?.parse().ok()?;
1291        Some((line_num, col))
1292    }
1293
1294    /// Infer severity from message content.
1295    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
1296        let lower = message.to_lowercase();
1297        if lower.contains("error")
1298            || lower.starts_with('e') && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1299            || lower.starts_with('f') && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1300        {
1301            DiagnosticSeverity::Error
1302        } else if lower.contains("warning")
1303            || lower.contains("warn")
1304            || lower.starts_with('w') && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1305        {
1306            DiagnosticSeverity::Warning
1307        } else {
1308            DiagnosticSeverity::Info
1309        }
1310    }
1311
1312    /// Strip "fixable" markers from external tool messages.
1313    ///
1314    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
1315    /// context these markers can be misleading - the lint tool's fix capability may
1316    /// differ from what our configured formatter can fix. We strip these markers
1317    /// to avoid making promises we can't keep.
1318    fn strip_fixable_markers(message: &str) -> String {
1319        message
1320            .replace(" [*]", "")
1321            .replace("[*] ", "")
1322            .replace("[*]", "")
1323            .replace(" (fixable)", "")
1324            .replace("(fixable) ", "")
1325            .replace("(fixable)", "")
1326            .replace(" [fix available]", "")
1327            .replace("[fix available] ", "")
1328            .replace("[fix available]", "")
1329            .replace(" [autofix]", "")
1330            .replace("[autofix] ", "")
1331            .replace("[autofix]", "")
1332            .trim()
1333            .to_string()
1334    }
1335}
1336
1337/// Builder for FencedCodeBlockInfo during parsing.
1338struct FencedCodeBlockBuilder {
1339    start_line: usize,
1340    content_start: usize,
1341    language: String,
1342    info_string: String,
1343    fence_char: char,
1344    fence_length: usize,
1345    indent: usize,
1346    indent_prefix: String,
1347}
1348
1349#[cfg(test)]
1350mod tests {
1351    use super::*;
1352
1353    fn default_config() -> CodeBlockToolsConfig {
1354        CodeBlockToolsConfig::default()
1355    }
1356
1357    #[test]
1358    fn test_extract_code_blocks() {
1359        let config = default_config();
1360        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1361
1362        let content = r#"# Example
1363
1364```python
1365def hello():
1366    print("Hello")
1367```
1368
1369Some text
1370
1371```rust
1372fn main() {}
1373```
1374"#;
1375
1376        let blocks = processor.extract_code_blocks(content);
1377
1378        assert_eq!(blocks.len(), 2);
1379
1380        assert_eq!(blocks[0].language, "python");
1381        assert_eq!(blocks[0].fence_char, '`');
1382        assert_eq!(blocks[0].fence_length, 3);
1383        assert_eq!(blocks[0].start_line, 2);
1384        assert_eq!(blocks[0].indent, 0);
1385        assert_eq!(blocks[0].indent_prefix, "");
1386
1387        assert_eq!(blocks[1].language, "rust");
1388        assert_eq!(blocks[1].fence_char, '`');
1389        assert_eq!(blocks[1].fence_length, 3);
1390    }
1391
1392    #[test]
1393    fn test_extract_code_blocks_with_info_string() {
1394        let config = default_config();
1395        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1396
1397        let content = "```python title=\"example.py\"\ncode\n```";
1398        let blocks = processor.extract_code_blocks(content);
1399
1400        assert_eq!(blocks.len(), 1);
1401        assert_eq!(blocks[0].language, "python");
1402        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
1403    }
1404
1405    #[test]
1406    fn test_extract_code_blocks_tilde_fence() {
1407        let config = default_config();
1408        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1409
1410        let content = "~~~bash\necho hello\n~~~";
1411        let blocks = processor.extract_code_blocks(content);
1412
1413        assert_eq!(blocks.len(), 1);
1414        assert_eq!(blocks[0].language, "bash");
1415        assert_eq!(blocks[0].fence_char, '~');
1416        assert_eq!(blocks[0].fence_length, 3);
1417        assert_eq!(blocks[0].indent_prefix, "");
1418    }
1419
1420    #[test]
1421    fn test_extract_code_blocks_with_indent_prefix() {
1422        let config = default_config();
1423        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1424
1425        let content = "  - item\n    ```python\n    print('hi')\n    ```";
1426        let blocks = processor.extract_code_blocks(content);
1427
1428        assert_eq!(blocks.len(), 1);
1429        assert_eq!(blocks[0].indent_prefix, "    ");
1430    }
1431
1432    #[test]
1433    fn test_extract_code_blocks_no_language() {
1434        let config = default_config();
1435        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1436
1437        let content = "```\nplain code\n```";
1438        let blocks = processor.extract_code_blocks(content);
1439
1440        assert_eq!(blocks.len(), 1);
1441        assert_eq!(blocks[0].language, "");
1442    }
1443
1444    #[test]
1445    fn test_resolve_language_linguist() {
1446        let mut config = default_config();
1447        config.normalize_language = NormalizeLanguage::Linguist;
1448        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1449
1450        assert_eq!(processor.resolve_language("py"), "python");
1451        assert_eq!(processor.resolve_language("bash"), "shell");
1452        assert_eq!(processor.resolve_language("js"), "javascript");
1453    }
1454
1455    #[test]
1456    fn test_resolve_language_exact() {
1457        let mut config = default_config();
1458        config.normalize_language = NormalizeLanguage::Exact;
1459        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1460
1461        assert_eq!(processor.resolve_language("py"), "py");
1462        assert_eq!(processor.resolve_language("BASH"), "bash");
1463    }
1464
1465    #[test]
1466    fn test_resolve_language_user_alias_override() {
1467        let mut config = default_config();
1468        config.language_aliases.insert("py".to_string(), "python".to_string());
1469        config.normalize_language = NormalizeLanguage::Exact;
1470        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1471
1472        assert_eq!(processor.resolve_language("PY"), "python");
1473    }
1474
1475    #[test]
1476    fn test_indent_strip_and_reapply_roundtrip() {
1477        let config = default_config();
1478        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1479
1480        let raw = "    def hello():\n        print('hi')";
1481        let stripped = processor.strip_indent_from_block(raw, "    ");
1482        assert_eq!(stripped, "def hello():\n    print('hi')");
1483
1484        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
1485        assert_eq!(reapplied, raw);
1486    }
1487
1488    #[test]
1489    fn test_infer_severity() {
1490        let config = default_config();
1491        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1492
1493        assert_eq!(
1494            processor.infer_severity("E501 line too long"),
1495            DiagnosticSeverity::Error
1496        );
1497        assert_eq!(
1498            processor.infer_severity("W291 trailing whitespace"),
1499            DiagnosticSeverity::Warning
1500        );
1501        assert_eq!(
1502            processor.infer_severity("error: something failed"),
1503            DiagnosticSeverity::Error
1504        );
1505        assert_eq!(
1506            processor.infer_severity("warning: unused variable"),
1507            DiagnosticSeverity::Warning
1508        );
1509        assert_eq!(
1510            processor.infer_severity("note: consider using"),
1511            DiagnosticSeverity::Info
1512        );
1513    }
1514
1515    #[test]
1516    fn test_parse_standard_format_windows_path() {
1517        let config = default_config();
1518        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1519
1520        let output = ToolOutput {
1521            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1522            stderr: String::new(),
1523            exit_code: 1,
1524            success: false,
1525        };
1526
1527        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1528        assert_eq!(diags.len(), 1);
1529        assert_eq!(diags[0].file_line, 12);
1530        assert_eq!(diags[0].column, Some(5));
1531        assert_eq!(diags[0].message, "E123 message");
1532    }
1533
1534    #[test]
1535    fn test_parse_eslint_severity() {
1536        let config = default_config();
1537        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1538
1539        let output = ToolOutput {
1540            stdout: "1:2 error Unexpected token".to_string(),
1541            stderr: String::new(),
1542            exit_code: 1,
1543            success: false,
1544        };
1545
1546        let diags = processor.parse_tool_output(&output, "eslint", 5);
1547        assert_eq!(diags.len(), 1);
1548        assert_eq!(diags[0].file_line, 6);
1549        assert_eq!(diags[0].column, Some(2));
1550        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1551        assert_eq!(diags[0].message, "Unexpected token");
1552    }
1553
1554    #[test]
1555    fn test_parse_shellcheck_multiline() {
1556        let config = default_config();
1557        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1558
1559        let output = ToolOutput {
1560            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1561            stderr: String::new(),
1562            exit_code: 1,
1563            success: false,
1564        };
1565
1566        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1567        assert_eq!(diags.len(), 1);
1568        assert_eq!(diags[0].file_line, 13);
1569        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1570        assert_eq!(diags[0].message, "Double quote to prevent globbing");
1571    }
1572
1573    #[test]
1574    fn test_lint_no_config() {
1575        let config = default_config();
1576        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1577
1578        let content = "```python\nprint('hello')\n```";
1579        let result = processor.lint(content);
1580
1581        // Should succeed with no diagnostics (no tools configured)
1582        assert!(result.is_ok());
1583        assert!(result.unwrap().is_empty());
1584    }
1585
1586    #[test]
1587    fn test_format_no_config() {
1588        let config = default_config();
1589        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1590
1591        let content = "```python\nprint('hello')\n```";
1592        let result = processor.format(content);
1593
1594        // Should succeed with unchanged content (no tools configured)
1595        assert!(result.is_ok());
1596        let output = result.unwrap();
1597        assert_eq!(output.content, content);
1598        assert!(!output.had_errors);
1599        assert!(output.error_messages.is_empty());
1600    }
1601
1602    #[test]
1603    fn test_lint_on_missing_language_definition_fail() {
1604        let mut config = default_config();
1605        config.on_missing_language_definition = OnMissing::Fail;
1606        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1607
1608        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1609        let result = processor.lint(content);
1610
1611        // Should succeed but return diagnostics for both missing language definitions
1612        assert!(result.is_ok());
1613        let diagnostics = result.unwrap();
1614        assert_eq!(diagnostics.len(), 2);
1615        assert!(diagnostics[0].message.contains("No lint tools configured"));
1616        assert!(diagnostics[0].message.contains("python"));
1617        assert!(diagnostics[1].message.contains("javascript"));
1618    }
1619
1620    #[test]
1621    fn test_lint_on_missing_language_definition_fail_fast() {
1622        let mut config = default_config();
1623        config.on_missing_language_definition = OnMissing::FailFast;
1624        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1625
1626        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1627        let result = processor.lint(content);
1628
1629        // Should fail immediately on first missing language
1630        assert!(result.is_err());
1631        let err = result.unwrap_err();
1632        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1633    }
1634
1635    #[test]
1636    fn test_format_on_missing_language_definition_fail() {
1637        let mut config = default_config();
1638        config.on_missing_language_definition = OnMissing::Fail;
1639        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1640
1641        let content = "```python\nprint('hello')\n```";
1642        let result = processor.format(content);
1643
1644        // Should succeed but report errors
1645        assert!(result.is_ok());
1646        let output = result.unwrap();
1647        assert_eq!(output.content, content); // Content unchanged
1648        assert!(output.had_errors);
1649        assert!(!output.error_messages.is_empty());
1650        assert!(output.error_messages[0].contains("No format tools configured"));
1651    }
1652
1653    #[test]
1654    fn test_format_on_missing_language_definition_fail_fast() {
1655        let mut config = default_config();
1656        config.on_missing_language_definition = OnMissing::FailFast;
1657        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1658
1659        let content = "```python\nprint('hello')\n```";
1660        let result = processor.format(content);
1661
1662        // Should fail immediately
1663        assert!(result.is_err());
1664        let err = result.unwrap_err();
1665        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1666    }
1667
1668    #[test]
1669    fn test_lint_on_missing_tool_binary_fail() {
1670        use super::super::config::{LanguageToolConfig, ToolDefinition};
1671
1672        let mut config = default_config();
1673        config.on_missing_tool_binary = OnMissing::Fail;
1674
1675        // Configure a tool with a non-existent binary
1676        let lang_config = LanguageToolConfig {
1677            lint: vec!["nonexistent-linter".to_string()],
1678            ..Default::default()
1679        };
1680        config.languages.insert("python".to_string(), lang_config);
1681
1682        let tool_def = ToolDefinition {
1683            command: vec!["nonexistent-binary-xyz123".to_string()],
1684            ..Default::default()
1685        };
1686        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1687
1688        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1689
1690        let content = "```python\nprint('hello')\n```";
1691        let result = processor.lint(content);
1692
1693        // Should succeed but return diagnostic for missing binary
1694        assert!(result.is_ok());
1695        let diagnostics = result.unwrap();
1696        assert_eq!(diagnostics.len(), 1);
1697        assert!(diagnostics[0].message.contains("not found in PATH"));
1698    }
1699
1700    #[test]
1701    fn test_lint_on_missing_tool_binary_fail_fast() {
1702        use super::super::config::{LanguageToolConfig, ToolDefinition};
1703
1704        let mut config = default_config();
1705        config.on_missing_tool_binary = OnMissing::FailFast;
1706
1707        // Configure a tool with a non-existent binary
1708        let lang_config = LanguageToolConfig {
1709            lint: vec!["nonexistent-linter".to_string()],
1710            ..Default::default()
1711        };
1712        config.languages.insert("python".to_string(), lang_config);
1713
1714        let tool_def = ToolDefinition {
1715            command: vec!["nonexistent-binary-xyz123".to_string()],
1716            ..Default::default()
1717        };
1718        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1719
1720        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1721
1722        let content = "```python\nprint('hello')\n```";
1723        let result = processor.lint(content);
1724
1725        // Should fail immediately
1726        assert!(result.is_err());
1727        let err = result.unwrap_err();
1728        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1729    }
1730
1731    #[test]
1732    fn test_format_on_missing_tool_binary_fail() {
1733        use super::super::config::{LanguageToolConfig, ToolDefinition};
1734
1735        let mut config = default_config();
1736        config.on_missing_tool_binary = OnMissing::Fail;
1737
1738        // Configure a tool with a non-existent binary
1739        let lang_config = LanguageToolConfig {
1740            format: vec!["nonexistent-formatter".to_string()],
1741            ..Default::default()
1742        };
1743        config.languages.insert("python".to_string(), lang_config);
1744
1745        let tool_def = ToolDefinition {
1746            command: vec!["nonexistent-binary-xyz123".to_string()],
1747            ..Default::default()
1748        };
1749        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1750
1751        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1752
1753        let content = "```python\nprint('hello')\n```";
1754        let result = processor.format(content);
1755
1756        // Should succeed but report errors
1757        assert!(result.is_ok());
1758        let output = result.unwrap();
1759        assert_eq!(output.content, content); // Content unchanged
1760        assert!(output.had_errors);
1761        assert!(!output.error_messages.is_empty());
1762        assert!(output.error_messages[0].contains("not found in PATH"));
1763    }
1764
1765    #[test]
1766    fn test_format_on_missing_tool_binary_fail_fast() {
1767        use super::super::config::{LanguageToolConfig, ToolDefinition};
1768
1769        let mut config = default_config();
1770        config.on_missing_tool_binary = OnMissing::FailFast;
1771
1772        // Configure a tool with a non-existent binary
1773        let lang_config = LanguageToolConfig {
1774            format: vec!["nonexistent-formatter".to_string()],
1775            ..Default::default()
1776        };
1777        config.languages.insert("python".to_string(), lang_config);
1778
1779        let tool_def = ToolDefinition {
1780            command: vec!["nonexistent-binary-xyz123".to_string()],
1781            ..Default::default()
1782        };
1783        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1784
1785        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1786
1787        let content = "```python\nprint('hello')\n```";
1788        let result = processor.format(content);
1789
1790        // Should fail immediately
1791        assert!(result.is_err());
1792        let err = result.unwrap_err();
1793        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1794    }
1795
1796    #[test]
1797    fn test_lint_rumdl_builtin_skipped_for_markdown() {
1798        // Configure the built-in "rumdl" tool for markdown
1799        // The processor should skip it (handled by embedded markdown linting)
1800        let mut config = default_config();
1801        config.languages.insert(
1802            "markdown".to_string(),
1803            LanguageToolConfig {
1804                lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1805                ..Default::default()
1806            },
1807        );
1808        config.on_missing_language_definition = OnMissing::Fail;
1809        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1810
1811        let content = "```markdown\n# Hello\n```";
1812        let result = processor.lint(content);
1813
1814        // Should succeed with no diagnostics - "rumdl" tool is skipped, not treated as unknown
1815        assert!(result.is_ok());
1816        assert!(result.unwrap().is_empty());
1817    }
1818
1819    #[test]
1820    fn test_format_rumdl_builtin_skipped_for_markdown() {
1821        // Configure the built-in "rumdl" tool for markdown
1822        let mut config = default_config();
1823        config.languages.insert(
1824            "markdown".to_string(),
1825            LanguageToolConfig {
1826                format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1827                ..Default::default()
1828            },
1829        );
1830        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1831
1832        let content = "```markdown\n# Hello\n```";
1833        let result = processor.format(content);
1834
1835        // Should succeed with unchanged content - "rumdl" tool is skipped
1836        assert!(result.is_ok());
1837        let output = result.unwrap();
1838        assert_eq!(output.content, content);
1839        assert!(!output.had_errors);
1840    }
1841
1842    #[test]
1843    fn test_is_markdown_language() {
1844        // Test the helper function
1845        assert!(is_markdown_language("markdown"));
1846        assert!(is_markdown_language("Markdown"));
1847        assert!(is_markdown_language("MARKDOWN"));
1848        assert!(is_markdown_language("md"));
1849        assert!(is_markdown_language("MD"));
1850        assert!(!is_markdown_language("python"));
1851        assert!(!is_markdown_language("rust"));
1852        assert!(!is_markdown_language(""));
1853    }
1854
1855    // Issue #423: MkDocs admonition code block detection
1856
1857    #[test]
1858    fn test_extract_mkdocs_admonition_code_block() {
1859        let config = default_config();
1860        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1861
1862        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1863        let blocks = processor.extract_code_blocks(content);
1864
1865        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs admonition");
1866        assert_eq!(blocks[0].language, "python");
1867    }
1868
1869    #[test]
1870    fn test_extract_mkdocs_tab_code_block() {
1871        let config = default_config();
1872        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1873
1874        let content = "=== \"Python\"\n\n    ```python\n    print(\"hello\")\n    ```\n";
1875        let blocks = processor.extract_code_blocks(content);
1876
1877        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs tab");
1878        assert_eq!(blocks[0].language, "python");
1879    }
1880
1881    #[test]
1882    fn test_standard_flavor_ignores_admonition_indented_content() {
1883        let config = default_config();
1884        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1885
1886        // With standard flavor, pulldown_cmark parses this differently;
1887        // our MkDocs extraction should NOT run
1888        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1889        let blocks = processor.extract_code_blocks(content);
1890
1891        // Standard flavor relies on pulldown_cmark only, which may or may not detect
1892        // indented fenced blocks. The key assertion is that we don't double-detect.
1893        // With standard flavor, the MkDocs extraction path is skipped entirely.
1894        for (i, b) in blocks.iter().enumerate() {
1895            for (j, b2) in blocks.iter().enumerate() {
1896                if i != j {
1897                    assert_ne!(b.start_line, b2.start_line, "No duplicate blocks should exist");
1898                }
1899            }
1900        }
1901    }
1902
1903    #[test]
1904    fn test_mkdocs_top_level_blocks_alongside_admonition() {
1905        let config = default_config();
1906        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1907
1908        let content =
1909            "```rust\nfn main() {}\n```\n\n!!! note\n    Some text\n\n    ```python\n    print(\"hello\")\n    ```\n";
1910        let blocks = processor.extract_code_blocks(content);
1911
1912        assert_eq!(
1913            blocks.len(),
1914            2,
1915            "Should detect both top-level and admonition code blocks"
1916        );
1917        assert_eq!(blocks[0].language, "rust");
1918        assert_eq!(blocks[1].language, "python");
1919    }
1920
1921    #[test]
1922    fn test_mkdocs_nested_admonition_code_block() {
1923        let config = default_config();
1924        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1925
1926        let content = "\
1927!!! note
1928    Some text
1929
1930    !!! warning
1931        Nested content
1932
1933        ```python
1934        x = 1
1935        ```
1936";
1937        let blocks = processor.extract_code_blocks(content);
1938        assert_eq!(blocks.len(), 1, "Should detect code block inside nested admonition");
1939        assert_eq!(blocks[0].language, "python");
1940    }
1941
1942    #[test]
1943    fn test_mkdocs_consecutive_admonitions_no_stale_context() {
1944        let config = default_config();
1945        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1946
1947        // Two consecutive admonitions at the same indent level.
1948        // The first has no code block, the second does.
1949        let content = "\
1950!!! note
1951    First admonition content
1952
1953!!! warning
1954    Second admonition content
1955
1956    ```python
1957    y = 2
1958    ```
1959";
1960        let blocks = processor.extract_code_blocks(content);
1961        assert_eq!(blocks.len(), 1, "Should detect code block in second admonition only");
1962        assert_eq!(blocks[0].language, "python");
1963    }
1964
1965    #[test]
1966    fn test_mkdocs_crlf_line_endings() {
1967        let config = default_config();
1968        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1969
1970        // Use \r\n line endings
1971        let content = "!!! note\r\n    Some text\r\n\r\n    ```python\r\n    x = 1\r\n    ```\r\n";
1972        let blocks = processor.extract_code_blocks(content);
1973
1974        assert_eq!(blocks.len(), 1, "Should detect code block with CRLF line endings");
1975        assert_eq!(blocks[0].language, "python");
1976
1977        // Verify byte offsets point to valid content
1978        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1979        assert!(
1980            extracted.contains("x = 1"),
1981            "Extracted content should contain code. Got: {extracted:?}"
1982        );
1983    }
1984
1985    #[test]
1986    fn test_mkdocs_unclosed_fence_in_admonition() {
1987        let config = default_config();
1988        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1989
1990        // Unclosed fence should not produce a block
1991        let content = "!!! note\n    ```python\n    x = 1\n    no closing fence\n";
1992        let blocks = processor.extract_code_blocks(content);
1993        assert_eq!(blocks.len(), 0, "Unclosed fence should not produce a block");
1994    }
1995
1996    #[test]
1997    fn test_mkdocs_tilde_fence_in_admonition() {
1998        let config = default_config();
1999        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2000
2001        let content = "!!! note\n    ~~~ruby\n    puts 'hi'\n    ~~~\n";
2002        let blocks = processor.extract_code_blocks(content);
2003        assert_eq!(blocks.len(), 1, "Should detect tilde-fenced code block");
2004        assert_eq!(blocks[0].language, "ruby");
2005    }
2006
2007    #[test]
2008    fn test_mkdocs_empty_lines_in_code_block() {
2009        let config = default_config();
2010        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2011
2012        // Code block with empty lines inside — verifies byte offsets are correct
2013        // across empty lines (the previous find("") approach would break here)
2014        let content = "!!! note\n    ```python\n    x = 1\n\n    y = 2\n    ```\n";
2015        let blocks = processor.extract_code_blocks(content);
2016        assert_eq!(blocks.len(), 1);
2017
2018        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2019        assert!(
2020            extracted.contains("x = 1") && extracted.contains("y = 2"),
2021            "Extracted content should span across the empty line. Got: {extracted:?}"
2022        );
2023    }
2024
2025    #[test]
2026    fn test_mkdocs_content_byte_offsets_lf() {
2027        let config = default_config();
2028        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2029
2030        let content = "!!! note\n    ```python\n    print('hi')\n    ```\n";
2031        let blocks = processor.extract_code_blocks(content);
2032        assert_eq!(blocks.len(), 1);
2033
2034        // Verify the extracted content is exactly the code body
2035        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2036        assert_eq!(extracted, "    print('hi')\n", "Content offsets should be exact for LF");
2037    }
2038
2039    #[test]
2040    fn test_mkdocs_content_byte_offsets_crlf() {
2041        let config = default_config();
2042        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
2043
2044        let content = "!!! note\r\n    ```python\r\n    print('hi')\r\n    ```\r\n";
2045        let blocks = processor.extract_code_blocks(content);
2046        assert_eq!(blocks.len(), 1);
2047
2048        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
2049        assert_eq!(
2050            extracted, "    print('hi')\r\n",
2051            "Content offsets should be exact for CRLF"
2052        );
2053    }
2054
2055    #[test]
2056    fn test_lint_enabled_false_skips_language_in_strict_mode() {
2057        // With on-missing-language-definition = "fail", a language configured
2058        // with enabled=false should be silently skipped (no error).
2059        let mut config = default_config();
2060        config.normalize_language = NormalizeLanguage::Exact;
2061        config.on_missing_language_definition = OnMissing::Fail;
2062
2063        // Python has tools, plaintext is disabled
2064        config.languages.insert(
2065            "python".to_string(),
2066            LanguageToolConfig {
2067                lint: vec!["ruff:check".to_string()],
2068                ..Default::default()
2069            },
2070        );
2071        config.languages.insert(
2072            "plaintext".to_string(),
2073            LanguageToolConfig {
2074                enabled: false,
2075                ..Default::default()
2076            },
2077        );
2078
2079        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2080
2081        let content = "```plaintext\nsome text\n```";
2082        let result = processor.lint(content);
2083
2084        // No error for plaintext: enabled=false satisfies strict mode
2085        assert!(result.is_ok());
2086        let diagnostics = result.unwrap();
2087        assert!(
2088            diagnostics.is_empty(),
2089            "Expected no diagnostics for disabled language, got: {diagnostics:?}"
2090        );
2091    }
2092
2093    #[test]
2094    fn test_format_enabled_false_skips_language_in_strict_mode() {
2095        // Same test but for format mode
2096        let mut config = default_config();
2097        config.normalize_language = NormalizeLanguage::Exact;
2098        config.on_missing_language_definition = OnMissing::Fail;
2099
2100        config.languages.insert(
2101            "plaintext".to_string(),
2102            LanguageToolConfig {
2103                enabled: false,
2104                ..Default::default()
2105            },
2106        );
2107
2108        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2109
2110        let content = "```plaintext\nsome text\n```";
2111        let result = processor.format(content);
2112
2113        // No error for plaintext: enabled=false satisfies strict mode
2114        assert!(result.is_ok());
2115        let output = result.unwrap();
2116        assert!(!output.had_errors, "Expected no errors for disabled language");
2117        assert!(
2118            output.error_messages.is_empty(),
2119            "Expected no error messages, got: {:?}",
2120            output.error_messages
2121        );
2122    }
2123
2124    #[test]
2125    fn test_enabled_false_default_true_preserved() {
2126        // Verify that when enabled is not set, it defaults to true (existing behavior)
2127        let mut config = default_config();
2128        config.on_missing_language_definition = OnMissing::Fail;
2129
2130        // Configure python without explicitly setting enabled
2131        config.languages.insert(
2132            "python".to_string(),
2133            LanguageToolConfig {
2134                lint: vec!["ruff:check".to_string()],
2135                ..Default::default()
2136            },
2137        );
2138
2139        let lang_config = config.languages.get("python").unwrap();
2140        assert!(lang_config.enabled, "enabled should default to true");
2141    }
2142
2143    #[test]
2144    fn test_enabled_false_with_fail_fast_no_error() {
2145        // Even with fail-fast, enabled=false should skip silently
2146        let mut config = default_config();
2147        config.normalize_language = NormalizeLanguage::Exact;
2148        config.on_missing_language_definition = OnMissing::FailFast;
2149
2150        config.languages.insert(
2151            "unknown".to_string(),
2152            LanguageToolConfig {
2153                enabled: false,
2154                ..Default::default()
2155            },
2156        );
2157
2158        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2159
2160        let content = "```unknown\nsome content\n```";
2161        let result = processor.lint(content);
2162
2163        // Should not return an error: enabled=false takes precedence over fail-fast
2164        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2165        assert!(result.unwrap().is_empty());
2166    }
2167
2168    #[test]
2169    fn test_enabled_false_format_with_fail_fast_no_error() {
2170        // Same for format mode
2171        let mut config = default_config();
2172        config.normalize_language = NormalizeLanguage::Exact;
2173        config.on_missing_language_definition = OnMissing::FailFast;
2174
2175        config.languages.insert(
2176            "unknown".to_string(),
2177            LanguageToolConfig {
2178                enabled: false,
2179                ..Default::default()
2180            },
2181        );
2182
2183        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2184
2185        let content = "```unknown\nsome content\n```";
2186        let result = processor.format(content);
2187
2188        assert!(result.is_ok(), "Expected Ok but got Err: {result:?}");
2189        let output = result.unwrap();
2190        assert!(!output.had_errors);
2191    }
2192
2193    #[test]
2194    fn test_enabled_false_with_tools_still_skips() {
2195        // If enabled=false but tools are listed, the language should still be skipped
2196        let mut config = default_config();
2197        config.on_missing_language_definition = OnMissing::Fail;
2198
2199        config.languages.insert(
2200            "python".to_string(),
2201            LanguageToolConfig {
2202                enabled: false,
2203                lint: vec!["ruff:check".to_string()],
2204                format: vec!["ruff:format".to_string()],
2205                on_error: None,
2206            },
2207        );
2208
2209        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2210
2211        let content = "```python\nprint('hello')\n```";
2212
2213        // Lint should skip
2214        let lint_result = processor.lint(content);
2215        assert!(lint_result.is_ok());
2216        assert!(lint_result.unwrap().is_empty());
2217
2218        // Format should skip
2219        let format_result = processor.format(content);
2220        assert!(format_result.is_ok());
2221        let output = format_result.unwrap();
2222        assert!(!output.had_errors);
2223        assert_eq!(output.content, content, "Content should be unchanged");
2224    }
2225
2226    #[test]
2227    fn test_enabled_true_without_tools_triggers_strict_mode() {
2228        // A language configured with enabled=true (default) but no tools
2229        // should still trigger strict mode errors
2230        let mut config = default_config();
2231        config.on_missing_language_definition = OnMissing::Fail;
2232
2233        config.languages.insert(
2234            "python".to_string(),
2235            LanguageToolConfig {
2236                // enabled defaults to true, no tools
2237                ..Default::default()
2238            },
2239        );
2240
2241        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2242
2243        let content = "```python\nprint('hello')\n```";
2244        let result = processor.lint(content);
2245
2246        // Should report an error because enabled=true but no lint tools configured
2247        assert!(result.is_ok());
2248        let diagnostics = result.unwrap();
2249        assert_eq!(diagnostics.len(), 1);
2250        assert!(diagnostics[0].message.contains("No lint tools configured"));
2251    }
2252
2253    #[test]
2254    fn test_mixed_enabled_and_disabled_languages() {
2255        // Multiple languages: one disabled, one unconfigured
2256        let mut config = default_config();
2257        config.normalize_language = NormalizeLanguage::Exact;
2258        config.on_missing_language_definition = OnMissing::Fail;
2259
2260        config.languages.insert(
2261            "plaintext".to_string(),
2262            LanguageToolConfig {
2263                enabled: false,
2264                ..Default::default()
2265            },
2266        );
2267
2268        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2269
2270        let content = "\
2271```plaintext
2272some text
2273```
2274
2275```javascript
2276console.log('hi');
2277```
2278";
2279
2280        let result = processor.lint(content);
2281        assert!(result.is_ok());
2282        let diagnostics = result.unwrap();
2283
2284        // plaintext: skipped (enabled=false), no error
2285        // javascript: not configured at all, should trigger strict mode error
2286        assert_eq!(diagnostics.len(), 1, "Expected 1 diagnostic, got: {diagnostics:?}");
2287        assert!(
2288            diagnostics[0].message.contains("javascript"),
2289            "Error should be about javascript, got: {}",
2290            diagnostics[0].message
2291        );
2292    }
2293
2294    #[test]
2295    fn test_generic_fallback_includes_all_stderr_lines() {
2296        let config = default_config();
2297        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2298
2299        // Use output that won't be parsed by any structured format parser
2300        let output = ToolOutput {
2301            stdout: String::new(),
2302            stderr: "Parse error at position 42\nUnexpected token '::'\n3 errors found".to_string(),
2303            exit_code: 1,
2304            success: false,
2305        };
2306
2307        let diags = processor.parse_tool_output(&output, "tombi", 5);
2308        assert_eq!(diags.len(), 3, "Expected one diagnostic per non-empty stderr line");
2309        assert_eq!(diags[0].message, "Parse error at position 42");
2310        assert_eq!(diags[1].message, "Unexpected token '::'");
2311        assert_eq!(diags[2].message, "3 errors found");
2312        assert!(diags.iter().all(|d| d.tool == "tombi"));
2313        assert!(diags.iter().all(|d| d.file_line == 5));
2314    }
2315
2316    #[test]
2317    fn test_generic_fallback_includes_all_stdout_lines_when_stderr_empty() {
2318        let config = default_config();
2319        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2320
2321        let output = ToolOutput {
2322            stdout: "Line 1 error\nLine 2 detail\nLine 3 summary".to_string(),
2323            stderr: String::new(),
2324            exit_code: 1,
2325            success: false,
2326        };
2327
2328        let diags = processor.parse_tool_output(&output, "some-tool", 10);
2329        assert_eq!(diags.len(), 3);
2330        assert_eq!(diags[0].message, "Line 1 error");
2331        assert_eq!(diags[1].message, "Line 2 detail");
2332        assert_eq!(diags[2].message, "Line 3 summary");
2333    }
2334
2335    #[test]
2336    fn test_generic_fallback_skips_blank_lines() {
2337        let config = default_config();
2338        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2339
2340        let output = ToolOutput {
2341            stdout: String::new(),
2342            stderr: "error: bad input\n\n  \n\ndetail: see above\n".to_string(),
2343            exit_code: 1,
2344            success: false,
2345        };
2346
2347        let diags = processor.parse_tool_output(&output, "tool", 1);
2348        assert_eq!(diags.len(), 2);
2349        assert_eq!(diags[0].message, "error: bad input");
2350        assert_eq!(diags[1].message, "detail: see above");
2351    }
2352
2353    #[test]
2354    fn test_generic_fallback_exit_code_when_no_output() {
2355        let config = default_config();
2356        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2357
2358        let output = ToolOutput {
2359            stdout: String::new(),
2360            stderr: String::new(),
2361            exit_code: 42,
2362            success: false,
2363        };
2364
2365        let diags = processor.parse_tool_output(&output, "tool", 1);
2366        assert_eq!(diags.len(), 1);
2367        assert_eq!(diags[0].message, "Tool exited with code 42");
2368    }
2369
2370    #[test]
2371    fn test_generic_fallback_not_triggered_on_success() {
2372        let config = default_config();
2373        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2374
2375        let output = ToolOutput {
2376            stdout: "some informational output".to_string(),
2377            stderr: String::new(),
2378            exit_code: 0,
2379            success: true,
2380        };
2381
2382        let diags = processor.parse_tool_output(&output, "tool", 1);
2383        assert!(
2384            diags.is_empty(),
2385            "Successful tool runs should produce no fallback diagnostics"
2386        );
2387    }
2388
2389    #[test]
2390    fn test_ansi_codes_stripped_before_parsing() {
2391        let config = default_config();
2392        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2393
2394        // ruff-style output with ANSI color codes wrapping the message
2395        let output = ToolOutput {
2396            stdout: "\x1b[1m_.py\x1b[0m:\x1b[33m1\x1b[0m:\x1b[33m1\x1b[0m: \x1b[31mE501\x1b[0m Line too long"
2397                .to_string(),
2398            stderr: String::new(),
2399            exit_code: 1,
2400            success: false,
2401        };
2402
2403        let diags = processor.parse_tool_output(&output, "ruff:check", 5);
2404        assert_eq!(diags.len(), 1, "ANSI-colored output should still be parsed");
2405        assert_eq!(diags[0].message, "E501 Line too long");
2406        assert_eq!(diags[0].file_line, 6); // 5 + 1
2407    }
2408
2409    #[test]
2410    fn test_tombi_multiline_error_format() {
2411        let config = default_config();
2412        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2413
2414        // Simulates tombi output (ANSI codes stripped for clarity)
2415        let output = ToolOutput {
2416            stdout: "[test]\ntest: \"test\"\nError: invalid key\n    at line 2 column 1\nError: expected key\n    at line 2 column 1\nError: expected '='\n    at line 2 column 1\nError: expected value\n    at line 2 column 1".to_string(),
2417            stderr: "1 file failed to be formatted".to_string(),
2418            exit_code: 1,
2419            success: false,
2420        };
2421
2422        let diags = processor.parse_tool_output(&output, "tombi", 7);
2423        assert_eq!(
2424            diags.len(),
2425            4,
2426            "Expected 4 diagnostics from tombi errors, got {diags:?}"
2427        );
2428        assert_eq!(diags[0].message, "invalid key");
2429        assert_eq!(diags[0].file_line, 9); // 7 + 2
2430        assert_eq!(diags[0].column, Some(1));
2431        assert_eq!(diags[1].message, "expected key");
2432        assert_eq!(diags[1].file_line, 9);
2433        assert_eq!(diags[2].message, "expected '='");
2434        assert_eq!(diags[3].message, "expected value");
2435        assert!(diags.iter().all(|d| d.tool == "tombi"));
2436    }
2437
2438    #[test]
2439    fn test_tombi_with_ansi_codes() {
2440        let config = default_config();
2441        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2442
2443        // Real tombi output with ANSI escape codes
2444        let output = ToolOutput {
2445            stdout: "[test]\ntest: \"test\"\n\x1b[1;31m  Error\x1b[0m: \x1b[1minvalid key\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m\n\x1b[1;31m  Error\x1b[0m: \x1b[1mexpected '='\x1b[0m\n    \x1b[90mat line 2 column 1\x1b[0m".to_string(),
2446            stderr: "1 file failed to be formatted".to_string(),
2447            exit_code: 1,
2448            success: false,
2449        };
2450
2451        let diags = processor.parse_tool_output(&output, "tombi", 7);
2452        assert_eq!(
2453            diags.len(),
2454            2,
2455            "Expected 2 diagnostics from ANSI-colored tombi output, got {diags:?}"
2456        );
2457        assert_eq!(diags[0].message, "invalid key");
2458        assert_eq!(diags[0].file_line, 9);
2459        assert_eq!(diags[1].message, "expected '='");
2460        assert_eq!(diags[1].file_line, 9);
2461    }
2462
2463    #[test]
2464    fn test_fallback_combines_stdout_and_stderr() {
2465        let config = default_config();
2466        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2467
2468        // Tool puts some errors on stdout, summary on stderr
2469        let output = ToolOutput {
2470            stdout: "problem found in input".to_string(),
2471            stderr: "1 file failed".to_string(),
2472            exit_code: 1,
2473            success: false,
2474        };
2475
2476        let diags = processor.parse_tool_output(&output, "tool", 1);
2477        assert_eq!(diags.len(), 2, "Fallback should include both stdout and stderr");
2478        assert_eq!(diags[0].message, "problem found in input");
2479        assert_eq!(diags[1].message, "1 file failed");
2480    }
2481
2482    #[test]
2483    fn test_error_line_without_position_info() {
2484        let config = default_config();
2485        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2486
2487        // Error: line not followed by "at line N column M"
2488        let output = ToolOutput {
2489            stdout: "Error: something went wrong\nsome unrelated line".to_string(),
2490            stderr: String::new(),
2491            exit_code: 1,
2492            success: false,
2493        };
2494
2495        let diags = processor.parse_tool_output(&output, "tool", 5);
2496        // "Error: something went wrong" → parsed by error-line parser (no position)
2497        // "some unrelated line" → no parser matches, but diagnostics not empty → no fallback
2498        assert!(!diags.is_empty());
2499        assert_eq!(diags[0].message, "something went wrong");
2500        assert_eq!(diags[0].file_line, 5); // No line offset, uses code_block_start
2501    }
2502
2503    #[test]
2504    fn test_warning_line_with_position() {
2505        let config = default_config();
2506        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2507
2508        let output = ToolOutput {
2509            stdout: "Warning: deprecated syntax\n    at line 3 column 5".to_string(),
2510            stderr: String::new(),
2511            exit_code: 1,
2512            success: false,
2513        };
2514
2515        let diags = processor.parse_tool_output(&output, "tool", 10);
2516        assert_eq!(diags.len(), 1);
2517        assert_eq!(diags[0].message, "deprecated syntax");
2518        assert_eq!(diags[0].file_line, 13); // 10 + 3
2519        assert_eq!(diags[0].column, Some(5));
2520        assert!(matches!(diags[0].severity, DiagnosticSeverity::Warning));
2521    }
2522
2523    #[test]
2524    fn test_strip_ansi_codes() {
2525        assert_eq!(strip_ansi_codes("hello"), "hello");
2526        assert_eq!(strip_ansi_codes("\x1b[31mred\x1b[0m"), "red");
2527        assert_eq!(
2528            strip_ansi_codes("\x1b[1;31m  Error\x1b[0m: \x1b[1mmsg\x1b[0m"),
2529            "  Error: msg"
2530        );
2531        assert_eq!(strip_ansi_codes("no codes here"), "no codes here");
2532        assert_eq!(strip_ansi_codes(""), "");
2533        assert_eq!(
2534            strip_ansi_codes("\x1b[90mat line 2 column 1\x1b[0m"),
2535            "at line 2 column 1"
2536        );
2537    }
2538
2539    #[test]
2540    fn test_parse_at_line_column() {
2541        assert_eq!(
2542            CodeBlockToolProcessor::parse_at_line_column("at line 2 column 1"),
2543            Some((2, 1))
2544        );
2545        assert_eq!(
2546            CodeBlockToolProcessor::parse_at_line_column("at line 10 column 15"),
2547            Some((10, 15))
2548        );
2549        assert_eq!(
2550            CodeBlockToolProcessor::parse_at_line_column("At Line 5 Column 3"),
2551            Some((5, 3))
2552        );
2553        assert_eq!(
2554            CodeBlockToolProcessor::parse_at_line_column("not a position line"),
2555            None
2556        );
2557        assert_eq!(
2558            CodeBlockToolProcessor::parse_at_line_column("at line abc column 1"),
2559            None
2560        );
2561    }
2562
2563    #[test]
2564    fn test_parse_error_line() {
2565        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Error: invalid key").unwrap();
2566        assert_eq!(msg, "invalid key");
2567        assert!(matches!(sev, DiagnosticSeverity::Error));
2568
2569        let (msg, sev) = CodeBlockToolProcessor::parse_error_line("Warning: deprecated").unwrap();
2570        assert_eq!(msg, "deprecated");
2571        assert!(matches!(sev, DiagnosticSeverity::Warning));
2572
2573        // Lowercase should NOT match (avoids conflict with unstructured tool output)
2574        assert!(CodeBlockToolProcessor::parse_error_line("error: bad input").is_none());
2575        assert!(CodeBlockToolProcessor::parse_error_line("warning: minor issue").is_none());
2576
2577        // Empty message after prefix should not match
2578        assert!(CodeBlockToolProcessor::parse_error_line("Error:").is_none());
2579        assert!(CodeBlockToolProcessor::parse_error_line("Error:   ").is_none());
2580
2581        // Not an error line
2582        assert!(CodeBlockToolProcessor::parse_error_line("some random text").is_none());
2583    }
2584
2585    #[test]
2586    fn test_consecutive_error_lines_without_position() {
2587        let config = default_config();
2588        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2589
2590        // Two Error: lines in a row — first should flush without position,
2591        // second gets position from "at line"
2592        let output = ToolOutput {
2593            stdout: "Error: first problem\nError: second problem\n    at line 3 column 1".to_string(),
2594            stderr: String::new(),
2595            exit_code: 1,
2596            success: false,
2597        };
2598
2599        let diags = processor.parse_tool_output(&output, "tool", 5);
2600        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2601        // First error flushed without position when second Error: was encountered
2602        assert_eq!(diags[0].message, "first problem");
2603        assert_eq!(diags[0].file_line, 5); // No line mapping
2604        assert_eq!(diags[0].column, None);
2605        // Second error resolved with position
2606        assert_eq!(diags[1].message, "second problem");
2607        assert_eq!(diags[1].file_line, 8); // 5 + 3
2608        assert_eq!(diags[1].column, Some(1));
2609    }
2610
2611    #[test]
2612    fn test_error_line_at_end_of_output() {
2613        let config = default_config();
2614        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2615
2616        // Error: as the very last line — flushed by post-loop code
2617        let output = ToolOutput {
2618            stdout: "Error: trailing error".to_string(),
2619            stderr: String::new(),
2620            exit_code: 1,
2621            success: false,
2622        };
2623
2624        let diags = processor.parse_tool_output(&output, "tool", 5);
2625        assert_eq!(diags.len(), 1);
2626        assert_eq!(diags[0].message, "trailing error");
2627        assert_eq!(diags[0].file_line, 5); // No position info available
2628        assert_eq!(diags[0].column, None);
2629    }
2630
2631    #[test]
2632    fn test_blank_lines_between_error_and_position() {
2633        let config = default_config();
2634        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2635
2636        // Blank lines between Error: and "at line" should be transparently skipped
2637        let output = ToolOutput {
2638            stdout: "Error: spaced out\n\n\n    at line 4 column 2".to_string(),
2639            stderr: String::new(),
2640            exit_code: 1,
2641            success: false,
2642        };
2643
2644        let diags = processor.parse_tool_output(&output, "tool", 10);
2645        assert_eq!(diags.len(), 1);
2646        assert_eq!(diags[0].message, "spaced out");
2647        assert_eq!(diags[0].file_line, 14); // 10 + 4
2648        assert_eq!(diags[0].column, Some(2));
2649    }
2650
2651    #[test]
2652    fn test_mixed_structured_and_error_line_parsers() {
2653        let config = default_config();
2654        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2655
2656        // Mix of ruff-style structured output and tombi-style Error: output
2657        let output = ToolOutput {
2658            stdout: "_.py:1:5: E501 Line too long\nError: invalid syntax\n    at line 3 column 1".to_string(),
2659            stderr: String::new(),
2660            exit_code: 1,
2661            success: false,
2662        };
2663
2664        let diags = processor.parse_tool_output(&output, "tool", 5);
2665        assert_eq!(diags.len(), 2, "Expected 2 diagnostics, got {diags:?}");
2666        // First: standard format parser
2667        assert_eq!(diags[0].message, "E501 Line too long");
2668        assert_eq!(diags[0].file_line, 6); // 5 + 1
2669        // Second: Error: + at line parser
2670        assert_eq!(diags[1].message, "invalid syntax");
2671        assert_eq!(diags[1].file_line, 8); // 5 + 3
2672    }
2673
2674    #[test]
2675    fn test_at_line_without_preceding_error() {
2676        let config = default_config();
2677        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2678
2679        // "at line N column M" without a preceding Error: should not create a diagnostic
2680        let output = ToolOutput {
2681            stdout: "at line 2 column 1\nsome other text".to_string(),
2682            stderr: String::new(),
2683            exit_code: 1,
2684            success: false,
2685        };
2686
2687        let diags = processor.parse_tool_output(&output, "tool", 5);
2688        // No pending error, so "at line" is just an unmatched line
2689        // Both lines are unmatched, fallback fires with combined output
2690        assert_eq!(diags.len(), 2);
2691        assert_eq!(diags[0].message, "at line 2 column 1");
2692        assert_eq!(diags[1].message, "some other text");
2693    }
2694
2695    // =========================================================================
2696    // Issue #527: formatter that produces empty output should not erase content
2697    // =========================================================================
2698
2699    /// A formatter that produces no stdout (like `tombi lint -` mistakenly used
2700    /// as a formatter) should not replace non-empty content with an empty string.
2701    /// This test uses `true` which exits 0 with no output, simulating the bug.
2702    #[test]
2703    fn test_format_empty_output_does_not_erase_content() {
2704        use super::super::config::LanguageToolConfig;
2705
2706        let mut config = default_config();
2707        config.languages.insert(
2708            "toml".to_string(),
2709            LanguageToolConfig {
2710                format: vec!["empty-formatter".to_string()],
2711                ..Default::default()
2712            },
2713        );
2714        // Define a tool that exits 0 but produces no stdout (simulates `tombi lint -`)
2715        config.tools.insert(
2716            "empty-formatter".to_string(),
2717            super::super::config::ToolDefinition {
2718                command: vec!["true".to_string()],
2719                stdin: true,
2720                stdout: true,
2721                lint_args: vec![],
2722                format_args: vec![],
2723            },
2724        );
2725
2726        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2727
2728        let content = "```toml\nkey = \"value\"\n```\n";
2729        let result = processor.format(content);
2730
2731        assert!(result.is_ok(), "Format should not error");
2732        let output = result.unwrap();
2733
2734        // The content must NOT be erased — original content should be preserved
2735        assert!(
2736            output.content.contains("key = \"value\""),
2737            "Empty formatter output should not erase content. Got: {:?}",
2738            output.content
2739        );
2740    }
2741
2742    /// A formatter that echoes input back (like `cat`) should preserve content.
2743    #[test]
2744    fn test_format_identity_formatter_preserves_content() {
2745        use super::super::config::LanguageToolConfig;
2746
2747        let mut config = default_config();
2748        config.languages.insert(
2749            "toml".to_string(),
2750            LanguageToolConfig {
2751                format: vec!["cat-formatter".to_string()],
2752                ..Default::default()
2753            },
2754        );
2755        config.tools.insert(
2756            "cat-formatter".to_string(),
2757            super::super::config::ToolDefinition {
2758                command: vec!["cat".to_string()],
2759                stdin: true,
2760                stdout: true,
2761                lint_args: vec![],
2762                format_args: vec![],
2763            },
2764        );
2765
2766        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2767
2768        let content = "```toml\nkey = \"value\"\n```\n";
2769        let result = processor.format(content);
2770
2771        assert!(result.is_ok(), "Format should not error");
2772        let output = result.unwrap();
2773        assert_eq!(
2774            output.content, content,
2775            "Identity formatter should preserve content exactly"
2776        );
2777    }
2778
2779    /// Verify that the context-aware tool resolution resolves bare "tombi"
2780    /// to "tombi:format" in format context and "tombi:lint" in lint context.
2781    #[test]
2782    fn test_resolve_tool_context_aware_tombi() {
2783        let config = default_config();
2784        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2785
2786        // In format context, bare "tombi" should resolve to "tombi:format"
2787        let format_def = processor
2788            .resolve_tool("tombi", ToolContext::Format)
2789            .expect("Should resolve tombi in format context");
2790        assert!(
2791            format_def.command.iter().any(|arg| arg == "format"),
2792            "Bare 'tombi' in format context should resolve to 'tombi format', got: {:?}",
2793            format_def.command
2794        );
2795
2796        // In lint context, bare "tombi" should resolve to "tombi:lint"
2797        let lint_def = processor
2798            .resolve_tool("tombi", ToolContext::Lint)
2799            .expect("Should resolve tombi in lint context");
2800        assert!(
2801            lint_def.command.iter().any(|arg| arg == "lint"),
2802            "Bare 'tombi' in lint context should resolve to 'tombi lint', got: {:?}",
2803            lint_def.command
2804        );
2805
2806        // Explicit suffix should bypass context-aware resolution
2807        let explicit_def = processor
2808            .resolve_tool("tombi:lint", ToolContext::Format)
2809            .expect("Should resolve explicit tombi:lint even in format context");
2810        assert!(
2811            explicit_def.command.iter().any(|arg| arg == "lint"),
2812            "Explicit 'tombi:lint' should always use lint, got: {:?}",
2813            explicit_def.command
2814        );
2815    }
2816
2817    /// Verify context-aware resolution for ruff (uses "check" suffix, not "lint").
2818    #[test]
2819    fn test_resolve_tool_context_aware_ruff() {
2820        let config = default_config();
2821        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2822
2823        // In lint context, bare "ruff" should resolve to "ruff:check"
2824        let lint_def = processor
2825            .resolve_tool("ruff", ToolContext::Lint)
2826            .expect("Should resolve ruff in lint context");
2827        assert!(
2828            lint_def.command.iter().any(|arg| arg == "check"),
2829            "Bare 'ruff' in lint context should resolve to 'ruff check', got: {:?}",
2830            lint_def.command
2831        );
2832
2833        // In format context, bare "ruff" should resolve to "ruff:format"
2834        let format_def = processor
2835            .resolve_tool("ruff", ToolContext::Format)
2836            .expect("Should resolve ruff in format context");
2837        assert!(
2838            format_def.command.iter().any(|arg| arg == "format"),
2839            "Bare 'ruff' in format context should resolve to 'ruff format', got: {:?}",
2840            format_def.command
2841        );
2842    }
2843
2844    /// Tools without context-specific variants should still resolve via bare name.
2845    #[test]
2846    fn test_resolve_tool_bare_name_fallback() {
2847        let config = default_config();
2848        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2849
2850        // "shellcheck" has no :lint or :format variant — should fall back to bare name
2851        let def = processor
2852            .resolve_tool("shellcheck", ToolContext::Lint)
2853            .expect("Should resolve shellcheck via fallback");
2854        assert!(
2855            def.command.iter().any(|arg| arg == "shellcheck"),
2856            "shellcheck should resolve to itself, got: {:?}",
2857            def.command
2858        );
2859    }
2860
2861    /// Context-aware resolution for tools with non-standard format suffixes.
2862    #[test]
2863    fn test_resolve_tool_context_aware_sqlfluff() {
2864        let config = default_config();
2865        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2866
2867        // sqlfluff uses ":fix" as its format variant
2868        let format_def = processor
2869            .resolve_tool("sqlfluff", ToolContext::Format)
2870            .expect("Should resolve sqlfluff in format context");
2871        assert!(
2872            format_def.command.iter().any(|arg| arg == "fix"),
2873            "Bare 'sqlfluff' in format context should resolve to 'sqlfluff fix', got: {:?}",
2874            format_def.command
2875        );
2876    }
2877
2878    /// Context-aware resolution for djlint (:reformat suffix).
2879    #[test]
2880    fn test_resolve_tool_context_aware_djlint() {
2881        let config = default_config();
2882        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2883
2884        // djlint uses ":reformat" as its format variant
2885        let format_def = processor
2886            .resolve_tool("djlint", ToolContext::Format)
2887            .expect("Should resolve djlint in format context");
2888        assert!(
2889            format_def.command.iter().any(|arg| arg.contains("reformat")),
2890            "Bare 'djlint' in format context should resolve to djlint reformat, got: {:?}",
2891            format_def.command
2892        );
2893    }
2894
2895    /// User-defined tools with context-specific variants resolve correctly.
2896    #[test]
2897    fn test_resolve_tool_user_defined_with_context_variant() {
2898        use super::super::config::ToolDefinition;
2899
2900        let mut config = default_config();
2901        config.tools.insert(
2902            "mytool".to_string(),
2903            ToolDefinition {
2904                command: vec!["mytool".to_string(), "--lint".to_string()],
2905                ..Default::default()
2906            },
2907        );
2908        config.tools.insert(
2909            "mytool:format".to_string(),
2910            ToolDefinition {
2911                command: vec!["mytool".to_string(), "--format".to_string()],
2912                ..Default::default()
2913            },
2914        );
2915
2916        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2917
2918        // In format context, should resolve to "mytool:format"
2919        let def = processor
2920            .resolve_tool("mytool", ToolContext::Format)
2921            .expect("Should resolve user tool in format context");
2922        assert!(
2923            def.command.iter().any(|arg| arg == "--format"),
2924            "User 'mytool' in format context should resolve to mytool:format, got: {:?}",
2925            def.command
2926        );
2927
2928        // In lint context, should fall back to bare "mytool" (no mytool:lint exists)
2929        let def = processor
2930            .resolve_tool("mytool", ToolContext::Lint)
2931            .expect("Should resolve user tool in lint context via fallback");
2932        assert!(
2933            def.command.iter().any(|arg| arg == "--lint"),
2934            "User 'mytool' in lint context should fall back to bare name, got: {:?}",
2935            def.command
2936        );
2937    }
2938
2939    /// Nonexistent tool returns None.
2940    #[test]
2941    fn test_resolve_tool_nonexistent_returns_none() {
2942        let config = default_config();
2943        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
2944
2945        assert!(
2946            processor
2947                .resolve_tool("nonexistent-tool-xyz", ToolContext::Lint)
2948                .is_none(),
2949            "Nonexistent tool should return None in lint context"
2950        );
2951        assert!(
2952            processor
2953                .resolve_tool("nonexistent-tool-xyz", ToolContext::Format)
2954                .is_none(),
2955            "Nonexistent tool should return None in format context"
2956        );
2957    }
2958
2959    #[test]
2960    fn test_strip_ansi_codes_edge_cases() {
2961        // Lone ESC without CSI bracket — non-printable, safely dropped
2962        assert_eq!(strip_ansi_codes("before\x1bafter"), "beforeafter");
2963        // ESC at end of string
2964        assert_eq!(strip_ansi_codes("trailing\x1b"), "trailing");
2965        // Nested/consecutive sequences
2966        assert_eq!(strip_ansi_codes("\x1b[1m\x1b[31mbold red\x1b[0m"), "bold red");
2967        // 256-color and RGB sequences
2968        assert_eq!(strip_ansi_codes("\x1b[38;5;196mred\x1b[0m"), "red");
2969        assert_eq!(strip_ansi_codes("\x1b[38;2;255;0;0mred\x1b[0m"), "red");
2970    }
2971
2972    /// A linter that enforces a trailing newline (like ryl/yamllint
2973    /// `new-line-at-end-of-file`) must not fire on a fenced code block whose
2974    /// final newline was stripped during extraction. The content piped to the
2975    /// tool should be newline-terminated, matching how the tool sees a real file.
2976    #[cfg(unix)]
2977    #[test]
2978    fn test_lint_yaml_block_no_false_new_line_at_eof() {
2979        use super::super::config::{LanguageToolConfig, ToolDefinition};
2980
2981        let mut config = default_config();
2982        config.normalize_language = NormalizeLanguage::Exact;
2983        config.languages.insert(
2984            "yaml".to_string(),
2985            LanguageToolConfig {
2986                lint: vec!["eof-newline-linter".to_string()],
2987                ..Default::default()
2988            },
2989        );
2990        // Emits a diagnostic only when stdin's last byte is not a newline.
2991        // `$(tail -c1)` strips trailing newlines, so it is empty iff the input
2992        // already ends in a newline.
2993        config.tools.insert(
2994            "eof-newline-linter".to_string(),
2995            ToolDefinition {
2996                command: vec![
2997                    "sh".to_string(),
2998                    "-c".to_string(),
2999                    "if [ -n \"$(tail -c1)\" ]; then echo '1:1: no newline at end of file'; fi".to_string(),
3000                ],
3001                ..Default::default()
3002            },
3003        );
3004
3005        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
3006
3007        let content = "```yaml\nfoo: bar\n```\n";
3008        let diagnostics = processor.lint(content).expect("lint should succeed");
3009
3010        assert!(
3011            diagnostics.is_empty(),
3012            "EOF-newline linter should not fire on an extracted code block, got: {diagnostics:?}"
3013        );
3014    }
3015
3016    /// Format mode should also feed tools newline-terminated input, so a
3017    /// formatter that enforces an end-of-file newline sees the content as a
3018    /// complete file rather than re-adding a newline rumdl would then have to
3019    /// strip. The formatter here reports whether its stdin ended in a newline.
3020    #[cfg(unix)]
3021    #[test]
3022    fn test_format_feeds_newline_terminated_input() {
3023        use super::super::config::{LanguageToolConfig, ToolDefinition};
3024
3025        let mut config = default_config();
3026        config.normalize_language = NormalizeLanguage::Exact;
3027        config.languages.insert(
3028            "yaml".to_string(),
3029            LanguageToolConfig {
3030                format: vec!["newline-probe".to_string()],
3031                ..Default::default()
3032            },
3033        );
3034        config.tools.insert(
3035            "newline-probe".to_string(),
3036            ToolDefinition {
3037                command: vec![
3038                    "sh".to_string(),
3039                    "-c".to_string(),
3040                    "if [ -z \"$(tail -c1)\" ]; then echo HAD_NEWLINE; else echo NO_NEWLINE; fi".to_string(),
3041                ],
3042                ..Default::default()
3043            },
3044        );
3045
3046        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
3047
3048        let content = "```yaml\nfoo: bar\n```\n";
3049        let output = processor.format(content).expect("format should succeed");
3050
3051        assert!(
3052            output.content.contains("HAD_NEWLINE"),
3053            "Formatter should receive newline-terminated stdin, got: {:?}",
3054            output.content
3055        );
3056    }
3057
3058    #[test]
3059    fn test_ensure_trailing_newline() {
3060        // Non-empty content without a trailing newline gets one appended.
3061        assert_eq!(ensure_trailing_newline("foo: bar"), "foo: bar\n");
3062        // Multi-line content gets a single newline appended to the last line.
3063        assert_eq!(ensure_trailing_newline("a: 1\nb: 2"), "a: 1\nb: 2\n");
3064        // Content already ending in a newline is returned unchanged.
3065        assert_eq!(ensure_trailing_newline("foo: bar\n"), "foo: bar\n");
3066        // A trailing blank line (already newline-terminated) is left alone.
3067        assert_eq!(ensure_trailing_newline("foo: bar\n\n"), "foo: bar\n\n");
3068        // Genuinely empty content is left empty — no newline synthesized.
3069        assert_eq!(ensure_trailing_newline(""), "");
3070    }
3071}