rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::config::MarkdownFlavor;
13use crate::rule::{LintWarning, Severity};
14use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
15
16/// Special built-in tool name for rumdl's own markdown linting.
17/// When this tool is configured for markdown blocks, the processor skips
18/// external execution since it's handled by embedded markdown linting.
19pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
20
21/// Check if a language is markdown (handles common variations).
22fn is_markdown_language(lang: &str) -> bool {
23    matches!(lang.to_lowercase().as_str(), "markdown" | "md")
24}
25
26/// Information about a fenced code block for processing.
27#[derive(Debug, Clone)]
28pub struct FencedCodeBlockInfo {
29    /// 0-indexed line number where opening fence starts.
30    pub start_line: usize,
31    /// 0-indexed line number where closing fence ends.
32    pub end_line: usize,
33    /// Byte offset where code content starts (after opening fence line).
34    pub content_start: usize,
35    /// Byte offset where code content ends (before closing fence line).
36    pub content_end: usize,
37    /// Language tag extracted from info string (first token).
38    pub language: String,
39    /// Full info string from the fence.
40    pub info_string: String,
41    /// The fence character used (` or ~).
42    pub fence_char: char,
43    /// Length of the fence (3 or more).
44    pub fence_length: usize,
45    /// Leading whitespace on the fence line.
46    pub indent: usize,
47    /// Exact leading whitespace prefix from the fence line.
48    pub indent_prefix: String,
49}
50
51/// A diagnostic message from an external tool.
52#[derive(Debug, Clone)]
53pub struct CodeBlockDiagnostic {
54    /// Line number in the original markdown file (1-indexed).
55    pub file_line: usize,
56    /// Column number (1-indexed, if available).
57    pub column: Option<usize>,
58    /// Message from the tool.
59    pub message: String,
60    /// Severity (error, warning, info).
61    pub severity: DiagnosticSeverity,
62    /// Name of the tool that produced this.
63    pub tool: String,
64    /// Line where the code block starts (1-indexed, for context).
65    pub code_block_start: usize,
66}
67
68/// Severity level for diagnostics.
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum DiagnosticSeverity {
71    Error,
72    Warning,
73    Info,
74}
75
76impl CodeBlockDiagnostic {
77    /// Convert to a LintWarning for integration with rumdl's warning system.
78    pub fn to_lint_warning(&self) -> LintWarning {
79        let severity = match self.severity {
80            DiagnosticSeverity::Error => Severity::Error,
81            DiagnosticSeverity::Warning => Severity::Warning,
82            DiagnosticSeverity::Info => Severity::Info,
83        };
84
85        LintWarning {
86            message: self.message.clone(),
87            line: self.file_line,
88            column: self.column.unwrap_or(1),
89            end_line: self.file_line,
90            end_column: self.column.unwrap_or(1),
91            severity,
92            fix: None, // External tool diagnostics don't provide fixes
93            rule_name: Some(self.tool.clone()),
94        }
95    }
96}
97
98/// Error during code block processing.
99#[derive(Debug, Clone)]
100pub enum ProcessorError {
101    /// Tool execution failed.
102    ToolError(ExecutorError),
103    /// No tools configured for language.
104    NoToolsConfigured { language: String },
105    /// Tool binary not found.
106    ToolBinaryNotFound { tool: String, language: String },
107    /// Processing was aborted due to on_error = fail.
108    Aborted { message: String },
109}
110
111impl std::fmt::Display for ProcessorError {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        match self {
114            Self::ToolError(e) => write!(f, "{e}"),
115            Self::NoToolsConfigured { language } => {
116                write!(f, "No tools configured for language '{language}'")
117            }
118            Self::ToolBinaryNotFound { tool, language } => {
119                write!(f, "Tool '{tool}' binary not found for language '{language}'")
120            }
121            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
122        }
123    }
124}
125
126impl std::error::Error for ProcessorError {}
127
128impl From<ExecutorError> for ProcessorError {
129    fn from(e: ExecutorError) -> Self {
130        Self::ToolError(e)
131    }
132}
133
134/// Result of processing a single code block.
135#[derive(Debug)]
136pub struct CodeBlockResult {
137    /// Diagnostics from linting.
138    pub diagnostics: Vec<CodeBlockDiagnostic>,
139    /// Formatted content (if formatting was requested and succeeded).
140    pub formatted_content: Option<String>,
141    /// Whether the code block was modified.
142    pub was_modified: bool,
143}
144
145/// Result of formatting code blocks in a document.
146#[derive(Debug)]
147pub struct FormatOutput {
148    /// The formatted content (may be partially formatted if errors occurred).
149    pub content: String,
150    /// Whether any errors occurred during formatting.
151    pub had_errors: bool,
152    /// Error messages for blocks that couldn't be formatted.
153    pub error_messages: Vec<String>,
154}
155
156/// Main processor for code block tools.
157pub struct CodeBlockToolProcessor<'a> {
158    config: &'a CodeBlockToolsConfig,
159    flavor: MarkdownFlavor,
160    linguist: LinguistResolver,
161    registry: ToolRegistry,
162    executor: ToolExecutor,
163    user_aliases: std::collections::HashMap<String, String>,
164}
165
166impl<'a> CodeBlockToolProcessor<'a> {
167    /// Create a new processor with the given configuration and markdown flavor.
168    pub fn new(config: &'a CodeBlockToolsConfig, flavor: MarkdownFlavor) -> Self {
169        let user_aliases = config
170            .language_aliases
171            .iter()
172            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
173            .collect();
174        Self {
175            config,
176            flavor,
177            linguist: LinguistResolver::new(),
178            registry: ToolRegistry::new(config.tools.clone()),
179            executor: ToolExecutor::new(config.timeout),
180            user_aliases,
181        }
182    }
183
184    /// Extract all fenced code blocks from content.
185    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
186        let mut blocks = Vec::new();
187        let mut current_block: Option<FencedCodeBlockBuilder> = None;
188
189        let options = Options::all();
190        let parser = Parser::new_ext(content, options).into_offset_iter();
191
192        let lines: Vec<&str> = content.lines().collect();
193
194        for (event, range) in parser {
195            match event {
196                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
197                    let info_string = info.to_string();
198                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
199
200                    // Find start line
201                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
202
203                    // Find content start (after opening fence line)
204                    let content_start = content[range.start..]
205                        .find('\n')
206                        .map(|i| range.start + i + 1)
207                        .unwrap_or(content.len());
208
209                    // Detect fence character and length from the line
210                    let fence_line = lines.get(start_line).unwrap_or(&"");
211                    let trimmed = fence_line.trim_start();
212                    let indent = fence_line.len() - trimmed.len();
213                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
214                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
215                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
216                    } else {
217                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
218                    };
219
220                    current_block = Some(FencedCodeBlockBuilder {
221                        start_line,
222                        content_start,
223                        language,
224                        info_string,
225                        fence_char,
226                        fence_length,
227                        indent,
228                        indent_prefix,
229                    });
230                }
231                Event::End(TagEnd::CodeBlock) => {
232                    if let Some(builder) = current_block.take() {
233                        // Find end line
234                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
235
236                        // Find content end (before closing fence line)
237                        let search_start = builder.content_start.min(range.end);
238                        let content_end = if search_start < range.end {
239                            content[search_start..range.end]
240                                .rfind('\n')
241                                .map(|i| search_start + i)
242                                .unwrap_or(search_start)
243                        } else {
244                            search_start
245                        };
246
247                        if content_end >= builder.content_start {
248                            blocks.push(FencedCodeBlockInfo {
249                                start_line: builder.start_line,
250                                end_line,
251                                content_start: builder.content_start,
252                                content_end,
253                                language: builder.language,
254                                info_string: builder.info_string,
255                                fence_char: builder.fence_char,
256                                fence_length: builder.fence_length,
257                                indent: builder.indent,
258                                indent_prefix: builder.indent_prefix,
259                            });
260                        }
261                    }
262                }
263                _ => {}
264            }
265        }
266
267        // For MkDocs flavor, also extract code blocks inside admonitions and tabs
268        if self.flavor == MarkdownFlavor::MkDocs {
269            let mkdocs_blocks = self.extract_mkdocs_code_blocks(content);
270            for mb in mkdocs_blocks {
271                // Deduplicate: only add if no existing block starts at the same line
272                if !blocks.iter().any(|b| b.start_line == mb.start_line) {
273                    blocks.push(mb);
274                }
275            }
276            blocks.sort_by_key(|b| b.start_line);
277        }
278
279        blocks
280    }
281
282    /// Extract fenced code blocks that are inside MkDocs admonitions or tabs.
283    ///
284    /// pulldown_cmark doesn't parse MkDocs-specific constructs, so indented
285    /// code blocks inside `!!!`/`???` admonitions or `===` tabs are missed.
286    /// This method manually scans for them.
287    fn extract_mkdocs_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
288        use crate::utils::mkdocs_admonitions;
289        use crate::utils::mkdocs_tabs;
290
291        let mut blocks = Vec::new();
292        let lines: Vec<&str> = content.lines().collect();
293
294        // Track current MkDocs context indent level
295        // We only need to know if we're inside any MkDocs block, so a simple stack suffices.
296        let mut context_indent_stack: Vec<usize> = Vec::new();
297
298        // Track fence state inside MkDocs context
299        let mut in_fence = false;
300        let mut fence_start_line: usize = 0;
301        let mut fence_content_start: usize = 0;
302        let mut fence_char: char = '`';
303        let mut fence_length: usize = 0;
304        let mut fence_indent: usize = 0;
305        let mut fence_indent_prefix = String::new();
306        let mut fence_language = String::new();
307        let mut fence_info_string = String::new();
308
309        // Compute byte offsets via pointer arithmetic.
310        // `content.lines()` returns slices into the original string,
311        // so each line's pointer offset from `content` gives its byte position.
312        // This correctly handles \n, \r\n, and empty lines.
313        let content_start_ptr = content.as_ptr() as usize;
314        let line_offsets: Vec<usize> = lines
315            .iter()
316            .map(|line| line.as_ptr() as usize - content_start_ptr)
317            .collect();
318
319        for (i, line) in lines.iter().enumerate() {
320            let line_indent = crate::utils::mkdocs_common::get_line_indent(line);
321            let is_admonition = mkdocs_admonitions::is_admonition_start(line);
322            let is_tab = mkdocs_tabs::is_tab_marker(line);
323
324            // Pop contexts when the current line is not indented enough to be content.
325            // This runs for ALL lines (including new admonition/tab starts) to clean
326            // up stale entries before potentially pushing a new context.
327            if !line.trim().is_empty() {
328                while let Some(&ctx_indent) = context_indent_stack.last() {
329                    if line_indent < ctx_indent + 4 {
330                        context_indent_stack.pop();
331                        if in_fence {
332                            in_fence = false;
333                        }
334                    } else {
335                        break;
336                    }
337                }
338            }
339
340            // Check for admonition start — push new context
341            if is_admonition && let Some(indent) = mkdocs_admonitions::get_admonition_indent(line) {
342                context_indent_stack.push(indent);
343                continue;
344            }
345
346            // Check for tab marker — push new context
347            if is_tab && let Some(indent) = mkdocs_tabs::get_tab_indent(line) {
348                context_indent_stack.push(indent);
349                continue;
350            }
351
352            // Only look for fences inside a MkDocs context
353            if context_indent_stack.is_empty() {
354                continue;
355            }
356
357            let trimmed = line.trim_start();
358            let leading_spaces = line.len() - trimmed.len();
359
360            if !in_fence {
361                // Check for fence opening
362                let (fc, fl) = if trimmed.starts_with("```") {
363                    ('`', trimmed.chars().take_while(|&c| c == '`').count())
364                } else if trimmed.starts_with("~~~") {
365                    ('~', trimmed.chars().take_while(|&c| c == '~').count())
366                } else {
367                    continue;
368                };
369
370                if fl >= 3 {
371                    in_fence = true;
372                    fence_start_line = i;
373                    fence_char = fc;
374                    fence_length = fl;
375                    fence_indent = leading_spaces;
376                    fence_indent_prefix = line.get(..leading_spaces).unwrap_or("").to_string();
377
378                    let after_fence = &trimmed[fl..];
379                    fence_info_string = after_fence.trim().to_string();
380                    fence_language = fence_info_string.split_whitespace().next().unwrap_or("").to_string();
381
382                    // Content starts at the next line's byte offset
383                    fence_content_start = line_offsets.get(i + 1).copied().unwrap_or(content.len());
384                }
385            } else {
386                // Check for fence closing
387                let is_closing = if fence_char == '`' {
388                    trimmed.starts_with("```")
389                        && trimmed.chars().take_while(|&c| c == '`').count() >= fence_length
390                        && trimmed.trim_start_matches('`').trim().is_empty()
391                } else {
392                    trimmed.starts_with("~~~")
393                        && trimmed.chars().take_while(|&c| c == '~').count() >= fence_length
394                        && trimmed.trim_start_matches('~').trim().is_empty()
395                };
396
397                if is_closing {
398                    let content_end = line_offsets.get(i).copied().unwrap_or(content.len());
399
400                    if content_end >= fence_content_start {
401                        blocks.push(FencedCodeBlockInfo {
402                            start_line: fence_start_line,
403                            end_line: i,
404                            content_start: fence_content_start,
405                            content_end,
406                            language: fence_language.clone(),
407                            info_string: fence_info_string.clone(),
408                            fence_char,
409                            fence_length,
410                            indent: fence_indent,
411                            indent_prefix: fence_indent_prefix.clone(),
412                        });
413                    }
414
415                    in_fence = false;
416                }
417            }
418        }
419
420        blocks
421    }
422
423    /// Resolve a language tag to its canonical name.
424    fn resolve_language(&self, language: &str) -> String {
425        let lower = language.to_lowercase();
426        if let Some(mapped) = self.user_aliases.get(&lower) {
427            return mapped.clone();
428        }
429        match self.config.normalize_language {
430            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
431            NormalizeLanguage::Exact => lower,
432        }
433    }
434
435    /// Get the effective on_error setting for a language.
436    fn get_on_error(&self, language: &str) -> OnError {
437        self.config
438            .languages
439            .get(language)
440            .and_then(|lc| lc.on_error)
441            .unwrap_or(self.config.on_error)
442    }
443
444    /// Strip the fence indentation prefix from each line of a code block.
445    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
446        if indent_prefix.is_empty() {
447            return content.to_string();
448        }
449
450        let mut out = String::with_capacity(content.len());
451        for line in content.split_inclusive('\n') {
452            if let Some(stripped) = line.strip_prefix(indent_prefix) {
453                out.push_str(stripped);
454            } else {
455                out.push_str(line);
456            }
457        }
458        out
459    }
460
461    /// Re-apply the fence indentation prefix to each line of a code block.
462    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
463        if indent_prefix.is_empty() {
464            return content.to_string();
465        }
466        if content.is_empty() {
467            return String::new();
468        }
469
470        let mut out = String::with_capacity(content.len() + indent_prefix.len());
471        for line in content.split_inclusive('\n') {
472            if line == "\n" {
473                out.push_str(line);
474            } else {
475                out.push_str(indent_prefix);
476                out.push_str(line);
477            }
478        }
479        out
480    }
481
482    /// Lint all code blocks in the content.
483    ///
484    /// Returns diagnostics from all configured linters.
485    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
486        let mut all_diagnostics = Vec::new();
487        let blocks = self.extract_code_blocks(content);
488
489        for block in blocks {
490            if block.language.is_empty() {
491                continue; // Skip blocks without language tag
492            }
493
494            let canonical_lang = self.resolve_language(&block.language);
495
496            // Get lint tools for this language
497            let lint_tools = match self.config.languages.get(&canonical_lang) {
498                Some(lc) if !lc.lint.is_empty() => &lc.lint,
499                _ => {
500                    // No tools configured for this language in lint mode
501                    match self.config.on_missing_language_definition {
502                        OnMissing::Ignore => continue,
503                        OnMissing::Fail => {
504                            all_diagnostics.push(CodeBlockDiagnostic {
505                                file_line: block.start_line + 1,
506                                column: None,
507                                message: format!("No lint tools configured for language '{canonical_lang}'"),
508                                severity: DiagnosticSeverity::Error,
509                                tool: "code-block-tools".to_string(),
510                                code_block_start: block.start_line + 1,
511                            });
512                            continue;
513                        }
514                        OnMissing::FailFast => {
515                            return Err(ProcessorError::NoToolsConfigured {
516                                language: canonical_lang,
517                            });
518                        }
519                    }
520                }
521            };
522
523            // Extract code block content
524            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
525                &content[block.content_start..block.content_end]
526            } else {
527                continue;
528            };
529            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
530
531            // Run each lint tool
532            for tool_id in lint_tools {
533                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown linting
534                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
535                    continue;
536                }
537
538                let tool_def = match self.registry.get(tool_id) {
539                    Some(t) => t,
540                    None => {
541                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
542                        continue;
543                    }
544                };
545
546                // Check if tool binary exists before running
547                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
548                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
549                    match self.config.on_missing_tool_binary {
550                        OnMissing::Ignore => {
551                            log::debug!("Tool binary '{tool_name}' not found, skipping");
552                            continue;
553                        }
554                        OnMissing::Fail => {
555                            all_diagnostics.push(CodeBlockDiagnostic {
556                                file_line: block.start_line + 1,
557                                column: None,
558                                message: format!("Tool binary '{tool_name}' not found in PATH"),
559                                severity: DiagnosticSeverity::Error,
560                                tool: "code-block-tools".to_string(),
561                                code_block_start: block.start_line + 1,
562                            });
563                            continue;
564                        }
565                        OnMissing::FailFast => {
566                            return Err(ProcessorError::ToolBinaryNotFound {
567                                tool: tool_name.to_string(),
568                                language: canonical_lang.clone(),
569                            });
570                        }
571                    }
572                }
573
574                match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
575                    Ok(output) => {
576                        // Parse tool output into diagnostics
577                        let diagnostics = self.parse_tool_output(
578                            &output,
579                            tool_id,
580                            block.start_line + 1, // Convert to 1-indexed
581                        );
582                        all_diagnostics.extend(diagnostics);
583                    }
584                    Err(e) => {
585                        let on_error = self.get_on_error(&canonical_lang);
586                        match on_error {
587                            OnError::Fail => return Err(e.into()),
588                            OnError::Warn => {
589                                log::warn!("Tool '{tool_id}' failed: {e}");
590                            }
591                            OnError::Skip => {
592                                // Silently skip
593                            }
594                        }
595                    }
596                }
597            }
598        }
599
600        Ok(all_diagnostics)
601    }
602
603    /// Format all code blocks in the content.
604    ///
605    /// Returns the modified content with formatted code blocks and any errors that occurred.
606    /// With `on-missing-*` = `fail`, errors are collected but formatting continues.
607    /// With `on-missing-*` = `fail-fast`, returns Err immediately on first error.
608    pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
609        let blocks = self.extract_code_blocks(content);
610
611        if blocks.is_empty() {
612            return Ok(FormatOutput {
613                content: content.to_string(),
614                had_errors: false,
615                error_messages: Vec::new(),
616            });
617        }
618
619        // Process blocks in reverse order to maintain byte offsets
620        let mut result = content.to_string();
621        let mut error_messages: Vec<String> = Vec::new();
622
623        for block in blocks.into_iter().rev() {
624            if block.language.is_empty() {
625                continue;
626            }
627
628            let canonical_lang = self.resolve_language(&block.language);
629
630            // Get format tools for this language
631            let format_tools = match self.config.languages.get(&canonical_lang) {
632                Some(lc) if !lc.format.is_empty() => &lc.format,
633                _ => {
634                    // No tools configured for this language in format mode
635                    match self.config.on_missing_language_definition {
636                        OnMissing::Ignore => continue,
637                        OnMissing::Fail => {
638                            error_messages.push(format!(
639                                "No format tools configured for language '{canonical_lang}' at line {}",
640                                block.start_line + 1
641                            ));
642                            continue;
643                        }
644                        OnMissing::FailFast => {
645                            return Err(ProcessorError::NoToolsConfigured {
646                                language: canonical_lang,
647                            });
648                        }
649                    }
650                }
651            };
652
653            // Extract code block content
654            if block.content_start >= block.content_end || block.content_end > result.len() {
655                continue;
656            }
657            let code_content_raw = result[block.content_start..block.content_end].to_string();
658            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
659
660            // Run format tools (use first successful one)
661            let mut formatted = code_content.clone();
662            let mut tool_ran = false;
663            for tool_id in format_tools {
664                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown formatting
665                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
666                    continue;
667                }
668
669                let tool_def = match self.registry.get(tool_id) {
670                    Some(t) => t,
671                    None => {
672                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
673                        continue;
674                    }
675                };
676
677                // Check if tool binary exists before running
678                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
679                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
680                    match self.config.on_missing_tool_binary {
681                        OnMissing::Ignore => {
682                            log::debug!("Tool binary '{tool_name}' not found, skipping");
683                            continue;
684                        }
685                        OnMissing::Fail => {
686                            error_messages.push(format!(
687                                "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
688                                block.start_line + 1
689                            ));
690                            continue;
691                        }
692                        OnMissing::FailFast => {
693                            return Err(ProcessorError::ToolBinaryNotFound {
694                                tool: tool_name.to_string(),
695                                language: canonical_lang.clone(),
696                            });
697                        }
698                    }
699                }
700
701                match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
702                    Ok(output) => {
703                        // Ensure trailing newline matches original (unindented)
704                        formatted = output;
705                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
706                            formatted.push('\n');
707                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
708                            formatted.pop();
709                        }
710                        tool_ran = true;
711                        break; // Use first successful formatter
712                    }
713                    Err(e) => {
714                        let on_error = self.get_on_error(&canonical_lang);
715                        match on_error {
716                            OnError::Fail => return Err(e.into()),
717                            OnError::Warn => {
718                                log::warn!("Formatter '{tool_id}' failed: {e}");
719                            }
720                            OnError::Skip => {}
721                        }
722                    }
723                }
724            }
725
726            // Replace content if changed and a tool actually ran
727            if tool_ran && formatted != code_content {
728                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
729                if reindented != code_content_raw {
730                    result.replace_range(block.content_start..block.content_end, &reindented);
731                }
732            }
733        }
734
735        Ok(FormatOutput {
736            content: result,
737            had_errors: !error_messages.is_empty(),
738            error_messages,
739        })
740    }
741
742    /// Parse tool output into diagnostics.
743    ///
744    /// This is a basic parser that handles common output formats.
745    /// Tools vary widely in their output format, so this is best-effort.
746    fn parse_tool_output(
747        &self,
748        output: &ToolOutput,
749        tool_id: &str,
750        code_block_start_line: usize,
751    ) -> Vec<CodeBlockDiagnostic> {
752        let mut diagnostics = Vec::new();
753        let mut shellcheck_line: Option<usize> = None;
754
755        // Combine stdout and stderr for parsing
756        let stdout = &output.stdout;
757        let stderr = &output.stderr;
758        let combined = format!("{stdout}\n{stderr}");
759
760        // Look for common line:column:message patterns
761        // Examples:
762        // - ruff: "_.py:1:1: E501 Line too long"
763        // - shellcheck: "In - line 1: ..."
764        // - eslint: "1:10 error Description"
765
766        for line in combined.lines() {
767            let line = line.trim();
768            if line.is_empty() {
769                continue;
770            }
771
772            if let Some(line_num) = self.parse_shellcheck_header(line) {
773                shellcheck_line = Some(line_num);
774                continue;
775            }
776
777            if let Some(line_num) = shellcheck_line
778                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
779            {
780                diagnostics.push(diag);
781                continue;
782            }
783
784            // Try pattern: "file:line:col: message" or "file:line: message"
785            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
786                diagnostics.push(diag);
787                continue;
788            }
789
790            // Try pattern: "line:col message" (eslint style)
791            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
792                diagnostics.push(diag);
793                continue;
794            }
795
796            // Try single-line shellcheck format fallback
797            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
798                diagnostics.push(diag);
799            }
800        }
801
802        // If no diagnostics parsed but tool failed, create a generic one
803        if diagnostics.is_empty() && !output.success {
804            let message = if !output.stderr.is_empty() {
805                output.stderr.lines().next().unwrap_or("Tool failed").to_string()
806            } else if !output.stdout.is_empty() {
807                output.stdout.lines().next().unwrap_or("Tool failed").to_string()
808            } else {
809                let exit_code = output.exit_code;
810                format!("Tool exited with code {exit_code}")
811            };
812
813            diagnostics.push(CodeBlockDiagnostic {
814                file_line: code_block_start_line,
815                column: None,
816                message,
817                severity: DiagnosticSeverity::Error,
818                tool: tool_id.to_string(),
819                code_block_start: code_block_start_line,
820            });
821        }
822
823        diagnostics
824    }
825
826    /// Parse standard "file:line:col: message" format.
827    fn parse_standard_format(
828        &self,
829        line: &str,
830        tool_id: &str,
831        code_block_start_line: usize,
832    ) -> Option<CodeBlockDiagnostic> {
833        // Match patterns like "file.py:1:10: E501 message"
834        let mut parts = line.rsplitn(4, ':');
835        let message = parts.next()?.trim().to_string();
836        let part1 = parts.next()?.trim().to_string();
837        let part2 = parts.next()?.trim().to_string();
838        let part3 = parts.next().map(|s| s.trim().to_string());
839
840        let (line_part, col_part) = if part3.is_some() {
841            (part2, Some(part1))
842        } else {
843            (part1, None)
844        };
845
846        if let Ok(line_num) = line_part.parse::<usize>() {
847            let column = col_part.and_then(|s| s.parse::<usize>().ok());
848            let message = Self::strip_fixable_markers(&message);
849            if !message.is_empty() {
850                let severity = self.infer_severity(&message);
851                return Some(CodeBlockDiagnostic {
852                    file_line: code_block_start_line + line_num,
853                    column,
854                    message,
855                    severity,
856                    tool: tool_id.to_string(),
857                    code_block_start: code_block_start_line,
858                });
859            }
860        }
861        None
862    }
863
864    /// Parse eslint-style "line:col severity message" format.
865    fn parse_eslint_format(
866        &self,
867        line: &str,
868        tool_id: &str,
869        code_block_start_line: usize,
870    ) -> Option<CodeBlockDiagnostic> {
871        // Match "1:10 error Message"
872        let parts: Vec<&str> = line.splitn(3, ' ').collect();
873        if parts.len() >= 2 {
874            let loc_parts: Vec<&str> = parts[0].split(':').collect();
875            if loc_parts.len() == 2
876                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
877            {
878                let (sev_part, msg_part) = if parts.len() >= 3 {
879                    (parts[1], parts[2])
880                } else {
881                    (parts[1], "")
882                };
883                let message = if msg_part.is_empty() {
884                    sev_part.to_string()
885                } else {
886                    msg_part.to_string()
887                };
888                let message = Self::strip_fixable_markers(&message);
889                let severity = match sev_part.to_lowercase().as_str() {
890                    "error" => DiagnosticSeverity::Error,
891                    "warning" | "warn" => DiagnosticSeverity::Warning,
892                    "info" => DiagnosticSeverity::Info,
893                    _ => self.infer_severity(&message),
894                };
895                return Some(CodeBlockDiagnostic {
896                    file_line: code_block_start_line + line_num,
897                    column: Some(col),
898                    message,
899                    severity,
900                    tool: tool_id.to_string(),
901                    code_block_start: code_block_start_line,
902                });
903            }
904        }
905        None
906    }
907
908    /// Parse shellcheck-style "In - line N: message" format.
909    fn parse_shellcheck_format(
910        &self,
911        line: &str,
912        tool_id: &str,
913        code_block_start_line: usize,
914    ) -> Option<CodeBlockDiagnostic> {
915        // Match "In - line 5:" pattern
916        if line.starts_with("In ")
917            && line.contains(" line ")
918            && let Some(line_start) = line.find(" line ")
919        {
920            let after_line = &line[line_start + 6..];
921            if let Some(colon_pos) = after_line.find(':')
922                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
923            {
924                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
925                if !message.is_empty() {
926                    let severity = self.infer_severity(&message);
927                    return Some(CodeBlockDiagnostic {
928                        file_line: code_block_start_line + line_num,
929                        column: None,
930                        message,
931                        severity,
932                        tool: tool_id.to_string(),
933                        code_block_start: code_block_start_line,
934                    });
935                }
936            }
937        }
938        None
939    }
940
941    /// Parse shellcheck header line to capture line number context.
942    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
943        if line.starts_with("In ")
944            && line.contains(" line ")
945            && let Some(line_start) = line.find(" line ")
946        {
947            let after_line = &line[line_start + 6..];
948            if let Some(colon_pos) = after_line.find(':') {
949                return after_line[..colon_pos].trim().parse::<usize>().ok();
950            }
951        }
952        None
953    }
954
955    /// Parse shellcheck message line containing SCXXXX codes.
956    fn parse_shellcheck_message(
957        &self,
958        line: &str,
959        tool_id: &str,
960        code_block_start_line: usize,
961        line_num: usize,
962    ) -> Option<CodeBlockDiagnostic> {
963        let sc_pos = line.find("SC")?;
964        let after_sc = &line[sc_pos + 2..];
965        let code_len = after_sc.chars().take_while(|c| c.is_ascii_digit()).count();
966        if code_len == 0 {
967            return None;
968        }
969        let after_code = &after_sc[code_len..];
970        let sev_start = after_code.find('(')? + 1;
971        let sev_end = after_code[sev_start..].find(')')? + sev_start;
972        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
973        let message_start = after_code.find("):")? + 2;
974        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
975        if message.is_empty() {
976            return None;
977        }
978
979        let severity = match sev.as_str() {
980            "error" => DiagnosticSeverity::Error,
981            "warning" | "warn" => DiagnosticSeverity::Warning,
982            "info" | "style" => DiagnosticSeverity::Info,
983            _ => self.infer_severity(&message),
984        };
985
986        Some(CodeBlockDiagnostic {
987            file_line: code_block_start_line + line_num,
988            column: None,
989            message,
990            severity,
991            tool: tool_id.to_string(),
992            code_block_start: code_block_start_line,
993        })
994    }
995
996    /// Infer severity from message content.
997    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
998        let lower = message.to_lowercase();
999        if lower.contains("error")
1000            || lower.starts_with("e") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1001            || lower.starts_with("f") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1002        {
1003            DiagnosticSeverity::Error
1004        } else if lower.contains("warning")
1005            || lower.contains("warn")
1006            || lower.starts_with("w") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1007        {
1008            DiagnosticSeverity::Warning
1009        } else {
1010            DiagnosticSeverity::Info
1011        }
1012    }
1013
1014    /// Strip "fixable" markers from external tool messages.
1015    ///
1016    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
1017    /// context these markers can be misleading - the lint tool's fix capability may
1018    /// differ from what our configured formatter can fix. We strip these markers
1019    /// to avoid making promises we can't keep.
1020    fn strip_fixable_markers(message: &str) -> String {
1021        message
1022            .replace(" [*]", "")
1023            .replace("[*] ", "")
1024            .replace("[*]", "")
1025            .replace(" (fixable)", "")
1026            .replace("(fixable) ", "")
1027            .replace("(fixable)", "")
1028            .replace(" [fix available]", "")
1029            .replace("[fix available] ", "")
1030            .replace("[fix available]", "")
1031            .replace(" [autofix]", "")
1032            .replace("[autofix] ", "")
1033            .replace("[autofix]", "")
1034            .trim()
1035            .to_string()
1036    }
1037}
1038
1039/// Builder for FencedCodeBlockInfo during parsing.
1040struct FencedCodeBlockBuilder {
1041    start_line: usize,
1042    content_start: usize,
1043    language: String,
1044    info_string: String,
1045    fence_char: char,
1046    fence_length: usize,
1047    indent: usize,
1048    indent_prefix: String,
1049}
1050
1051#[cfg(test)]
1052mod tests {
1053    use super::*;
1054
1055    fn default_config() -> CodeBlockToolsConfig {
1056        CodeBlockToolsConfig::default()
1057    }
1058
1059    #[test]
1060    fn test_extract_code_blocks() {
1061        let config = default_config();
1062        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1063
1064        let content = r#"# Example
1065
1066```python
1067def hello():
1068    print("Hello")
1069```
1070
1071Some text
1072
1073```rust
1074fn main() {}
1075```
1076"#;
1077
1078        let blocks = processor.extract_code_blocks(content);
1079
1080        assert_eq!(blocks.len(), 2);
1081
1082        assert_eq!(blocks[0].language, "python");
1083        assert_eq!(blocks[0].fence_char, '`');
1084        assert_eq!(blocks[0].fence_length, 3);
1085        assert_eq!(blocks[0].start_line, 2);
1086        assert_eq!(blocks[0].indent, 0);
1087        assert_eq!(blocks[0].indent_prefix, "");
1088
1089        assert_eq!(blocks[1].language, "rust");
1090        assert_eq!(blocks[1].fence_char, '`');
1091        assert_eq!(blocks[1].fence_length, 3);
1092    }
1093
1094    #[test]
1095    fn test_extract_code_blocks_with_info_string() {
1096        let config = default_config();
1097        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1098
1099        let content = "```python title=\"example.py\"\ncode\n```";
1100        let blocks = processor.extract_code_blocks(content);
1101
1102        assert_eq!(blocks.len(), 1);
1103        assert_eq!(blocks[0].language, "python");
1104        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
1105    }
1106
1107    #[test]
1108    fn test_extract_code_blocks_tilde_fence() {
1109        let config = default_config();
1110        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1111
1112        let content = "~~~bash\necho hello\n~~~";
1113        let blocks = processor.extract_code_blocks(content);
1114
1115        assert_eq!(blocks.len(), 1);
1116        assert_eq!(blocks[0].language, "bash");
1117        assert_eq!(blocks[0].fence_char, '~');
1118        assert_eq!(blocks[0].fence_length, 3);
1119        assert_eq!(blocks[0].indent_prefix, "");
1120    }
1121
1122    #[test]
1123    fn test_extract_code_blocks_with_indent_prefix() {
1124        let config = default_config();
1125        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1126
1127        let content = "  - item\n    ```python\n    print('hi')\n    ```";
1128        let blocks = processor.extract_code_blocks(content);
1129
1130        assert_eq!(blocks.len(), 1);
1131        assert_eq!(blocks[0].indent_prefix, "    ");
1132    }
1133
1134    #[test]
1135    fn test_extract_code_blocks_no_language() {
1136        let config = default_config();
1137        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1138
1139        let content = "```\nplain code\n```";
1140        let blocks = processor.extract_code_blocks(content);
1141
1142        assert_eq!(blocks.len(), 1);
1143        assert_eq!(blocks[0].language, "");
1144    }
1145
1146    #[test]
1147    fn test_resolve_language_linguist() {
1148        let mut config = default_config();
1149        config.normalize_language = NormalizeLanguage::Linguist;
1150        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1151
1152        assert_eq!(processor.resolve_language("py"), "python");
1153        assert_eq!(processor.resolve_language("bash"), "shell");
1154        assert_eq!(processor.resolve_language("js"), "javascript");
1155    }
1156
1157    #[test]
1158    fn test_resolve_language_exact() {
1159        let mut config = default_config();
1160        config.normalize_language = NormalizeLanguage::Exact;
1161        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1162
1163        assert_eq!(processor.resolve_language("py"), "py");
1164        assert_eq!(processor.resolve_language("BASH"), "bash");
1165    }
1166
1167    #[test]
1168    fn test_resolve_language_user_alias_override() {
1169        let mut config = default_config();
1170        config.language_aliases.insert("py".to_string(), "python".to_string());
1171        config.normalize_language = NormalizeLanguage::Exact;
1172        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1173
1174        assert_eq!(processor.resolve_language("PY"), "python");
1175    }
1176
1177    #[test]
1178    fn test_indent_strip_and_reapply_roundtrip() {
1179        let config = default_config();
1180        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1181
1182        let raw = "    def hello():\n        print('hi')";
1183        let stripped = processor.strip_indent_from_block(raw, "    ");
1184        assert_eq!(stripped, "def hello():\n    print('hi')");
1185
1186        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
1187        assert_eq!(reapplied, raw);
1188    }
1189
1190    #[test]
1191    fn test_infer_severity() {
1192        let config = default_config();
1193        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1194
1195        assert_eq!(
1196            processor.infer_severity("E501 line too long"),
1197            DiagnosticSeverity::Error
1198        );
1199        assert_eq!(
1200            processor.infer_severity("W291 trailing whitespace"),
1201            DiagnosticSeverity::Warning
1202        );
1203        assert_eq!(
1204            processor.infer_severity("error: something failed"),
1205            DiagnosticSeverity::Error
1206        );
1207        assert_eq!(
1208            processor.infer_severity("warning: unused variable"),
1209            DiagnosticSeverity::Warning
1210        );
1211        assert_eq!(
1212            processor.infer_severity("note: consider using"),
1213            DiagnosticSeverity::Info
1214        );
1215    }
1216
1217    #[test]
1218    fn test_parse_standard_format_windows_path() {
1219        let config = default_config();
1220        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1221
1222        let output = ToolOutput {
1223            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1224            stderr: String::new(),
1225            exit_code: 1,
1226            success: false,
1227        };
1228
1229        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1230        assert_eq!(diags.len(), 1);
1231        assert_eq!(diags[0].file_line, 12);
1232        assert_eq!(diags[0].column, Some(5));
1233        assert_eq!(diags[0].message, "E123 message");
1234    }
1235
1236    #[test]
1237    fn test_parse_eslint_severity() {
1238        let config = default_config();
1239        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1240
1241        let output = ToolOutput {
1242            stdout: "1:2 error Unexpected token".to_string(),
1243            stderr: String::new(),
1244            exit_code: 1,
1245            success: false,
1246        };
1247
1248        let diags = processor.parse_tool_output(&output, "eslint", 5);
1249        assert_eq!(diags.len(), 1);
1250        assert_eq!(diags[0].file_line, 6);
1251        assert_eq!(diags[0].column, Some(2));
1252        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1253        assert_eq!(diags[0].message, "Unexpected token");
1254    }
1255
1256    #[test]
1257    fn test_parse_shellcheck_multiline() {
1258        let config = default_config();
1259        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1260
1261        let output = ToolOutput {
1262            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1263            stderr: String::new(),
1264            exit_code: 1,
1265            success: false,
1266        };
1267
1268        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1269        assert_eq!(diags.len(), 1);
1270        assert_eq!(diags[0].file_line, 13);
1271        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1272        assert_eq!(diags[0].message, "Double quote to prevent globbing");
1273    }
1274
1275    #[test]
1276    fn test_lint_no_config() {
1277        let config = default_config();
1278        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1279
1280        let content = "```python\nprint('hello')\n```";
1281        let result = processor.lint(content);
1282
1283        // Should succeed with no diagnostics (no tools configured)
1284        assert!(result.is_ok());
1285        assert!(result.unwrap().is_empty());
1286    }
1287
1288    #[test]
1289    fn test_format_no_config() {
1290        let config = default_config();
1291        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1292
1293        let content = "```python\nprint('hello')\n```";
1294        let result = processor.format(content);
1295
1296        // Should succeed with unchanged content (no tools configured)
1297        assert!(result.is_ok());
1298        let output = result.unwrap();
1299        assert_eq!(output.content, content);
1300        assert!(!output.had_errors);
1301        assert!(output.error_messages.is_empty());
1302    }
1303
1304    #[test]
1305    fn test_lint_on_missing_language_definition_fail() {
1306        let mut config = default_config();
1307        config.on_missing_language_definition = OnMissing::Fail;
1308        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1309
1310        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1311        let result = processor.lint(content);
1312
1313        // Should succeed but return diagnostics for both missing language definitions
1314        assert!(result.is_ok());
1315        let diagnostics = result.unwrap();
1316        assert_eq!(diagnostics.len(), 2);
1317        assert!(diagnostics[0].message.contains("No lint tools configured"));
1318        assert!(diagnostics[0].message.contains("python"));
1319        assert!(diagnostics[1].message.contains("javascript"));
1320    }
1321
1322    #[test]
1323    fn test_lint_on_missing_language_definition_fail_fast() {
1324        let mut config = default_config();
1325        config.on_missing_language_definition = OnMissing::FailFast;
1326        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1327
1328        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1329        let result = processor.lint(content);
1330
1331        // Should fail immediately on first missing language
1332        assert!(result.is_err());
1333        let err = result.unwrap_err();
1334        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1335    }
1336
1337    #[test]
1338    fn test_format_on_missing_language_definition_fail() {
1339        let mut config = default_config();
1340        config.on_missing_language_definition = OnMissing::Fail;
1341        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1342
1343        let content = "```python\nprint('hello')\n```";
1344        let result = processor.format(content);
1345
1346        // Should succeed but report errors
1347        assert!(result.is_ok());
1348        let output = result.unwrap();
1349        assert_eq!(output.content, content); // Content unchanged
1350        assert!(output.had_errors);
1351        assert!(!output.error_messages.is_empty());
1352        assert!(output.error_messages[0].contains("No format tools configured"));
1353    }
1354
1355    #[test]
1356    fn test_format_on_missing_language_definition_fail_fast() {
1357        let mut config = default_config();
1358        config.on_missing_language_definition = OnMissing::FailFast;
1359        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1360
1361        let content = "```python\nprint('hello')\n```";
1362        let result = processor.format(content);
1363
1364        // Should fail immediately
1365        assert!(result.is_err());
1366        let err = result.unwrap_err();
1367        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1368    }
1369
1370    #[test]
1371    fn test_lint_on_missing_tool_binary_fail() {
1372        use super::super::config::{LanguageToolConfig, ToolDefinition};
1373
1374        let mut config = default_config();
1375        config.on_missing_tool_binary = OnMissing::Fail;
1376
1377        // Configure a tool with a non-existent binary
1378        let lang_config = LanguageToolConfig {
1379            lint: vec!["nonexistent-linter".to_string()],
1380            ..Default::default()
1381        };
1382        config.languages.insert("python".to_string(), lang_config);
1383
1384        let tool_def = ToolDefinition {
1385            command: vec!["nonexistent-binary-xyz123".to_string()],
1386            ..Default::default()
1387        };
1388        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1389
1390        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1391
1392        let content = "```python\nprint('hello')\n```";
1393        let result = processor.lint(content);
1394
1395        // Should succeed but return diagnostic for missing binary
1396        assert!(result.is_ok());
1397        let diagnostics = result.unwrap();
1398        assert_eq!(diagnostics.len(), 1);
1399        assert!(diagnostics[0].message.contains("not found in PATH"));
1400    }
1401
1402    #[test]
1403    fn test_lint_on_missing_tool_binary_fail_fast() {
1404        use super::super::config::{LanguageToolConfig, ToolDefinition};
1405
1406        let mut config = default_config();
1407        config.on_missing_tool_binary = OnMissing::FailFast;
1408
1409        // Configure a tool with a non-existent binary
1410        let lang_config = LanguageToolConfig {
1411            lint: vec!["nonexistent-linter".to_string()],
1412            ..Default::default()
1413        };
1414        config.languages.insert("python".to_string(), lang_config);
1415
1416        let tool_def = ToolDefinition {
1417            command: vec!["nonexistent-binary-xyz123".to_string()],
1418            ..Default::default()
1419        };
1420        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1421
1422        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1423
1424        let content = "```python\nprint('hello')\n```";
1425        let result = processor.lint(content);
1426
1427        // Should fail immediately
1428        assert!(result.is_err());
1429        let err = result.unwrap_err();
1430        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1431    }
1432
1433    #[test]
1434    fn test_format_on_missing_tool_binary_fail() {
1435        use super::super::config::{LanguageToolConfig, ToolDefinition};
1436
1437        let mut config = default_config();
1438        config.on_missing_tool_binary = OnMissing::Fail;
1439
1440        // Configure a tool with a non-existent binary
1441        let lang_config = LanguageToolConfig {
1442            format: vec!["nonexistent-formatter".to_string()],
1443            ..Default::default()
1444        };
1445        config.languages.insert("python".to_string(), lang_config);
1446
1447        let tool_def = ToolDefinition {
1448            command: vec!["nonexistent-binary-xyz123".to_string()],
1449            ..Default::default()
1450        };
1451        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1452
1453        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1454
1455        let content = "```python\nprint('hello')\n```";
1456        let result = processor.format(content);
1457
1458        // Should succeed but report errors
1459        assert!(result.is_ok());
1460        let output = result.unwrap();
1461        assert_eq!(output.content, content); // Content unchanged
1462        assert!(output.had_errors);
1463        assert!(!output.error_messages.is_empty());
1464        assert!(output.error_messages[0].contains("not found in PATH"));
1465    }
1466
1467    #[test]
1468    fn test_format_on_missing_tool_binary_fail_fast() {
1469        use super::super::config::{LanguageToolConfig, ToolDefinition};
1470
1471        let mut config = default_config();
1472        config.on_missing_tool_binary = OnMissing::FailFast;
1473
1474        // Configure a tool with a non-existent binary
1475        let lang_config = LanguageToolConfig {
1476            format: vec!["nonexistent-formatter".to_string()],
1477            ..Default::default()
1478        };
1479        config.languages.insert("python".to_string(), lang_config);
1480
1481        let tool_def = ToolDefinition {
1482            command: vec!["nonexistent-binary-xyz123".to_string()],
1483            ..Default::default()
1484        };
1485        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1486
1487        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1488
1489        let content = "```python\nprint('hello')\n```";
1490        let result = processor.format(content);
1491
1492        // Should fail immediately
1493        assert!(result.is_err());
1494        let err = result.unwrap_err();
1495        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1496    }
1497
1498    #[test]
1499    fn test_lint_rumdl_builtin_skipped_for_markdown() {
1500        // Configure the built-in "rumdl" tool for markdown
1501        // The processor should skip it (handled by embedded markdown linting)
1502        let mut config = default_config();
1503        config.languages.insert(
1504            "markdown".to_string(),
1505            LanguageToolConfig {
1506                lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1507                format: vec![],
1508                on_error: None,
1509            },
1510        );
1511        config.on_missing_language_definition = OnMissing::Fail;
1512        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1513
1514        let content = "```markdown\n# Hello\n```";
1515        let result = processor.lint(content);
1516
1517        // Should succeed with no diagnostics - "rumdl" tool is skipped, not treated as unknown
1518        assert!(result.is_ok());
1519        assert!(result.unwrap().is_empty());
1520    }
1521
1522    #[test]
1523    fn test_format_rumdl_builtin_skipped_for_markdown() {
1524        // Configure the built-in "rumdl" tool for markdown
1525        let mut config = default_config();
1526        config.languages.insert(
1527            "markdown".to_string(),
1528            LanguageToolConfig {
1529                lint: vec![],
1530                format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1531                on_error: None,
1532            },
1533        );
1534        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1535
1536        let content = "```markdown\n# Hello\n```";
1537        let result = processor.format(content);
1538
1539        // Should succeed with unchanged content - "rumdl" tool is skipped
1540        assert!(result.is_ok());
1541        let output = result.unwrap();
1542        assert_eq!(output.content, content);
1543        assert!(!output.had_errors);
1544    }
1545
1546    #[test]
1547    fn test_is_markdown_language() {
1548        // Test the helper function
1549        assert!(is_markdown_language("markdown"));
1550        assert!(is_markdown_language("Markdown"));
1551        assert!(is_markdown_language("MARKDOWN"));
1552        assert!(is_markdown_language("md"));
1553        assert!(is_markdown_language("MD"));
1554        assert!(!is_markdown_language("python"));
1555        assert!(!is_markdown_language("rust"));
1556        assert!(!is_markdown_language(""));
1557    }
1558
1559    // Issue #423: MkDocs admonition code block detection
1560
1561    #[test]
1562    fn test_extract_mkdocs_admonition_code_block() {
1563        let config = default_config();
1564        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1565
1566        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1567        let blocks = processor.extract_code_blocks(content);
1568
1569        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs admonition");
1570        assert_eq!(blocks[0].language, "python");
1571    }
1572
1573    #[test]
1574    fn test_extract_mkdocs_tab_code_block() {
1575        let config = default_config();
1576        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1577
1578        let content = "=== \"Python\"\n\n    ```python\n    print(\"hello\")\n    ```\n";
1579        let blocks = processor.extract_code_blocks(content);
1580
1581        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs tab");
1582        assert_eq!(blocks[0].language, "python");
1583    }
1584
1585    #[test]
1586    fn test_standard_flavor_ignores_admonition_indented_content() {
1587        let config = default_config();
1588        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1589
1590        // With standard flavor, pulldown_cmark parses this differently;
1591        // our MkDocs extraction should NOT run
1592        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1593        let blocks = processor.extract_code_blocks(content);
1594
1595        // Standard flavor relies on pulldown_cmark only, which may or may not detect
1596        // indented fenced blocks. The key assertion is that we don't double-detect.
1597        // With standard flavor, the MkDocs extraction path is skipped entirely.
1598        for (i, b) in blocks.iter().enumerate() {
1599            for (j, b2) in blocks.iter().enumerate() {
1600                if i != j {
1601                    assert_ne!(b.start_line, b2.start_line, "No duplicate blocks should exist");
1602                }
1603            }
1604        }
1605    }
1606
1607    #[test]
1608    fn test_mkdocs_top_level_blocks_alongside_admonition() {
1609        let config = default_config();
1610        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1611
1612        let content =
1613            "```rust\nfn main() {}\n```\n\n!!! note\n    Some text\n\n    ```python\n    print(\"hello\")\n    ```\n";
1614        let blocks = processor.extract_code_blocks(content);
1615
1616        assert_eq!(
1617            blocks.len(),
1618            2,
1619            "Should detect both top-level and admonition code blocks"
1620        );
1621        assert_eq!(blocks[0].language, "rust");
1622        assert_eq!(blocks[1].language, "python");
1623    }
1624
1625    #[test]
1626    fn test_mkdocs_nested_admonition_code_block() {
1627        let config = default_config();
1628        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1629
1630        let content = "\
1631!!! note
1632    Some text
1633
1634    !!! warning
1635        Nested content
1636
1637        ```python
1638        x = 1
1639        ```
1640";
1641        let blocks = processor.extract_code_blocks(content);
1642        assert_eq!(blocks.len(), 1, "Should detect code block inside nested admonition");
1643        assert_eq!(blocks[0].language, "python");
1644    }
1645
1646    #[test]
1647    fn test_mkdocs_consecutive_admonitions_no_stale_context() {
1648        let config = default_config();
1649        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1650
1651        // Two consecutive admonitions at the same indent level.
1652        // The first has no code block, the second does.
1653        let content = "\
1654!!! note
1655    First admonition content
1656
1657!!! warning
1658    Second admonition content
1659
1660    ```python
1661    y = 2
1662    ```
1663";
1664        let blocks = processor.extract_code_blocks(content);
1665        assert_eq!(blocks.len(), 1, "Should detect code block in second admonition only");
1666        assert_eq!(blocks[0].language, "python");
1667    }
1668
1669    #[test]
1670    fn test_mkdocs_crlf_line_endings() {
1671        let config = default_config();
1672        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1673
1674        // Use \r\n line endings
1675        let content = "!!! note\r\n    Some text\r\n\r\n    ```python\r\n    x = 1\r\n    ```\r\n";
1676        let blocks = processor.extract_code_blocks(content);
1677
1678        assert_eq!(blocks.len(), 1, "Should detect code block with CRLF line endings");
1679        assert_eq!(blocks[0].language, "python");
1680
1681        // Verify byte offsets point to valid content
1682        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1683        assert!(
1684            extracted.contains("x = 1"),
1685            "Extracted content should contain code. Got: {extracted:?}"
1686        );
1687    }
1688
1689    #[test]
1690    fn test_mkdocs_unclosed_fence_in_admonition() {
1691        let config = default_config();
1692        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1693
1694        // Unclosed fence should not produce a block
1695        let content = "!!! note\n    ```python\n    x = 1\n    no closing fence\n";
1696        let blocks = processor.extract_code_blocks(content);
1697        assert_eq!(blocks.len(), 0, "Unclosed fence should not produce a block");
1698    }
1699
1700    #[test]
1701    fn test_mkdocs_tilde_fence_in_admonition() {
1702        let config = default_config();
1703        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1704
1705        let content = "!!! note\n    ~~~ruby\n    puts 'hi'\n    ~~~\n";
1706        let blocks = processor.extract_code_blocks(content);
1707        assert_eq!(blocks.len(), 1, "Should detect tilde-fenced code block");
1708        assert_eq!(blocks[0].language, "ruby");
1709    }
1710
1711    #[test]
1712    fn test_mkdocs_empty_lines_in_code_block() {
1713        let config = default_config();
1714        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1715
1716        // Code block with empty lines inside — verifies byte offsets are correct
1717        // across empty lines (the previous find("") approach would break here)
1718        let content = "!!! note\n    ```python\n    x = 1\n\n    y = 2\n    ```\n";
1719        let blocks = processor.extract_code_blocks(content);
1720        assert_eq!(blocks.len(), 1);
1721
1722        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1723        assert!(
1724            extracted.contains("x = 1") && extracted.contains("y = 2"),
1725            "Extracted content should span across the empty line. Got: {extracted:?}"
1726        );
1727    }
1728
1729    #[test]
1730    fn test_mkdocs_content_byte_offsets_lf() {
1731        let config = default_config();
1732        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1733
1734        let content = "!!! note\n    ```python\n    print('hi')\n    ```\n";
1735        let blocks = processor.extract_code_blocks(content);
1736        assert_eq!(blocks.len(), 1);
1737
1738        // Verify the extracted content is exactly the code body
1739        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1740        assert_eq!(extracted, "    print('hi')\n", "Content offsets should be exact for LF");
1741    }
1742
1743    #[test]
1744    fn test_mkdocs_content_byte_offsets_crlf() {
1745        let config = default_config();
1746        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1747
1748        let content = "!!! note\r\n    ```python\r\n    print('hi')\r\n    ```\r\n";
1749        let blocks = processor.extract_code_blocks(content);
1750        assert_eq!(blocks.len(), 1);
1751
1752        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1753        assert_eq!(
1754            extracted, "    print('hi')\r\n",
1755            "Content offsets should be exact for CRLF"
1756        );
1757    }
1758}
rumdl_lib/code_block_tools/processor.rs

rumdl_lib/code_block_tools/
processor.rs