rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::config::MarkdownFlavor;
13use crate::rule::{LintWarning, Severity};
14use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
15
16/// Special built-in tool name for rumdl's own markdown linting.
17/// When this tool is configured for markdown blocks, the processor skips
18/// external execution since it's handled by embedded markdown linting.
19pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
20
21/// Check if a language is markdown (handles common variations).
22fn is_markdown_language(lang: &str) -> bool {
23    matches!(lang.to_lowercase().as_str(), "markdown" | "md")
24}
25
26/// Information about a fenced code block for processing.
27#[derive(Debug, Clone)]
28pub struct FencedCodeBlockInfo {
29    /// 0-indexed line number where opening fence starts.
30    pub start_line: usize,
31    /// 0-indexed line number where closing fence ends.
32    pub end_line: usize,
33    /// Byte offset where code content starts (after opening fence line).
34    pub content_start: usize,
35    /// Byte offset where code content ends (before closing fence line).
36    pub content_end: usize,
37    /// Language tag extracted from info string (first token).
38    pub language: String,
39    /// Full info string from the fence.
40    pub info_string: String,
41    /// The fence character used (` or ~).
42    pub fence_char: char,
43    /// Length of the fence (3 or more).
44    pub fence_length: usize,
45    /// Leading whitespace on the fence line.
46    pub indent: usize,
47    /// Exact leading whitespace prefix from the fence line.
48    pub indent_prefix: String,
49}
50
51/// A diagnostic message from an external tool.
52#[derive(Debug, Clone)]
53pub struct CodeBlockDiagnostic {
54    /// Line number in the original markdown file (1-indexed).
55    pub file_line: usize,
56    /// Column number (1-indexed, if available).
57    pub column: Option<usize>,
58    /// Message from the tool.
59    pub message: String,
60    /// Severity (error, warning, info).
61    pub severity: DiagnosticSeverity,
62    /// Name of the tool that produced this.
63    pub tool: String,
64    /// Line where the code block starts (1-indexed, for context).
65    pub code_block_start: usize,
66}
67
68/// Severity level for diagnostics.
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum DiagnosticSeverity {
71    Error,
72    Warning,
73    Info,
74}
75
76impl CodeBlockDiagnostic {
77    /// Convert to a LintWarning for integration with rumdl's warning system.
78    pub fn to_lint_warning(&self) -> LintWarning {
79        let severity = match self.severity {
80            DiagnosticSeverity::Error => Severity::Error,
81            DiagnosticSeverity::Warning => Severity::Warning,
82            DiagnosticSeverity::Info => Severity::Info,
83        };
84
85        LintWarning {
86            message: self.message.clone(),
87            line: self.file_line,
88            column: self.column.unwrap_or(1),
89            end_line: self.file_line,
90            end_column: self.column.unwrap_or(1),
91            severity,
92            fix: None, // External tool diagnostics don't provide fixes
93            rule_name: Some(self.tool.clone()),
94        }
95    }
96}
97
98/// Error during code block processing.
99#[derive(Debug, Clone)]
100pub enum ProcessorError {
101    /// Tool execution failed.
102    ToolError(ExecutorError),
103    /// No tools configured for language.
104    NoToolsConfigured { language: String },
105    /// Tool binary not found.
106    ToolBinaryNotFound { tool: String, language: String },
107    /// Processing was aborted due to on_error = fail.
108    Aborted { message: String },
109}
110
111impl std::fmt::Display for ProcessorError {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        match self {
114            Self::ToolError(e) => write!(f, "{e}"),
115            Self::NoToolsConfigured { language } => {
116                write!(f, "No tools configured for language '{language}'")
117            }
118            Self::ToolBinaryNotFound { tool, language } => {
119                write!(f, "Tool '{tool}' binary not found for language '{language}'")
120            }
121            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
122        }
123    }
124}
125
126impl std::error::Error for ProcessorError {}
127
128impl From<ExecutorError> for ProcessorError {
129    fn from(e: ExecutorError) -> Self {
130        Self::ToolError(e)
131    }
132}
133
134/// Result of processing a single code block.
135#[derive(Debug)]
136pub struct CodeBlockResult {
137    /// Diagnostics from linting.
138    pub diagnostics: Vec<CodeBlockDiagnostic>,
139    /// Formatted content (if formatting was requested and succeeded).
140    pub formatted_content: Option<String>,
141    /// Whether the code block was modified.
142    pub was_modified: bool,
143}
144
145/// Result of formatting code blocks in a document.
146#[derive(Debug)]
147pub struct FormatOutput {
148    /// The formatted content (may be partially formatted if errors occurred).
149    pub content: String,
150    /// Whether any errors occurred during formatting.
151    pub had_errors: bool,
152    /// Error messages for blocks that couldn't be formatted.
153    pub error_messages: Vec<String>,
154}
155
156/// Main processor for code block tools.
157pub struct CodeBlockToolProcessor<'a> {
158    config: &'a CodeBlockToolsConfig,
159    flavor: MarkdownFlavor,
160    linguist: LinguistResolver,
161    registry: ToolRegistry,
162    executor: ToolExecutor,
163    user_aliases: std::collections::HashMap<String, String>,
164}
165
166impl<'a> CodeBlockToolProcessor<'a> {
167    /// Create a new processor with the given configuration and markdown flavor.
168    pub fn new(config: &'a CodeBlockToolsConfig, flavor: MarkdownFlavor) -> Self {
169        let user_aliases = config
170            .language_aliases
171            .iter()
172            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
173            .collect();
174        Self {
175            config,
176            flavor,
177            linguist: LinguistResolver::new(),
178            registry: ToolRegistry::new(config.tools.clone()),
179            executor: ToolExecutor::new(config.timeout),
180            user_aliases,
181        }
182    }
183
184    /// Extract all fenced code blocks from content.
185    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
186        let mut blocks = Vec::new();
187        let mut current_block: Option<FencedCodeBlockBuilder> = None;
188
189        let options = Options::all();
190        let parser = Parser::new_ext(content, options).into_offset_iter();
191
192        let lines: Vec<&str> = content.lines().collect();
193
194        for (event, range) in parser {
195            match event {
196                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
197                    let info_string = info.to_string();
198                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
199
200                    // Find start line
201                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
202
203                    // Find content start (after opening fence line)
204                    let content_start = content[range.start..]
205                        .find('\n')
206                        .map(|i| range.start + i + 1)
207                        .unwrap_or(content.len());
208
209                    // Detect fence character and length from the line
210                    let fence_line = lines.get(start_line).unwrap_or(&"");
211                    let trimmed = fence_line.trim_start();
212                    let indent = fence_line.len() - trimmed.len();
213                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
214                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
215                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
216                    } else {
217                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
218                    };
219
220                    current_block = Some(FencedCodeBlockBuilder {
221                        start_line,
222                        content_start,
223                        language,
224                        info_string,
225                        fence_char,
226                        fence_length,
227                        indent,
228                        indent_prefix,
229                    });
230                }
231                Event::End(TagEnd::CodeBlock) => {
232                    if let Some(builder) = current_block.take() {
233                        // Find end line
234                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
235
236                        // Find content end (before closing fence line)
237                        let search_start = builder.content_start.min(range.end);
238                        let content_end = if search_start < range.end {
239                            content[search_start..range.end]
240                                .rfind('\n')
241                                .map(|i| search_start + i)
242                                .unwrap_or(search_start)
243                        } else {
244                            search_start
245                        };
246
247                        if content_end >= builder.content_start {
248                            blocks.push(FencedCodeBlockInfo {
249                                start_line: builder.start_line,
250                                end_line,
251                                content_start: builder.content_start,
252                                content_end,
253                                language: builder.language,
254                                info_string: builder.info_string,
255                                fence_char: builder.fence_char,
256                                fence_length: builder.fence_length,
257                                indent: builder.indent,
258                                indent_prefix: builder.indent_prefix,
259                            });
260                        }
261                    }
262                }
263                _ => {}
264            }
265        }
266
267        // For MkDocs flavor, also extract code blocks inside admonitions and tabs
268        if self.flavor == MarkdownFlavor::MkDocs {
269            let mkdocs_blocks = self.extract_mkdocs_code_blocks(content);
270            for mb in mkdocs_blocks {
271                // Deduplicate: only add if no existing block starts at the same line
272                if !blocks.iter().any(|b| b.start_line == mb.start_line) {
273                    blocks.push(mb);
274                }
275            }
276            blocks.sort_by_key(|b| b.start_line);
277        }
278
279        blocks
280    }
281
282    /// Extract fenced code blocks that are inside MkDocs admonitions or tabs.
283    ///
284    /// pulldown_cmark doesn't parse MkDocs-specific constructs, so indented
285    /// code blocks inside `!!!`/`???` admonitions or `===` tabs are missed.
286    /// This method manually scans for them.
287    fn extract_mkdocs_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
288        use crate::utils::mkdocs_admonitions;
289        use crate::utils::mkdocs_tabs;
290
291        let mut blocks = Vec::new();
292        let lines: Vec<&str> = content.lines().collect();
293
294        // Track current MkDocs context indent level
295        // We only need to know if we're inside any MkDocs block, so a simple stack suffices.
296        let mut context_indent_stack: Vec<usize> = Vec::new();
297
298        // Track fence state inside MkDocs context
299        let mut in_fence = false;
300        let mut fence_start_line: usize = 0;
301        let mut fence_content_start: usize = 0;
302        let mut fence_char: char = '`';
303        let mut fence_length: usize = 0;
304        let mut fence_indent: usize = 0;
305        let mut fence_indent_prefix = String::new();
306        let mut fence_language = String::new();
307        let mut fence_info_string = String::new();
308
309        // Compute byte offsets via pointer arithmetic.
310        // `content.lines()` returns slices into the original string,
311        // so each line's pointer offset from `content` gives its byte position.
312        // This correctly handles \n, \r\n, and empty lines.
313        let content_start_ptr = content.as_ptr() as usize;
314        let line_offsets: Vec<usize> = lines
315            .iter()
316            .map(|line| line.as_ptr() as usize - content_start_ptr)
317            .collect();
318
319        for (i, line) in lines.iter().enumerate() {
320            let line_indent = crate::utils::mkdocs_common::get_line_indent(line);
321            let is_admonition = mkdocs_admonitions::is_admonition_start(line);
322            let is_tab = mkdocs_tabs::is_tab_marker(line);
323
324            // Pop contexts when the current line is not indented enough to be content.
325            // This runs for ALL lines (including new admonition/tab starts) to clean
326            // up stale entries before potentially pushing a new context.
327            if !line.trim().is_empty() {
328                while let Some(&ctx_indent) = context_indent_stack.last() {
329                    if line_indent < ctx_indent + 4 {
330                        context_indent_stack.pop();
331                        if in_fence {
332                            in_fence = false;
333                        }
334                    } else {
335                        break;
336                    }
337                }
338            }
339
340            // Check for admonition start — push new context
341            if is_admonition {
342                if let Some(indent) = mkdocs_admonitions::get_admonition_indent(line) {
343                    context_indent_stack.push(indent);
344                    continue;
345                }
346            }
347
348            // Check for tab marker — push new context
349            if is_tab {
350                if let Some(indent) = mkdocs_tabs::get_tab_indent(line) {
351                    context_indent_stack.push(indent);
352                    continue;
353                }
354            }
355
356            // Only look for fences inside a MkDocs context
357            if context_indent_stack.is_empty() {
358                continue;
359            }
360
361            let trimmed = line.trim_start();
362            let leading_spaces = line.len() - trimmed.len();
363
364            if !in_fence {
365                // Check for fence opening
366                let (fc, fl) = if trimmed.starts_with("```") {
367                    ('`', trimmed.chars().take_while(|&c| c == '`').count())
368                } else if trimmed.starts_with("~~~") {
369                    ('~', trimmed.chars().take_while(|&c| c == '~').count())
370                } else {
371                    continue;
372                };
373
374                if fl >= 3 {
375                    in_fence = true;
376                    fence_start_line = i;
377                    fence_char = fc;
378                    fence_length = fl;
379                    fence_indent = leading_spaces;
380                    fence_indent_prefix = line.get(..leading_spaces).unwrap_or("").to_string();
381
382                    let after_fence = &trimmed[fl..];
383                    fence_info_string = after_fence.trim().to_string();
384                    fence_language = fence_info_string.split_whitespace().next().unwrap_or("").to_string();
385
386                    // Content starts at the next line's byte offset
387                    fence_content_start = line_offsets.get(i + 1).copied().unwrap_or(content.len());
388                }
389            } else {
390                // Check for fence closing
391                let is_closing = if fence_char == '`' {
392                    trimmed.starts_with("```")
393                        && trimmed.chars().take_while(|&c| c == '`').count() >= fence_length
394                        && trimmed.trim_start_matches('`').trim().is_empty()
395                } else {
396                    trimmed.starts_with("~~~")
397                        && trimmed.chars().take_while(|&c| c == '~').count() >= fence_length
398                        && trimmed.trim_start_matches('~').trim().is_empty()
399                };
400
401                if is_closing {
402                    let content_end = line_offsets.get(i).copied().unwrap_or(content.len());
403
404                    if content_end >= fence_content_start {
405                        blocks.push(FencedCodeBlockInfo {
406                            start_line: fence_start_line,
407                            end_line: i,
408                            content_start: fence_content_start,
409                            content_end,
410                            language: fence_language.clone(),
411                            info_string: fence_info_string.clone(),
412                            fence_char,
413                            fence_length,
414                            indent: fence_indent,
415                            indent_prefix: fence_indent_prefix.clone(),
416                        });
417                    }
418
419                    in_fence = false;
420                }
421            }
422        }
423
424        blocks
425    }
426
427    /// Resolve a language tag to its canonical name.
428    fn resolve_language(&self, language: &str) -> String {
429        let lower = language.to_lowercase();
430        if let Some(mapped) = self.user_aliases.get(&lower) {
431            return mapped.clone();
432        }
433        match self.config.normalize_language {
434            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
435            NormalizeLanguage::Exact => lower,
436        }
437    }
438
439    /// Get the effective on_error setting for a language.
440    fn get_on_error(&self, language: &str) -> OnError {
441        self.config
442            .languages
443            .get(language)
444            .and_then(|lc| lc.on_error)
445            .unwrap_or(self.config.on_error)
446    }
447
448    /// Strip the fence indentation prefix from each line of a code block.
449    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
450        if indent_prefix.is_empty() {
451            return content.to_string();
452        }
453
454        let mut out = String::with_capacity(content.len());
455        for line in content.split_inclusive('\n') {
456            if let Some(stripped) = line.strip_prefix(indent_prefix) {
457                out.push_str(stripped);
458            } else {
459                out.push_str(line);
460            }
461        }
462        out
463    }
464
465    /// Re-apply the fence indentation prefix to each line of a code block.
466    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
467        if indent_prefix.is_empty() {
468            return content.to_string();
469        }
470        if content.is_empty() {
471            return String::new();
472        }
473
474        let mut out = String::with_capacity(content.len() + indent_prefix.len());
475        for line in content.split_inclusive('\n') {
476            if line == "\n" {
477                out.push_str(line);
478            } else {
479                out.push_str(indent_prefix);
480                out.push_str(line);
481            }
482        }
483        out
484    }
485
486    /// Lint all code blocks in the content.
487    ///
488    /// Returns diagnostics from all configured linters.
489    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
490        let mut all_diagnostics = Vec::new();
491        let blocks = self.extract_code_blocks(content);
492
493        for block in blocks {
494            if block.language.is_empty() {
495                continue; // Skip blocks without language tag
496            }
497
498            let canonical_lang = self.resolve_language(&block.language);
499
500            // Get lint tools for this language
501            let lint_tools = match self.config.languages.get(&canonical_lang) {
502                Some(lc) if !lc.lint.is_empty() => &lc.lint,
503                _ => {
504                    // No tools configured for this language in lint mode
505                    match self.config.on_missing_language_definition {
506                        OnMissing::Ignore => continue,
507                        OnMissing::Fail => {
508                            all_diagnostics.push(CodeBlockDiagnostic {
509                                file_line: block.start_line + 1,
510                                column: None,
511                                message: format!("No lint tools configured for language '{canonical_lang}'"),
512                                severity: DiagnosticSeverity::Error,
513                                tool: "code-block-tools".to_string(),
514                                code_block_start: block.start_line + 1,
515                            });
516                            continue;
517                        }
518                        OnMissing::FailFast => {
519                            return Err(ProcessorError::NoToolsConfigured {
520                                language: canonical_lang,
521                            });
522                        }
523                    }
524                }
525            };
526
527            // Extract code block content
528            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
529                &content[block.content_start..block.content_end]
530            } else {
531                continue;
532            };
533            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
534
535            // Run each lint tool
536            for tool_id in lint_tools {
537                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown linting
538                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
539                    continue;
540                }
541
542                let tool_def = match self.registry.get(tool_id) {
543                    Some(t) => t,
544                    None => {
545                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
546                        continue;
547                    }
548                };
549
550                // Check if tool binary exists before running
551                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
552                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
553                    match self.config.on_missing_tool_binary {
554                        OnMissing::Ignore => {
555                            log::debug!("Tool binary '{tool_name}' not found, skipping");
556                            continue;
557                        }
558                        OnMissing::Fail => {
559                            all_diagnostics.push(CodeBlockDiagnostic {
560                                file_line: block.start_line + 1,
561                                column: None,
562                                message: format!("Tool binary '{tool_name}' not found in PATH"),
563                                severity: DiagnosticSeverity::Error,
564                                tool: "code-block-tools".to_string(),
565                                code_block_start: block.start_line + 1,
566                            });
567                            continue;
568                        }
569                        OnMissing::FailFast => {
570                            return Err(ProcessorError::ToolBinaryNotFound {
571                                tool: tool_name.to_string(),
572                                language: canonical_lang.clone(),
573                            });
574                        }
575                    }
576                }
577
578                match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
579                    Ok(output) => {
580                        // Parse tool output into diagnostics
581                        let diagnostics = self.parse_tool_output(
582                            &output,
583                            tool_id,
584                            block.start_line + 1, // Convert to 1-indexed
585                        );
586                        all_diagnostics.extend(diagnostics);
587                    }
588                    Err(e) => {
589                        let on_error = self.get_on_error(&canonical_lang);
590                        match on_error {
591                            OnError::Fail => return Err(e.into()),
592                            OnError::Warn => {
593                                log::warn!("Tool '{tool_id}' failed: {e}");
594                            }
595                            OnError::Skip => {
596                                // Silently skip
597                            }
598                        }
599                    }
600                }
601            }
602        }
603
604        Ok(all_diagnostics)
605    }
606
607    /// Format all code blocks in the content.
608    ///
609    /// Returns the modified content with formatted code blocks and any errors that occurred.
610    /// With `on-missing-*` = `fail`, errors are collected but formatting continues.
611    /// With `on-missing-*` = `fail-fast`, returns Err immediately on first error.
612    pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
613        let blocks = self.extract_code_blocks(content);
614
615        if blocks.is_empty() {
616            return Ok(FormatOutput {
617                content: content.to_string(),
618                had_errors: false,
619                error_messages: Vec::new(),
620            });
621        }
622
623        // Process blocks in reverse order to maintain byte offsets
624        let mut result = content.to_string();
625        let mut error_messages: Vec<String> = Vec::new();
626
627        for block in blocks.into_iter().rev() {
628            if block.language.is_empty() {
629                continue;
630            }
631
632            let canonical_lang = self.resolve_language(&block.language);
633
634            // Get format tools for this language
635            let format_tools = match self.config.languages.get(&canonical_lang) {
636                Some(lc) if !lc.format.is_empty() => &lc.format,
637                _ => {
638                    // No tools configured for this language in format mode
639                    match self.config.on_missing_language_definition {
640                        OnMissing::Ignore => continue,
641                        OnMissing::Fail => {
642                            error_messages.push(format!(
643                                "No format tools configured for language '{canonical_lang}' at line {}",
644                                block.start_line + 1
645                            ));
646                            continue;
647                        }
648                        OnMissing::FailFast => {
649                            return Err(ProcessorError::NoToolsConfigured {
650                                language: canonical_lang,
651                            });
652                        }
653                    }
654                }
655            };
656
657            // Extract code block content
658            if block.content_start >= block.content_end || block.content_end > result.len() {
659                continue;
660            }
661            let code_content_raw = result[block.content_start..block.content_end].to_string();
662            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
663
664            // Run format tools (use first successful one)
665            let mut formatted = code_content.clone();
666            let mut tool_ran = false;
667            for tool_id in format_tools {
668                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown formatting
669                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
670                    continue;
671                }
672
673                let tool_def = match self.registry.get(tool_id) {
674                    Some(t) => t,
675                    None => {
676                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
677                        continue;
678                    }
679                };
680
681                // Check if tool binary exists before running
682                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
683                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
684                    match self.config.on_missing_tool_binary {
685                        OnMissing::Ignore => {
686                            log::debug!("Tool binary '{tool_name}' not found, skipping");
687                            continue;
688                        }
689                        OnMissing::Fail => {
690                            error_messages.push(format!(
691                                "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
692                                block.start_line + 1
693                            ));
694                            continue;
695                        }
696                        OnMissing::FailFast => {
697                            return Err(ProcessorError::ToolBinaryNotFound {
698                                tool: tool_name.to_string(),
699                                language: canonical_lang.clone(),
700                            });
701                        }
702                    }
703                }
704
705                match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
706                    Ok(output) => {
707                        // Ensure trailing newline matches original (unindented)
708                        formatted = output;
709                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
710                            formatted.push('\n');
711                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
712                            formatted.pop();
713                        }
714                        tool_ran = true;
715                        break; // Use first successful formatter
716                    }
717                    Err(e) => {
718                        let on_error = self.get_on_error(&canonical_lang);
719                        match on_error {
720                            OnError::Fail => return Err(e.into()),
721                            OnError::Warn => {
722                                log::warn!("Formatter '{tool_id}' failed: {e}");
723                            }
724                            OnError::Skip => {}
725                        }
726                    }
727                }
728            }
729
730            // Replace content if changed and a tool actually ran
731            if tool_ran && formatted != code_content {
732                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
733                if reindented != code_content_raw {
734                    result.replace_range(block.content_start..block.content_end, &reindented);
735                }
736            }
737        }
738
739        Ok(FormatOutput {
740            content: result,
741            had_errors: !error_messages.is_empty(),
742            error_messages,
743        })
744    }
745
746    /// Parse tool output into diagnostics.
747    ///
748    /// This is a basic parser that handles common output formats.
749    /// Tools vary widely in their output format, so this is best-effort.
750    fn parse_tool_output(
751        &self,
752        output: &ToolOutput,
753        tool_id: &str,
754        code_block_start_line: usize,
755    ) -> Vec<CodeBlockDiagnostic> {
756        let mut diagnostics = Vec::new();
757        let mut shellcheck_line: Option<usize> = None;
758
759        // Combine stdout and stderr for parsing
760        let stdout = &output.stdout;
761        let stderr = &output.stderr;
762        let combined = format!("{stdout}\n{stderr}");
763
764        // Look for common line:column:message patterns
765        // Examples:
766        // - ruff: "_.py:1:1: E501 Line too long"
767        // - shellcheck: "In - line 1: ..."
768        // - eslint: "1:10 error Description"
769
770        for line in combined.lines() {
771            let line = line.trim();
772            if line.is_empty() {
773                continue;
774            }
775
776            if let Some(line_num) = self.parse_shellcheck_header(line) {
777                shellcheck_line = Some(line_num);
778                continue;
779            }
780
781            if let Some(line_num) = shellcheck_line
782                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
783            {
784                diagnostics.push(diag);
785                continue;
786            }
787
788            // Try pattern: "file:line:col: message" or "file:line: message"
789            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
790                diagnostics.push(diag);
791                continue;
792            }
793
794            // Try pattern: "line:col message" (eslint style)
795            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
796                diagnostics.push(diag);
797                continue;
798            }
799
800            // Try single-line shellcheck format fallback
801            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
802                diagnostics.push(diag);
803            }
804        }
805
806        // If no diagnostics parsed but tool failed, create a generic one
807        if diagnostics.is_empty() && !output.success {
808            let message = if !output.stderr.is_empty() {
809                output.stderr.lines().next().unwrap_or("Tool failed").to_string()
810            } else if !output.stdout.is_empty() {
811                output.stdout.lines().next().unwrap_or("Tool failed").to_string()
812            } else {
813                let exit_code = output.exit_code;
814                format!("Tool exited with code {exit_code}")
815            };
816
817            diagnostics.push(CodeBlockDiagnostic {
818                file_line: code_block_start_line,
819                column: None,
820                message,
821                severity: DiagnosticSeverity::Error,
822                tool: tool_id.to_string(),
823                code_block_start: code_block_start_line,
824            });
825        }
826
827        diagnostics
828    }
829
830    /// Parse standard "file:line:col: message" format.
831    fn parse_standard_format(
832        &self,
833        line: &str,
834        tool_id: &str,
835        code_block_start_line: usize,
836    ) -> Option<CodeBlockDiagnostic> {
837        // Match patterns like "file.py:1:10: E501 message"
838        let mut parts = line.rsplitn(4, ':');
839        let message = parts.next()?.trim().to_string();
840        let part1 = parts.next()?.trim().to_string();
841        let part2 = parts.next()?.trim().to_string();
842        let part3 = parts.next().map(|s| s.trim().to_string());
843
844        let (line_part, col_part) = if part3.is_some() {
845            (part2, Some(part1))
846        } else {
847            (part1, None)
848        };
849
850        if let Ok(line_num) = line_part.parse::<usize>() {
851            let column = col_part.and_then(|s| s.parse::<usize>().ok());
852            let message = Self::strip_fixable_markers(&message);
853            if !message.is_empty() {
854                let severity = self.infer_severity(&message);
855                return Some(CodeBlockDiagnostic {
856                    file_line: code_block_start_line + line_num,
857                    column,
858                    message,
859                    severity,
860                    tool: tool_id.to_string(),
861                    code_block_start: code_block_start_line,
862                });
863            }
864        }
865        None
866    }
867
868    /// Parse eslint-style "line:col severity message" format.
869    fn parse_eslint_format(
870        &self,
871        line: &str,
872        tool_id: &str,
873        code_block_start_line: usize,
874    ) -> Option<CodeBlockDiagnostic> {
875        // Match "1:10 error Message"
876        let parts: Vec<&str> = line.splitn(3, ' ').collect();
877        if parts.len() >= 2 {
878            let loc_parts: Vec<&str> = parts[0].split(':').collect();
879            if loc_parts.len() == 2
880                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
881            {
882                let (sev_part, msg_part) = if parts.len() >= 3 {
883                    (parts[1], parts[2])
884                } else {
885                    (parts[1], "")
886                };
887                let message = if msg_part.is_empty() {
888                    sev_part.to_string()
889                } else {
890                    msg_part.to_string()
891                };
892                let message = Self::strip_fixable_markers(&message);
893                let severity = match sev_part.to_lowercase().as_str() {
894                    "error" => DiagnosticSeverity::Error,
895                    "warning" | "warn" => DiagnosticSeverity::Warning,
896                    "info" => DiagnosticSeverity::Info,
897                    _ => self.infer_severity(&message),
898                };
899                return Some(CodeBlockDiagnostic {
900                    file_line: code_block_start_line + line_num,
901                    column: Some(col),
902                    message,
903                    severity,
904                    tool: tool_id.to_string(),
905                    code_block_start: code_block_start_line,
906                });
907            }
908        }
909        None
910    }
911
912    /// Parse shellcheck-style "In - line N: message" format.
913    fn parse_shellcheck_format(
914        &self,
915        line: &str,
916        tool_id: &str,
917        code_block_start_line: usize,
918    ) -> Option<CodeBlockDiagnostic> {
919        // Match "In - line 5:" pattern
920        if line.starts_with("In ")
921            && line.contains(" line ")
922            && let Some(line_start) = line.find(" line ")
923        {
924            let after_line = &line[line_start + 6..];
925            if let Some(colon_pos) = after_line.find(':')
926                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
927            {
928                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
929                if !message.is_empty() {
930                    let severity = self.infer_severity(&message);
931                    return Some(CodeBlockDiagnostic {
932                        file_line: code_block_start_line + line_num,
933                        column: None,
934                        message,
935                        severity,
936                        tool: tool_id.to_string(),
937                        code_block_start: code_block_start_line,
938                    });
939                }
940            }
941        }
942        None
943    }
944
945    /// Parse shellcheck header line to capture line number context.
946    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
947        if line.starts_with("In ")
948            && line.contains(" line ")
949            && let Some(line_start) = line.find(" line ")
950        {
951            let after_line = &line[line_start + 6..];
952            if let Some(colon_pos) = after_line.find(':') {
953                return after_line[..colon_pos].trim().parse::<usize>().ok();
954            }
955        }
956        None
957    }
958
959    /// Parse shellcheck message line containing SCXXXX codes.
960    fn parse_shellcheck_message(
961        &self,
962        line: &str,
963        tool_id: &str,
964        code_block_start_line: usize,
965        line_num: usize,
966    ) -> Option<CodeBlockDiagnostic> {
967        let sc_pos = line.find("SC")?;
968        let after_sc = &line[sc_pos + 2..];
969        let code_len = after_sc.chars().take_while(|c| c.is_ascii_digit()).count();
970        if code_len == 0 {
971            return None;
972        }
973        let after_code = &after_sc[code_len..];
974        let sev_start = after_code.find('(')? + 1;
975        let sev_end = after_code[sev_start..].find(')')? + sev_start;
976        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
977        let message_start = after_code.find("):")? + 2;
978        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
979        if message.is_empty() {
980            return None;
981        }
982
983        let severity = match sev.as_str() {
984            "error" => DiagnosticSeverity::Error,
985            "warning" | "warn" => DiagnosticSeverity::Warning,
986            "info" | "style" => DiagnosticSeverity::Info,
987            _ => self.infer_severity(&message),
988        };
989
990        Some(CodeBlockDiagnostic {
991            file_line: code_block_start_line + line_num,
992            column: None,
993            message,
994            severity,
995            tool: tool_id.to_string(),
996            code_block_start: code_block_start_line,
997        })
998    }
999
1000    /// Infer severity from message content.
1001    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
1002        let lower = message.to_lowercase();
1003        if lower.contains("error")
1004            || lower.starts_with("e") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1005            || lower.starts_with("f") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1006        {
1007            DiagnosticSeverity::Error
1008        } else if lower.contains("warning")
1009            || lower.contains("warn")
1010            || lower.starts_with("w") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
1011        {
1012            DiagnosticSeverity::Warning
1013        } else {
1014            DiagnosticSeverity::Info
1015        }
1016    }
1017
1018    /// Strip "fixable" markers from external tool messages.
1019    ///
1020    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
1021    /// context these markers can be misleading - the lint tool's fix capability may
1022    /// differ from what our configured formatter can fix. We strip these markers
1023    /// to avoid making promises we can't keep.
1024    fn strip_fixable_markers(message: &str) -> String {
1025        message
1026            .replace(" [*]", "")
1027            .replace("[*] ", "")
1028            .replace("[*]", "")
1029            .replace(" (fixable)", "")
1030            .replace("(fixable) ", "")
1031            .replace("(fixable)", "")
1032            .replace(" [fix available]", "")
1033            .replace("[fix available] ", "")
1034            .replace("[fix available]", "")
1035            .replace(" [autofix]", "")
1036            .replace("[autofix] ", "")
1037            .replace("[autofix]", "")
1038            .trim()
1039            .to_string()
1040    }
1041}
1042
1043/// Builder for FencedCodeBlockInfo during parsing.
1044struct FencedCodeBlockBuilder {
1045    start_line: usize,
1046    content_start: usize,
1047    language: String,
1048    info_string: String,
1049    fence_char: char,
1050    fence_length: usize,
1051    indent: usize,
1052    indent_prefix: String,
1053}
1054
1055#[cfg(test)]
1056mod tests {
1057    use super::*;
1058
1059    fn default_config() -> CodeBlockToolsConfig {
1060        CodeBlockToolsConfig::default()
1061    }
1062
1063    #[test]
1064    fn test_extract_code_blocks() {
1065        let config = default_config();
1066        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1067
1068        let content = r#"# Example
1069
1070```python
1071def hello():
1072    print("Hello")
1073```
1074
1075Some text
1076
1077```rust
1078fn main() {}
1079```
1080"#;
1081
1082        let blocks = processor.extract_code_blocks(content);
1083
1084        assert_eq!(blocks.len(), 2);
1085
1086        assert_eq!(blocks[0].language, "python");
1087        assert_eq!(blocks[0].fence_char, '`');
1088        assert_eq!(blocks[0].fence_length, 3);
1089        assert_eq!(blocks[0].start_line, 2);
1090        assert_eq!(blocks[0].indent, 0);
1091        assert_eq!(blocks[0].indent_prefix, "");
1092
1093        assert_eq!(blocks[1].language, "rust");
1094        assert_eq!(blocks[1].fence_char, '`');
1095        assert_eq!(blocks[1].fence_length, 3);
1096    }
1097
1098    #[test]
1099    fn test_extract_code_blocks_with_info_string() {
1100        let config = default_config();
1101        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1102
1103        let content = "```python title=\"example.py\"\ncode\n```";
1104        let blocks = processor.extract_code_blocks(content);
1105
1106        assert_eq!(blocks.len(), 1);
1107        assert_eq!(blocks[0].language, "python");
1108        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
1109    }
1110
1111    #[test]
1112    fn test_extract_code_blocks_tilde_fence() {
1113        let config = default_config();
1114        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1115
1116        let content = "~~~bash\necho hello\n~~~";
1117        let blocks = processor.extract_code_blocks(content);
1118
1119        assert_eq!(blocks.len(), 1);
1120        assert_eq!(blocks[0].language, "bash");
1121        assert_eq!(blocks[0].fence_char, '~');
1122        assert_eq!(blocks[0].fence_length, 3);
1123        assert_eq!(blocks[0].indent_prefix, "");
1124    }
1125
1126    #[test]
1127    fn test_extract_code_blocks_with_indent_prefix() {
1128        let config = default_config();
1129        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1130
1131        let content = "  - item\n    ```python\n    print('hi')\n    ```";
1132        let blocks = processor.extract_code_blocks(content);
1133
1134        assert_eq!(blocks.len(), 1);
1135        assert_eq!(blocks[0].indent_prefix, "    ");
1136    }
1137
1138    #[test]
1139    fn test_extract_code_blocks_no_language() {
1140        let config = default_config();
1141        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1142
1143        let content = "```\nplain code\n```";
1144        let blocks = processor.extract_code_blocks(content);
1145
1146        assert_eq!(blocks.len(), 1);
1147        assert_eq!(blocks[0].language, "");
1148    }
1149
1150    #[test]
1151    fn test_resolve_language_linguist() {
1152        let mut config = default_config();
1153        config.normalize_language = NormalizeLanguage::Linguist;
1154        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1155
1156        assert_eq!(processor.resolve_language("py"), "python");
1157        assert_eq!(processor.resolve_language("bash"), "shell");
1158        assert_eq!(processor.resolve_language("js"), "javascript");
1159    }
1160
1161    #[test]
1162    fn test_resolve_language_exact() {
1163        let mut config = default_config();
1164        config.normalize_language = NormalizeLanguage::Exact;
1165        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1166
1167        assert_eq!(processor.resolve_language("py"), "py");
1168        assert_eq!(processor.resolve_language("BASH"), "bash");
1169    }
1170
1171    #[test]
1172    fn test_resolve_language_user_alias_override() {
1173        let mut config = default_config();
1174        config.language_aliases.insert("py".to_string(), "python".to_string());
1175        config.normalize_language = NormalizeLanguage::Exact;
1176        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1177
1178        assert_eq!(processor.resolve_language("PY"), "python");
1179    }
1180
1181    #[test]
1182    fn test_indent_strip_and_reapply_roundtrip() {
1183        let config = default_config();
1184        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1185
1186        let raw = "    def hello():\n        print('hi')";
1187        let stripped = processor.strip_indent_from_block(raw, "    ");
1188        assert_eq!(stripped, "def hello():\n    print('hi')");
1189
1190        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
1191        assert_eq!(reapplied, raw);
1192    }
1193
1194    #[test]
1195    fn test_infer_severity() {
1196        let config = default_config();
1197        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1198
1199        assert_eq!(
1200            processor.infer_severity("E501 line too long"),
1201            DiagnosticSeverity::Error
1202        );
1203        assert_eq!(
1204            processor.infer_severity("W291 trailing whitespace"),
1205            DiagnosticSeverity::Warning
1206        );
1207        assert_eq!(
1208            processor.infer_severity("error: something failed"),
1209            DiagnosticSeverity::Error
1210        );
1211        assert_eq!(
1212            processor.infer_severity("warning: unused variable"),
1213            DiagnosticSeverity::Warning
1214        );
1215        assert_eq!(
1216            processor.infer_severity("note: consider using"),
1217            DiagnosticSeverity::Info
1218        );
1219    }
1220
1221    #[test]
1222    fn test_parse_standard_format_windows_path() {
1223        let config = default_config();
1224        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1225
1226        let output = ToolOutput {
1227            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1228            stderr: String::new(),
1229            exit_code: 1,
1230            success: false,
1231        };
1232
1233        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1234        assert_eq!(diags.len(), 1);
1235        assert_eq!(diags[0].file_line, 12);
1236        assert_eq!(diags[0].column, Some(5));
1237        assert_eq!(diags[0].message, "E123 message");
1238    }
1239
1240    #[test]
1241    fn test_parse_eslint_severity() {
1242        let config = default_config();
1243        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1244
1245        let output = ToolOutput {
1246            stdout: "1:2 error Unexpected token".to_string(),
1247            stderr: String::new(),
1248            exit_code: 1,
1249            success: false,
1250        };
1251
1252        let diags = processor.parse_tool_output(&output, "eslint", 5);
1253        assert_eq!(diags.len(), 1);
1254        assert_eq!(diags[0].file_line, 6);
1255        assert_eq!(diags[0].column, Some(2));
1256        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1257        assert_eq!(diags[0].message, "Unexpected token");
1258    }
1259
1260    #[test]
1261    fn test_parse_shellcheck_multiline() {
1262        let config = default_config();
1263        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1264
1265        let output = ToolOutput {
1266            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1267            stderr: String::new(),
1268            exit_code: 1,
1269            success: false,
1270        };
1271
1272        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1273        assert_eq!(diags.len(), 1);
1274        assert_eq!(diags[0].file_line, 13);
1275        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1276        assert_eq!(diags[0].message, "Double quote to prevent globbing");
1277    }
1278
1279    #[test]
1280    fn test_lint_no_config() {
1281        let config = default_config();
1282        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1283
1284        let content = "```python\nprint('hello')\n```";
1285        let result = processor.lint(content);
1286
1287        // Should succeed with no diagnostics (no tools configured)
1288        assert!(result.is_ok());
1289        assert!(result.unwrap().is_empty());
1290    }
1291
1292    #[test]
1293    fn test_format_no_config() {
1294        let config = default_config();
1295        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1296
1297        let content = "```python\nprint('hello')\n```";
1298        let result = processor.format(content);
1299
1300        // Should succeed with unchanged content (no tools configured)
1301        assert!(result.is_ok());
1302        let output = result.unwrap();
1303        assert_eq!(output.content, content);
1304        assert!(!output.had_errors);
1305        assert!(output.error_messages.is_empty());
1306    }
1307
1308    #[test]
1309    fn test_lint_on_missing_language_definition_fail() {
1310        let mut config = default_config();
1311        config.on_missing_language_definition = OnMissing::Fail;
1312        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1313
1314        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1315        let result = processor.lint(content);
1316
1317        // Should succeed but return diagnostics for both missing language definitions
1318        assert!(result.is_ok());
1319        let diagnostics = result.unwrap();
1320        assert_eq!(diagnostics.len(), 2);
1321        assert!(diagnostics[0].message.contains("No lint tools configured"));
1322        assert!(diagnostics[0].message.contains("python"));
1323        assert!(diagnostics[1].message.contains("javascript"));
1324    }
1325
1326    #[test]
1327    fn test_lint_on_missing_language_definition_fail_fast() {
1328        let mut config = default_config();
1329        config.on_missing_language_definition = OnMissing::FailFast;
1330        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1331
1332        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1333        let result = processor.lint(content);
1334
1335        // Should fail immediately on first missing language
1336        assert!(result.is_err());
1337        let err = result.unwrap_err();
1338        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1339    }
1340
1341    #[test]
1342    fn test_format_on_missing_language_definition_fail() {
1343        let mut config = default_config();
1344        config.on_missing_language_definition = OnMissing::Fail;
1345        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1346
1347        let content = "```python\nprint('hello')\n```";
1348        let result = processor.format(content);
1349
1350        // Should succeed but report errors
1351        assert!(result.is_ok());
1352        let output = result.unwrap();
1353        assert_eq!(output.content, content); // Content unchanged
1354        assert!(output.had_errors);
1355        assert!(!output.error_messages.is_empty());
1356        assert!(output.error_messages[0].contains("No format tools configured"));
1357    }
1358
1359    #[test]
1360    fn test_format_on_missing_language_definition_fail_fast() {
1361        let mut config = default_config();
1362        config.on_missing_language_definition = OnMissing::FailFast;
1363        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1364
1365        let content = "```python\nprint('hello')\n```";
1366        let result = processor.format(content);
1367
1368        // Should fail immediately
1369        assert!(result.is_err());
1370        let err = result.unwrap_err();
1371        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1372    }
1373
1374    #[test]
1375    fn test_lint_on_missing_tool_binary_fail() {
1376        use super::super::config::{LanguageToolConfig, ToolDefinition};
1377
1378        let mut config = default_config();
1379        config.on_missing_tool_binary = OnMissing::Fail;
1380
1381        // Configure a tool with a non-existent binary
1382        let lang_config = LanguageToolConfig {
1383            lint: vec!["nonexistent-linter".to_string()],
1384            ..Default::default()
1385        };
1386        config.languages.insert("python".to_string(), lang_config);
1387
1388        let tool_def = ToolDefinition {
1389            command: vec!["nonexistent-binary-xyz123".to_string()],
1390            ..Default::default()
1391        };
1392        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1393
1394        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1395
1396        let content = "```python\nprint('hello')\n```";
1397        let result = processor.lint(content);
1398
1399        // Should succeed but return diagnostic for missing binary
1400        assert!(result.is_ok());
1401        let diagnostics = result.unwrap();
1402        assert_eq!(diagnostics.len(), 1);
1403        assert!(diagnostics[0].message.contains("not found in PATH"));
1404    }
1405
1406    #[test]
1407    fn test_lint_on_missing_tool_binary_fail_fast() {
1408        use super::super::config::{LanguageToolConfig, ToolDefinition};
1409
1410        let mut config = default_config();
1411        config.on_missing_tool_binary = OnMissing::FailFast;
1412
1413        // Configure a tool with a non-existent binary
1414        let lang_config = LanguageToolConfig {
1415            lint: vec!["nonexistent-linter".to_string()],
1416            ..Default::default()
1417        };
1418        config.languages.insert("python".to_string(), lang_config);
1419
1420        let tool_def = ToolDefinition {
1421            command: vec!["nonexistent-binary-xyz123".to_string()],
1422            ..Default::default()
1423        };
1424        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1425
1426        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1427
1428        let content = "```python\nprint('hello')\n```";
1429        let result = processor.lint(content);
1430
1431        // Should fail immediately
1432        assert!(result.is_err());
1433        let err = result.unwrap_err();
1434        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1435    }
1436
1437    #[test]
1438    fn test_format_on_missing_tool_binary_fail() {
1439        use super::super::config::{LanguageToolConfig, ToolDefinition};
1440
1441        let mut config = default_config();
1442        config.on_missing_tool_binary = OnMissing::Fail;
1443
1444        // Configure a tool with a non-existent binary
1445        let lang_config = LanguageToolConfig {
1446            format: vec!["nonexistent-formatter".to_string()],
1447            ..Default::default()
1448        };
1449        config.languages.insert("python".to_string(), lang_config);
1450
1451        let tool_def = ToolDefinition {
1452            command: vec!["nonexistent-binary-xyz123".to_string()],
1453            ..Default::default()
1454        };
1455        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1456
1457        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1458
1459        let content = "```python\nprint('hello')\n```";
1460        let result = processor.format(content);
1461
1462        // Should succeed but report errors
1463        assert!(result.is_ok());
1464        let output = result.unwrap();
1465        assert_eq!(output.content, content); // Content unchanged
1466        assert!(output.had_errors);
1467        assert!(!output.error_messages.is_empty());
1468        assert!(output.error_messages[0].contains("not found in PATH"));
1469    }
1470
1471    #[test]
1472    fn test_format_on_missing_tool_binary_fail_fast() {
1473        use super::super::config::{LanguageToolConfig, ToolDefinition};
1474
1475        let mut config = default_config();
1476        config.on_missing_tool_binary = OnMissing::FailFast;
1477
1478        // Configure a tool with a non-existent binary
1479        let lang_config = LanguageToolConfig {
1480            format: vec!["nonexistent-formatter".to_string()],
1481            ..Default::default()
1482        };
1483        config.languages.insert("python".to_string(), lang_config);
1484
1485        let tool_def = ToolDefinition {
1486            command: vec!["nonexistent-binary-xyz123".to_string()],
1487            ..Default::default()
1488        };
1489        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1490
1491        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1492
1493        let content = "```python\nprint('hello')\n```";
1494        let result = processor.format(content);
1495
1496        // Should fail immediately
1497        assert!(result.is_err());
1498        let err = result.unwrap_err();
1499        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1500    }
1501
1502    #[test]
1503    fn test_lint_rumdl_builtin_skipped_for_markdown() {
1504        // Configure the built-in "rumdl" tool for markdown
1505        // The processor should skip it (handled by embedded markdown linting)
1506        let mut config = default_config();
1507        config.languages.insert(
1508            "markdown".to_string(),
1509            LanguageToolConfig {
1510                lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1511                format: vec![],
1512                on_error: None,
1513            },
1514        );
1515        config.on_missing_language_definition = OnMissing::Fail;
1516        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1517
1518        let content = "```markdown\n# Hello\n```";
1519        let result = processor.lint(content);
1520
1521        // Should succeed with no diagnostics - "rumdl" tool is skipped, not treated as unknown
1522        assert!(result.is_ok());
1523        assert!(result.unwrap().is_empty());
1524    }
1525
1526    #[test]
1527    fn test_format_rumdl_builtin_skipped_for_markdown() {
1528        // Configure the built-in "rumdl" tool for markdown
1529        let mut config = default_config();
1530        config.languages.insert(
1531            "markdown".to_string(),
1532            LanguageToolConfig {
1533                lint: vec![],
1534                format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1535                on_error: None,
1536            },
1537        );
1538        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1539
1540        let content = "```markdown\n# Hello\n```";
1541        let result = processor.format(content);
1542
1543        // Should succeed with unchanged content - "rumdl" tool is skipped
1544        assert!(result.is_ok());
1545        let output = result.unwrap();
1546        assert_eq!(output.content, content);
1547        assert!(!output.had_errors);
1548    }
1549
1550    #[test]
1551    fn test_is_markdown_language() {
1552        // Test the helper function
1553        assert!(is_markdown_language("markdown"));
1554        assert!(is_markdown_language("Markdown"));
1555        assert!(is_markdown_language("MARKDOWN"));
1556        assert!(is_markdown_language("md"));
1557        assert!(is_markdown_language("MD"));
1558        assert!(!is_markdown_language("python"));
1559        assert!(!is_markdown_language("rust"));
1560        assert!(!is_markdown_language(""));
1561    }
1562
1563    // Issue #423: MkDocs admonition code block detection
1564
1565    #[test]
1566    fn test_extract_mkdocs_admonition_code_block() {
1567        let config = default_config();
1568        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1569
1570        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1571        let blocks = processor.extract_code_blocks(content);
1572
1573        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs admonition");
1574        assert_eq!(blocks[0].language, "python");
1575    }
1576
1577    #[test]
1578    fn test_extract_mkdocs_tab_code_block() {
1579        let config = default_config();
1580        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1581
1582        let content = "=== \"Python\"\n\n    ```python\n    print(\"hello\")\n    ```\n";
1583        let blocks = processor.extract_code_blocks(content);
1584
1585        assert_eq!(blocks.len(), 1, "Should detect code block inside MkDocs tab");
1586        assert_eq!(blocks[0].language, "python");
1587    }
1588
1589    #[test]
1590    fn test_standard_flavor_ignores_admonition_indented_content() {
1591        let config = default_config();
1592        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::default());
1593
1594        // With standard flavor, pulldown_cmark parses this differently;
1595        // our MkDocs extraction should NOT run
1596        let content = "!!! note\n    Some text\n\n    ```python\n    def hello():\n        pass\n    ```\n";
1597        let blocks = processor.extract_code_blocks(content);
1598
1599        // Standard flavor relies on pulldown_cmark only, which may or may not detect
1600        // indented fenced blocks. The key assertion is that we don't double-detect.
1601        // With standard flavor, the MkDocs extraction path is skipped entirely.
1602        for (i, b) in blocks.iter().enumerate() {
1603            for (j, b2) in blocks.iter().enumerate() {
1604                if i != j {
1605                    assert_ne!(b.start_line, b2.start_line, "No duplicate blocks should exist");
1606                }
1607            }
1608        }
1609    }
1610
1611    #[test]
1612    fn test_mkdocs_top_level_blocks_alongside_admonition() {
1613        let config = default_config();
1614        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1615
1616        let content =
1617            "```rust\nfn main() {}\n```\n\n!!! note\n    Some text\n\n    ```python\n    print(\"hello\")\n    ```\n";
1618        let blocks = processor.extract_code_blocks(content);
1619
1620        assert_eq!(
1621            blocks.len(),
1622            2,
1623            "Should detect both top-level and admonition code blocks"
1624        );
1625        assert_eq!(blocks[0].language, "rust");
1626        assert_eq!(blocks[1].language, "python");
1627    }
1628
1629    #[test]
1630    fn test_mkdocs_nested_admonition_code_block() {
1631        let config = default_config();
1632        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1633
1634        let content = "\
1635!!! note
1636    Some text
1637
1638    !!! warning
1639        Nested content
1640
1641        ```python
1642        x = 1
1643        ```
1644";
1645        let blocks = processor.extract_code_blocks(content);
1646        assert_eq!(blocks.len(), 1, "Should detect code block inside nested admonition");
1647        assert_eq!(blocks[0].language, "python");
1648    }
1649
1650    #[test]
1651    fn test_mkdocs_consecutive_admonitions_no_stale_context() {
1652        let config = default_config();
1653        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1654
1655        // Two consecutive admonitions at the same indent level.
1656        // The first has no code block, the second does.
1657        let content = "\
1658!!! note
1659    First admonition content
1660
1661!!! warning
1662    Second admonition content
1663
1664    ```python
1665    y = 2
1666    ```
1667";
1668        let blocks = processor.extract_code_blocks(content);
1669        assert_eq!(blocks.len(), 1, "Should detect code block in second admonition only");
1670        assert_eq!(blocks[0].language, "python");
1671    }
1672
1673    #[test]
1674    fn test_mkdocs_crlf_line_endings() {
1675        let config = default_config();
1676        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1677
1678        // Use \r\n line endings
1679        let content = "!!! note\r\n    Some text\r\n\r\n    ```python\r\n    x = 1\r\n    ```\r\n";
1680        let blocks = processor.extract_code_blocks(content);
1681
1682        assert_eq!(blocks.len(), 1, "Should detect code block with CRLF line endings");
1683        assert_eq!(blocks[0].language, "python");
1684
1685        // Verify byte offsets point to valid content
1686        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1687        assert!(
1688            extracted.contains("x = 1"),
1689            "Extracted content should contain code. Got: {extracted:?}"
1690        );
1691    }
1692
1693    #[test]
1694    fn test_mkdocs_unclosed_fence_in_admonition() {
1695        let config = default_config();
1696        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1697
1698        // Unclosed fence should not produce a block
1699        let content = "!!! note\n    ```python\n    x = 1\n    no closing fence\n";
1700        let blocks = processor.extract_code_blocks(content);
1701        assert_eq!(blocks.len(), 0, "Unclosed fence should not produce a block");
1702    }
1703
1704    #[test]
1705    fn test_mkdocs_tilde_fence_in_admonition() {
1706        let config = default_config();
1707        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1708
1709        let content = "!!! note\n    ~~~ruby\n    puts 'hi'\n    ~~~\n";
1710        let blocks = processor.extract_code_blocks(content);
1711        assert_eq!(blocks.len(), 1, "Should detect tilde-fenced code block");
1712        assert_eq!(blocks[0].language, "ruby");
1713    }
1714
1715    #[test]
1716    fn test_mkdocs_empty_lines_in_code_block() {
1717        let config = default_config();
1718        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1719
1720        // Code block with empty lines inside — verifies byte offsets are correct
1721        // across empty lines (the previous find("") approach would break here)
1722        let content = "!!! note\n    ```python\n    x = 1\n\n    y = 2\n    ```\n";
1723        let blocks = processor.extract_code_blocks(content);
1724        assert_eq!(blocks.len(), 1);
1725
1726        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1727        assert!(
1728            extracted.contains("x = 1") && extracted.contains("y = 2"),
1729            "Extracted content should span across the empty line. Got: {extracted:?}"
1730        );
1731    }
1732
1733    #[test]
1734    fn test_mkdocs_content_byte_offsets_lf() {
1735        let config = default_config();
1736        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1737
1738        let content = "!!! note\n    ```python\n    print('hi')\n    ```\n";
1739        let blocks = processor.extract_code_blocks(content);
1740        assert_eq!(blocks.len(), 1);
1741
1742        // Verify the extracted content is exactly the code body
1743        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1744        assert_eq!(extracted, "    print('hi')\n", "Content offsets should be exact for LF");
1745    }
1746
1747    #[test]
1748    fn test_mkdocs_content_byte_offsets_crlf() {
1749        let config = default_config();
1750        let processor = CodeBlockToolProcessor::new(&config, MarkdownFlavor::MkDocs);
1751
1752        let content = "!!! note\r\n    ```python\r\n    print('hi')\r\n    ```\r\n";
1753        let blocks = processor.extract_code_blocks(content);
1754        assert_eq!(blocks.len(), 1);
1755
1756        let extracted = &content[blocks[0].content_start..blocks[0].content_end];
1757        assert_eq!(
1758            extracted, "    print('hi')\r\n",
1759            "Content offsets should be exact for CRLF"
1760        );
1761    }
1762}
rumdl_lib/code_block_tools/processor.rs

rumdl_lib/code_block_tools/
processor.rs