Skip to main content

rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::rule::{LintWarning, Severity};
13use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
14
15/// Special built-in tool name for rumdl's own markdown linting.
16/// When this tool is configured for markdown blocks, the processor skips
17/// external execution since it's handled by embedded markdown linting.
18pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
19
20/// Check if a language is markdown (handles common variations).
21fn is_markdown_language(lang: &str) -> bool {
22    matches!(lang.to_lowercase().as_str(), "markdown" | "md")
23}
24
25/// Information about a fenced code block for processing.
26#[derive(Debug, Clone)]
27pub struct FencedCodeBlockInfo {
28    /// 0-indexed line number where opening fence starts.
29    pub start_line: usize,
30    /// 0-indexed line number where closing fence ends.
31    pub end_line: usize,
32    /// Byte offset where code content starts (after opening fence line).
33    pub content_start: usize,
34    /// Byte offset where code content ends (before closing fence line).
35    pub content_end: usize,
36    /// Language tag extracted from info string (first token).
37    pub language: String,
38    /// Full info string from the fence.
39    pub info_string: String,
40    /// The fence character used (` or ~).
41    pub fence_char: char,
42    /// Length of the fence (3 or more).
43    pub fence_length: usize,
44    /// Leading whitespace on the fence line.
45    pub indent: usize,
46    /// Exact leading whitespace prefix from the fence line.
47    pub indent_prefix: String,
48}
49
50/// A diagnostic message from an external tool.
51#[derive(Debug, Clone)]
52pub struct CodeBlockDiagnostic {
53    /// Line number in the original markdown file (1-indexed).
54    pub file_line: usize,
55    /// Column number (1-indexed, if available).
56    pub column: Option<usize>,
57    /// Message from the tool.
58    pub message: String,
59    /// Severity (error, warning, info).
60    pub severity: DiagnosticSeverity,
61    /// Name of the tool that produced this.
62    pub tool: String,
63    /// Line where the code block starts (1-indexed, for context).
64    pub code_block_start: usize,
65}
66
67/// Severity level for diagnostics.
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum DiagnosticSeverity {
70    Error,
71    Warning,
72    Info,
73}
74
75impl CodeBlockDiagnostic {
76    /// Convert to a LintWarning for integration with rumdl's warning system.
77    pub fn to_lint_warning(&self) -> LintWarning {
78        let severity = match self.severity {
79            DiagnosticSeverity::Error => Severity::Error,
80            DiagnosticSeverity::Warning => Severity::Warning,
81            DiagnosticSeverity::Info => Severity::Info,
82        };
83
84        LintWarning {
85            message: self.message.clone(),
86            line: self.file_line,
87            column: self.column.unwrap_or(1),
88            end_line: self.file_line,
89            end_column: self.column.unwrap_or(1),
90            severity,
91            fix: None, // External tool diagnostics don't provide fixes
92            rule_name: Some(self.tool.clone()),
93        }
94    }
95}
96
97/// Error during code block processing.
98#[derive(Debug, Clone)]
99pub enum ProcessorError {
100    /// Tool execution failed.
101    ToolError(ExecutorError),
102    /// No tools configured for language.
103    NoToolsConfigured { language: String },
104    /// Tool binary not found.
105    ToolBinaryNotFound { tool: String, language: String },
106    /// Processing was aborted due to on_error = fail.
107    Aborted { message: String },
108}
109
110impl std::fmt::Display for ProcessorError {
111    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112        match self {
113            Self::ToolError(e) => write!(f, "{e}"),
114            Self::NoToolsConfigured { language } => {
115                write!(f, "No tools configured for language '{language}'")
116            }
117            Self::ToolBinaryNotFound { tool, language } => {
118                write!(f, "Tool '{tool}' binary not found for language '{language}'")
119            }
120            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
121        }
122    }
123}
124
125impl std::error::Error for ProcessorError {}
126
127impl From<ExecutorError> for ProcessorError {
128    fn from(e: ExecutorError) -> Self {
129        Self::ToolError(e)
130    }
131}
132
133/// Result of processing a single code block.
134#[derive(Debug)]
135pub struct CodeBlockResult {
136    /// Diagnostics from linting.
137    pub diagnostics: Vec<CodeBlockDiagnostic>,
138    /// Formatted content (if formatting was requested and succeeded).
139    pub formatted_content: Option<String>,
140    /// Whether the code block was modified.
141    pub was_modified: bool,
142}
143
144/// Result of formatting code blocks in a document.
145#[derive(Debug)]
146pub struct FormatOutput {
147    /// The formatted content (may be partially formatted if errors occurred).
148    pub content: String,
149    /// Whether any errors occurred during formatting.
150    pub had_errors: bool,
151    /// Error messages for blocks that couldn't be formatted.
152    pub error_messages: Vec<String>,
153}
154
155/// Main processor for code block tools.
156pub struct CodeBlockToolProcessor<'a> {
157    config: &'a CodeBlockToolsConfig,
158    linguist: LinguistResolver,
159    registry: ToolRegistry,
160    executor: ToolExecutor,
161    user_aliases: std::collections::HashMap<String, String>,
162}
163
164impl<'a> CodeBlockToolProcessor<'a> {
165    /// Create a new processor with the given configuration.
166    pub fn new(config: &'a CodeBlockToolsConfig) -> Self {
167        let user_aliases = config
168            .language_aliases
169            .iter()
170            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
171            .collect();
172        Self {
173            config,
174            linguist: LinguistResolver::new(),
175            registry: ToolRegistry::new(config.tools.clone()),
176            executor: ToolExecutor::new(config.timeout),
177            user_aliases,
178        }
179    }
180
181    /// Extract all fenced code blocks from content.
182    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
183        let mut blocks = Vec::new();
184        let mut current_block: Option<FencedCodeBlockBuilder> = None;
185
186        let options = Options::all();
187        let parser = Parser::new_ext(content, options).into_offset_iter();
188
189        let lines: Vec<&str> = content.lines().collect();
190
191        for (event, range) in parser {
192            match event {
193                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
194                    let info_string = info.to_string();
195                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
196
197                    // Find start line
198                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
199
200                    // Find content start (after opening fence line)
201                    let content_start = content[range.start..]
202                        .find('\n')
203                        .map(|i| range.start + i + 1)
204                        .unwrap_or(content.len());
205
206                    // Detect fence character and length from the line
207                    let fence_line = lines.get(start_line).unwrap_or(&"");
208                    let trimmed = fence_line.trim_start();
209                    let indent = fence_line.len() - trimmed.len();
210                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
211                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
212                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
213                    } else {
214                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
215                    };
216
217                    current_block = Some(FencedCodeBlockBuilder {
218                        start_line,
219                        content_start,
220                        language,
221                        info_string,
222                        fence_char,
223                        fence_length,
224                        indent,
225                        indent_prefix,
226                    });
227                }
228                Event::End(TagEnd::CodeBlock) => {
229                    if let Some(builder) = current_block.take() {
230                        // Find end line
231                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
232
233                        // Find content end (before closing fence line)
234                        let search_start = builder.content_start.min(range.end);
235                        let content_end = if search_start < range.end {
236                            content[search_start..range.end]
237                                .rfind('\n')
238                                .map(|i| search_start + i)
239                                .unwrap_or(search_start)
240                        } else {
241                            search_start
242                        };
243
244                        if content_end >= builder.content_start {
245                            blocks.push(FencedCodeBlockInfo {
246                                start_line: builder.start_line,
247                                end_line,
248                                content_start: builder.content_start,
249                                content_end,
250                                language: builder.language,
251                                info_string: builder.info_string,
252                                fence_char: builder.fence_char,
253                                fence_length: builder.fence_length,
254                                indent: builder.indent,
255                                indent_prefix: builder.indent_prefix,
256                            });
257                        }
258                    }
259                }
260                _ => {}
261            }
262        }
263
264        blocks
265    }
266
267    /// Resolve a language tag to its canonical name.
268    fn resolve_language(&self, language: &str) -> String {
269        let lower = language.to_lowercase();
270        if let Some(mapped) = self.user_aliases.get(&lower) {
271            return mapped.clone();
272        }
273        match self.config.normalize_language {
274            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
275            NormalizeLanguage::Exact => lower,
276        }
277    }
278
279    /// Get the effective on_error setting for a language.
280    fn get_on_error(&self, language: &str) -> OnError {
281        self.config
282            .languages
283            .get(language)
284            .and_then(|lc| lc.on_error)
285            .unwrap_or(self.config.on_error)
286    }
287
288    /// Strip the fence indentation prefix from each line of a code block.
289    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
290        if indent_prefix.is_empty() {
291            return content.to_string();
292        }
293
294        let mut out = String::with_capacity(content.len());
295        for line in content.split_inclusive('\n') {
296            if let Some(stripped) = line.strip_prefix(indent_prefix) {
297                out.push_str(stripped);
298            } else {
299                out.push_str(line);
300            }
301        }
302        out
303    }
304
305    /// Re-apply the fence indentation prefix to each line of a code block.
306    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
307        if indent_prefix.is_empty() {
308            return content.to_string();
309        }
310        if content.is_empty() {
311            return String::new();
312        }
313
314        let mut out = String::with_capacity(content.len() + indent_prefix.len());
315        for line in content.split_inclusive('\n') {
316            if line == "\n" {
317                out.push_str(line);
318            } else {
319                out.push_str(indent_prefix);
320                out.push_str(line);
321            }
322        }
323        out
324    }
325
326    /// Lint all code blocks in the content.
327    ///
328    /// Returns diagnostics from all configured linters.
329    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
330        let mut all_diagnostics = Vec::new();
331        let blocks = self.extract_code_blocks(content);
332
333        for block in blocks {
334            if block.language.is_empty() {
335                continue; // Skip blocks without language tag
336            }
337
338            let canonical_lang = self.resolve_language(&block.language);
339
340            // Get lint tools for this language
341            let lint_tools = match self.config.languages.get(&canonical_lang) {
342                Some(lc) if !lc.lint.is_empty() => &lc.lint,
343                _ => {
344                    // No tools configured for this language in lint mode
345                    match self.config.on_missing_language_definition {
346                        OnMissing::Ignore => continue,
347                        OnMissing::Fail => {
348                            all_diagnostics.push(CodeBlockDiagnostic {
349                                file_line: block.start_line + 1,
350                                column: None,
351                                message: format!("No lint tools configured for language '{canonical_lang}'"),
352                                severity: DiagnosticSeverity::Error,
353                                tool: "code-block-tools".to_string(),
354                                code_block_start: block.start_line + 1,
355                            });
356                            continue;
357                        }
358                        OnMissing::FailFast => {
359                            return Err(ProcessorError::NoToolsConfigured {
360                                language: canonical_lang,
361                            });
362                        }
363                    }
364                }
365            };
366
367            // Extract code block content
368            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
369                &content[block.content_start..block.content_end]
370            } else {
371                continue;
372            };
373            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
374
375            // Run each lint tool
376            for tool_id in lint_tools {
377                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown linting
378                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
379                    continue;
380                }
381
382                let tool_def = match self.registry.get(tool_id) {
383                    Some(t) => t,
384                    None => {
385                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
386                        continue;
387                    }
388                };
389
390                // Check if tool binary exists before running
391                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
392                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
393                    match self.config.on_missing_tool_binary {
394                        OnMissing::Ignore => {
395                            log::debug!("Tool binary '{tool_name}' not found, skipping");
396                            continue;
397                        }
398                        OnMissing::Fail => {
399                            all_diagnostics.push(CodeBlockDiagnostic {
400                                file_line: block.start_line + 1,
401                                column: None,
402                                message: format!("Tool binary '{tool_name}' not found in PATH"),
403                                severity: DiagnosticSeverity::Error,
404                                tool: "code-block-tools".to_string(),
405                                code_block_start: block.start_line + 1,
406                            });
407                            continue;
408                        }
409                        OnMissing::FailFast => {
410                            return Err(ProcessorError::ToolBinaryNotFound {
411                                tool: tool_name.to_string(),
412                                language: canonical_lang.clone(),
413                            });
414                        }
415                    }
416                }
417
418                match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
419                    Ok(output) => {
420                        // Parse tool output into diagnostics
421                        let diagnostics = self.parse_tool_output(
422                            &output,
423                            tool_id,
424                            block.start_line + 1, // Convert to 1-indexed
425                        );
426                        all_diagnostics.extend(diagnostics);
427                    }
428                    Err(e) => {
429                        let on_error = self.get_on_error(&canonical_lang);
430                        match on_error {
431                            OnError::Fail => return Err(e.into()),
432                            OnError::Warn => {
433                                log::warn!("Tool '{tool_id}' failed: {e}");
434                            }
435                            OnError::Skip => {
436                                // Silently skip
437                            }
438                        }
439                    }
440                }
441            }
442        }
443
444        Ok(all_diagnostics)
445    }
446
447    /// Format all code blocks in the content.
448    ///
449    /// Returns the modified content with formatted code blocks and any errors that occurred.
450    /// With `on-missing-*` = `fail`, errors are collected but formatting continues.
451    /// With `on-missing-*` = `fail-fast`, returns Err immediately on first error.
452    pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
453        let blocks = self.extract_code_blocks(content);
454
455        if blocks.is_empty() {
456            return Ok(FormatOutput {
457                content: content.to_string(),
458                had_errors: false,
459                error_messages: Vec::new(),
460            });
461        }
462
463        // Process blocks in reverse order to maintain byte offsets
464        let mut result = content.to_string();
465        let mut error_messages: Vec<String> = Vec::new();
466
467        for block in blocks.into_iter().rev() {
468            if block.language.is_empty() {
469                continue;
470            }
471
472            let canonical_lang = self.resolve_language(&block.language);
473
474            // Get format tools for this language
475            let format_tools = match self.config.languages.get(&canonical_lang) {
476                Some(lc) if !lc.format.is_empty() => &lc.format,
477                _ => {
478                    // No tools configured for this language in format mode
479                    match self.config.on_missing_language_definition {
480                        OnMissing::Ignore => continue,
481                        OnMissing::Fail => {
482                            error_messages.push(format!(
483                                "No format tools configured for language '{canonical_lang}' at line {}",
484                                block.start_line + 1
485                            ));
486                            continue;
487                        }
488                        OnMissing::FailFast => {
489                            return Err(ProcessorError::NoToolsConfigured {
490                                language: canonical_lang,
491                            });
492                        }
493                    }
494                }
495            };
496
497            // Extract code block content
498            if block.content_start >= block.content_end || block.content_end > result.len() {
499                continue;
500            }
501            let code_content_raw = result[block.content_start..block.content_end].to_string();
502            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
503
504            // Run format tools (use first successful one)
505            let mut formatted = code_content.clone();
506            let mut tool_ran = false;
507            for tool_id in format_tools {
508                // Skip built-in "rumdl" tool for markdown - handled separately by embedded markdown formatting
509                if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
510                    continue;
511                }
512
513                let tool_def = match self.registry.get(tool_id) {
514                    Some(t) => t,
515                    None => {
516                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
517                        continue;
518                    }
519                };
520
521                // Check if tool binary exists before running
522                let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
523                if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
524                    match self.config.on_missing_tool_binary {
525                        OnMissing::Ignore => {
526                            log::debug!("Tool binary '{tool_name}' not found, skipping");
527                            continue;
528                        }
529                        OnMissing::Fail => {
530                            error_messages.push(format!(
531                                "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
532                                block.start_line + 1
533                            ));
534                            continue;
535                        }
536                        OnMissing::FailFast => {
537                            return Err(ProcessorError::ToolBinaryNotFound {
538                                tool: tool_name.to_string(),
539                                language: canonical_lang.clone(),
540                            });
541                        }
542                    }
543                }
544
545                match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
546                    Ok(output) => {
547                        // Ensure trailing newline matches original (unindented)
548                        formatted = output;
549                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
550                            formatted.push('\n');
551                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
552                            formatted.pop();
553                        }
554                        tool_ran = true;
555                        break; // Use first successful formatter
556                    }
557                    Err(e) => {
558                        let on_error = self.get_on_error(&canonical_lang);
559                        match on_error {
560                            OnError::Fail => return Err(e.into()),
561                            OnError::Warn => {
562                                log::warn!("Formatter '{tool_id}' failed: {e}");
563                            }
564                            OnError::Skip => {}
565                        }
566                    }
567                }
568            }
569
570            // Replace content if changed and a tool actually ran
571            if tool_ran && formatted != code_content {
572                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
573                if reindented != code_content_raw {
574                    result.replace_range(block.content_start..block.content_end, &reindented);
575                }
576            }
577        }
578
579        Ok(FormatOutput {
580            content: result,
581            had_errors: !error_messages.is_empty(),
582            error_messages,
583        })
584    }
585
586    /// Parse tool output into diagnostics.
587    ///
588    /// This is a basic parser that handles common output formats.
589    /// Tools vary widely in their output format, so this is best-effort.
590    fn parse_tool_output(
591        &self,
592        output: &ToolOutput,
593        tool_id: &str,
594        code_block_start_line: usize,
595    ) -> Vec<CodeBlockDiagnostic> {
596        let mut diagnostics = Vec::new();
597        let mut shellcheck_line: Option<usize> = None;
598
599        // Combine stdout and stderr for parsing
600        let stdout = &output.stdout;
601        let stderr = &output.stderr;
602        let combined = format!("{stdout}\n{stderr}");
603
604        // Look for common line:column:message patterns
605        // Examples:
606        // - ruff: "_.py:1:1: E501 Line too long"
607        // - shellcheck: "In - line 1: ..."
608        // - eslint: "1:10 error Description"
609
610        for line in combined.lines() {
611            let line = line.trim();
612            if line.is_empty() {
613                continue;
614            }
615
616            if let Some(line_num) = self.parse_shellcheck_header(line) {
617                shellcheck_line = Some(line_num);
618                continue;
619            }
620
621            if let Some(line_num) = shellcheck_line
622                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
623            {
624                diagnostics.push(diag);
625                continue;
626            }
627
628            // Try pattern: "file:line:col: message" or "file:line: message"
629            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
630                diagnostics.push(diag);
631                continue;
632            }
633
634            // Try pattern: "line:col message" (eslint style)
635            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
636                diagnostics.push(diag);
637                continue;
638            }
639
640            // Try single-line shellcheck format fallback
641            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
642                diagnostics.push(diag);
643            }
644        }
645
646        // If no diagnostics parsed but tool failed, create a generic one
647        if diagnostics.is_empty() && !output.success {
648            let message = if !output.stderr.is_empty() {
649                output.stderr.lines().next().unwrap_or("Tool failed").to_string()
650            } else if !output.stdout.is_empty() {
651                output.stdout.lines().next().unwrap_or("Tool failed").to_string()
652            } else {
653                let exit_code = output.exit_code;
654                format!("Tool exited with code {exit_code}")
655            };
656
657            diagnostics.push(CodeBlockDiagnostic {
658                file_line: code_block_start_line,
659                column: None,
660                message,
661                severity: DiagnosticSeverity::Error,
662                tool: tool_id.to_string(),
663                code_block_start: code_block_start_line,
664            });
665        }
666
667        diagnostics
668    }
669
670    /// Parse standard "file:line:col: message" format.
671    fn parse_standard_format(
672        &self,
673        line: &str,
674        tool_id: &str,
675        code_block_start_line: usize,
676    ) -> Option<CodeBlockDiagnostic> {
677        // Match patterns like "file.py:1:10: E501 message"
678        let mut parts = line.rsplitn(4, ':');
679        let message = parts.next()?.trim().to_string();
680        let part1 = parts.next()?.trim().to_string();
681        let part2 = parts.next()?.trim().to_string();
682        let part3 = parts.next().map(|s| s.trim().to_string());
683
684        let (line_part, col_part) = if part3.is_some() {
685            (part2, Some(part1))
686        } else {
687            (part1, None)
688        };
689
690        if let Ok(line_num) = line_part.parse::<usize>() {
691            let column = col_part.and_then(|s| s.parse::<usize>().ok());
692            let message = Self::strip_fixable_markers(&message);
693            if !message.is_empty() {
694                let severity = self.infer_severity(&message);
695                return Some(CodeBlockDiagnostic {
696                    file_line: code_block_start_line + line_num,
697                    column,
698                    message,
699                    severity,
700                    tool: tool_id.to_string(),
701                    code_block_start: code_block_start_line,
702                });
703            }
704        }
705        None
706    }
707
708    /// Parse eslint-style "line:col severity message" format.
709    fn parse_eslint_format(
710        &self,
711        line: &str,
712        tool_id: &str,
713        code_block_start_line: usize,
714    ) -> Option<CodeBlockDiagnostic> {
715        // Match "1:10 error Message"
716        let parts: Vec<&str> = line.splitn(3, ' ').collect();
717        if parts.len() >= 2 {
718            let loc_parts: Vec<&str> = parts[0].split(':').collect();
719            if loc_parts.len() == 2
720                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
721            {
722                let (sev_part, msg_part) = if parts.len() >= 3 {
723                    (parts[1], parts[2])
724                } else {
725                    (parts[1], "")
726                };
727                let message = if msg_part.is_empty() {
728                    sev_part.to_string()
729                } else {
730                    msg_part.to_string()
731                };
732                let message = Self::strip_fixable_markers(&message);
733                let severity = match sev_part.to_lowercase().as_str() {
734                    "error" => DiagnosticSeverity::Error,
735                    "warning" | "warn" => DiagnosticSeverity::Warning,
736                    "info" => DiagnosticSeverity::Info,
737                    _ => self.infer_severity(&message),
738                };
739                return Some(CodeBlockDiagnostic {
740                    file_line: code_block_start_line + line_num,
741                    column: Some(col),
742                    message,
743                    severity,
744                    tool: tool_id.to_string(),
745                    code_block_start: code_block_start_line,
746                });
747            }
748        }
749        None
750    }
751
752    /// Parse shellcheck-style "In - line N: message" format.
753    fn parse_shellcheck_format(
754        &self,
755        line: &str,
756        tool_id: &str,
757        code_block_start_line: usize,
758    ) -> Option<CodeBlockDiagnostic> {
759        // Match "In - line 5:" pattern
760        if line.starts_with("In ")
761            && line.contains(" line ")
762            && let Some(line_start) = line.find(" line ")
763        {
764            let after_line = &line[line_start + 6..];
765            if let Some(colon_pos) = after_line.find(':')
766                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
767            {
768                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
769                if !message.is_empty() {
770                    let severity = self.infer_severity(&message);
771                    return Some(CodeBlockDiagnostic {
772                        file_line: code_block_start_line + line_num,
773                        column: None,
774                        message,
775                        severity,
776                        tool: tool_id.to_string(),
777                        code_block_start: code_block_start_line,
778                    });
779                }
780            }
781        }
782        None
783    }
784
785    /// Parse shellcheck header line to capture line number context.
786    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
787        if line.starts_with("In ")
788            && line.contains(" line ")
789            && let Some(line_start) = line.find(" line ")
790        {
791            let after_line = &line[line_start + 6..];
792            if let Some(colon_pos) = after_line.find(':') {
793                return after_line[..colon_pos].trim().parse::<usize>().ok();
794            }
795        }
796        None
797    }
798
799    /// Parse shellcheck message line containing SCXXXX codes.
800    fn parse_shellcheck_message(
801        &self,
802        line: &str,
803        tool_id: &str,
804        code_block_start_line: usize,
805        line_num: usize,
806    ) -> Option<CodeBlockDiagnostic> {
807        let sc_pos = line.find("SC")?;
808        let after_sc = &line[sc_pos + 2..];
809        let code_len = after_sc.chars().take_while(|c| c.is_ascii_digit()).count();
810        if code_len == 0 {
811            return None;
812        }
813        let after_code = &after_sc[code_len..];
814        let sev_start = after_code.find('(')? + 1;
815        let sev_end = after_code[sev_start..].find(')')? + sev_start;
816        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
817        let message_start = after_code.find("):")? + 2;
818        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
819        if message.is_empty() {
820            return None;
821        }
822
823        let severity = match sev.as_str() {
824            "error" => DiagnosticSeverity::Error,
825            "warning" | "warn" => DiagnosticSeverity::Warning,
826            "info" | "style" => DiagnosticSeverity::Info,
827            _ => self.infer_severity(&message),
828        };
829
830        Some(CodeBlockDiagnostic {
831            file_line: code_block_start_line + line_num,
832            column: None,
833            message,
834            severity,
835            tool: tool_id.to_string(),
836            code_block_start: code_block_start_line,
837        })
838    }
839
840    /// Infer severity from message content.
841    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
842        let lower = message.to_lowercase();
843        if lower.contains("error")
844            || lower.starts_with("e") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
845            || lower.starts_with("f") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
846        {
847            DiagnosticSeverity::Error
848        } else if lower.contains("warning")
849            || lower.contains("warn")
850            || lower.starts_with("w") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
851        {
852            DiagnosticSeverity::Warning
853        } else {
854            DiagnosticSeverity::Info
855        }
856    }
857
858    /// Strip "fixable" markers from external tool messages.
859    ///
860    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
861    /// context these markers can be misleading - the lint tool's fix capability may
862    /// differ from what our configured formatter can fix. We strip these markers
863    /// to avoid making promises we can't keep.
864    fn strip_fixable_markers(message: &str) -> String {
865        message
866            .replace(" [*]", "")
867            .replace("[*] ", "")
868            .replace("[*]", "")
869            .replace(" (fixable)", "")
870            .replace("(fixable) ", "")
871            .replace("(fixable)", "")
872            .replace(" [fix available]", "")
873            .replace("[fix available] ", "")
874            .replace("[fix available]", "")
875            .replace(" [autofix]", "")
876            .replace("[autofix] ", "")
877            .replace("[autofix]", "")
878            .trim()
879            .to_string()
880    }
881}
882
883/// Builder for FencedCodeBlockInfo during parsing.
884struct FencedCodeBlockBuilder {
885    start_line: usize,
886    content_start: usize,
887    language: String,
888    info_string: String,
889    fence_char: char,
890    fence_length: usize,
891    indent: usize,
892    indent_prefix: String,
893}
894
895#[cfg(test)]
896mod tests {
897    use super::*;
898
899    fn default_config() -> CodeBlockToolsConfig {
900        CodeBlockToolsConfig::default()
901    }
902
903    #[test]
904    fn test_extract_code_blocks() {
905        let config = default_config();
906        let processor = CodeBlockToolProcessor::new(&config);
907
908        let content = r#"# Example
909
910```python
911def hello():
912    print("Hello")
913```
914
915Some text
916
917```rust
918fn main() {}
919```
920"#;
921
922        let blocks = processor.extract_code_blocks(content);
923
924        assert_eq!(blocks.len(), 2);
925
926        assert_eq!(blocks[0].language, "python");
927        assert_eq!(blocks[0].fence_char, '`');
928        assert_eq!(blocks[0].fence_length, 3);
929        assert_eq!(blocks[0].start_line, 2);
930        assert_eq!(blocks[0].indent, 0);
931        assert_eq!(blocks[0].indent_prefix, "");
932
933        assert_eq!(blocks[1].language, "rust");
934        assert_eq!(blocks[1].fence_char, '`');
935        assert_eq!(blocks[1].fence_length, 3);
936    }
937
938    #[test]
939    fn test_extract_code_blocks_with_info_string() {
940        let config = default_config();
941        let processor = CodeBlockToolProcessor::new(&config);
942
943        let content = "```python title=\"example.py\"\ncode\n```";
944        let blocks = processor.extract_code_blocks(content);
945
946        assert_eq!(blocks.len(), 1);
947        assert_eq!(blocks[0].language, "python");
948        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
949    }
950
951    #[test]
952    fn test_extract_code_blocks_tilde_fence() {
953        let config = default_config();
954        let processor = CodeBlockToolProcessor::new(&config);
955
956        let content = "~~~bash\necho hello\n~~~";
957        let blocks = processor.extract_code_blocks(content);
958
959        assert_eq!(blocks.len(), 1);
960        assert_eq!(blocks[0].language, "bash");
961        assert_eq!(blocks[0].fence_char, '~');
962        assert_eq!(blocks[0].fence_length, 3);
963        assert_eq!(blocks[0].indent_prefix, "");
964    }
965
966    #[test]
967    fn test_extract_code_blocks_with_indent_prefix() {
968        let config = default_config();
969        let processor = CodeBlockToolProcessor::new(&config);
970
971        let content = "  - item\n    ```python\n    print('hi')\n    ```";
972        let blocks = processor.extract_code_blocks(content);
973
974        assert_eq!(blocks.len(), 1);
975        assert_eq!(blocks[0].indent_prefix, "    ");
976    }
977
978    #[test]
979    fn test_extract_code_blocks_no_language() {
980        let config = default_config();
981        let processor = CodeBlockToolProcessor::new(&config);
982
983        let content = "```\nplain code\n```";
984        let blocks = processor.extract_code_blocks(content);
985
986        assert_eq!(blocks.len(), 1);
987        assert_eq!(blocks[0].language, "");
988    }
989
990    #[test]
991    fn test_resolve_language_linguist() {
992        let mut config = default_config();
993        config.normalize_language = NormalizeLanguage::Linguist;
994        let processor = CodeBlockToolProcessor::new(&config);
995
996        assert_eq!(processor.resolve_language("py"), "python");
997        assert_eq!(processor.resolve_language("bash"), "shell");
998        assert_eq!(processor.resolve_language("js"), "javascript");
999    }
1000
1001    #[test]
1002    fn test_resolve_language_exact() {
1003        let mut config = default_config();
1004        config.normalize_language = NormalizeLanguage::Exact;
1005        let processor = CodeBlockToolProcessor::new(&config);
1006
1007        assert_eq!(processor.resolve_language("py"), "py");
1008        assert_eq!(processor.resolve_language("BASH"), "bash");
1009    }
1010
1011    #[test]
1012    fn test_resolve_language_user_alias_override() {
1013        let mut config = default_config();
1014        config.language_aliases.insert("py".to_string(), "python".to_string());
1015        config.normalize_language = NormalizeLanguage::Exact;
1016        let processor = CodeBlockToolProcessor::new(&config);
1017
1018        assert_eq!(processor.resolve_language("PY"), "python");
1019    }
1020
1021    #[test]
1022    fn test_indent_strip_and_reapply_roundtrip() {
1023        let config = default_config();
1024        let processor = CodeBlockToolProcessor::new(&config);
1025
1026        let raw = "    def hello():\n        print('hi')";
1027        let stripped = processor.strip_indent_from_block(raw, "    ");
1028        assert_eq!(stripped, "def hello():\n    print('hi')");
1029
1030        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
1031        assert_eq!(reapplied, raw);
1032    }
1033
1034    #[test]
1035    fn test_infer_severity() {
1036        let config = default_config();
1037        let processor = CodeBlockToolProcessor::new(&config);
1038
1039        assert_eq!(
1040            processor.infer_severity("E501 line too long"),
1041            DiagnosticSeverity::Error
1042        );
1043        assert_eq!(
1044            processor.infer_severity("W291 trailing whitespace"),
1045            DiagnosticSeverity::Warning
1046        );
1047        assert_eq!(
1048            processor.infer_severity("error: something failed"),
1049            DiagnosticSeverity::Error
1050        );
1051        assert_eq!(
1052            processor.infer_severity("warning: unused variable"),
1053            DiagnosticSeverity::Warning
1054        );
1055        assert_eq!(
1056            processor.infer_severity("note: consider using"),
1057            DiagnosticSeverity::Info
1058        );
1059    }
1060
1061    #[test]
1062    fn test_parse_standard_format_windows_path() {
1063        let config = default_config();
1064        let processor = CodeBlockToolProcessor::new(&config);
1065
1066        let output = ToolOutput {
1067            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1068            stderr: String::new(),
1069            exit_code: 1,
1070            success: false,
1071        };
1072
1073        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1074        assert_eq!(diags.len(), 1);
1075        assert_eq!(diags[0].file_line, 12);
1076        assert_eq!(diags[0].column, Some(5));
1077        assert_eq!(diags[0].message, "E123 message");
1078    }
1079
1080    #[test]
1081    fn test_parse_eslint_severity() {
1082        let config = default_config();
1083        let processor = CodeBlockToolProcessor::new(&config);
1084
1085        let output = ToolOutput {
1086            stdout: "1:2 error Unexpected token".to_string(),
1087            stderr: String::new(),
1088            exit_code: 1,
1089            success: false,
1090        };
1091
1092        let diags = processor.parse_tool_output(&output, "eslint", 5);
1093        assert_eq!(diags.len(), 1);
1094        assert_eq!(diags[0].file_line, 6);
1095        assert_eq!(diags[0].column, Some(2));
1096        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1097        assert_eq!(diags[0].message, "Unexpected token");
1098    }
1099
1100    #[test]
1101    fn test_parse_shellcheck_multiline() {
1102        let config = default_config();
1103        let processor = CodeBlockToolProcessor::new(&config);
1104
1105        let output = ToolOutput {
1106            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1107            stderr: String::new(),
1108            exit_code: 1,
1109            success: false,
1110        };
1111
1112        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1113        assert_eq!(diags.len(), 1);
1114        assert_eq!(diags[0].file_line, 13);
1115        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1116        assert_eq!(diags[0].message, "Double quote to prevent globbing");
1117    }
1118
1119    #[test]
1120    fn test_lint_no_config() {
1121        let config = default_config();
1122        let processor = CodeBlockToolProcessor::new(&config);
1123
1124        let content = "```python\nprint('hello')\n```";
1125        let result = processor.lint(content);
1126
1127        // Should succeed with no diagnostics (no tools configured)
1128        assert!(result.is_ok());
1129        assert!(result.unwrap().is_empty());
1130    }
1131
1132    #[test]
1133    fn test_format_no_config() {
1134        let config = default_config();
1135        let processor = CodeBlockToolProcessor::new(&config);
1136
1137        let content = "```python\nprint('hello')\n```";
1138        let result = processor.format(content);
1139
1140        // Should succeed with unchanged content (no tools configured)
1141        assert!(result.is_ok());
1142        let output = result.unwrap();
1143        assert_eq!(output.content, content);
1144        assert!(!output.had_errors);
1145        assert!(output.error_messages.is_empty());
1146    }
1147
1148    #[test]
1149    fn test_lint_on_missing_language_definition_fail() {
1150        let mut config = default_config();
1151        config.on_missing_language_definition = OnMissing::Fail;
1152        let processor = CodeBlockToolProcessor::new(&config);
1153
1154        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1155        let result = processor.lint(content);
1156
1157        // Should succeed but return diagnostics for both missing language definitions
1158        assert!(result.is_ok());
1159        let diagnostics = result.unwrap();
1160        assert_eq!(diagnostics.len(), 2);
1161        assert!(diagnostics[0].message.contains("No lint tools configured"));
1162        assert!(diagnostics[0].message.contains("python"));
1163        assert!(diagnostics[1].message.contains("javascript"));
1164    }
1165
1166    #[test]
1167    fn test_lint_on_missing_language_definition_fail_fast() {
1168        let mut config = default_config();
1169        config.on_missing_language_definition = OnMissing::FailFast;
1170        let processor = CodeBlockToolProcessor::new(&config);
1171
1172        let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1173        let result = processor.lint(content);
1174
1175        // Should fail immediately on first missing language
1176        assert!(result.is_err());
1177        let err = result.unwrap_err();
1178        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1179    }
1180
1181    #[test]
1182    fn test_format_on_missing_language_definition_fail() {
1183        let mut config = default_config();
1184        config.on_missing_language_definition = OnMissing::Fail;
1185        let processor = CodeBlockToolProcessor::new(&config);
1186
1187        let content = "```python\nprint('hello')\n```";
1188        let result = processor.format(content);
1189
1190        // Should succeed but report errors
1191        assert!(result.is_ok());
1192        let output = result.unwrap();
1193        assert_eq!(output.content, content); // Content unchanged
1194        assert!(output.had_errors);
1195        assert!(!output.error_messages.is_empty());
1196        assert!(output.error_messages[0].contains("No format tools configured"));
1197    }
1198
1199    #[test]
1200    fn test_format_on_missing_language_definition_fail_fast() {
1201        let mut config = default_config();
1202        config.on_missing_language_definition = OnMissing::FailFast;
1203        let processor = CodeBlockToolProcessor::new(&config);
1204
1205        let content = "```python\nprint('hello')\n```";
1206        let result = processor.format(content);
1207
1208        // Should fail immediately
1209        assert!(result.is_err());
1210        let err = result.unwrap_err();
1211        assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1212    }
1213
1214    #[test]
1215    fn test_lint_on_missing_tool_binary_fail() {
1216        use super::super::config::{LanguageToolConfig, ToolDefinition};
1217
1218        let mut config = default_config();
1219        config.on_missing_tool_binary = OnMissing::Fail;
1220
1221        // Configure a tool with a non-existent binary
1222        let lang_config = LanguageToolConfig {
1223            lint: vec!["nonexistent-linter".to_string()],
1224            ..Default::default()
1225        };
1226        config.languages.insert("python".to_string(), lang_config);
1227
1228        let tool_def = ToolDefinition {
1229            command: vec!["nonexistent-binary-xyz123".to_string()],
1230            ..Default::default()
1231        };
1232        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1233
1234        let processor = CodeBlockToolProcessor::new(&config);
1235
1236        let content = "```python\nprint('hello')\n```";
1237        let result = processor.lint(content);
1238
1239        // Should succeed but return diagnostic for missing binary
1240        assert!(result.is_ok());
1241        let diagnostics = result.unwrap();
1242        assert_eq!(diagnostics.len(), 1);
1243        assert!(diagnostics[0].message.contains("not found in PATH"));
1244    }
1245
1246    #[test]
1247    fn test_lint_on_missing_tool_binary_fail_fast() {
1248        use super::super::config::{LanguageToolConfig, ToolDefinition};
1249
1250        let mut config = default_config();
1251        config.on_missing_tool_binary = OnMissing::FailFast;
1252
1253        // Configure a tool with a non-existent binary
1254        let lang_config = LanguageToolConfig {
1255            lint: vec!["nonexistent-linter".to_string()],
1256            ..Default::default()
1257        };
1258        config.languages.insert("python".to_string(), lang_config);
1259
1260        let tool_def = ToolDefinition {
1261            command: vec!["nonexistent-binary-xyz123".to_string()],
1262            ..Default::default()
1263        };
1264        config.tools.insert("nonexistent-linter".to_string(), tool_def);
1265
1266        let processor = CodeBlockToolProcessor::new(&config);
1267
1268        let content = "```python\nprint('hello')\n```";
1269        let result = processor.lint(content);
1270
1271        // Should fail immediately
1272        assert!(result.is_err());
1273        let err = result.unwrap_err();
1274        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1275    }
1276
1277    #[test]
1278    fn test_format_on_missing_tool_binary_fail() {
1279        use super::super::config::{LanguageToolConfig, ToolDefinition};
1280
1281        let mut config = default_config();
1282        config.on_missing_tool_binary = OnMissing::Fail;
1283
1284        // Configure a tool with a non-existent binary
1285        let lang_config = LanguageToolConfig {
1286            format: vec!["nonexistent-formatter".to_string()],
1287            ..Default::default()
1288        };
1289        config.languages.insert("python".to_string(), lang_config);
1290
1291        let tool_def = ToolDefinition {
1292            command: vec!["nonexistent-binary-xyz123".to_string()],
1293            ..Default::default()
1294        };
1295        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1296
1297        let processor = CodeBlockToolProcessor::new(&config);
1298
1299        let content = "```python\nprint('hello')\n```";
1300        let result = processor.format(content);
1301
1302        // Should succeed but report errors
1303        assert!(result.is_ok());
1304        let output = result.unwrap();
1305        assert_eq!(output.content, content); // Content unchanged
1306        assert!(output.had_errors);
1307        assert!(!output.error_messages.is_empty());
1308        assert!(output.error_messages[0].contains("not found in PATH"));
1309    }
1310
1311    #[test]
1312    fn test_format_on_missing_tool_binary_fail_fast() {
1313        use super::super::config::{LanguageToolConfig, ToolDefinition};
1314
1315        let mut config = default_config();
1316        config.on_missing_tool_binary = OnMissing::FailFast;
1317
1318        // Configure a tool with a non-existent binary
1319        let lang_config = LanguageToolConfig {
1320            format: vec!["nonexistent-formatter".to_string()],
1321            ..Default::default()
1322        };
1323        config.languages.insert("python".to_string(), lang_config);
1324
1325        let tool_def = ToolDefinition {
1326            command: vec!["nonexistent-binary-xyz123".to_string()],
1327            ..Default::default()
1328        };
1329        config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1330
1331        let processor = CodeBlockToolProcessor::new(&config);
1332
1333        let content = "```python\nprint('hello')\n```";
1334        let result = processor.format(content);
1335
1336        // Should fail immediately
1337        assert!(result.is_err());
1338        let err = result.unwrap_err();
1339        assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1340    }
1341
1342    #[test]
1343    fn test_lint_rumdl_builtin_skipped_for_markdown() {
1344        // Configure the built-in "rumdl" tool for markdown
1345        // The processor should skip it (handled by embedded markdown linting)
1346        let mut config = default_config();
1347        config.languages.insert(
1348            "markdown".to_string(),
1349            LanguageToolConfig {
1350                lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1351                format: vec![],
1352                on_error: None,
1353            },
1354        );
1355        config.on_missing_language_definition = OnMissing::Fail;
1356        let processor = CodeBlockToolProcessor::new(&config);
1357
1358        let content = "```markdown\n# Hello\n```";
1359        let result = processor.lint(content);
1360
1361        // Should succeed with no diagnostics - "rumdl" tool is skipped, not treated as unknown
1362        assert!(result.is_ok());
1363        assert!(result.unwrap().is_empty());
1364    }
1365
1366    #[test]
1367    fn test_format_rumdl_builtin_skipped_for_markdown() {
1368        // Configure the built-in "rumdl" tool for markdown
1369        let mut config = default_config();
1370        config.languages.insert(
1371            "markdown".to_string(),
1372            LanguageToolConfig {
1373                lint: vec![],
1374                format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1375                on_error: None,
1376            },
1377        );
1378        let processor = CodeBlockToolProcessor::new(&config);
1379
1380        let content = "```markdown\n# Hello\n```";
1381        let result = processor.format(content);
1382
1383        // Should succeed with unchanged content - "rumdl" tool is skipped
1384        assert!(result.is_ok());
1385        let output = result.unwrap();
1386        assert_eq!(output.content, content);
1387        assert!(!output.had_errors);
1388    }
1389
1390    #[test]
1391    fn test_is_markdown_language() {
1392        // Test the helper function
1393        assert!(is_markdown_language("markdown"));
1394        assert!(is_markdown_language("Markdown"));
1395        assert!(is_markdown_language("MARKDOWN"));
1396        assert!(is_markdown_language("md"));
1397        assert!(is_markdown_language("MD"));
1398        assert!(!is_markdown_language("python"));
1399        assert!(!is_markdown_language("rust"));
1400        assert!(!is_markdown_language(""));
1401    }
1402}