rumdl_lib/code_block_tools/
processor.rs

1//! Main processor for code block linting and formatting.
2//!
3//! This module coordinates language resolution, tool lookup, execution,
4//! and result collection for processing code blocks in markdown files.
5
6use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError};
7use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
8use super::linguist::LinguistResolver;
9use super::registry::ToolRegistry;
10use crate::rule::{LintWarning, Severity};
11use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
12
13/// Information about a fenced code block for processing.
14#[derive(Debug, Clone)]
15pub struct FencedCodeBlockInfo {
16    /// 0-indexed line number where opening fence starts.
17    pub start_line: usize,
18    /// 0-indexed line number where closing fence ends.
19    pub end_line: usize,
20    /// Byte offset where code content starts (after opening fence line).
21    pub content_start: usize,
22    /// Byte offset where code content ends (before closing fence line).
23    pub content_end: usize,
24    /// Language tag extracted from info string (first token).
25    pub language: String,
26    /// Full info string from the fence.
27    pub info_string: String,
28    /// The fence character used (` or ~).
29    pub fence_char: char,
30    /// Length of the fence (3 or more).
31    pub fence_length: usize,
32    /// Leading whitespace on the fence line.
33    pub indent: usize,
34    /// Exact leading whitespace prefix from the fence line.
35    pub indent_prefix: String,
36}
37
38/// A diagnostic message from an external tool.
39#[derive(Debug, Clone)]
40pub struct CodeBlockDiagnostic {
41    /// Line number in the original markdown file (1-indexed).
42    pub file_line: usize,
43    /// Column number (1-indexed, if available).
44    pub column: Option<usize>,
45    /// Message from the tool.
46    pub message: String,
47    /// Severity (error, warning, info).
48    pub severity: DiagnosticSeverity,
49    /// Name of the tool that produced this.
50    pub tool: String,
51    /// Line where the code block starts (1-indexed, for context).
52    pub code_block_start: usize,
53}
54
55/// Severity level for diagnostics.
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum DiagnosticSeverity {
58    Error,
59    Warning,
60    Info,
61}
62
63impl CodeBlockDiagnostic {
64    /// Convert to a LintWarning for integration with rumdl's warning system.
65    pub fn to_lint_warning(&self) -> LintWarning {
66        let severity = match self.severity {
67            DiagnosticSeverity::Error => Severity::Error,
68            DiagnosticSeverity::Warning => Severity::Warning,
69            DiagnosticSeverity::Info => Severity::Info,
70        };
71
72        LintWarning {
73            message: self.message.clone(),
74            line: self.file_line,
75            column: self.column.unwrap_or(1),
76            end_line: self.file_line,
77            end_column: self.column.unwrap_or(1),
78            severity,
79            fix: None, // External tool diagnostics don't provide fixes
80            rule_name: Some(self.tool.clone()),
81        }
82    }
83}
84
85/// Error during code block processing.
86#[derive(Debug, Clone)]
87pub enum ProcessorError {
88    /// Tool execution failed.
89    ToolError(ExecutorError),
90    /// No tools configured for language.
91    NoToolsConfigured { language: String },
92    /// Processing was aborted due to on_error = fail.
93    Aborted { message: String },
94}
95
96impl std::fmt::Display for ProcessorError {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        match self {
99            Self::ToolError(e) => write!(f, "{e}"),
100            Self::NoToolsConfigured { language } => {
101                write!(f, "No tools configured for language '{language}'")
102            }
103            Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
104        }
105    }
106}
107
108impl std::error::Error for ProcessorError {}
109
110impl From<ExecutorError> for ProcessorError {
111    fn from(e: ExecutorError) -> Self {
112        Self::ToolError(e)
113    }
114}
115
116/// Result of processing a single code block.
117#[derive(Debug)]
118pub struct CodeBlockResult {
119    /// Diagnostics from linting.
120    pub diagnostics: Vec<CodeBlockDiagnostic>,
121    /// Formatted content (if formatting was requested and succeeded).
122    pub formatted_content: Option<String>,
123    /// Whether the code block was modified.
124    pub was_modified: bool,
125}
126
127/// Main processor for code block tools.
128pub struct CodeBlockToolProcessor<'a> {
129    config: &'a CodeBlockToolsConfig,
130    linguist: LinguistResolver,
131    registry: ToolRegistry,
132    executor: ToolExecutor,
133    user_aliases: std::collections::HashMap<String, String>,
134}
135
136impl<'a> CodeBlockToolProcessor<'a> {
137    /// Create a new processor with the given configuration.
138    pub fn new(config: &'a CodeBlockToolsConfig) -> Self {
139        let user_aliases = config
140            .language_aliases
141            .iter()
142            .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
143            .collect();
144        Self {
145            config,
146            linguist: LinguistResolver::new(),
147            registry: ToolRegistry::new(config.tools.clone()),
148            executor: ToolExecutor::new(config.timeout),
149            user_aliases,
150        }
151    }
152
153    /// Extract all fenced code blocks from content.
154    pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
155        let mut blocks = Vec::new();
156        let mut current_block: Option<FencedCodeBlockBuilder> = None;
157
158        let options = Options::all();
159        let parser = Parser::new_ext(content, options).into_offset_iter();
160
161        let lines: Vec<&str> = content.lines().collect();
162
163        for (event, range) in parser {
164            match event {
165                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
166                    let info_string = info.to_string();
167                    let language = info_string.split_whitespace().next().unwrap_or("").to_string();
168
169                    // Find start line
170                    let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
171
172                    // Find content start (after opening fence line)
173                    let content_start = content[range.start..]
174                        .find('\n')
175                        .map(|i| range.start + i + 1)
176                        .unwrap_or(content.len());
177
178                    // Detect fence character and length from the line
179                    let fence_line = lines.get(start_line).unwrap_or(&"");
180                    let trimmed = fence_line.trim_start();
181                    let indent = fence_line.len() - trimmed.len();
182                    let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
183                    let (fence_char, fence_length) = if trimmed.starts_with('~') {
184                        ('~', trimmed.chars().take_while(|&c| c == '~').count())
185                    } else {
186                        ('`', trimmed.chars().take_while(|&c| c == '`').count())
187                    };
188
189                    current_block = Some(FencedCodeBlockBuilder {
190                        start_line,
191                        content_start,
192                        language,
193                        info_string,
194                        fence_char,
195                        fence_length,
196                        indent,
197                        indent_prefix,
198                    });
199                }
200                Event::End(TagEnd::CodeBlock) => {
201                    if let Some(builder) = current_block.take() {
202                        // Find end line
203                        let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
204
205                        // Find content end (before closing fence line)
206                        let search_start = builder.content_start.min(range.end);
207                        let content_end = if search_start < range.end {
208                            content[search_start..range.end]
209                                .rfind('\n')
210                                .map(|i| search_start + i)
211                                .unwrap_or(search_start)
212                        } else {
213                            search_start
214                        };
215
216                        if content_end >= builder.content_start {
217                            blocks.push(FencedCodeBlockInfo {
218                                start_line: builder.start_line,
219                                end_line,
220                                content_start: builder.content_start,
221                                content_end,
222                                language: builder.language,
223                                info_string: builder.info_string,
224                                fence_char: builder.fence_char,
225                                fence_length: builder.fence_length,
226                                indent: builder.indent,
227                                indent_prefix: builder.indent_prefix,
228                            });
229                        }
230                    }
231                }
232                _ => {}
233            }
234        }
235
236        blocks
237    }
238
239    /// Resolve a language tag to its canonical name.
240    fn resolve_language(&self, language: &str) -> String {
241        let lower = language.to_lowercase();
242        if let Some(mapped) = self.user_aliases.get(&lower) {
243            return mapped.clone();
244        }
245        match self.config.normalize_language {
246            NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
247            NormalizeLanguage::Exact => lower,
248        }
249    }
250
251    /// Get the effective on_error setting for a language.
252    fn get_on_error(&self, language: &str) -> OnError {
253        self.config
254            .languages
255            .get(language)
256            .and_then(|lc| lc.on_error)
257            .unwrap_or(self.config.on_error)
258    }
259
260    /// Strip the fence indentation prefix from each line of a code block.
261    fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
262        if indent_prefix.is_empty() {
263            return content.to_string();
264        }
265
266        let mut out = String::with_capacity(content.len());
267        for line in content.split_inclusive('\n') {
268            if let Some(stripped) = line.strip_prefix(indent_prefix) {
269                out.push_str(stripped);
270            } else {
271                out.push_str(line);
272            }
273        }
274        out
275    }
276
277    /// Re-apply the fence indentation prefix to each line of a code block.
278    fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
279        if indent_prefix.is_empty() {
280            return content.to_string();
281        }
282        if content.is_empty() {
283            return String::new();
284        }
285
286        let mut out = String::with_capacity(content.len() + indent_prefix.len());
287        for line in content.split_inclusive('\n') {
288            if line == "\n" {
289                out.push_str(line);
290            } else {
291                out.push_str(indent_prefix);
292                out.push_str(line);
293            }
294        }
295        out
296    }
297
298    /// Lint all code blocks in the content.
299    ///
300    /// Returns diagnostics from all configured linters.
301    pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
302        let mut all_diagnostics = Vec::new();
303        let blocks = self.extract_code_blocks(content);
304
305        for block in blocks {
306            if block.language.is_empty() {
307                continue; // Skip blocks without language tag
308            }
309
310            let canonical_lang = self.resolve_language(&block.language);
311
312            // Get lint tools for this language
313            let lint_tools = match self.config.languages.get(&canonical_lang) {
314                Some(lc) => &lc.lint,
315                None => continue, // No config for this language
316            };
317
318            if lint_tools.is_empty() {
319                continue;
320            }
321
322            // Extract code block content
323            let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
324                &content[block.content_start..block.content_end]
325            } else {
326                continue;
327            };
328            let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
329
330            // Run each lint tool
331            for tool_id in lint_tools {
332                let tool_def = match self.registry.get(tool_id) {
333                    Some(t) => t,
334                    None => {
335                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
336                        continue;
337                    }
338                };
339
340                match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
341                    Ok(output) => {
342                        // Parse tool output into diagnostics
343                        let diagnostics = self.parse_tool_output(
344                            &output,
345                            tool_id,
346                            block.start_line + 1, // Convert to 1-indexed
347                        );
348                        all_diagnostics.extend(diagnostics);
349                    }
350                    Err(e) => {
351                        let on_error = self.get_on_error(&canonical_lang);
352                        match on_error {
353                            OnError::Fail => return Err(e.into()),
354                            OnError::Warn => {
355                                log::warn!("Tool '{tool_id}' failed: {e}");
356                            }
357                            OnError::Skip => {
358                                // Silently skip
359                            }
360                        }
361                    }
362                }
363            }
364        }
365
366        Ok(all_diagnostics)
367    }
368
369    /// Format all code blocks in the content.
370    ///
371    /// Returns the modified content with formatted code blocks.
372    pub fn format(&self, content: &str) -> Result<String, ProcessorError> {
373        let blocks = self.extract_code_blocks(content);
374
375        if blocks.is_empty() {
376            return Ok(content.to_string());
377        }
378
379        // Process blocks in reverse order to maintain byte offsets
380        let mut result = content.to_string();
381
382        for block in blocks.into_iter().rev() {
383            if block.language.is_empty() {
384                continue;
385            }
386
387            let canonical_lang = self.resolve_language(&block.language);
388
389            // Get format tools for this language
390            let format_tools = match self.config.languages.get(&canonical_lang) {
391                Some(lc) => &lc.format,
392                None => continue,
393            };
394
395            if format_tools.is_empty() {
396                continue;
397            }
398
399            // Extract code block content
400            if block.content_start >= block.content_end || block.content_end > result.len() {
401                continue;
402            }
403            let code_content_raw = result[block.content_start..block.content_end].to_string();
404            let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
405
406            // Run format tools (use first successful one)
407            let mut formatted = code_content.clone();
408            for tool_id in format_tools {
409                let tool_def = match self.registry.get(tool_id) {
410                    Some(t) => t,
411                    None => {
412                        log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
413                        continue;
414                    }
415                };
416
417                match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
418                    Ok(output) => {
419                        // Ensure trailing newline matches original (unindented)
420                        formatted = output;
421                        if code_content.ends_with('\n') && !formatted.ends_with('\n') {
422                            formatted.push('\n');
423                        } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
424                            formatted.pop();
425                        }
426                        break; // Use first successful formatter
427                    }
428                    Err(e) => {
429                        let on_error = self.get_on_error(&canonical_lang);
430                        match on_error {
431                            OnError::Fail => return Err(e.into()),
432                            OnError::Warn => {
433                                log::warn!("Formatter '{tool_id}' failed: {e}");
434                            }
435                            OnError::Skip => {}
436                        }
437                    }
438                }
439            }
440
441            // Replace content if changed
442            if formatted != code_content {
443                let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
444                if reindented != code_content_raw {
445                    result.replace_range(block.content_start..block.content_end, &reindented);
446                }
447            }
448        }
449
450        Ok(result)
451    }
452
453    /// Parse tool output into diagnostics.
454    ///
455    /// This is a basic parser that handles common output formats.
456    /// Tools vary widely in their output format, so this is best-effort.
457    fn parse_tool_output(
458        &self,
459        output: &ToolOutput,
460        tool_id: &str,
461        code_block_start_line: usize,
462    ) -> Vec<CodeBlockDiagnostic> {
463        let mut diagnostics = Vec::new();
464        let mut shellcheck_line: Option<usize> = None;
465
466        // Combine stdout and stderr for parsing
467        let stdout = &output.stdout;
468        let stderr = &output.stderr;
469        let combined = format!("{stdout}\n{stderr}");
470
471        // Look for common line:column:message patterns
472        // Examples:
473        // - ruff: "_.py:1:1: E501 Line too long"
474        // - shellcheck: "In - line 1: ..."
475        // - eslint: "1:10 error Description"
476
477        for line in combined.lines() {
478            let line = line.trim();
479            if line.is_empty() {
480                continue;
481            }
482
483            if let Some(line_num) = self.parse_shellcheck_header(line) {
484                shellcheck_line = Some(line_num);
485                continue;
486            }
487
488            if let Some(line_num) = shellcheck_line
489                && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
490            {
491                diagnostics.push(diag);
492                continue;
493            }
494
495            // Try pattern: "file:line:col: message" or "file:line: message"
496            if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
497                diagnostics.push(diag);
498                continue;
499            }
500
501            // Try pattern: "line:col message" (eslint style)
502            if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
503                diagnostics.push(diag);
504                continue;
505            }
506
507            // Try single-line shellcheck format fallback
508            if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
509                diagnostics.push(diag);
510            }
511        }
512
513        // If no diagnostics parsed but tool failed, create a generic one
514        if diagnostics.is_empty() && !output.success {
515            let message = if !output.stderr.is_empty() {
516                output.stderr.lines().next().unwrap_or("Tool failed").to_string()
517            } else if !output.stdout.is_empty() {
518                output.stdout.lines().next().unwrap_or("Tool failed").to_string()
519            } else {
520                let exit_code = output.exit_code;
521                format!("Tool exited with code {exit_code}")
522            };
523
524            diagnostics.push(CodeBlockDiagnostic {
525                file_line: code_block_start_line,
526                column: None,
527                message,
528                severity: DiagnosticSeverity::Error,
529                tool: tool_id.to_string(),
530                code_block_start: code_block_start_line,
531            });
532        }
533
534        diagnostics
535    }
536
537    /// Parse standard "file:line:col: message" format.
538    fn parse_standard_format(
539        &self,
540        line: &str,
541        tool_id: &str,
542        code_block_start_line: usize,
543    ) -> Option<CodeBlockDiagnostic> {
544        // Match patterns like "file.py:1:10: E501 message"
545        let mut parts = line.rsplitn(4, ':');
546        let message = parts.next()?.trim().to_string();
547        let part1 = parts.next()?.trim().to_string();
548        let part2 = parts.next()?.trim().to_string();
549        let part3 = parts.next().map(|s| s.trim().to_string());
550
551        let (line_part, col_part) = if part3.is_some() {
552            (part2, Some(part1))
553        } else {
554            (part1, None)
555        };
556
557        if let Ok(line_num) = line_part.parse::<usize>() {
558            let column = col_part.and_then(|s| s.parse::<usize>().ok());
559            let message = Self::strip_fixable_markers(&message);
560            if !message.is_empty() {
561                let severity = self.infer_severity(&message);
562                return Some(CodeBlockDiagnostic {
563                    file_line: code_block_start_line + line_num,
564                    column,
565                    message,
566                    severity,
567                    tool: tool_id.to_string(),
568                    code_block_start: code_block_start_line,
569                });
570            }
571        }
572        None
573    }
574
575    /// Parse eslint-style "line:col severity message" format.
576    fn parse_eslint_format(
577        &self,
578        line: &str,
579        tool_id: &str,
580        code_block_start_line: usize,
581    ) -> Option<CodeBlockDiagnostic> {
582        // Match "1:10 error Message"
583        let parts: Vec<&str> = line.splitn(3, ' ').collect();
584        if parts.len() >= 2 {
585            let loc_parts: Vec<&str> = parts[0].split(':').collect();
586            if loc_parts.len() == 2
587                && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
588            {
589                let (sev_part, msg_part) = if parts.len() >= 3 {
590                    (parts[1], parts[2])
591                } else {
592                    (parts[1], "")
593                };
594                let message = if msg_part.is_empty() {
595                    sev_part.to_string()
596                } else {
597                    msg_part.to_string()
598                };
599                let message = Self::strip_fixable_markers(&message);
600                let severity = match sev_part.to_lowercase().as_str() {
601                    "error" => DiagnosticSeverity::Error,
602                    "warning" | "warn" => DiagnosticSeverity::Warning,
603                    "info" => DiagnosticSeverity::Info,
604                    _ => self.infer_severity(&message),
605                };
606                return Some(CodeBlockDiagnostic {
607                    file_line: code_block_start_line + line_num,
608                    column: Some(col),
609                    message,
610                    severity,
611                    tool: tool_id.to_string(),
612                    code_block_start: code_block_start_line,
613                });
614            }
615        }
616        None
617    }
618
619    /// Parse shellcheck-style "In - line N: message" format.
620    fn parse_shellcheck_format(
621        &self,
622        line: &str,
623        tool_id: &str,
624        code_block_start_line: usize,
625    ) -> Option<CodeBlockDiagnostic> {
626        // Match "In - line 5:" pattern
627        if line.starts_with("In ")
628            && line.contains(" line ")
629            && let Some(line_start) = line.find(" line ")
630        {
631            let after_line = &line[line_start + 6..];
632            if let Some(colon_pos) = after_line.find(':')
633                && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
634            {
635                let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
636                if !message.is_empty() {
637                    let severity = self.infer_severity(&message);
638                    return Some(CodeBlockDiagnostic {
639                        file_line: code_block_start_line + line_num,
640                        column: None,
641                        message,
642                        severity,
643                        tool: tool_id.to_string(),
644                        code_block_start: code_block_start_line,
645                    });
646                }
647            }
648        }
649        None
650    }
651
652    /// Parse shellcheck header line to capture line number context.
653    fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
654        if line.starts_with("In ")
655            && line.contains(" line ")
656            && let Some(line_start) = line.find(" line ")
657        {
658            let after_line = &line[line_start + 6..];
659            if let Some(colon_pos) = after_line.find(':') {
660                return after_line[..colon_pos].trim().parse::<usize>().ok();
661            }
662        }
663        None
664    }
665
666    /// Parse shellcheck message line containing SCXXXX codes.
667    fn parse_shellcheck_message(
668        &self,
669        line: &str,
670        tool_id: &str,
671        code_block_start_line: usize,
672        line_num: usize,
673    ) -> Option<CodeBlockDiagnostic> {
674        let sc_pos = line.find("SC")?;
675        let after_sc = &line[sc_pos + 2..];
676        let code_len = after_sc.chars().take_while(|c| c.is_ascii_digit()).count();
677        if code_len == 0 {
678            return None;
679        }
680        let after_code = &after_sc[code_len..];
681        let sev_start = after_code.find('(')? + 1;
682        let sev_end = after_code[sev_start..].find(')')? + sev_start;
683        let sev = after_code[sev_start..sev_end].trim().to_lowercase();
684        let message_start = after_code.find("):")? + 2;
685        let message = Self::strip_fixable_markers(after_code[message_start..].trim());
686        if message.is_empty() {
687            return None;
688        }
689
690        let severity = match sev.as_str() {
691            "error" => DiagnosticSeverity::Error,
692            "warning" | "warn" => DiagnosticSeverity::Warning,
693            "info" | "style" => DiagnosticSeverity::Info,
694            _ => self.infer_severity(&message),
695        };
696
697        Some(CodeBlockDiagnostic {
698            file_line: code_block_start_line + line_num,
699            column: None,
700            message,
701            severity,
702            tool: tool_id.to_string(),
703            code_block_start: code_block_start_line,
704        })
705    }
706
707    /// Infer severity from message content.
708    fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
709        let lower = message.to_lowercase();
710        if lower.contains("error")
711            || lower.starts_with("e") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
712            || lower.starts_with("f") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
713        {
714            DiagnosticSeverity::Error
715        } else if lower.contains("warning")
716            || lower.contains("warn")
717            || lower.starts_with("w") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
718        {
719            DiagnosticSeverity::Warning
720        } else {
721            DiagnosticSeverity::Info
722        }
723    }
724
725    /// Strip "fixable" markers from external tool messages.
726    ///
727    /// External tools like ruff show `[*]` to indicate fixable issues, but in rumdl's
728    /// context these markers can be misleading - the lint tool's fix capability may
729    /// differ from what our configured formatter can fix. We strip these markers
730    /// to avoid making promises we can't keep.
731    fn strip_fixable_markers(message: &str) -> String {
732        message
733            .replace(" [*]", "")
734            .replace("[*] ", "")
735            .replace("[*]", "")
736            .replace(" (fixable)", "")
737            .replace("(fixable) ", "")
738            .replace("(fixable)", "")
739            .replace(" [fix available]", "")
740            .replace("[fix available] ", "")
741            .replace("[fix available]", "")
742            .replace(" [autofix]", "")
743            .replace("[autofix] ", "")
744            .replace("[autofix]", "")
745            .trim()
746            .to_string()
747    }
748}
749
750/// Builder for FencedCodeBlockInfo during parsing.
751struct FencedCodeBlockBuilder {
752    start_line: usize,
753    content_start: usize,
754    language: String,
755    info_string: String,
756    fence_char: char,
757    fence_length: usize,
758    indent: usize,
759    indent_prefix: String,
760}
761
762#[cfg(test)]
763mod tests {
764    use super::*;
765
766    fn default_config() -> CodeBlockToolsConfig {
767        CodeBlockToolsConfig::default()
768    }
769
770    #[test]
771    fn test_extract_code_blocks() {
772        let config = default_config();
773        let processor = CodeBlockToolProcessor::new(&config);
774
775        let content = r#"# Example
776
777```python
778def hello():
779    print("Hello")
780```
781
782Some text
783
784```rust
785fn main() {}
786```
787"#;
788
789        let blocks = processor.extract_code_blocks(content);
790
791        assert_eq!(blocks.len(), 2);
792
793        assert_eq!(blocks[0].language, "python");
794        assert_eq!(blocks[0].fence_char, '`');
795        assert_eq!(blocks[0].fence_length, 3);
796        assert_eq!(blocks[0].start_line, 2);
797        assert_eq!(blocks[0].indent, 0);
798        assert_eq!(blocks[0].indent_prefix, "");
799
800        assert_eq!(blocks[1].language, "rust");
801        assert_eq!(blocks[1].fence_char, '`');
802        assert_eq!(blocks[1].fence_length, 3);
803    }
804
805    #[test]
806    fn test_extract_code_blocks_with_info_string() {
807        let config = default_config();
808        let processor = CodeBlockToolProcessor::new(&config);
809
810        let content = "```python title=\"example.py\"\ncode\n```";
811        let blocks = processor.extract_code_blocks(content);
812
813        assert_eq!(blocks.len(), 1);
814        assert_eq!(blocks[0].language, "python");
815        assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
816    }
817
818    #[test]
819    fn test_extract_code_blocks_tilde_fence() {
820        let config = default_config();
821        let processor = CodeBlockToolProcessor::new(&config);
822
823        let content = "~~~bash\necho hello\n~~~";
824        let blocks = processor.extract_code_blocks(content);
825
826        assert_eq!(blocks.len(), 1);
827        assert_eq!(blocks[0].language, "bash");
828        assert_eq!(blocks[0].fence_char, '~');
829        assert_eq!(blocks[0].fence_length, 3);
830        assert_eq!(blocks[0].indent_prefix, "");
831    }
832
833    #[test]
834    fn test_extract_code_blocks_with_indent_prefix() {
835        let config = default_config();
836        let processor = CodeBlockToolProcessor::new(&config);
837
838        let content = "  - item\n    ```python\n    print('hi')\n    ```";
839        let blocks = processor.extract_code_blocks(content);
840
841        assert_eq!(blocks.len(), 1);
842        assert_eq!(blocks[0].indent_prefix, "    ");
843    }
844
845    #[test]
846    fn test_extract_code_blocks_no_language() {
847        let config = default_config();
848        let processor = CodeBlockToolProcessor::new(&config);
849
850        let content = "```\nplain code\n```";
851        let blocks = processor.extract_code_blocks(content);
852
853        assert_eq!(blocks.len(), 1);
854        assert_eq!(blocks[0].language, "");
855    }
856
857    #[test]
858    fn test_resolve_language_linguist() {
859        let mut config = default_config();
860        config.normalize_language = NormalizeLanguage::Linguist;
861        let processor = CodeBlockToolProcessor::new(&config);
862
863        assert_eq!(processor.resolve_language("py"), "python");
864        assert_eq!(processor.resolve_language("bash"), "shell");
865        assert_eq!(processor.resolve_language("js"), "javascript");
866    }
867
868    #[test]
869    fn test_resolve_language_exact() {
870        let mut config = default_config();
871        config.normalize_language = NormalizeLanguage::Exact;
872        let processor = CodeBlockToolProcessor::new(&config);
873
874        assert_eq!(processor.resolve_language("py"), "py");
875        assert_eq!(processor.resolve_language("BASH"), "bash");
876    }
877
878    #[test]
879    fn test_resolve_language_user_alias_override() {
880        let mut config = default_config();
881        config.language_aliases.insert("py".to_string(), "python".to_string());
882        config.normalize_language = NormalizeLanguage::Exact;
883        let processor = CodeBlockToolProcessor::new(&config);
884
885        assert_eq!(processor.resolve_language("PY"), "python");
886    }
887
888    #[test]
889    fn test_indent_strip_and_reapply_roundtrip() {
890        let config = default_config();
891        let processor = CodeBlockToolProcessor::new(&config);
892
893        let raw = "    def hello():\n        print('hi')";
894        let stripped = processor.strip_indent_from_block(raw, "    ");
895        assert_eq!(stripped, "def hello():\n    print('hi')");
896
897        let reapplied = processor.apply_indent_to_block(&stripped, "    ");
898        assert_eq!(reapplied, raw);
899    }
900
901    #[test]
902    fn test_infer_severity() {
903        let config = default_config();
904        let processor = CodeBlockToolProcessor::new(&config);
905
906        assert_eq!(
907            processor.infer_severity("E501 line too long"),
908            DiagnosticSeverity::Error
909        );
910        assert_eq!(
911            processor.infer_severity("W291 trailing whitespace"),
912            DiagnosticSeverity::Warning
913        );
914        assert_eq!(
915            processor.infer_severity("error: something failed"),
916            DiagnosticSeverity::Error
917        );
918        assert_eq!(
919            processor.infer_severity("warning: unused variable"),
920            DiagnosticSeverity::Warning
921        );
922        assert_eq!(
923            processor.infer_severity("note: consider using"),
924            DiagnosticSeverity::Info
925        );
926    }
927
928    #[test]
929    fn test_parse_standard_format_windows_path() {
930        let config = default_config();
931        let processor = CodeBlockToolProcessor::new(&config);
932
933        let output = ToolOutput {
934            stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
935            stderr: String::new(),
936            exit_code: 1,
937            success: false,
938        };
939
940        let diags = processor.parse_tool_output(&output, "ruff:check", 10);
941        assert_eq!(diags.len(), 1);
942        assert_eq!(diags[0].file_line, 12);
943        assert_eq!(diags[0].column, Some(5));
944        assert_eq!(diags[0].message, "E123 message");
945    }
946
947    #[test]
948    fn test_parse_eslint_severity() {
949        let config = default_config();
950        let processor = CodeBlockToolProcessor::new(&config);
951
952        let output = ToolOutput {
953            stdout: "1:2 error Unexpected token".to_string(),
954            stderr: String::new(),
955            exit_code: 1,
956            success: false,
957        };
958
959        let diags = processor.parse_tool_output(&output, "eslint", 5);
960        assert_eq!(diags.len(), 1);
961        assert_eq!(diags[0].file_line, 6);
962        assert_eq!(diags[0].column, Some(2));
963        assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
964        assert_eq!(diags[0].message, "Unexpected token");
965    }
966
967    #[test]
968    fn test_parse_shellcheck_multiline() {
969        let config = default_config();
970        let processor = CodeBlockToolProcessor::new(&config);
971
972        let output = ToolOutput {
973            stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
974            stderr: String::new(),
975            exit_code: 1,
976            success: false,
977        };
978
979        let diags = processor.parse_tool_output(&output, "shellcheck", 10);
980        assert_eq!(diags.len(), 1);
981        assert_eq!(diags[0].file_line, 13);
982        assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
983        assert_eq!(diags[0].message, "Double quote to prevent globbing");
984    }
985
986    #[test]
987    fn test_lint_no_config() {
988        let config = default_config();
989        let processor = CodeBlockToolProcessor::new(&config);
990
991        let content = "```python\nprint('hello')\n```";
992        let result = processor.lint(content);
993
994        // Should succeed with no diagnostics (no tools configured)
995        assert!(result.is_ok());
996        assert!(result.unwrap().is_empty());
997    }
998
999    #[test]
1000    fn test_format_no_config() {
1001        let config = default_config();
1002        let processor = CodeBlockToolProcessor::new(&config);
1003
1004        let content = "```python\nprint('hello')\n```";
1005        let result = processor.format(content);
1006
1007        // Should succeed with unchanged content (no tools configured)
1008        assert!(result.is_ok());
1009        assert_eq!(result.unwrap(), content);
1010    }
1011}
rumdl_lib/code_block_tools/processor.rs

rumdl_lib/code_block_tools/
processor.rs