1#[cfg(test)]
7use super::config::LanguageToolConfig;
8use super::config::{CodeBlockToolsConfig, NormalizeLanguage, OnError, OnMissing};
9use super::executor::{ExecutorError, ToolExecutor, ToolOutput};
10use super::linguist::LinguistResolver;
11use super::registry::ToolRegistry;
12use crate::rule::{LintWarning, Severity};
13use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
14
15pub const RUMDL_BUILTIN_TOOL: &str = "rumdl";
19
20fn is_markdown_language(lang: &str) -> bool {
22 matches!(lang.to_lowercase().as_str(), "markdown" | "md")
23}
24
25#[derive(Debug, Clone)]
27pub struct FencedCodeBlockInfo {
28 pub start_line: usize,
30 pub end_line: usize,
32 pub content_start: usize,
34 pub content_end: usize,
36 pub language: String,
38 pub info_string: String,
40 pub fence_char: char,
42 pub fence_length: usize,
44 pub indent: usize,
46 pub indent_prefix: String,
48}
49
50#[derive(Debug, Clone)]
52pub struct CodeBlockDiagnostic {
53 pub file_line: usize,
55 pub column: Option<usize>,
57 pub message: String,
59 pub severity: DiagnosticSeverity,
61 pub tool: String,
63 pub code_block_start: usize,
65}
66
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum DiagnosticSeverity {
70 Error,
71 Warning,
72 Info,
73}
74
75impl CodeBlockDiagnostic {
76 pub fn to_lint_warning(&self) -> LintWarning {
78 let severity = match self.severity {
79 DiagnosticSeverity::Error => Severity::Error,
80 DiagnosticSeverity::Warning => Severity::Warning,
81 DiagnosticSeverity::Info => Severity::Info,
82 };
83
84 LintWarning {
85 message: self.message.clone(),
86 line: self.file_line,
87 column: self.column.unwrap_or(1),
88 end_line: self.file_line,
89 end_column: self.column.unwrap_or(1),
90 severity,
91 fix: None, rule_name: Some(self.tool.clone()),
93 }
94 }
95}
96
97#[derive(Debug, Clone)]
99pub enum ProcessorError {
100 ToolError(ExecutorError),
102 NoToolsConfigured { language: String },
104 ToolBinaryNotFound { tool: String, language: String },
106 Aborted { message: String },
108}
109
110impl std::fmt::Display for ProcessorError {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 match self {
113 Self::ToolError(e) => write!(f, "{e}"),
114 Self::NoToolsConfigured { language } => {
115 write!(f, "No tools configured for language '{language}'")
116 }
117 Self::ToolBinaryNotFound { tool, language } => {
118 write!(f, "Tool '{tool}' binary not found for language '{language}'")
119 }
120 Self::Aborted { message } => write!(f, "Processing aborted: {message}"),
121 }
122 }
123}
124
125impl std::error::Error for ProcessorError {}
126
127impl From<ExecutorError> for ProcessorError {
128 fn from(e: ExecutorError) -> Self {
129 Self::ToolError(e)
130 }
131}
132
133#[derive(Debug)]
135pub struct CodeBlockResult {
136 pub diagnostics: Vec<CodeBlockDiagnostic>,
138 pub formatted_content: Option<String>,
140 pub was_modified: bool,
142}
143
144#[derive(Debug)]
146pub struct FormatOutput {
147 pub content: String,
149 pub had_errors: bool,
151 pub error_messages: Vec<String>,
153}
154
155pub struct CodeBlockToolProcessor<'a> {
157 config: &'a CodeBlockToolsConfig,
158 linguist: LinguistResolver,
159 registry: ToolRegistry,
160 executor: ToolExecutor,
161 user_aliases: std::collections::HashMap<String, String>,
162}
163
164impl<'a> CodeBlockToolProcessor<'a> {
165 pub fn new(config: &'a CodeBlockToolsConfig) -> Self {
167 let user_aliases = config
168 .language_aliases
169 .iter()
170 .map(|(k, v)| (k.to_lowercase(), v.to_lowercase()))
171 .collect();
172 Self {
173 config,
174 linguist: LinguistResolver::new(),
175 registry: ToolRegistry::new(config.tools.clone()),
176 executor: ToolExecutor::new(config.timeout),
177 user_aliases,
178 }
179 }
180
181 pub fn extract_code_blocks(&self, content: &str) -> Vec<FencedCodeBlockInfo> {
183 let mut blocks = Vec::new();
184 let mut current_block: Option<FencedCodeBlockBuilder> = None;
185
186 let options = Options::all();
187 let parser = Parser::new_ext(content, options).into_offset_iter();
188
189 let lines: Vec<&str> = content.lines().collect();
190
191 for (event, range) in parser {
192 match event {
193 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
194 let info_string = info.to_string();
195 let language = info_string.split_whitespace().next().unwrap_or("").to_string();
196
197 let start_line = content[..range.start].chars().filter(|&c| c == '\n').count();
199
200 let content_start = content[range.start..]
202 .find('\n')
203 .map(|i| range.start + i + 1)
204 .unwrap_or(content.len());
205
206 let fence_line = lines.get(start_line).unwrap_or(&"");
208 let trimmed = fence_line.trim_start();
209 let indent = fence_line.len() - trimmed.len();
210 let indent_prefix = fence_line.get(..indent).unwrap_or("").to_string();
211 let (fence_char, fence_length) = if trimmed.starts_with('~') {
212 ('~', trimmed.chars().take_while(|&c| c == '~').count())
213 } else {
214 ('`', trimmed.chars().take_while(|&c| c == '`').count())
215 };
216
217 current_block = Some(FencedCodeBlockBuilder {
218 start_line,
219 content_start,
220 language,
221 info_string,
222 fence_char,
223 fence_length,
224 indent,
225 indent_prefix,
226 });
227 }
228 Event::End(TagEnd::CodeBlock) => {
229 if let Some(builder) = current_block.take() {
230 let end_line = content[..range.end].chars().filter(|&c| c == '\n').count();
232
233 let search_start = builder.content_start.min(range.end);
235 let content_end = if search_start < range.end {
236 content[search_start..range.end]
237 .rfind('\n')
238 .map(|i| search_start + i)
239 .unwrap_or(search_start)
240 } else {
241 search_start
242 };
243
244 if content_end >= builder.content_start {
245 blocks.push(FencedCodeBlockInfo {
246 start_line: builder.start_line,
247 end_line,
248 content_start: builder.content_start,
249 content_end,
250 language: builder.language,
251 info_string: builder.info_string,
252 fence_char: builder.fence_char,
253 fence_length: builder.fence_length,
254 indent: builder.indent,
255 indent_prefix: builder.indent_prefix,
256 });
257 }
258 }
259 }
260 _ => {}
261 }
262 }
263
264 blocks
265 }
266
267 fn resolve_language(&self, language: &str) -> String {
269 let lower = language.to_lowercase();
270 if let Some(mapped) = self.user_aliases.get(&lower) {
271 return mapped.clone();
272 }
273 match self.config.normalize_language {
274 NormalizeLanguage::Linguist => self.linguist.resolve(&lower),
275 NormalizeLanguage::Exact => lower,
276 }
277 }
278
279 fn get_on_error(&self, language: &str) -> OnError {
281 self.config
282 .languages
283 .get(language)
284 .and_then(|lc| lc.on_error)
285 .unwrap_or(self.config.on_error)
286 }
287
288 fn strip_indent_from_block(&self, content: &str, indent_prefix: &str) -> String {
290 if indent_prefix.is_empty() {
291 return content.to_string();
292 }
293
294 let mut out = String::with_capacity(content.len());
295 for line in content.split_inclusive('\n') {
296 if let Some(stripped) = line.strip_prefix(indent_prefix) {
297 out.push_str(stripped);
298 } else {
299 out.push_str(line);
300 }
301 }
302 out
303 }
304
305 fn apply_indent_to_block(&self, content: &str, indent_prefix: &str) -> String {
307 if indent_prefix.is_empty() {
308 return content.to_string();
309 }
310 if content.is_empty() {
311 return String::new();
312 }
313
314 let mut out = String::with_capacity(content.len() + indent_prefix.len());
315 for line in content.split_inclusive('\n') {
316 if line == "\n" {
317 out.push_str(line);
318 } else {
319 out.push_str(indent_prefix);
320 out.push_str(line);
321 }
322 }
323 out
324 }
325
326 pub fn lint(&self, content: &str) -> Result<Vec<CodeBlockDiagnostic>, ProcessorError> {
330 let mut all_diagnostics = Vec::new();
331 let blocks = self.extract_code_blocks(content);
332
333 for block in blocks {
334 if block.language.is_empty() {
335 continue; }
337
338 let canonical_lang = self.resolve_language(&block.language);
339
340 let lint_tools = match self.config.languages.get(&canonical_lang) {
342 Some(lc) if !lc.lint.is_empty() => &lc.lint,
343 _ => {
344 match self.config.on_missing_language_definition {
346 OnMissing::Ignore => continue,
347 OnMissing::Fail => {
348 all_diagnostics.push(CodeBlockDiagnostic {
349 file_line: block.start_line + 1,
350 column: None,
351 message: format!("No lint tools configured for language '{canonical_lang}'"),
352 severity: DiagnosticSeverity::Error,
353 tool: "code-block-tools".to_string(),
354 code_block_start: block.start_line + 1,
355 });
356 continue;
357 }
358 OnMissing::FailFast => {
359 return Err(ProcessorError::NoToolsConfigured {
360 language: canonical_lang,
361 });
362 }
363 }
364 }
365 };
366
367 let code_content_raw = if block.content_start < block.content_end && block.content_end <= content.len() {
369 &content[block.content_start..block.content_end]
370 } else {
371 continue;
372 };
373 let code_content = self.strip_indent_from_block(code_content_raw, &block.indent_prefix);
374
375 for tool_id in lint_tools {
377 if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
379 continue;
380 }
381
382 let tool_def = match self.registry.get(tool_id) {
383 Some(t) => t,
384 None => {
385 log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
386 continue;
387 }
388 };
389
390 let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
392 if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
393 match self.config.on_missing_tool_binary {
394 OnMissing::Ignore => {
395 log::debug!("Tool binary '{tool_name}' not found, skipping");
396 continue;
397 }
398 OnMissing::Fail => {
399 all_diagnostics.push(CodeBlockDiagnostic {
400 file_line: block.start_line + 1,
401 column: None,
402 message: format!("Tool binary '{tool_name}' not found in PATH"),
403 severity: DiagnosticSeverity::Error,
404 tool: "code-block-tools".to_string(),
405 code_block_start: block.start_line + 1,
406 });
407 continue;
408 }
409 OnMissing::FailFast => {
410 return Err(ProcessorError::ToolBinaryNotFound {
411 tool: tool_name.to_string(),
412 language: canonical_lang.clone(),
413 });
414 }
415 }
416 }
417
418 match self.executor.lint(tool_def, &code_content, Some(self.config.timeout)) {
419 Ok(output) => {
420 let diagnostics = self.parse_tool_output(
422 &output,
423 tool_id,
424 block.start_line + 1, );
426 all_diagnostics.extend(diagnostics);
427 }
428 Err(e) => {
429 let on_error = self.get_on_error(&canonical_lang);
430 match on_error {
431 OnError::Fail => return Err(e.into()),
432 OnError::Warn => {
433 log::warn!("Tool '{tool_id}' failed: {e}");
434 }
435 OnError::Skip => {
436 }
438 }
439 }
440 }
441 }
442 }
443
444 Ok(all_diagnostics)
445 }
446
447 pub fn format(&self, content: &str) -> Result<FormatOutput, ProcessorError> {
453 let blocks = self.extract_code_blocks(content);
454
455 if blocks.is_empty() {
456 return Ok(FormatOutput {
457 content: content.to_string(),
458 had_errors: false,
459 error_messages: Vec::new(),
460 });
461 }
462
463 let mut result = content.to_string();
465 let mut error_messages: Vec<String> = Vec::new();
466
467 for block in blocks.into_iter().rev() {
468 if block.language.is_empty() {
469 continue;
470 }
471
472 let canonical_lang = self.resolve_language(&block.language);
473
474 let format_tools = match self.config.languages.get(&canonical_lang) {
476 Some(lc) if !lc.format.is_empty() => &lc.format,
477 _ => {
478 match self.config.on_missing_language_definition {
480 OnMissing::Ignore => continue,
481 OnMissing::Fail => {
482 error_messages.push(format!(
483 "No format tools configured for language '{canonical_lang}' at line {}",
484 block.start_line + 1
485 ));
486 continue;
487 }
488 OnMissing::FailFast => {
489 return Err(ProcessorError::NoToolsConfigured {
490 language: canonical_lang,
491 });
492 }
493 }
494 }
495 };
496
497 if block.content_start >= block.content_end || block.content_end > result.len() {
499 continue;
500 }
501 let code_content_raw = result[block.content_start..block.content_end].to_string();
502 let code_content = self.strip_indent_from_block(&code_content_raw, &block.indent_prefix);
503
504 let mut formatted = code_content.clone();
506 let mut tool_ran = false;
507 for tool_id in format_tools {
508 if tool_id == RUMDL_BUILTIN_TOOL && is_markdown_language(&canonical_lang) {
510 continue;
511 }
512
513 let tool_def = match self.registry.get(tool_id) {
514 Some(t) => t,
515 None => {
516 log::warn!("Unknown tool '{tool_id}' configured for language '{canonical_lang}'");
517 continue;
518 }
519 };
520
521 let tool_name = tool_def.command.first().map(String::as_str).unwrap_or("");
523 if !tool_name.is_empty() && !self.executor.is_tool_available(tool_name) {
524 match self.config.on_missing_tool_binary {
525 OnMissing::Ignore => {
526 log::debug!("Tool binary '{tool_name}' not found, skipping");
527 continue;
528 }
529 OnMissing::Fail => {
530 error_messages.push(format!(
531 "Tool binary '{tool_name}' not found in PATH for language '{canonical_lang}' at line {}",
532 block.start_line + 1
533 ));
534 continue;
535 }
536 OnMissing::FailFast => {
537 return Err(ProcessorError::ToolBinaryNotFound {
538 tool: tool_name.to_string(),
539 language: canonical_lang.clone(),
540 });
541 }
542 }
543 }
544
545 match self.executor.format(tool_def, &formatted, Some(self.config.timeout)) {
546 Ok(output) => {
547 formatted = output;
549 if code_content.ends_with('\n') && !formatted.ends_with('\n') {
550 formatted.push('\n');
551 } else if !code_content.ends_with('\n') && formatted.ends_with('\n') {
552 formatted.pop();
553 }
554 tool_ran = true;
555 break; }
557 Err(e) => {
558 let on_error = self.get_on_error(&canonical_lang);
559 match on_error {
560 OnError::Fail => return Err(e.into()),
561 OnError::Warn => {
562 log::warn!("Formatter '{tool_id}' failed: {e}");
563 }
564 OnError::Skip => {}
565 }
566 }
567 }
568 }
569
570 if tool_ran && formatted != code_content {
572 let reindented = self.apply_indent_to_block(&formatted, &block.indent_prefix);
573 if reindented != code_content_raw {
574 result.replace_range(block.content_start..block.content_end, &reindented);
575 }
576 }
577 }
578
579 Ok(FormatOutput {
580 content: result,
581 had_errors: !error_messages.is_empty(),
582 error_messages,
583 })
584 }
585
586 fn parse_tool_output(
591 &self,
592 output: &ToolOutput,
593 tool_id: &str,
594 code_block_start_line: usize,
595 ) -> Vec<CodeBlockDiagnostic> {
596 let mut diagnostics = Vec::new();
597 let mut shellcheck_line: Option<usize> = None;
598
599 let stdout = &output.stdout;
601 let stderr = &output.stderr;
602 let combined = format!("{stdout}\n{stderr}");
603
604 for line in combined.lines() {
611 let line = line.trim();
612 if line.is_empty() {
613 continue;
614 }
615
616 if let Some(line_num) = self.parse_shellcheck_header(line) {
617 shellcheck_line = Some(line_num);
618 continue;
619 }
620
621 if let Some(line_num) = shellcheck_line
622 && let Some(diag) = self.parse_shellcheck_message(line, tool_id, code_block_start_line, line_num)
623 {
624 diagnostics.push(diag);
625 continue;
626 }
627
628 if let Some(diag) = self.parse_standard_format(line, tool_id, code_block_start_line) {
630 diagnostics.push(diag);
631 continue;
632 }
633
634 if let Some(diag) = self.parse_eslint_format(line, tool_id, code_block_start_line) {
636 diagnostics.push(diag);
637 continue;
638 }
639
640 if let Some(diag) = self.parse_shellcheck_format(line, tool_id, code_block_start_line) {
642 diagnostics.push(diag);
643 }
644 }
645
646 if diagnostics.is_empty() && !output.success {
648 let message = if !output.stderr.is_empty() {
649 output.stderr.lines().next().unwrap_or("Tool failed").to_string()
650 } else if !output.stdout.is_empty() {
651 output.stdout.lines().next().unwrap_or("Tool failed").to_string()
652 } else {
653 let exit_code = output.exit_code;
654 format!("Tool exited with code {exit_code}")
655 };
656
657 diagnostics.push(CodeBlockDiagnostic {
658 file_line: code_block_start_line,
659 column: None,
660 message,
661 severity: DiagnosticSeverity::Error,
662 tool: tool_id.to_string(),
663 code_block_start: code_block_start_line,
664 });
665 }
666
667 diagnostics
668 }
669
670 fn parse_standard_format(
672 &self,
673 line: &str,
674 tool_id: &str,
675 code_block_start_line: usize,
676 ) -> Option<CodeBlockDiagnostic> {
677 let mut parts = line.rsplitn(4, ':');
679 let message = parts.next()?.trim().to_string();
680 let part1 = parts.next()?.trim().to_string();
681 let part2 = parts.next()?.trim().to_string();
682 let part3 = parts.next().map(|s| s.trim().to_string());
683
684 let (line_part, col_part) = if part3.is_some() {
685 (part2, Some(part1))
686 } else {
687 (part1, None)
688 };
689
690 if let Ok(line_num) = line_part.parse::<usize>() {
691 let column = col_part.and_then(|s| s.parse::<usize>().ok());
692 let message = Self::strip_fixable_markers(&message);
693 if !message.is_empty() {
694 let severity = self.infer_severity(&message);
695 return Some(CodeBlockDiagnostic {
696 file_line: code_block_start_line + line_num,
697 column,
698 message,
699 severity,
700 tool: tool_id.to_string(),
701 code_block_start: code_block_start_line,
702 });
703 }
704 }
705 None
706 }
707
708 fn parse_eslint_format(
710 &self,
711 line: &str,
712 tool_id: &str,
713 code_block_start_line: usize,
714 ) -> Option<CodeBlockDiagnostic> {
715 let parts: Vec<&str> = line.splitn(3, ' ').collect();
717 if parts.len() >= 2 {
718 let loc_parts: Vec<&str> = parts[0].split(':').collect();
719 if loc_parts.len() == 2
720 && let (Ok(line_num), Ok(col)) = (loc_parts[0].parse::<usize>(), loc_parts[1].parse::<usize>())
721 {
722 let (sev_part, msg_part) = if parts.len() >= 3 {
723 (parts[1], parts[2])
724 } else {
725 (parts[1], "")
726 };
727 let message = if msg_part.is_empty() {
728 sev_part.to_string()
729 } else {
730 msg_part.to_string()
731 };
732 let message = Self::strip_fixable_markers(&message);
733 let severity = match sev_part.to_lowercase().as_str() {
734 "error" => DiagnosticSeverity::Error,
735 "warning" | "warn" => DiagnosticSeverity::Warning,
736 "info" => DiagnosticSeverity::Info,
737 _ => self.infer_severity(&message),
738 };
739 return Some(CodeBlockDiagnostic {
740 file_line: code_block_start_line + line_num,
741 column: Some(col),
742 message,
743 severity,
744 tool: tool_id.to_string(),
745 code_block_start: code_block_start_line,
746 });
747 }
748 }
749 None
750 }
751
752 fn parse_shellcheck_format(
754 &self,
755 line: &str,
756 tool_id: &str,
757 code_block_start_line: usize,
758 ) -> Option<CodeBlockDiagnostic> {
759 if line.starts_with("In ")
761 && line.contains(" line ")
762 && let Some(line_start) = line.find(" line ")
763 {
764 let after_line = &line[line_start + 6..];
765 if let Some(colon_pos) = after_line.find(':')
766 && let Ok(line_num) = after_line[..colon_pos].trim().parse::<usize>()
767 {
768 let message = Self::strip_fixable_markers(after_line[colon_pos + 1..].trim());
769 if !message.is_empty() {
770 let severity = self.infer_severity(&message);
771 return Some(CodeBlockDiagnostic {
772 file_line: code_block_start_line + line_num,
773 column: None,
774 message,
775 severity,
776 tool: tool_id.to_string(),
777 code_block_start: code_block_start_line,
778 });
779 }
780 }
781 }
782 None
783 }
784
785 fn parse_shellcheck_header(&self, line: &str) -> Option<usize> {
787 if line.starts_with("In ")
788 && line.contains(" line ")
789 && let Some(line_start) = line.find(" line ")
790 {
791 let after_line = &line[line_start + 6..];
792 if let Some(colon_pos) = after_line.find(':') {
793 return after_line[..colon_pos].trim().parse::<usize>().ok();
794 }
795 }
796 None
797 }
798
799 fn parse_shellcheck_message(
801 &self,
802 line: &str,
803 tool_id: &str,
804 code_block_start_line: usize,
805 line_num: usize,
806 ) -> Option<CodeBlockDiagnostic> {
807 let sc_pos = line.find("SC")?;
808 let after_sc = &line[sc_pos + 2..];
809 let code_len = after_sc.chars().take_while(|c| c.is_ascii_digit()).count();
810 if code_len == 0 {
811 return None;
812 }
813 let after_code = &after_sc[code_len..];
814 let sev_start = after_code.find('(')? + 1;
815 let sev_end = after_code[sev_start..].find(')')? + sev_start;
816 let sev = after_code[sev_start..sev_end].trim().to_lowercase();
817 let message_start = after_code.find("):")? + 2;
818 let message = Self::strip_fixable_markers(after_code[message_start..].trim());
819 if message.is_empty() {
820 return None;
821 }
822
823 let severity = match sev.as_str() {
824 "error" => DiagnosticSeverity::Error,
825 "warning" | "warn" => DiagnosticSeverity::Warning,
826 "info" | "style" => DiagnosticSeverity::Info,
827 _ => self.infer_severity(&message),
828 };
829
830 Some(CodeBlockDiagnostic {
831 file_line: code_block_start_line + line_num,
832 column: None,
833 message,
834 severity,
835 tool: tool_id.to_string(),
836 code_block_start: code_block_start_line,
837 })
838 }
839
840 fn infer_severity(&self, message: &str) -> DiagnosticSeverity {
842 let lower = message.to_lowercase();
843 if lower.contains("error")
844 || lower.starts_with("e") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
845 || lower.starts_with("f") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
846 {
847 DiagnosticSeverity::Error
848 } else if lower.contains("warning")
849 || lower.contains("warn")
850 || lower.starts_with("w") && lower.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
851 {
852 DiagnosticSeverity::Warning
853 } else {
854 DiagnosticSeverity::Info
855 }
856 }
857
858 fn strip_fixable_markers(message: &str) -> String {
865 message
866 .replace(" [*]", "")
867 .replace("[*] ", "")
868 .replace("[*]", "")
869 .replace(" (fixable)", "")
870 .replace("(fixable) ", "")
871 .replace("(fixable)", "")
872 .replace(" [fix available]", "")
873 .replace("[fix available] ", "")
874 .replace("[fix available]", "")
875 .replace(" [autofix]", "")
876 .replace("[autofix] ", "")
877 .replace("[autofix]", "")
878 .trim()
879 .to_string()
880 }
881}
882
883struct FencedCodeBlockBuilder {
885 start_line: usize,
886 content_start: usize,
887 language: String,
888 info_string: String,
889 fence_char: char,
890 fence_length: usize,
891 indent: usize,
892 indent_prefix: String,
893}
894
895#[cfg(test)]
896mod tests {
897 use super::*;
898
899 fn default_config() -> CodeBlockToolsConfig {
900 CodeBlockToolsConfig::default()
901 }
902
903 #[test]
904 fn test_extract_code_blocks() {
905 let config = default_config();
906 let processor = CodeBlockToolProcessor::new(&config);
907
908 let content = r#"# Example
909
910```python
911def hello():
912 print("Hello")
913```
914
915Some text
916
917```rust
918fn main() {}
919```
920"#;
921
922 let blocks = processor.extract_code_blocks(content);
923
924 assert_eq!(blocks.len(), 2);
925
926 assert_eq!(blocks[0].language, "python");
927 assert_eq!(blocks[0].fence_char, '`');
928 assert_eq!(blocks[0].fence_length, 3);
929 assert_eq!(blocks[0].start_line, 2);
930 assert_eq!(blocks[0].indent, 0);
931 assert_eq!(blocks[0].indent_prefix, "");
932
933 assert_eq!(blocks[1].language, "rust");
934 assert_eq!(blocks[1].fence_char, '`');
935 assert_eq!(blocks[1].fence_length, 3);
936 }
937
938 #[test]
939 fn test_extract_code_blocks_with_info_string() {
940 let config = default_config();
941 let processor = CodeBlockToolProcessor::new(&config);
942
943 let content = "```python title=\"example.py\"\ncode\n```";
944 let blocks = processor.extract_code_blocks(content);
945
946 assert_eq!(blocks.len(), 1);
947 assert_eq!(blocks[0].language, "python");
948 assert_eq!(blocks[0].info_string, "python title=\"example.py\"");
949 }
950
951 #[test]
952 fn test_extract_code_blocks_tilde_fence() {
953 let config = default_config();
954 let processor = CodeBlockToolProcessor::new(&config);
955
956 let content = "~~~bash\necho hello\n~~~";
957 let blocks = processor.extract_code_blocks(content);
958
959 assert_eq!(blocks.len(), 1);
960 assert_eq!(blocks[0].language, "bash");
961 assert_eq!(blocks[0].fence_char, '~');
962 assert_eq!(blocks[0].fence_length, 3);
963 assert_eq!(blocks[0].indent_prefix, "");
964 }
965
966 #[test]
967 fn test_extract_code_blocks_with_indent_prefix() {
968 let config = default_config();
969 let processor = CodeBlockToolProcessor::new(&config);
970
971 let content = " - item\n ```python\n print('hi')\n ```";
972 let blocks = processor.extract_code_blocks(content);
973
974 assert_eq!(blocks.len(), 1);
975 assert_eq!(blocks[0].indent_prefix, " ");
976 }
977
978 #[test]
979 fn test_extract_code_blocks_no_language() {
980 let config = default_config();
981 let processor = CodeBlockToolProcessor::new(&config);
982
983 let content = "```\nplain code\n```";
984 let blocks = processor.extract_code_blocks(content);
985
986 assert_eq!(blocks.len(), 1);
987 assert_eq!(blocks[0].language, "");
988 }
989
990 #[test]
991 fn test_resolve_language_linguist() {
992 let mut config = default_config();
993 config.normalize_language = NormalizeLanguage::Linguist;
994 let processor = CodeBlockToolProcessor::new(&config);
995
996 assert_eq!(processor.resolve_language("py"), "python");
997 assert_eq!(processor.resolve_language("bash"), "shell");
998 assert_eq!(processor.resolve_language("js"), "javascript");
999 }
1000
1001 #[test]
1002 fn test_resolve_language_exact() {
1003 let mut config = default_config();
1004 config.normalize_language = NormalizeLanguage::Exact;
1005 let processor = CodeBlockToolProcessor::new(&config);
1006
1007 assert_eq!(processor.resolve_language("py"), "py");
1008 assert_eq!(processor.resolve_language("BASH"), "bash");
1009 }
1010
1011 #[test]
1012 fn test_resolve_language_user_alias_override() {
1013 let mut config = default_config();
1014 config.language_aliases.insert("py".to_string(), "python".to_string());
1015 config.normalize_language = NormalizeLanguage::Exact;
1016 let processor = CodeBlockToolProcessor::new(&config);
1017
1018 assert_eq!(processor.resolve_language("PY"), "python");
1019 }
1020
1021 #[test]
1022 fn test_indent_strip_and_reapply_roundtrip() {
1023 let config = default_config();
1024 let processor = CodeBlockToolProcessor::new(&config);
1025
1026 let raw = " def hello():\n print('hi')";
1027 let stripped = processor.strip_indent_from_block(raw, " ");
1028 assert_eq!(stripped, "def hello():\n print('hi')");
1029
1030 let reapplied = processor.apply_indent_to_block(&stripped, " ");
1031 assert_eq!(reapplied, raw);
1032 }
1033
1034 #[test]
1035 fn test_infer_severity() {
1036 let config = default_config();
1037 let processor = CodeBlockToolProcessor::new(&config);
1038
1039 assert_eq!(
1040 processor.infer_severity("E501 line too long"),
1041 DiagnosticSeverity::Error
1042 );
1043 assert_eq!(
1044 processor.infer_severity("W291 trailing whitespace"),
1045 DiagnosticSeverity::Warning
1046 );
1047 assert_eq!(
1048 processor.infer_severity("error: something failed"),
1049 DiagnosticSeverity::Error
1050 );
1051 assert_eq!(
1052 processor.infer_severity("warning: unused variable"),
1053 DiagnosticSeverity::Warning
1054 );
1055 assert_eq!(
1056 processor.infer_severity("note: consider using"),
1057 DiagnosticSeverity::Info
1058 );
1059 }
1060
1061 #[test]
1062 fn test_parse_standard_format_windows_path() {
1063 let config = default_config();
1064 let processor = CodeBlockToolProcessor::new(&config);
1065
1066 let output = ToolOutput {
1067 stdout: "C:\\path\\file.py:2:5: E123 message".to_string(),
1068 stderr: String::new(),
1069 exit_code: 1,
1070 success: false,
1071 };
1072
1073 let diags = processor.parse_tool_output(&output, "ruff:check", 10);
1074 assert_eq!(diags.len(), 1);
1075 assert_eq!(diags[0].file_line, 12);
1076 assert_eq!(diags[0].column, Some(5));
1077 assert_eq!(diags[0].message, "E123 message");
1078 }
1079
1080 #[test]
1081 fn test_parse_eslint_severity() {
1082 let config = default_config();
1083 let processor = CodeBlockToolProcessor::new(&config);
1084
1085 let output = ToolOutput {
1086 stdout: "1:2 error Unexpected token".to_string(),
1087 stderr: String::new(),
1088 exit_code: 1,
1089 success: false,
1090 };
1091
1092 let diags = processor.parse_tool_output(&output, "eslint", 5);
1093 assert_eq!(diags.len(), 1);
1094 assert_eq!(diags[0].file_line, 6);
1095 assert_eq!(diags[0].column, Some(2));
1096 assert_eq!(diags[0].severity, DiagnosticSeverity::Error);
1097 assert_eq!(diags[0].message, "Unexpected token");
1098 }
1099
1100 #[test]
1101 fn test_parse_shellcheck_multiline() {
1102 let config = default_config();
1103 let processor = CodeBlockToolProcessor::new(&config);
1104
1105 let output = ToolOutput {
1106 stdout: "In - line 3:\necho $var\n ^-- SC2086 (info): Double quote to prevent globbing".to_string(),
1107 stderr: String::new(),
1108 exit_code: 1,
1109 success: false,
1110 };
1111
1112 let diags = processor.parse_tool_output(&output, "shellcheck", 10);
1113 assert_eq!(diags.len(), 1);
1114 assert_eq!(diags[0].file_line, 13);
1115 assert_eq!(diags[0].severity, DiagnosticSeverity::Info);
1116 assert_eq!(diags[0].message, "Double quote to prevent globbing");
1117 }
1118
1119 #[test]
1120 fn test_lint_no_config() {
1121 let config = default_config();
1122 let processor = CodeBlockToolProcessor::new(&config);
1123
1124 let content = "```python\nprint('hello')\n```";
1125 let result = processor.lint(content);
1126
1127 assert!(result.is_ok());
1129 assert!(result.unwrap().is_empty());
1130 }
1131
1132 #[test]
1133 fn test_format_no_config() {
1134 let config = default_config();
1135 let processor = CodeBlockToolProcessor::new(&config);
1136
1137 let content = "```python\nprint('hello')\n```";
1138 let result = processor.format(content);
1139
1140 assert!(result.is_ok());
1142 let output = result.unwrap();
1143 assert_eq!(output.content, content);
1144 assert!(!output.had_errors);
1145 assert!(output.error_messages.is_empty());
1146 }
1147
1148 #[test]
1149 fn test_lint_on_missing_language_definition_fail() {
1150 let mut config = default_config();
1151 config.on_missing_language_definition = OnMissing::Fail;
1152 let processor = CodeBlockToolProcessor::new(&config);
1153
1154 let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1155 let result = processor.lint(content);
1156
1157 assert!(result.is_ok());
1159 let diagnostics = result.unwrap();
1160 assert_eq!(diagnostics.len(), 2);
1161 assert!(diagnostics[0].message.contains("No lint tools configured"));
1162 assert!(diagnostics[0].message.contains("python"));
1163 assert!(diagnostics[1].message.contains("javascript"));
1164 }
1165
1166 #[test]
1167 fn test_lint_on_missing_language_definition_fail_fast() {
1168 let mut config = default_config();
1169 config.on_missing_language_definition = OnMissing::FailFast;
1170 let processor = CodeBlockToolProcessor::new(&config);
1171
1172 let content = "```python\nprint('hello')\n```\n\n```javascript\nconsole.log('hi');\n```";
1173 let result = processor.lint(content);
1174
1175 assert!(result.is_err());
1177 let err = result.unwrap_err();
1178 assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1179 }
1180
1181 #[test]
1182 fn test_format_on_missing_language_definition_fail() {
1183 let mut config = default_config();
1184 config.on_missing_language_definition = OnMissing::Fail;
1185 let processor = CodeBlockToolProcessor::new(&config);
1186
1187 let content = "```python\nprint('hello')\n```";
1188 let result = processor.format(content);
1189
1190 assert!(result.is_ok());
1192 let output = result.unwrap();
1193 assert_eq!(output.content, content); assert!(output.had_errors);
1195 assert!(!output.error_messages.is_empty());
1196 assert!(output.error_messages[0].contains("No format tools configured"));
1197 }
1198
1199 #[test]
1200 fn test_format_on_missing_language_definition_fail_fast() {
1201 let mut config = default_config();
1202 config.on_missing_language_definition = OnMissing::FailFast;
1203 let processor = CodeBlockToolProcessor::new(&config);
1204
1205 let content = "```python\nprint('hello')\n```";
1206 let result = processor.format(content);
1207
1208 assert!(result.is_err());
1210 let err = result.unwrap_err();
1211 assert!(matches!(err, ProcessorError::NoToolsConfigured { .. }));
1212 }
1213
1214 #[test]
1215 fn test_lint_on_missing_tool_binary_fail() {
1216 use super::super::config::{LanguageToolConfig, ToolDefinition};
1217
1218 let mut config = default_config();
1219 config.on_missing_tool_binary = OnMissing::Fail;
1220
1221 let lang_config = LanguageToolConfig {
1223 lint: vec!["nonexistent-linter".to_string()],
1224 ..Default::default()
1225 };
1226 config.languages.insert("python".to_string(), lang_config);
1227
1228 let tool_def = ToolDefinition {
1229 command: vec!["nonexistent-binary-xyz123".to_string()],
1230 ..Default::default()
1231 };
1232 config.tools.insert("nonexistent-linter".to_string(), tool_def);
1233
1234 let processor = CodeBlockToolProcessor::new(&config);
1235
1236 let content = "```python\nprint('hello')\n```";
1237 let result = processor.lint(content);
1238
1239 assert!(result.is_ok());
1241 let diagnostics = result.unwrap();
1242 assert_eq!(diagnostics.len(), 1);
1243 assert!(diagnostics[0].message.contains("not found in PATH"));
1244 }
1245
1246 #[test]
1247 fn test_lint_on_missing_tool_binary_fail_fast() {
1248 use super::super::config::{LanguageToolConfig, ToolDefinition};
1249
1250 let mut config = default_config();
1251 config.on_missing_tool_binary = OnMissing::FailFast;
1252
1253 let lang_config = LanguageToolConfig {
1255 lint: vec!["nonexistent-linter".to_string()],
1256 ..Default::default()
1257 };
1258 config.languages.insert("python".to_string(), lang_config);
1259
1260 let tool_def = ToolDefinition {
1261 command: vec!["nonexistent-binary-xyz123".to_string()],
1262 ..Default::default()
1263 };
1264 config.tools.insert("nonexistent-linter".to_string(), tool_def);
1265
1266 let processor = CodeBlockToolProcessor::new(&config);
1267
1268 let content = "```python\nprint('hello')\n```";
1269 let result = processor.lint(content);
1270
1271 assert!(result.is_err());
1273 let err = result.unwrap_err();
1274 assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1275 }
1276
1277 #[test]
1278 fn test_format_on_missing_tool_binary_fail() {
1279 use super::super::config::{LanguageToolConfig, ToolDefinition};
1280
1281 let mut config = default_config();
1282 config.on_missing_tool_binary = OnMissing::Fail;
1283
1284 let lang_config = LanguageToolConfig {
1286 format: vec!["nonexistent-formatter".to_string()],
1287 ..Default::default()
1288 };
1289 config.languages.insert("python".to_string(), lang_config);
1290
1291 let tool_def = ToolDefinition {
1292 command: vec!["nonexistent-binary-xyz123".to_string()],
1293 ..Default::default()
1294 };
1295 config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1296
1297 let processor = CodeBlockToolProcessor::new(&config);
1298
1299 let content = "```python\nprint('hello')\n```";
1300 let result = processor.format(content);
1301
1302 assert!(result.is_ok());
1304 let output = result.unwrap();
1305 assert_eq!(output.content, content); assert!(output.had_errors);
1307 assert!(!output.error_messages.is_empty());
1308 assert!(output.error_messages[0].contains("not found in PATH"));
1309 }
1310
1311 #[test]
1312 fn test_format_on_missing_tool_binary_fail_fast() {
1313 use super::super::config::{LanguageToolConfig, ToolDefinition};
1314
1315 let mut config = default_config();
1316 config.on_missing_tool_binary = OnMissing::FailFast;
1317
1318 let lang_config = LanguageToolConfig {
1320 format: vec!["nonexistent-formatter".to_string()],
1321 ..Default::default()
1322 };
1323 config.languages.insert("python".to_string(), lang_config);
1324
1325 let tool_def = ToolDefinition {
1326 command: vec!["nonexistent-binary-xyz123".to_string()],
1327 ..Default::default()
1328 };
1329 config.tools.insert("nonexistent-formatter".to_string(), tool_def);
1330
1331 let processor = CodeBlockToolProcessor::new(&config);
1332
1333 let content = "```python\nprint('hello')\n```";
1334 let result = processor.format(content);
1335
1336 assert!(result.is_err());
1338 let err = result.unwrap_err();
1339 assert!(matches!(err, ProcessorError::ToolBinaryNotFound { .. }));
1340 }
1341
1342 #[test]
1343 fn test_lint_rumdl_builtin_skipped_for_markdown() {
1344 let mut config = default_config();
1347 config.languages.insert(
1348 "markdown".to_string(),
1349 LanguageToolConfig {
1350 lint: vec![RUMDL_BUILTIN_TOOL.to_string()],
1351 format: vec![],
1352 on_error: None,
1353 },
1354 );
1355 config.on_missing_language_definition = OnMissing::Fail;
1356 let processor = CodeBlockToolProcessor::new(&config);
1357
1358 let content = "```markdown\n# Hello\n```";
1359 let result = processor.lint(content);
1360
1361 assert!(result.is_ok());
1363 assert!(result.unwrap().is_empty());
1364 }
1365
1366 #[test]
1367 fn test_format_rumdl_builtin_skipped_for_markdown() {
1368 let mut config = default_config();
1370 config.languages.insert(
1371 "markdown".to_string(),
1372 LanguageToolConfig {
1373 lint: vec![],
1374 format: vec![RUMDL_BUILTIN_TOOL.to_string()],
1375 on_error: None,
1376 },
1377 );
1378 let processor = CodeBlockToolProcessor::new(&config);
1379
1380 let content = "```markdown\n# Hello\n```";
1381 let result = processor.format(content);
1382
1383 assert!(result.is_ok());
1385 let output = result.unwrap();
1386 assert_eq!(output.content, content);
1387 assert!(!output.had_errors);
1388 }
1389
1390 #[test]
1391 fn test_is_markdown_language() {
1392 assert!(is_markdown_language("markdown"));
1394 assert!(is_markdown_language("Markdown"));
1395 assert!(is_markdown_language("MARKDOWN"));
1396 assert!(is_markdown_language("md"));
1397 assert!(is_markdown_language("MD"));
1398 assert!(!is_markdown_language("python"));
1399 assert!(!is_markdown_language("rust"));
1400 assert!(!is_markdown_language(""));
1401 }
1402}