probe_code/extract/
formatter.rs

1//! Functions for formatting and printing extraction results.
2//!
3//! This module provides functions for formatting and printing extraction results
4//! in various formats (terminal, markdown, plain, json, xml, color).
5
6use anyhow::Result;
7use probe_code::models::SearchResult;
8use probe_code::search::search_tokens::count_tokens;
9use serde::Serialize;
10use std::fmt::Write as FmtWrite;
11use std::path::Path;
12
13/// A single internal function that handles both dry-run and non-dry-run formatting.
14///
15/// # Arguments
16///
17/// * `results` - The search results to format
18/// * `format` - The output format (terminal, markdown, plain, json, or color)
19/// * `original_input` - Optional original user input
20/// * `system_prompt` - Optional system prompt for LLM models
21/// * `user_instructions` - Optional user instructions for LLM models
22/// * `is_dry_run` - Whether this is a dry-run request (only file names/line numbers)
23fn format_extraction_internal(
24    results: &[SearchResult],
25    format: &str,
26    original_input: Option<&str>,
27    system_prompt: Option<&str>,
28    user_instructions: Option<&str>,
29    is_dry_run: bool,
30) -> Result<String> {
31    let mut output = String::new();
32
33    match format {
34        // ---------------------------------------
35        // JSON output
36        // ---------------------------------------
37        "json" => {
38            if is_dry_run {
39                // DRY-RUN JSON structure
40                #[derive(Serialize)]
41                struct JsonDryRunResult<'a> {
42                    file: &'a str,
43                    #[serde(serialize_with = "serialize_lines_as_array")]
44                    lines: (usize, usize),
45                    node_type: &'a str,
46                }
47
48                // Helper function to serialize lines as an array
49                fn serialize_lines_as_array<S>(
50                    lines: &(usize, usize),
51                    serializer: S,
52                ) -> std::result::Result<S::Ok, S::Error>
53                where
54                    S: serde::Serializer,
55                {
56                    use serde::ser::SerializeSeq;
57                    let mut seq = serializer.serialize_seq(Some(2))?;
58                    seq.serialize_element(&lines.0)?;
59                    seq.serialize_element(&lines.1)?;
60                    seq.end()
61                }
62
63                let json_results: Vec<JsonDryRunResult> = results
64                    .iter()
65                    .map(|r| JsonDryRunResult {
66                        file: &r.file,
67                        lines: r.lines,
68                        node_type: &r.node_type,
69                    })
70                    .collect();
71
72                // Create a wrapper object with results and summary
73                let mut wrapper = serde_json::json!({
74                    "results": json_results,
75                    "summary": {
76                        "count": results.len(),
77                    }
78                });
79
80                // Add system prompt, user instructions, and original_input if provided
81                if let Some(prompt) = system_prompt {
82                    wrapper["system_prompt"] = serde_json::Value::String(prompt.to_string());
83                }
84
85                if let Some(instructions) = user_instructions {
86                    wrapper["user_instructions"] =
87                        serde_json::Value::String(instructions.to_string());
88                }
89
90                if let Some(input) = original_input {
91                    wrapper["original_input"] = serde_json::Value::String(input.to_string());
92                }
93
94                write!(output, "{}", serde_json::to_string_pretty(&wrapper)?)?;
95            } else {
96                // NON-DRY-RUN JSON structure
97                #[derive(Serialize)]
98                struct JsonResult<'a> {
99                    file: &'a str,
100                    #[serde(serialize_with = "serialize_lines_as_array")]
101                    lines: (usize, usize),
102                    node_type: &'a str,
103                    code: &'a str,
104                    #[serde(skip_serializing_if = "Option::is_none")]
105                    original_input: Option<&'a str>,
106                }
107
108                // Helper function to serialize lines as an array
109                fn serialize_lines_as_array<S>(
110                    lines: &(usize, usize),
111                    serializer: S,
112                ) -> std::result::Result<S::Ok, S::Error>
113                where
114                    S: serde::Serializer,
115                {
116                    use serde::ser::SerializeSeq;
117                    let mut seq = serializer.serialize_seq(Some(2))?;
118                    seq.serialize_element(&lines.0)?;
119                    seq.serialize_element(&lines.1)?;
120                    seq.end()
121                }
122
123                let json_results: Vec<JsonResult> = results
124                    .iter()
125                    .map(|r| JsonResult {
126                        file: &r.file,
127                        lines: r.lines,
128                        node_type: &r.node_type,
129                        code: &r.code,
130                        // We no longer put original_input per result. If you truly need it,
131                        // you can uncomment the line below, but it's typically at the root.
132                        // original_input: r.original_input.as_deref(),
133                        original_input: None,
134                    })
135                    .collect();
136
137                // Create a wrapper object with results and summary
138                let mut wrapper = serde_json::json!({
139                    "results": json_results,
140                    "summary": {
141                        "count": results.len(),
142                        "total_bytes": results.iter().map(|r| r.code.len()).sum::<usize>(),
143                        "total_tokens": results.iter().map(|r| count_tokens(&r.code)).sum::<usize>(),
144                    }
145                });
146
147                // Add system prompt, user instructions, and original_input if provided
148                if let Some(input) = original_input {
149                    wrapper["original_input"] = serde_json::Value::String(input.to_string());
150                }
151
152                if let Some(prompt) = system_prompt {
153                    wrapper["system_prompt"] = serde_json::Value::String(prompt.to_string());
154                }
155
156                if let Some(instructions) = user_instructions {
157                    wrapper["user_instructions"] =
158                        serde_json::Value::String(instructions.to_string());
159                }
160
161                write!(output, "{}", serde_json::to_string_pretty(&wrapper)?)?;
162            }
163        }
164
165        // ---------------------------------------
166        // XML output
167        // ---------------------------------------
168        "xml" => {
169            // XML declaration
170            writeln!(output, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
171            // Open the root tag
172            writeln!(output, "<probe_results>")?;
173
174            if is_dry_run {
175                // DRY-RUN: no code, just file/lines/node_type
176                for result in results {
177                    writeln!(output, "  <result>")?;
178                    writeln!(output, "    <file>{}</file>", escape_xml(&result.file))?;
179
180                    if result.node_type != "file" {
181                        writeln!(output, "    <lines>")?;
182                        writeln!(output, "      <start>{}</start>", result.lines.0)?;
183                        writeln!(output, "      <end>{}</end>", result.lines.1)?;
184                        writeln!(output, "    </lines>")?;
185                    }
186
187                    if result.node_type != "file" && result.node_type != "context" {
188                        writeln!(
189                            output,
190                            "    <node_type>{}</node_type>",
191                            escape_xml(&result.node_type)
192                        )?;
193                    }
194
195                    writeln!(output, "  </result>")?;
196                }
197                // Summary
198                writeln!(output, "  <summary>")?;
199                writeln!(output, "    <count>{}</count>", results.len())?;
200                writeln!(output, "  </summary>")?;
201            } else {
202                // NON-DRY-RUN: includes code
203                for result in results {
204                    writeln!(output, "  <result>")?;
205                    writeln!(output, "    <file>{}</file>", escape_xml(&result.file))?;
206
207                    if result.node_type != "file" {
208                        writeln!(output, "    <lines>")?;
209                        writeln!(output, "      <start>{}</start>", result.lines.0)?;
210                        writeln!(output, "      <end>{}</end>", result.lines.1)?;
211                        writeln!(output, "    </lines>")?;
212                    }
213
214                    if result.node_type != "file" && result.node_type != "context" {
215                        writeln!(output, "    <node_type>{}</node_type>", &result.node_type)?;
216                    }
217
218                    // Use CDATA to preserve formatting and special characters
219                    writeln!(output, "    <code><![CDATA[{}]]></code>", &result.code)?;
220
221                    writeln!(output, "  </result>")?;
222                }
223
224                // Summary
225                writeln!(output, "  <summary>")?;
226                writeln!(output, "    <count>{}</count>", results.len())?;
227                writeln!(
228                    output,
229                    "    <total_bytes>{}</total_bytes>",
230                    results.iter().map(|r| r.code.len()).sum::<usize>()
231                )?;
232                writeln!(
233                    output,
234                    "    <total_tokens>{}</total_tokens>",
235                    results.iter().map(|r| count_tokens(&r.code)).sum::<usize>()
236                )?;
237                writeln!(output, "  </summary>")?;
238            }
239
240            // Add original_input, system_prompt, and user_instructions inside the root element
241            if let Some(input) = original_input {
242                writeln!(
243                    output,
244                    "  <original_input><![CDATA[{input}]]></original_input>"
245                )?;
246            }
247
248            if let Some(prompt) = system_prompt {
249                writeln!(
250                    output,
251                    "  <system_prompt><![CDATA[{prompt}]]></system_prompt>"
252                )?;
253            }
254
255            if let Some(instructions) = user_instructions {
256                writeln!(
257                    output,
258                    "  <user_instructions><![CDATA[{instructions}]]></user_instructions>"
259                )?;
260            }
261
262            // Close the root tag
263            writeln!(output, "</probe_results>")?;
264        }
265
266        // ---------------------------------------
267        // All other formats (terminal, markdown, plain, color)
268        // ---------------------------------------
269        _ => {
270            use colored::*;
271
272            // If there are no results
273            if results.is_empty() {
274                writeln!(output, "{}", "No results found.".yellow().bold())?;
275            } else {
276                // For each result, we either skip the code if is_dry_run, or include it otherwise.
277                for result in results {
278                    // Common: show file (with format-specific prefix)
279                    if format == "markdown" {
280                        writeln!(output, "## File: {}", result.file.yellow())?;
281                    } else {
282                        writeln!(output, "File: {}", result.file.yellow())?;
283                    }
284
285                    // Show lines if not a full file
286                    if result.node_type != "file" {
287                        if format == "markdown" {
288                            writeln!(output, "### Lines: {}-{}", result.lines.0, result.lines.1)?;
289                        } else {
290                            writeln!(output, "Lines: {}-{}", result.lines.0, result.lines.1)?;
291                        }
292                    }
293
294                    // Show node type if not file/context
295                    if result.node_type != "file" && result.node_type != "context" {
296                        if format == "markdown" {
297                            writeln!(output, "### Type: {}", result.node_type.cyan())?;
298                        } else {
299                            writeln!(output, "Type: {}", result.node_type.cyan())?;
300                        }
301                    }
302
303                    // In dry-run, we do NOT print the code
304                    if !is_dry_run {
305                        // Attempt a basic "highlight" approach by checking file extension
306                        let extension = Path::new(&result.file)
307                            .extension()
308                            .and_then(|ext| ext.to_str())
309                            .unwrap_or("");
310                        let language = get_language_from_extension(extension);
311
312                        match format {
313                            "markdown" => {
314                                if !language.is_empty() {
315                                    writeln!(output, "```{language}")?;
316                                } else {
317                                    writeln!(output, "```")?;
318                                }
319                                writeln!(output, "{}", result.code)?;
320                                writeln!(output, "```")?;
321                            }
322                            "plain" => {
323                                writeln!(output)?;
324                                writeln!(output, "{}", result.code)?;
325                                writeln!(output)?;
326                                writeln!(output, "----------------------------------------")?;
327                                writeln!(output)?;
328                            }
329                            "color" => {
330                                if !language.is_empty() {
331                                    writeln!(output, "```{language}")?;
332                                } else {
333                                    writeln!(output, "```")?;
334                                }
335                                writeln!(output, "{}", result.code)?;
336                                writeln!(output, "```")?;
337                            }
338                            // "terminal" or anything else not covered
339                            _ => {
340                                if !language.is_empty() {
341                                    writeln!(output, "```{language}")?;
342                                } else {
343                                    writeln!(output, "```")?;
344                                }
345                                writeln!(output, "{}", result.code)?;
346                                writeln!(output, "```")?;
347                            }
348                        }
349                    }
350
351                    writeln!(output)?;
352                }
353            }
354
355            // Now, print the root-level data (system prompt, user instructions, original input)
356            if let Some(input) = original_input {
357                writeln!(output, "{}", "Original Input:".yellow().bold())?;
358                writeln!(output, "{input}")?;
359            }
360            if let Some(prompt) = system_prompt {
361                writeln!(output)?;
362                writeln!(output, "{}", "System Prompt:".yellow().bold())?;
363                writeln!(output, "{prompt}")?;
364            }
365            if let Some(instructions) = user_instructions {
366                writeln!(output)?;
367                writeln!(output, "{}", "User Instructions:".yellow().bold())?;
368                writeln!(output, "{instructions}")?;
369            }
370
371            // Summaries for non-JSON/XML:
372            if !["json", "xml"].contains(&format) && !results.is_empty() {
373                writeln!(output)?;
374                if is_dry_run {
375                    writeln!(
376                        output,
377                        "{} {} {}",
378                        "Would extract".green().bold(),
379                        results.len(),
380                        if results.len() == 1 {
381                            "result"
382                        } else {
383                            "results"
384                        }
385                    )?;
386                } else {
387                    writeln!(
388                        output,
389                        "{} {} {}",
390                        "Extracted".green().bold(),
391                        results.len(),
392                        if results.len() == 1 {
393                            "result"
394                        } else {
395                            "results"
396                        }
397                    )?;
398
399                    let total_bytes: usize = results.iter().map(|r| r.code.len()).sum();
400                    let total_tokens: usize = results.iter().map(|r| count_tokens(&r.code)).sum();
401                    writeln!(output, "Total bytes returned: {total_bytes}")?;
402                    writeln!(output, "Total tokens returned: {total_tokens}")?;
403                }
404            }
405        }
406    }
407
408    Ok(output)
409}
410
411/// Format the extraction results for dry-run mode (only file names and line numbers)
412///
413/// # Arguments
414///
415/// * `results` - The search results to format
416/// * `format` - The output format (terminal, markdown, plain, json, or color)
417/// * `system_prompt` - Optional system prompt for LLM models
418/// * `user_instructions` - Optional user instructions for LLM models
419pub fn format_extraction_dry_run(
420    results: &[SearchResult],
421    format: &str,
422    original_input: Option<&str>,
423    system_prompt: Option<&str>,
424    user_instructions: Option<&str>,
425) -> Result<String> {
426    format_extraction_internal(
427        results,
428        format,
429        original_input,
430        system_prompt,
431        user_instructions,
432        true, // is_dry_run
433    )
434}
435
436/// Format the extraction results in the specified format and return as a string
437///
438/// # Arguments
439///
440/// * `results` - The search results to format
441/// * `format` - The output format (terminal, markdown, plain, json, or color)
442/// * `system_prompt` - Optional system prompt for LLM models
443/// * `user_instructions` - Optional user instructions for LLM models
444pub fn format_extraction_results(
445    results: &[SearchResult],
446    format: &str,
447    original_input: Option<&str>,
448    system_prompt: Option<&str>,
449    user_instructions: Option<&str>,
450) -> Result<String> {
451    format_extraction_internal(
452        results,
453        format,
454        original_input,
455        system_prompt,
456        user_instructions,
457        false, // is_dry_run
458    )
459}
460
461/// Format and print the extraction results in the specified format
462///
463/// # Arguments
464///
465/// * `results` - The search results to format and print
466/// * `format` - The output format (terminal, markdown, plain, json, or color)
467/// * `system_prompt` - Optional system prompt for LLM models
468/// * `user_instructions` - Optional user instructions for LLM models
469#[allow(dead_code)]
470pub fn format_and_print_extraction_results(
471    results: &[SearchResult],
472    format: &str,
473    original_input: Option<&str>,
474    system_prompt: Option<&str>,
475    user_instructions: Option<&str>,
476) -> Result<()> {
477    let output = format_extraction_results(
478        results,
479        format,
480        original_input,
481        system_prompt,
482        user_instructions,
483    )?;
484    println!("{output}");
485    Ok(())
486}
487
488/// Helper function to escape XML special characters
489fn escape_xml(s: &str) -> String {
490    s.replace("&", "&amp;")
491        .replace("<", "&lt;")
492        .replace(">", "&gt;")
493        .replace("\"", "&quot;")
494        .replace("'", "&apos;")
495}
496
497/// Get the language name from a file extension for syntax highlighting
498pub fn get_language_from_extension(extension: &str) -> &'static str {
499    match extension {
500        "rs" => "rust",
501        "py" => "python",
502        "js" => "javascript",
503        "ts" => "typescript",
504        "go" => "go",
505        "c" | "h" => "c",
506        "cpp" | "cc" | "cxx" | "hpp" => "cpp",
507        "java" => "java",
508        "rb" => "ruby",
509        "php" => "php",
510        "sh" => "bash",
511        "md" => "markdown",
512        "json" => "json",
513        "yaml" | "yml" => "yaml",
514        "html" => "html",
515        "css" => "css",
516        "sql" => "sql",
517        "kt" | "kts" => "kotlin",
518        "swift" => "swift",
519        "scala" => "scala",
520        "dart" => "dart",
521        "ex" | "exs" => "elixir",
522        "hs" => "haskell",
523        "clj" => "clojure",
524        "lua" => "lua",
525        "r" => "r",
526        "pl" | "pm" => "perl",
527        "proto" => "protobuf",
528        _ => "",
529    }
530}