infiniloom_engine/output/
mod.rs

1//! Output formatters for different LLM models
2//!
3//! This module provides formatters that can output in different formats:
4//! - XML (Claude-optimized)
5//! - Markdown (GPT-optimized)
6//! - TOON (Token-Oriented Object Notation - most compact)
7//! - JSON, YAML, Plain text
8//!
9//! Formatters support both in-memory (`format()`) and streaming (`format_to_writer()`)
10//! modes. Use streaming for large repositories to reduce memory usage.
11//!
12//! The [`escaping`] submodule provides text escaping utilities for XML, YAML,
13//! and other formats.
14
15pub mod escaping;
16mod markdown;
17mod toon;
18mod xml;
19
20use crate::repomap::RepoMap;
21use crate::types::{Repository, TokenizerModel};
22use std::io::{self, Write};
23
24pub use markdown::MarkdownFormatter;
25pub use toon::ToonFormatter;
26pub use xml::XmlFormatter;
27
28// Re-export StreamingFormatter from this module (defined locally above)
29
30/// Output format type
31#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
32pub enum OutputFormat {
33    /// Claude-optimized XML
34    #[default]
35    Xml,
36    /// GPT-optimized Markdown
37    Markdown,
38    /// JSON (generic)
39    Json,
40    /// YAML (Gemini)
41    Yaml,
42    /// TOON (Token-Oriented Object Notation) - most token-efficient
43    Toon,
44    /// Plain text (simple, no formatting)
45    Plain,
46}
47
48/// Output formatter trait for in-memory formatting.
49///
50/// Use `format()` for formatting with a repository map (includes symbol rankings),
51/// or `format_repo()` for formatting just the repository files.
52///
53/// For large repositories, prefer the `StreamingFormatter` trait to reduce memory usage.
54pub trait Formatter {
55    /// Format repository with map to string.
56    ///
57    /// The output includes both the repository files and the symbol rankings
58    /// from the repository map.
59    #[must_use]
60    fn format(&self, repo: &Repository, map: &RepoMap) -> String;
61
62    /// Format repository only to string (without map metadata).
63    #[must_use]
64    fn format_repo(&self, repo: &Repository) -> String;
65
66    /// Get format name.
67    fn name(&self) -> &'static str;
68}
69
70/// Streaming formatter trait for low-memory output
71///
72/// Implement this trait to enable streaming output directly to files,
73/// stdout, or network sockets without building intermediate strings.
74pub trait StreamingFormatter {
75    /// Stream repository with map to writer
76    ///
77    /// # Example
78    /// ```ignore
79    /// use std::io::BufWriter;
80    /// use std::fs::File;
81    ///
82    /// let file = File::create("output.xml")?;
83    /// let mut writer = BufWriter::new(file);
84    /// formatter.format_to_writer(&repo, &map, &mut writer)?;
85    /// ```
86    fn format_to_writer<W: Write>(
87        &self,
88        repo: &Repository,
89        map: &RepoMap,
90        writer: &mut W,
91    ) -> io::Result<()>;
92
93    /// Stream repository only to writer
94    fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()>;
95}
96
97/// Output formatter factory
98pub struct OutputFormatter;
99
100impl OutputFormatter {
101    /// Create Claude-optimized XML formatter
102    pub fn claude() -> XmlFormatter {
103        XmlFormatter::new(true).with_model(TokenizerModel::Claude)
104    }
105
106    /// Create GPT-optimized Markdown formatter
107    pub fn gpt() -> MarkdownFormatter {
108        MarkdownFormatter::new().with_model(TokenizerModel::Claude)
109    }
110
111    /// Create JSON formatter
112    pub fn json() -> JsonFormatter {
113        JsonFormatter
114    }
115
116    /// Create YAML formatter (Gemini)
117    pub fn gemini() -> YamlFormatter {
118        YamlFormatter::new(TokenizerModel::Gemini)
119    }
120
121    /// Create formatter by format type
122    pub fn by_format(format: OutputFormat) -> Box<dyn Formatter> {
123        Self::by_format_with_options(format, true)
124    }
125
126    /// Create formatter by format type with line numbers option
127    pub fn by_format_with_options(format: OutputFormat, line_numbers: bool) -> Box<dyn Formatter> {
128        Self::by_format_with_all_options(format, line_numbers, true)
129    }
130
131    /// Create formatter by format type with all options
132    pub fn by_format_with_all_options(
133        format: OutputFormat,
134        line_numbers: bool,
135        show_file_index: bool,
136    ) -> Box<dyn Formatter> {
137        let model = Self::default_model_for_format(format);
138        Self::by_format_with_all_options_and_model(format, line_numbers, show_file_index, model)
139    }
140
141    /// Create formatter by format type with model override
142    pub fn by_format_with_model(format: OutputFormat, model: TokenizerModel) -> Box<dyn Formatter> {
143        Self::by_format_with_all_options_and_model(format, true, true, model)
144    }
145
146    /// Create formatter by format type with all options and model override
147    pub fn by_format_with_all_options_and_model(
148        format: OutputFormat,
149        line_numbers: bool,
150        show_file_index: bool,
151        model: TokenizerModel,
152    ) -> Box<dyn Formatter> {
153        match format {
154            OutputFormat::Xml => Box::new(
155                XmlFormatter::new(true)
156                    .with_line_numbers(line_numbers)
157                    .with_file_index(show_file_index)
158                    .with_model(model),
159            ),
160            OutputFormat::Markdown => Box::new(
161                MarkdownFormatter::new()
162                    .with_line_numbers(line_numbers)
163                    .with_model(model),
164            ),
165            OutputFormat::Json => Box::new(JsonFormatter),
166            OutputFormat::Yaml => Box::new(YamlFormatter::new(model)),
167            OutputFormat::Toon => Box::new(
168                ToonFormatter::new()
169                    .with_line_numbers(line_numbers)
170                    .with_file_index(show_file_index)
171                    .with_model(model),
172            ),
173            OutputFormat::Plain => Box::new(
174                PlainFormatter::new()
175                    .with_line_numbers(line_numbers)
176                    .with_model(model),
177            ),
178        }
179    }
180
181    /// Create TOON formatter (most token-efficient)
182    pub fn toon() -> ToonFormatter {
183        ToonFormatter::new().with_model(TokenizerModel::Claude)
184    }
185
186    fn default_model_for_format(format: OutputFormat) -> TokenizerModel {
187        match format {
188            OutputFormat::Yaml => TokenizerModel::Gemini,
189            _ => TokenizerModel::Claude,
190        }
191    }
192}
193
194/// JSON formatter
195pub struct JsonFormatter;
196
197/// Output structure for JSON format with repository map
198#[derive(serde::Serialize)]
199struct JsonOutput<'a> {
200    repository: &'a Repository,
201    map: &'a RepoMap,
202}
203
204/// Output structure for JSON format without map (repo-only view)
205#[derive(serde::Serialize)]
206struct JsonRepoOutput<'a> {
207    repository: &'a Repository,
208}
209
210impl Formatter for JsonFormatter {
211    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
212        serde_json::to_string_pretty(&JsonOutput { repository: repo, map }).unwrap_or_default()
213    }
214
215    fn format_repo(&self, repo: &Repository) -> String {
216        // Use consistent structure with 'repository' key wrapper for API consistency
217        serde_json::to_string_pretty(&JsonRepoOutput { repository: repo }).unwrap_or_default()
218    }
219
220    fn name(&self) -> &'static str {
221        "json"
222    }
223}
224
225/// Plain text formatter (simple, no markup)
226pub struct PlainFormatter {
227    /// Include line numbers in code
228    include_line_numbers: bool,
229    /// Token model for counts in output
230    token_model: TokenizerModel,
231}
232
233impl PlainFormatter {
234    /// Create a new plain formatter
235    pub fn new() -> Self {
236        Self { include_line_numbers: true, token_model: TokenizerModel::Claude }
237    }
238
239    /// Set line numbers option
240    pub fn with_line_numbers(mut self, enabled: bool) -> Self {
241        self.include_line_numbers = enabled;
242        self
243    }
244
245    /// Set token model for token counts in output
246    pub fn with_model(mut self, model: TokenizerModel) -> Self {
247        self.token_model = model;
248        self
249    }
250}
251
252impl Default for PlainFormatter {
253    fn default() -> Self {
254        Self::new()
255    }
256}
257
258impl Formatter for PlainFormatter {
259    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
260        let mut output = String::new();
261
262        // Header
263        output.push_str(&format!("Repository: {}\n", repo.name));
264        output.push_str(&format!(
265            "Files: {} | Lines: {} | Tokens: {}\n",
266            repo.metadata.total_files,
267            repo.metadata.total_lines,
268            repo.metadata.total_tokens.get(self.token_model)
269        ));
270        output.push_str(&"=".repeat(60));
271        output.push('\n');
272        output.push('\n');
273
274        // Repository map summary
275        output.push_str("REPOSITORY MAP\n");
276        output.push_str(&"-".repeat(40));
277        output.push('\n');
278        output.push_str(&map.summary);
279        output.push_str("\n\n");
280
281        // Directory structure
282        if let Some(structure) = &repo.metadata.directory_structure {
283            output.push_str("DIRECTORY STRUCTURE\n");
284            output.push_str(&"-".repeat(40));
285            output.push('\n');
286            output.push_str(structure);
287            output.push_str("\n\n");
288        }
289
290        // Files
291        output.push_str("FILES\n");
292        output.push_str(&"=".repeat(60));
293        output.push('\n');
294
295        for file in &repo.files {
296            output.push('\n');
297            output.push_str(&format!("File: {}\n", file.relative_path));
298            if let Some(lang) = &file.language {
299                output.push_str(&format!("Language: {}\n", lang));
300            }
301            output.push_str(&format!("Tokens: {}\n", file.token_count.get(self.token_model)));
302            output.push_str(&"-".repeat(40));
303            output.push('\n');
304
305            if let Some(content) = &file.content {
306                if self.include_line_numbers {
307                    for (i, line) in content.lines().enumerate() {
308                        output.push_str(&format!("{:4} {}\n", i + 1, line));
309                    }
310                } else {
311                    output.push_str(content);
312                    if !content.ends_with('\n') {
313                        output.push('\n');
314                    }
315                }
316            }
317            output.push_str(&"-".repeat(40));
318            output.push('\n');
319        }
320
321        output
322    }
323
324    fn format_repo(&self, repo: &Repository) -> String {
325        let mut output = String::new();
326        for file in &repo.files {
327            output.push_str(&format!("=== {} ===\n", file.relative_path));
328            if let Some(content) = &file.content {
329                if self.include_line_numbers {
330                    for (i, line) in content.lines().enumerate() {
331                        output.push_str(&format!("{:4} {}\n", i + 1, line));
332                    }
333                } else {
334                    output.push_str(content);
335                    if !content.ends_with('\n') {
336                        output.push('\n');
337                    }
338                }
339            }
340            output.push('\n');
341        }
342        output
343    }
344
345    fn name(&self) -> &'static str {
346        "plain"
347    }
348}
349
350/// YAML formatter (Gemini-optimized)
351pub struct YamlFormatter {
352    token_model: TokenizerModel,
353}
354
355impl YamlFormatter {
356    /// Create a new YAML formatter with the specified token model
357    pub fn new(model: TokenizerModel) -> Self {
358        Self { token_model: model }
359    }
360}
361
362impl Formatter for YamlFormatter {
363    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
364        let mut output = String::new();
365
366        // YAML header
367        output.push_str("---\n");
368        output.push_str("# Repository Context for Gemini\n");
369        output.push_str("# Note: Query should be at the END of this context\n\n");
370
371        // Metadata
372        output.push_str("metadata:\n");
373        output.push_str(&format!("  name: {}\n", repo.name));
374        output.push_str(&format!("  files: {}\n", repo.metadata.total_files));
375        output.push_str(&format!("  lines: {}\n", repo.metadata.total_lines));
376        output
377            .push_str(&format!("  tokens: {}\n", repo.metadata.total_tokens.get(self.token_model)));
378        output.push('\n');
379
380        // Languages
381        output.push_str("languages:\n");
382        for lang in &repo.metadata.languages {
383            output.push_str(&format!(
384                "  - name: {}\n    files: {}\n    percentage: {:.1}%\n",
385                lang.language, lang.files, lang.percentage
386            ));
387        }
388        output.push('\n');
389
390        // Repository map
391        output.push_str("repository_map:\n");
392        output.push_str(&format!("  summary: |\n    {}\n", map.summary.replace('\n', "\n    ")));
393        output.push_str("  key_symbols:\n");
394        for sym in &map.key_symbols {
395            output.push_str(&format!(
396                "    - name: {}\n      type: {}\n      file: {}\n      rank: {}\n",
397                sym.name, sym.kind, sym.file, sym.rank
398            ));
399            if let Some(ref summary) = sym.summary {
400                output.push_str(&format!("      summary: {}\n", summary));
401            }
402        }
403        output.push('\n');
404
405        // Files
406        output.push_str("files:\n");
407        for file in &repo.files {
408            output.push_str(&format!("  - path: {}\n", file.relative_path));
409            if let Some(lang) = &file.language {
410                output.push_str(&format!("    language: {}\n", lang));
411            }
412            output.push_str(&format!("    tokens: {}\n", file.token_count.get(self.token_model)));
413
414            if let Some(content) = &file.content {
415                output.push_str("    content: |\n");
416                for line in content.lines() {
417                    output.push_str(&format!("      {}\n", line));
418                }
419            }
420        }
421
422        // Query placeholder at end (Gemini best practice)
423        output.push_str("\n# --- INSERT YOUR QUERY BELOW THIS LINE ---\n");
424        output.push_str("query: |\n");
425        output.push_str("  [Your question about this repository]\n");
426
427        output
428    }
429
430    fn format_repo(&self, repo: &Repository) -> String {
431        serde_yaml::to_string(repo).unwrap_or_default()
432    }
433
434    fn name(&self) -> &'static str {
435        "yaml"
436    }
437}
438
439#[cfg(test)]
440#[allow(clippy::str_to_string)]
441mod tests {
442    use super::*;
443    use crate::repomap::RepoMapGenerator;
444    use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
445
446    fn create_test_repo() -> Repository {
447        Repository {
448            name: "test".to_string(),
449            path: "/tmp/test".into(),
450            files: vec![RepoFile {
451                path: "/tmp/test/main.py".into(),
452                relative_path: "main.py".to_string(),
453                language: Some("python".to_string()),
454                size_bytes: 100,
455                token_count: TokenCounts {
456                    o200k: 48,
457                    cl100k: 49,
458                    claude: 50,
459                    gemini: 47,
460                    llama: 46,
461                    mistral: 46,
462                    deepseek: 46,
463                    qwen: 46,
464                    cohere: 47,
465                    grok: 46,
466                },
467                symbols: Vec::new(),
468                importance: 0.8,
469                content: Some("def main():\n    print('hello')".to_string()),
470            }],
471            metadata: RepoMetadata {
472                total_files: 1,
473                total_lines: 2,
474                total_tokens: TokenCounts {
475                    o200k: 48,
476                    cl100k: 49,
477                    claude: 50,
478                    gemini: 47,
479                    llama: 46,
480                    mistral: 46,
481                    deepseek: 46,
482                    qwen: 46,
483                    cohere: 47,
484                    grok: 46,
485                },
486                languages: vec![LanguageStats {
487                    language: "Python".to_string(),
488                    files: 1,
489                    lines: 2,
490                    percentage: 100.0,
491                }],
492                framework: None,
493                description: None,
494                branch: None,
495                commit: None,
496                directory_structure: Some("main.py\n".to_string()),
497                external_dependencies: vec!["requests".to_string()],
498                git_history: None,
499            },
500        }
501    }
502
503    #[test]
504    fn test_json_formatter() {
505        let repo = create_test_repo();
506        let map = RepoMapGenerator::new(1000).generate(&repo);
507
508        let formatter = OutputFormatter::json();
509        let output = formatter.format(&repo, &map);
510
511        assert!(output.contains("\"name\": \"test\""));
512        assert!(output.contains("\"files\""));
513    }
514
515    #[test]
516    fn test_yaml_formatter() {
517        let repo = create_test_repo();
518        let map = RepoMapGenerator::new(1000).generate(&repo);
519
520        let formatter = OutputFormatter::gemini();
521        let output = formatter.format(&repo, &map);
522
523        assert!(output.contains("name: test"));
524        assert!(output.contains("# --- INSERT YOUR QUERY"));
525    }
526}