infiniloom_engine/output/
mod.rs

1//! Output formatters for different LLM models
2//!
3//! This module provides formatters that can output in different formats:
4//! - XML (Claude-optimized)
5//! - Markdown (GPT-optimized)
6//! - TOON (Token-Oriented Object Notation - most compact)
7//! - JSON, YAML, Plain text
8//!
9//! Formatters support both in-memory (`format()`) and streaming (`format_to_writer()`)
10//! modes. Use streaming for large repositories to reduce memory usage.
11
12mod markdown;
13mod toon;
14mod xml;
15
16use crate::repomap::RepoMap;
17use crate::types::{Repository, TokenizerModel};
18use std::io::{self, Write};
19
20pub use markdown::MarkdownFormatter;
21pub use toon::ToonFormatter;
22pub use xml::XmlFormatter;
23
24// Re-export StreamingFormatter from this module (defined locally above)
25
26/// Output format type
27#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
28pub enum OutputFormat {
29    /// Claude-optimized XML
30    #[default]
31    Xml,
32    /// GPT-optimized Markdown
33    Markdown,
34    /// JSON (generic)
35    Json,
36    /// YAML (Gemini)
37    Yaml,
38    /// TOON (Token-Oriented Object Notation) - most token-efficient
39    Toon,
40    /// Plain text (simple, no formatting)
41    Plain,
42}
43
44/// Output formatter trait for in-memory formatting.
45///
46/// Use `format()` for formatting with a repository map (includes symbol rankings),
47/// or `format_repo()` for formatting just the repository files.
48///
49/// For large repositories, prefer the `StreamingFormatter` trait to reduce memory usage.
50pub trait Formatter {
51    /// Format repository with map to string.
52    ///
53    /// The output includes both the repository files and the symbol rankings
54    /// from the repository map.
55    #[must_use]
56    fn format(&self, repo: &Repository, map: &RepoMap) -> String;
57
58    /// Format repository only to string (without map metadata).
59    #[must_use]
60    fn format_repo(&self, repo: &Repository) -> String;
61
62    /// Get format name.
63    fn name(&self) -> &'static str;
64}
65
66/// Streaming formatter trait for low-memory output
67///
68/// Implement this trait to enable streaming output directly to files,
69/// stdout, or network sockets without building intermediate strings.
70pub trait StreamingFormatter {
71    /// Stream repository with map to writer
72    ///
73    /// # Example
74    /// ```ignore
75    /// use std::io::BufWriter;
76    /// use std::fs::File;
77    ///
78    /// let file = File::create("output.xml")?;
79    /// let mut writer = BufWriter::new(file);
80    /// formatter.format_to_writer(&repo, &map, &mut writer)?;
81    /// ```
82    fn format_to_writer<W: Write>(
83        &self,
84        repo: &Repository,
85        map: &RepoMap,
86        writer: &mut W,
87    ) -> io::Result<()>;
88
89    /// Stream repository only to writer
90    fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()>;
91}
92
93/// Output formatter factory
94pub struct OutputFormatter;
95
96impl OutputFormatter {
97    /// Create Claude-optimized XML formatter
98    pub fn claude() -> XmlFormatter {
99        XmlFormatter::new(true).with_model(TokenizerModel::Claude)
100    }
101
102    /// Create GPT-optimized Markdown formatter
103    pub fn gpt() -> MarkdownFormatter {
104        MarkdownFormatter::new().with_model(TokenizerModel::Claude)
105    }
106
107    /// Create JSON formatter
108    pub fn json() -> JsonFormatter {
109        JsonFormatter
110    }
111
112    /// Create YAML formatter (Gemini)
113    pub fn gemini() -> YamlFormatter {
114        YamlFormatter::new(TokenizerModel::Gemini)
115    }
116
117    /// Create formatter by format type
118    pub fn by_format(format: OutputFormat) -> Box<dyn Formatter> {
119        Self::by_format_with_options(format, true)
120    }
121
122    /// Create formatter by format type with line numbers option
123    pub fn by_format_with_options(format: OutputFormat, line_numbers: bool) -> Box<dyn Formatter> {
124        Self::by_format_with_all_options(format, line_numbers, true)
125    }
126
127    /// Create formatter by format type with all options
128    pub fn by_format_with_all_options(
129        format: OutputFormat,
130        line_numbers: bool,
131        show_file_index: bool,
132    ) -> Box<dyn Formatter> {
133        let model = Self::default_model_for_format(format);
134        Self::by_format_with_all_options_and_model(format, line_numbers, show_file_index, model)
135    }
136
137    /// Create formatter by format type with model override
138    pub fn by_format_with_model(format: OutputFormat, model: TokenizerModel) -> Box<dyn Formatter> {
139        Self::by_format_with_all_options_and_model(format, true, true, model)
140    }
141
142    /// Create formatter by format type with all options and model override
143    pub fn by_format_with_all_options_and_model(
144        format: OutputFormat,
145        line_numbers: bool,
146        show_file_index: bool,
147        model: TokenizerModel,
148    ) -> Box<dyn Formatter> {
149        match format {
150            OutputFormat::Xml => Box::new(
151                XmlFormatter::new(true)
152                    .with_line_numbers(line_numbers)
153                    .with_file_index(show_file_index)
154                    .with_model(model),
155            ),
156            OutputFormat::Markdown => Box::new(
157                MarkdownFormatter::new()
158                    .with_line_numbers(line_numbers)
159                    .with_model(model),
160            ),
161            OutputFormat::Json => Box::new(JsonFormatter),
162            OutputFormat::Yaml => Box::new(YamlFormatter::new(model)),
163            OutputFormat::Toon => Box::new(
164                ToonFormatter::new()
165                    .with_line_numbers(line_numbers)
166                    .with_file_index(show_file_index)
167                    .with_model(model),
168            ),
169            OutputFormat::Plain => Box::new(
170                PlainFormatter::new()
171                    .with_line_numbers(line_numbers)
172                    .with_model(model),
173            ),
174        }
175    }
176
177    /// Create TOON formatter (most token-efficient)
178    pub fn toon() -> ToonFormatter {
179        ToonFormatter::new().with_model(TokenizerModel::Claude)
180    }
181
182    fn default_model_for_format(format: OutputFormat) -> TokenizerModel {
183        match format {
184            OutputFormat::Yaml => TokenizerModel::Gemini,
185            _ => TokenizerModel::Claude,
186        }
187    }
188}
189
190/// JSON formatter
191pub struct JsonFormatter;
192
193/// Output structure for JSON format with repository map
194#[derive(serde::Serialize)]
195struct JsonOutput<'a> {
196    repository: &'a Repository,
197    map: &'a RepoMap,
198}
199
200/// Output structure for JSON format without map (repo-only view)
201#[derive(serde::Serialize)]
202struct JsonRepoOutput<'a> {
203    repository: &'a Repository,
204}
205
206impl Formatter for JsonFormatter {
207    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
208        serde_json::to_string_pretty(&JsonOutput { repository: repo, map }).unwrap_or_default()
209    }
210
211    fn format_repo(&self, repo: &Repository) -> String {
212        // Use consistent structure with 'repository' key wrapper for API consistency
213        serde_json::to_string_pretty(&JsonRepoOutput { repository: repo }).unwrap_or_default()
214    }
215
216    fn name(&self) -> &'static str {
217        "json"
218    }
219}
220
221/// Plain text formatter (simple, no markup)
222pub struct PlainFormatter {
223    /// Include line numbers in code
224    include_line_numbers: bool,
225    /// Token model for counts in output
226    token_model: TokenizerModel,
227}
228
229impl PlainFormatter {
230    /// Create a new plain formatter
231    pub fn new() -> Self {
232        Self { include_line_numbers: true, token_model: TokenizerModel::Claude }
233    }
234
235    /// Set line numbers option
236    pub fn with_line_numbers(mut self, enabled: bool) -> Self {
237        self.include_line_numbers = enabled;
238        self
239    }
240
241    /// Set token model for token counts in output
242    pub fn with_model(mut self, model: TokenizerModel) -> Self {
243        self.token_model = model;
244        self
245    }
246}
247
248impl Default for PlainFormatter {
249    fn default() -> Self {
250        Self::new()
251    }
252}
253
254impl Formatter for PlainFormatter {
255    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
256        let mut output = String::new();
257
258        // Header
259        output.push_str(&format!("Repository: {}\n", repo.name));
260        output.push_str(&format!(
261            "Files: {} | Lines: {} | Tokens: {}\n",
262            repo.metadata.total_files,
263            repo.metadata.total_lines,
264            repo.metadata.total_tokens.get(self.token_model)
265        ));
266        output.push_str(&"=".repeat(60));
267        output.push('\n');
268        output.push('\n');
269
270        // Repository map summary
271        output.push_str("REPOSITORY MAP\n");
272        output.push_str(&"-".repeat(40));
273        output.push('\n');
274        output.push_str(&map.summary);
275        output.push_str("\n\n");
276
277        // Directory structure
278        if let Some(structure) = &repo.metadata.directory_structure {
279            output.push_str("DIRECTORY STRUCTURE\n");
280            output.push_str(&"-".repeat(40));
281            output.push('\n');
282            output.push_str(structure);
283            output.push_str("\n\n");
284        }
285
286        // Files
287        output.push_str("FILES\n");
288        output.push_str(&"=".repeat(60));
289        output.push('\n');
290
291        for file in &repo.files {
292            output.push('\n');
293            output.push_str(&format!("File: {}\n", file.relative_path));
294            if let Some(lang) = &file.language {
295                output.push_str(&format!("Language: {}\n", lang));
296            }
297            output.push_str(&format!("Tokens: {}\n", file.token_count.get(self.token_model)));
298            output.push_str(&"-".repeat(40));
299            output.push('\n');
300
301            if let Some(content) = &file.content {
302                if self.include_line_numbers {
303                    for (i, line) in content.lines().enumerate() {
304                        output.push_str(&format!("{:4} {}\n", i + 1, line));
305                    }
306                } else {
307                    output.push_str(content);
308                    if !content.ends_with('\n') {
309                        output.push('\n');
310                    }
311                }
312            }
313            output.push_str(&"-".repeat(40));
314            output.push('\n');
315        }
316
317        output
318    }
319
320    fn format_repo(&self, repo: &Repository) -> String {
321        let mut output = String::new();
322        for file in &repo.files {
323            output.push_str(&format!("=== {} ===\n", file.relative_path));
324            if let Some(content) = &file.content {
325                if self.include_line_numbers {
326                    for (i, line) in content.lines().enumerate() {
327                        output.push_str(&format!("{:4} {}\n", i + 1, line));
328                    }
329                } else {
330                    output.push_str(content);
331                    if !content.ends_with('\n') {
332                        output.push('\n');
333                    }
334                }
335            }
336            output.push('\n');
337        }
338        output
339    }
340
341    fn name(&self) -> &'static str {
342        "plain"
343    }
344}
345
346/// YAML formatter (Gemini-optimized)
347pub struct YamlFormatter {
348    token_model: TokenizerModel,
349}
350
351impl YamlFormatter {
352    /// Create a new YAML formatter with the specified token model
353    pub fn new(model: TokenizerModel) -> Self {
354        Self { token_model: model }
355    }
356}
357
358impl Formatter for YamlFormatter {
359    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
360        let mut output = String::new();
361
362        // YAML header
363        output.push_str("---\n");
364        output.push_str("# Repository Context for Gemini\n");
365        output.push_str("# Note: Query should be at the END of this context\n\n");
366
367        // Metadata
368        output.push_str("metadata:\n");
369        output.push_str(&format!("  name: {}\n", repo.name));
370        output.push_str(&format!("  files: {}\n", repo.metadata.total_files));
371        output.push_str(&format!("  lines: {}\n", repo.metadata.total_lines));
372        output
373            .push_str(&format!("  tokens: {}\n", repo.metadata.total_tokens.get(self.token_model)));
374        output.push('\n');
375
376        // Languages
377        output.push_str("languages:\n");
378        for lang in &repo.metadata.languages {
379            output.push_str(&format!(
380                "  - name: {}\n    files: {}\n    percentage: {:.1}%\n",
381                lang.language, lang.files, lang.percentage
382            ));
383        }
384        output.push('\n');
385
386        // Repository map
387        output.push_str("repository_map:\n");
388        output.push_str(&format!("  summary: |\n    {}\n", map.summary.replace('\n', "\n    ")));
389        output.push_str("  key_symbols:\n");
390        for sym in &map.key_symbols {
391            output.push_str(&format!(
392                "    - name: {}\n      type: {}\n      file: {}\n      rank: {}\n",
393                sym.name, sym.kind, sym.file, sym.rank
394            ));
395            if let Some(ref summary) = sym.summary {
396                output.push_str(&format!("      summary: {}\n", summary));
397            }
398        }
399        output.push('\n');
400
401        // Files
402        output.push_str("files:\n");
403        for file in &repo.files {
404            output.push_str(&format!("  - path: {}\n", file.relative_path));
405            if let Some(lang) = &file.language {
406                output.push_str(&format!("    language: {}\n", lang));
407            }
408            output.push_str(&format!("    tokens: {}\n", file.token_count.get(self.token_model)));
409
410            if let Some(content) = &file.content {
411                output.push_str("    content: |\n");
412                for line in content.lines() {
413                    output.push_str(&format!("      {}\n", line));
414                }
415            }
416        }
417
418        // Query placeholder at end (Gemini best practice)
419        output.push_str("\n# --- INSERT YOUR QUERY BELOW THIS LINE ---\n");
420        output.push_str("query: |\n");
421        output.push_str("  [Your question about this repository]\n");
422
423        output
424    }
425
426    fn format_repo(&self, repo: &Repository) -> String {
427        serde_yaml::to_string(repo).unwrap_or_default()
428    }
429
430    fn name(&self) -> &'static str {
431        "yaml"
432    }
433}
434
435#[cfg(test)]
436#[allow(clippy::str_to_string)]
437mod tests {
438    use super::*;
439    use crate::repomap::RepoMapGenerator;
440    use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
441
442    fn create_test_repo() -> Repository {
443        Repository {
444            name: "test".to_string(),
445            path: "/tmp/test".into(),
446            files: vec![RepoFile {
447                path: "/tmp/test/main.py".into(),
448                relative_path: "main.py".to_string(),
449                language: Some("python".to_string()),
450                size_bytes: 100,
451                token_count: TokenCounts {
452                    o200k: 48,
453                    cl100k: 49,
454                    claude: 50,
455                    gemini: 47,
456                    llama: 46,
457                    mistral: 46,
458                    deepseek: 46,
459                    qwen: 46,
460                    cohere: 47,
461                    grok: 46,
462                },
463                symbols: Vec::new(),
464                importance: 0.8,
465                content: Some("def main():\n    print('hello')".to_string()),
466            }],
467            metadata: RepoMetadata {
468                total_files: 1,
469                total_lines: 2,
470                total_tokens: TokenCounts {
471                    o200k: 48,
472                    cl100k: 49,
473                    claude: 50,
474                    gemini: 47,
475                    llama: 46,
476                    mistral: 46,
477                    deepseek: 46,
478                    qwen: 46,
479                    cohere: 47,
480                    grok: 46,
481                },
482                languages: vec![LanguageStats {
483                    language: "Python".to_string(),
484                    files: 1,
485                    lines: 2,
486                    percentage: 100.0,
487                }],
488                framework: None,
489                description: None,
490                branch: None,
491                commit: None,
492                directory_structure: Some("main.py\n".to_string()),
493                external_dependencies: vec!["requests".to_string()],
494                git_history: None,
495            },
496        }
497    }
498
499    #[test]
500    fn test_json_formatter() {
501        let repo = create_test_repo();
502        let map = RepoMapGenerator::new(1000).generate(&repo);
503
504        let formatter = OutputFormatter::json();
505        let output = formatter.format(&repo, &map);
506
507        assert!(output.contains("\"name\": \"test\""));
508        assert!(output.contains("\"files\""));
509    }
510
511    #[test]
512    fn test_yaml_formatter() {
513        let repo = create_test_repo();
514        let map = RepoMapGenerator::new(1000).generate(&repo);
515
516        let formatter = OutputFormatter::gemini();
517        let output = formatter.format(&repo, &map);
518
519        assert!(output.contains("name: test"));
520        assert!(output.contains("# --- INSERT YOUR QUERY"));
521    }
522}