infiniloom_engine/output/
markdown.rs

1//! GPT-optimized Markdown output formatter
2//!
3//! Supports both in-memory (`format()`) and streaming (`format_to_writer()`) modes.
4
5use crate::output::{Formatter, StreamingFormatter};
6use crate::repomap::RepoMap;
7use crate::types::{Repository, TokenizerModel};
8use std::io::{self, Write};
9
10/// Markdown formatter optimized for GPT
11pub struct MarkdownFormatter {
12    /// Include overview tables
13    include_tables: bool,
14    /// Include Mermaid diagrams
15    include_mermaid: bool,
16    /// Include file tree
17    include_tree: bool,
18    /// Include line numbers in code
19    include_line_numbers: bool,
20    /// Token model for counts in output
21    token_model: TokenizerModel,
22}
23
24impl MarkdownFormatter {
25    /// Create a new Markdown formatter
26    pub fn new() -> Self {
27        Self {
28            include_tables: true,
29            include_mermaid: true,
30            include_tree: true,
31            include_line_numbers: true,
32            token_model: TokenizerModel::Claude,
33        }
34    }
35
36    /// Set tables option
37    pub fn with_tables(mut self, enabled: bool) -> Self {
38        self.include_tables = enabled;
39        self
40    }
41
42    /// Set Mermaid option
43    pub fn with_mermaid(mut self, enabled: bool) -> Self {
44        self.include_mermaid = enabled;
45        self
46    }
47
48    /// Set line numbers option
49    pub fn with_line_numbers(mut self, enabled: bool) -> Self {
50        self.include_line_numbers = enabled;
51        self
52    }
53
54    /// Set token model for token counts in output
55    pub fn with_model(mut self, model: TokenizerModel) -> Self {
56        self.token_model = model;
57        self
58    }
59
60    /// Estimate output size for pre-allocation
61    fn estimate_output_size(repo: &Repository) -> usize {
62        let base = 1000;
63        let files = repo.files.len() * 400;
64        let content: usize = repo
65            .files
66            .iter()
67            .filter_map(|f| f.content.as_ref())
68            .map(|c| c.len())
69            .sum();
70        base + files + content
71    }
72
73    // =========================================================================
74    // Streaming methods (write to impl std::io::Write)
75    // =========================================================================
76
77    fn stream_header<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
78        writeln!(w, "# Repository: {}", repo.name)?;
79        writeln!(w)?;
80        writeln!(
81            w,
82            "> **Files**: {} | **Lines**: {} | **Tokens**: {}",
83            repo.metadata.total_files,
84            repo.metadata.total_lines,
85            repo.metadata.total_tokens.get(self.token_model)
86        )?;
87        writeln!(w)
88    }
89
90    fn stream_overview<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
91        if !self.include_tables {
92            return Ok(());
93        }
94
95        writeln!(w, "## Overview")?;
96        writeln!(w)?;
97        writeln!(w, "| Metric | Value |")?;
98        writeln!(w, "|--------|-------|")?;
99        writeln!(w, "| Files | {} |", repo.metadata.total_files)?;
100        writeln!(w, "| Lines | {} |", repo.metadata.total_lines)?;
101
102        if let Some(lang) = repo.metadata.languages.first() {
103            writeln!(w, "| Primary Language | {} |", lang.language)?;
104        }
105        if let Some(framework) = &repo.metadata.framework {
106            writeln!(w, "| Framework | {} |", framework)?;
107        }
108        writeln!(w)?;
109
110        if repo.metadata.languages.len() > 1 {
111            writeln!(w, "### Languages")?;
112            writeln!(w)?;
113            writeln!(w, "| Language | Files | Percentage |")?;
114            writeln!(w, "|----------|-------|------------|")?;
115            for lang in &repo.metadata.languages {
116                writeln!(w, "| {} | {} | {:.1}% |", lang.language, lang.files, lang.percentage)?;
117            }
118            writeln!(w)?;
119        }
120        Ok(())
121    }
122
123    fn stream_repomap<W: Write>(&self, w: &mut W, map: &RepoMap) -> io::Result<()> {
124        writeln!(w, "## Repository Map")?;
125        writeln!(w)?;
126        writeln!(w, "{}", map.summary)?;
127        writeln!(w)?;
128
129        writeln!(w, "### Key Symbols")?;
130        writeln!(w)?;
131        writeln!(w, "| Rank | Symbol | Type | File | Line | Summary |")?;
132        writeln!(w, "|------|--------|------|------|------|---------|")?;
133        for sym in map.key_symbols.iter().take(15) {
134            let summary = sym
135                .summary
136                .as_deref()
137                .map(escape_markdown_cell)
138                .unwrap_or_default();
139            writeln!(
140                w,
141                "| {} | `{}` | {} | {} | {} | {} |",
142                sym.rank, sym.name, sym.kind, sym.file, sym.line, summary
143            )?;
144        }
145        writeln!(w)?;
146
147        if self.include_mermaid && !map.module_graph.edges.is_empty() {
148            writeln!(w, "### Module Dependencies")?;
149            writeln!(w)?;
150            writeln!(w, "```mermaid")?;
151            writeln!(w, "graph LR")?;
152            for edge in &map.module_graph.edges {
153                let sanitize_id = |s: &str| -> String {
154                    s.chars()
155                        .map(|c| if c == '-' || c == '.' { '_' } else { c })
156                        .collect()
157                };
158                let from_id = sanitize_id(&edge.from);
159                let to_id = sanitize_id(&edge.to);
160                writeln!(w, "    {}[\"{}\"] --> {}[\"{}\"]", from_id, edge.from, to_id, edge.to)?;
161            }
162            writeln!(w, "```")?;
163            writeln!(w)?;
164        }
165        Ok(())
166    }
167
168    fn stream_structure<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
169        if !self.include_tree {
170            return Ok(());
171        }
172
173        writeln!(w, "## Project Structure")?;
174        writeln!(w)?;
175        writeln!(w, "```")?;
176
177        let mut paths: Vec<_> = repo
178            .files
179            .iter()
180            .map(|f| f.relative_path.as_str())
181            .collect();
182        paths.sort();
183
184        let mut prev_parts: Vec<&str> = Vec::new();
185        for path in paths {
186            let parts: Vec<_> = path.split('/').collect();
187            let mut common = 0;
188            for (i, part) in parts.iter().enumerate() {
189                if i < prev_parts.len() && prev_parts[i] == *part {
190                    common = i + 1;
191                } else {
192                    break;
193                }
194            }
195            for (i, part) in parts.iter().enumerate().skip(common) {
196                let indent = "  ".repeat(i);
197                let prefix = if i == parts.len() - 1 {
198                    "📄 "
199                } else {
200                    "📁 "
201                };
202                writeln!(w, "{}{}{}", indent, prefix, part)?;
203            }
204            prev_parts = parts;
205        }
206
207        writeln!(w, "```")?;
208        writeln!(w)
209    }
210
211    fn stream_files<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
212        writeln!(w, "## Files")?;
213        writeln!(w)?;
214
215        for file in &repo.files {
216            if let Some(content) = &file.content {
217                writeln!(w, "### {}", file.relative_path)?;
218                writeln!(w)?;
219                writeln!(
220                    w,
221                    "> **Tokens**: {} | **Language**: {}",
222                    file.token_count.get(self.token_model),
223                    file.language.as_deref().unwrap_or("unknown")
224                )?;
225                writeln!(w)?;
226
227                let lang = file.language.as_deref().unwrap_or("");
228                writeln!(w, "```{}", lang)?;
229                if self.include_line_numbers {
230                    // Check if content has embedded line numbers (format: "N:content")
231                    // This preserves original line numbers when content has been compressed
232                    let first_line = content.lines().next().unwrap_or("");
233                    let has_embedded_line_nums = first_line.contains(':')
234                        && first_line
235                            .split(':')
236                            .next()
237                            .map(|s| s.parse::<u32>().is_ok())
238                            .unwrap_or(false);
239
240                    if has_embedded_line_nums {
241                        // Content has embedded line numbers - parse and output
242                        for line in content.lines() {
243                            if let Some((num_str, rest)) = line.split_once(':') {
244                                if let Ok(line_num) = num_str.parse::<u32>() {
245                                    writeln!(w, "{:4} {}", line_num, rest)?;
246                                } else {
247                                    // Fallback for malformed lines
248                                    writeln!(w, "     {}", line)?;
249                                }
250                            } else {
251                                writeln!(w, "     {}", line)?;
252                            }
253                        }
254                    } else {
255                        // No embedded line numbers - use sequential (uncompressed content)
256                        for (i, line) in content.lines().enumerate() {
257                            writeln!(w, "{:4} {}", i + 1, line)?;
258                        }
259                    }
260                } else {
261                    writeln!(w, "{}", content)?;
262                }
263                writeln!(w, "```")?;
264                writeln!(w)?;
265            }
266        }
267        Ok(())
268    }
269}
270
271impl Default for MarkdownFormatter {
272    fn default() -> Self {
273        Self::new()
274    }
275}
276
277impl Formatter for MarkdownFormatter {
278    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
279        // Use streaming internally for consistency
280        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
281        // Vec<u8> write cannot fail, ignore result
282        drop(self.format_to_writer(repo, map, &mut output));
283        // Use lossy conversion to handle any edge cases with invalid UTF-8
284        String::from_utf8(output)
285            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
286    }
287
288    fn format_repo(&self, repo: &Repository) -> String {
289        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
290        // Vec<u8> write cannot fail, ignore result
291        drop(self.format_repo_to_writer(repo, &mut output));
292        // Use lossy conversion to handle any edge cases with invalid UTF-8
293        String::from_utf8(output)
294            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
295    }
296
297    fn name(&self) -> &'static str {
298        "markdown"
299    }
300}
301
302impl StreamingFormatter for MarkdownFormatter {
303    fn format_to_writer<W: Write>(
304        &self,
305        repo: &Repository,
306        map: &RepoMap,
307        writer: &mut W,
308    ) -> io::Result<()> {
309        self.stream_header(writer, repo)?;
310        self.stream_overview(writer, repo)?;
311        self.stream_repomap(writer, map)?;
312        self.stream_structure(writer, repo)?;
313        self.stream_files(writer, repo)?;
314        Ok(())
315    }
316
317    fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()> {
318        self.stream_header(writer, repo)?;
319        self.stream_overview(writer, repo)?;
320        self.stream_structure(writer, repo)?;
321        self.stream_files(writer, repo)?;
322        Ok(())
323    }
324}
325
326fn escape_markdown_cell(text: &str) -> String {
327    text.replace('|', "\\|")
328        .replace('\n', " ")
329        .trim()
330        .to_owned()
331}
332
333#[cfg(test)]
334#[allow(clippy::str_to_string)]
335mod tests {
336    use super::*;
337    use crate::repomap::RepoMapGenerator;
338    use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
339
340    fn create_test_repo() -> Repository {
341        Repository {
342            name: "test".to_string(),
343            path: "/tmp/test".into(),
344            files: vec![RepoFile {
345                path: "/tmp/test/main.py".into(),
346                relative_path: "main.py".to_string(),
347                language: Some("python".to_string()),
348                size_bytes: 100,
349                token_count: TokenCounts {
350                    o200k: 48,
351                    cl100k: 49,
352                    claude: 50,
353                    gemini: 47,
354                    llama: 46,
355                    mistral: 46,
356                    deepseek: 46,
357                    qwen: 46,
358                    cohere: 47,
359                    grok: 46,
360                },
361                symbols: Vec::new(),
362                importance: 0.8,
363                content: Some("def main():\n    print('hello')".to_string()),
364            }],
365            metadata: RepoMetadata {
366                total_files: 1,
367                total_lines: 2,
368                total_tokens: TokenCounts {
369                    o200k: 48,
370                    cl100k: 49,
371                    claude: 50,
372                    gemini: 47,
373                    llama: 46,
374                    mistral: 46,
375                    deepseek: 46,
376                    qwen: 46,
377                    cohere: 47,
378                    grok: 46,
379                },
380                languages: vec![LanguageStats {
381                    language: "Python".to_string(),
382                    files: 1,
383                    lines: 2,
384                    percentage: 100.0,
385                }],
386                framework: None,
387                description: None,
388                branch: None,
389                commit: None,
390                directory_structure: None,
391                external_dependencies: vec![],
392                git_history: None,
393            },
394        }
395    }
396
397    #[test]
398    fn test_markdown_output() {
399        let repo = create_test_repo();
400        let map = RepoMapGenerator::new(1000).generate(&repo);
401
402        let formatter = MarkdownFormatter::new();
403        let output = formatter.format(&repo, &map);
404
405        assert!(output.contains("# Repository: test"));
406        assert!(output.contains("## Overview"));
407        assert!(output.contains("```python"));
408    }
409}