infiniloom_engine/output/
markdown.rs

1//! GPT-optimized Markdown output formatter
2//!
3//! Supports both in-memory (`format()`) and streaming (`format_to_writer()`) modes.
4
5use crate::output::{Formatter, StreamingFormatter};
6use crate::repomap::RepoMap;
7use crate::types::{Repository, TokenizerModel};
8use std::io::{self, Write};
9
10/// Markdown formatter optimized for GPT
11pub struct MarkdownFormatter {
12    /// Include overview tables
13    include_tables: bool,
14    /// Include Mermaid diagrams
15    include_mermaid: bool,
16    /// Include file tree
17    include_tree: bool,
18    /// Include line numbers in code
19    include_line_numbers: bool,
20    /// Token model for counts in output
21    token_model: TokenizerModel,
22}
23
24impl MarkdownFormatter {
25    /// Create a new Markdown formatter
26    pub fn new() -> Self {
27        Self {
28            include_tables: true,
29            include_mermaid: true,
30            include_tree: true,
31            include_line_numbers: true,
32            token_model: TokenizerModel::Claude,
33        }
34    }
35
36    /// Set tables option
37    pub fn with_tables(mut self, enabled: bool) -> Self {
38        self.include_tables = enabled;
39        self
40    }
41
42    /// Set Mermaid option
43    pub fn with_mermaid(mut self, enabled: bool) -> Self {
44        self.include_mermaid = enabled;
45        self
46    }
47
48    /// Set line numbers option
49    pub fn with_line_numbers(mut self, enabled: bool) -> Self {
50        self.include_line_numbers = enabled;
51        self
52    }
53
54    /// Set token model for token counts in output
55    pub fn with_model(mut self, model: TokenizerModel) -> Self {
56        self.token_model = model;
57        self
58    }
59
60    /// Estimate output size for pre-allocation
61    fn estimate_output_size(repo: &Repository) -> usize {
62        let base = 1000;
63        let files = repo.files.len() * 400;
64        let content: usize = repo
65            .files
66            .iter()
67            .filter_map(|f| f.content.as_ref())
68            .map(|c| c.len())
69            .sum();
70        base + files + content
71    }
72
73    // =========================================================================
74    // Streaming methods (write to impl std::io::Write)
75    // =========================================================================
76
77    fn stream_header<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
78        writeln!(w, "# Repository: {}", repo.name)?;
79        writeln!(w)?;
80        writeln!(
81            w,
82            "> **Files**: {} | **Lines**: {} | **Tokens**: {}",
83            repo.metadata.total_files,
84            repo.metadata.total_lines,
85            repo.metadata.total_tokens.get(self.token_model)
86        )?;
87        writeln!(w)
88    }
89
90    fn stream_overview<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
91        if !self.include_tables {
92            return Ok(());
93        }
94
95        writeln!(w, "## Overview")?;
96        writeln!(w)?;
97        writeln!(w, "| Metric | Value |")?;
98        writeln!(w, "|--------|-------|")?;
99        writeln!(w, "| Files | {} |", repo.metadata.total_files)?;
100        writeln!(w, "| Lines | {} |", repo.metadata.total_lines)?;
101
102        if let Some(lang) = repo.metadata.languages.first() {
103            writeln!(w, "| Primary Language | {} |", lang.language)?;
104        }
105        if let Some(framework) = &repo.metadata.framework {
106            writeln!(w, "| Framework | {} |", framework)?;
107        }
108        writeln!(w)?;
109
110        if repo.metadata.languages.len() > 1 {
111            writeln!(w, "### Languages")?;
112            writeln!(w)?;
113            writeln!(w, "| Language | Files | Percentage |")?;
114            writeln!(w, "|----------|-------|------------|")?;
115            for lang in &repo.metadata.languages {
116                writeln!(w, "| {} | {} | {:.1}% |", lang.language, lang.files, lang.percentage)?;
117            }
118            writeln!(w)?;
119        }
120        Ok(())
121    }
122
123    fn stream_repomap<W: Write>(&self, w: &mut W, map: &RepoMap) -> io::Result<()> {
124        writeln!(w, "## Repository Map")?;
125        writeln!(w)?;
126        writeln!(w, "{}", map.summary)?;
127        writeln!(w)?;
128
129        writeln!(w, "### Key Symbols")?;
130        writeln!(w)?;
131        writeln!(w, "| Rank | Symbol | Type | File | Line | Summary |")?;
132        writeln!(w, "|------|--------|------|------|------|---------|")?;
133        for sym in map.key_symbols.iter().take(15) {
134            let summary = sym
135                .summary
136                .as_deref()
137                .map(escape_markdown_cell)
138                .unwrap_or_default();
139            writeln!(
140                w,
141                "| {} | `{}` | {} | {} | {} | {} |",
142                sym.rank, sym.name, sym.kind, sym.file, sym.line, summary
143            )?;
144        }
145        writeln!(w)?;
146
147        if self.include_mermaid && !map.module_graph.edges.is_empty() {
148            writeln!(w, "### Module Dependencies")?;
149            writeln!(w)?;
150            writeln!(w, "```mermaid")?;
151            writeln!(w, "graph LR")?;
152            for edge in &map.module_graph.edges {
153                let sanitize_id = |s: &str| -> String {
154                    s.chars()
155                        .map(|c| if c == '-' || c == '.' { '_' } else { c })
156                        .collect()
157                };
158                let from_id = sanitize_id(&edge.from);
159                let to_id = sanitize_id(&edge.to);
160                writeln!(w, "    {}[\"{}\"] --> {}[\"{}\"]", from_id, edge.from, to_id, edge.to)?;
161            }
162            writeln!(w, "```")?;
163            writeln!(w)?;
164        }
165        Ok(())
166    }
167
168    fn stream_structure<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
169        if !self.include_tree {
170            return Ok(());
171        }
172
173        writeln!(w, "## Project Structure")?;
174        writeln!(w)?;
175        writeln!(w, "```")?;
176
177        let mut paths: Vec<_> = repo
178            .files
179            .iter()
180            .map(|f| f.relative_path.as_str())
181            .collect();
182        paths.sort();
183
184        let mut prev_parts: Vec<&str> = Vec::new();
185        for path in paths {
186            let parts: Vec<_> = path.split('/').collect();
187            let mut common = 0;
188            for (i, part) in parts.iter().enumerate() {
189                if i < prev_parts.len() && prev_parts[i] == *part {
190                    common = i + 1;
191                } else {
192                    break;
193                }
194            }
195            for (i, part) in parts.iter().enumerate().skip(common) {
196                let indent = "  ".repeat(i);
197                let prefix = if i == parts.len() - 1 {
198                    "📄 "
199                } else {
200                    "📁 "
201                };
202                writeln!(w, "{}{}{}", indent, prefix, part)?;
203            }
204            prev_parts = parts;
205        }
206
207        writeln!(w, "```")?;
208        writeln!(w)
209    }
210
211    fn stream_files<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
212        writeln!(w, "## Files")?;
213        writeln!(w)?;
214
215        for file in &repo.files {
216            if let Some(content) = &file.content {
217                writeln!(w, "### {}", file.relative_path)?;
218                writeln!(w)?;
219                writeln!(
220                    w,
221                    "> **Tokens**: {} | **Language**: {}",
222                    file.token_count.get(self.token_model),
223                    file.language.as_deref().unwrap_or("unknown")
224                )?;
225                writeln!(w)?;
226
227                let lang = file.language.as_deref().unwrap_or("");
228                writeln!(w, "```{}", lang)?;
229                if self.include_line_numbers {
230                    // Check if content has embedded line numbers (format: "N:content")
231                    // This preserves original line numbers when content has been compressed
232                    let first_line = content.lines().next().unwrap_or("");
233                    let has_embedded_line_nums = first_line.contains(':')
234                        && first_line
235                            .split(':')
236                            .next()
237                            .map(|s| s.parse::<u32>().is_ok())
238                            .unwrap_or(false);
239
240                    if has_embedded_line_nums {
241                        // Content has embedded line numbers - parse and output
242                        for line in content.lines() {
243                            if let Some((num_str, rest)) = line.split_once(':') {
244                                if let Ok(line_num) = num_str.parse::<u32>() {
245                                    writeln!(w, "{:4} {}", line_num, rest)?;
246                                } else {
247                                    // Fallback for malformed lines
248                                    writeln!(w, "     {}", line)?;
249                                }
250                            } else {
251                                writeln!(w, "     {}", line)?;
252                            }
253                        }
254                    } else {
255                        // No embedded line numbers - use sequential (uncompressed content)
256                        for (i, line) in content.lines().enumerate() {
257                            writeln!(w, "{:4} {}", i + 1, line)?;
258                        }
259                    }
260                } else {
261                    writeln!(w, "{}", content)?;
262                }
263                writeln!(w, "```")?;
264                writeln!(w)?;
265            }
266        }
267        Ok(())
268    }
269}
270
271impl Default for MarkdownFormatter {
272    fn default() -> Self {
273        Self::new()
274    }
275}
276
277impl Formatter for MarkdownFormatter {
278    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
279        // Use streaming internally for consistency
280        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
281        // Vec<u8> write cannot fail, ignore result
282        drop(self.format_to_writer(repo, map, &mut output));
283        // Use lossy conversion to handle any edge cases with invalid UTF-8
284        String::from_utf8(output)
285            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
286    }
287
288    fn format_repo(&self, repo: &Repository) -> String {
289        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
290        // Vec<u8> write cannot fail, ignore result
291        drop(self.format_repo_to_writer(repo, &mut output));
292        // Use lossy conversion to handle any edge cases with invalid UTF-8
293        String::from_utf8(output)
294            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
295    }
296
297    fn name(&self) -> &'static str {
298        "markdown"
299    }
300}
301
302impl StreamingFormatter for MarkdownFormatter {
303    fn format_to_writer<W: Write>(
304        &self,
305        repo: &Repository,
306        map: &RepoMap,
307        writer: &mut W,
308    ) -> io::Result<()> {
309        self.stream_header(writer, repo)?;
310        self.stream_overview(writer, repo)?;
311        self.stream_repomap(writer, map)?;
312        self.stream_structure(writer, repo)?;
313        self.stream_files(writer, repo)?;
314        Ok(())
315    }
316
317    fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()> {
318        self.stream_header(writer, repo)?;
319        self.stream_overview(writer, repo)?;
320        self.stream_structure(writer, repo)?;
321        self.stream_files(writer, repo)?;
322        Ok(())
323    }
324}
325
326fn escape_markdown_cell(text: &str) -> String {
327    text.replace('|', "\\|")
328        .replace('\n', " ")
329        .trim()
330        .to_owned()
331}
332
333#[cfg(test)]
334#[allow(clippy::str_to_string)]
335mod tests {
336    use super::*;
337    use crate::repomap::{
338        FileIndexEntry, ModuleEdge, ModuleGraph, ModuleNode, RankedSymbol, RepoMap,
339        RepoMapGenerator,
340    };
341    use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
342
343    fn create_test_repo() -> Repository {
344        Repository {
345            name: "test".to_string(),
346            path: "/tmp/test".into(),
347            files: vec![RepoFile {
348                path: "/tmp/test/main.py".into(),
349                relative_path: "main.py".to_string(),
350                language: Some("python".to_string()),
351                size_bytes: 100,
352                token_count: TokenCounts {
353                    o200k: 48,
354                    cl100k: 49,
355                    claude: 50,
356                    gemini: 47,
357                    llama: 46,
358                    mistral: 46,
359                    deepseek: 46,
360                    qwen: 46,
361                    cohere: 47,
362                    grok: 46,
363                },
364                symbols: Vec::new(),
365                importance: 0.8,
366                content: Some("def main():\n    print('hello')".to_string()),
367            }],
368            metadata: RepoMetadata {
369                total_files: 1,
370                total_lines: 2,
371                total_tokens: TokenCounts {
372                    o200k: 48,
373                    cl100k: 49,
374                    claude: 50,
375                    gemini: 47,
376                    llama: 46,
377                    mistral: 46,
378                    deepseek: 46,
379                    qwen: 46,
380                    cohere: 47,
381                    grok: 46,
382                },
383                languages: vec![LanguageStats {
384                    language: "Python".to_string(),
385                    files: 1,
386                    lines: 2,
387                    percentage: 100.0,
388                }],
389                framework: None,
390                description: None,
391                branch: None,
392                commit: None,
393                directory_structure: None,
394                external_dependencies: vec![],
395                git_history: None,
396            },
397        }
398    }
399
400    fn create_multi_language_repo() -> Repository {
401        Repository {
402            name: "multi-lang".to_string(),
403            path: "/tmp/multi".into(),
404            files: vec![
405                RepoFile {
406                    path: "/tmp/multi/src/main.rs".into(),
407                    relative_path: "src/main.rs".to_string(),
408                    language: Some("rust".to_string()),
409                    size_bytes: 200,
410                    token_count: TokenCounts::default(),
411                    symbols: Vec::new(),
412                    importance: 0.9,
413                    content: Some("fn main() {\n    println!(\"hello\");\n}".to_string()),
414                },
415                RepoFile {
416                    path: "/tmp/multi/src/lib.rs".into(),
417                    relative_path: "src/lib.rs".to_string(),
418                    language: Some("rust".to_string()),
419                    size_bytes: 150,
420                    token_count: TokenCounts::default(),
421                    symbols: Vec::new(),
422                    importance: 0.8,
423                    content: Some("pub mod utils;".to_string()),
424                },
425                RepoFile {
426                    path: "/tmp/multi/tests/test.py".into(),
427                    relative_path: "tests/test.py".to_string(),
428                    language: Some("python".to_string()),
429                    size_bytes: 100,
430                    token_count: TokenCounts::default(),
431                    symbols: Vec::new(),
432                    importance: 0.5,
433                    content: Some("def test_it(): pass".to_string()),
434                },
435            ],
436            metadata: RepoMetadata {
437                total_files: 3,
438                total_lines: 5,
439                total_tokens: TokenCounts::default(),
440                languages: vec![
441                    LanguageStats {
442                        language: "Rust".to_string(),
443                        files: 2,
444                        lines: 4,
445                        percentage: 66.7,
446                    },
447                    LanguageStats {
448                        language: "Python".to_string(),
449                        files: 1,
450                        lines: 1,
451                        percentage: 33.3,
452                    },
453                ],
454                framework: Some("Actix".to_string()),
455                description: Some("Test project".to_string()),
456                branch: Some("main".to_string()),
457                commit: Some("abc123".to_string()),
458                directory_structure: None,
459                external_dependencies: vec!["tokio".to_string()],
460                git_history: None,
461            },
462        }
463    }
464
465    fn create_test_map() -> RepoMap {
466        RepoMap {
467            summary: "Test repository with 1 key symbol".to_string(),
468            key_symbols: vec![RankedSymbol {
469                rank: 1,
470                name: "main".to_string(),
471                kind: "function".to_string(),
472                file: "main.py".to_string(),
473                line: 1,
474                signature: None,
475                summary: Some("Entry point".to_string()),
476                references: 0,
477                importance: 0.95,
478            }],
479            module_graph: ModuleGraph {
480                nodes: vec![ModuleNode { name: "main".to_string(), files: 1, tokens: 50 }],
481                edges: vec![],
482            },
483            file_index: vec![FileIndexEntry {
484                path: "main.py".to_string(),
485                tokens: 50,
486                importance: "high".to_string(),
487                summary: None,
488            }],
489            token_count: 50,
490        }
491    }
492
493    fn create_map_with_mermaid() -> RepoMap {
494        RepoMap {
495            summary: "Test with dependencies".to_string(),
496            key_symbols: vec![
497                RankedSymbol {
498                    rank: 1,
499                    name: "main".to_string(),
500                    kind: "function".to_string(),
501                    file: "main.rs".to_string(),
502                    line: 1,
503                    signature: Some("fn main()".to_string()),
504                    summary: Some("Entry | point".to_string()),
505                    references: 5,
506                    importance: 0.95,
507                },
508                RankedSymbol {
509                    rank: 2,
510                    name: "helper".to_string(),
511                    kind: "function".to_string(),
512                    file: "lib.rs".to_string(),
513                    line: 5,
514                    signature: None,
515                    summary: None,
516                    references: 2,
517                    importance: 0.7,
518                },
519            ],
520            module_graph: ModuleGraph {
521                nodes: vec![
522                    ModuleNode { name: "main".to_string(), files: 1, tokens: 100 },
523                    ModuleNode { name: "lib".to_string(), files: 1, tokens: 80 },
524                ],
525                edges: vec![ModuleEdge {
526                    from: "main-mod".to_string(),
527                    to: "lib.rs".to_string(),
528                    weight: 1,
529                }],
530            },
531            file_index: vec![
532                FileIndexEntry {
533                    path: "main.rs".to_string(),
534                    tokens: 100,
535                    importance: "critical".to_string(),
536                    summary: None,
537                },
538                FileIndexEntry {
539                    path: "lib.rs".to_string(),
540                    tokens: 80,
541                    importance: "high".to_string(),
542                    summary: None,
543                },
544            ],
545            token_count: 100,
546        }
547    }
548
549    #[test]
550    fn test_markdown_output() {
551        let repo = create_test_repo();
552        let map = RepoMapGenerator::new(1000).generate(&repo);
553
554        let formatter = MarkdownFormatter::new();
555        let output = formatter.format(&repo, &map);
556
557        assert!(output.contains("# Repository: test"));
558        assert!(output.contains("## Overview"));
559        assert!(output.contains("```python"));
560    }
561
562    #[test]
563    fn test_markdown_default() {
564        let formatter = MarkdownFormatter::default();
565        assert_eq!(formatter.name(), "markdown");
566    }
567
568    #[test]
569    fn test_builder_with_tables() {
570        let formatter = MarkdownFormatter::new().with_tables(false);
571        let repo = create_test_repo();
572        let map = create_test_map();
573        let output = formatter.format(&repo, &map);
574        assert!(!output.contains("## Overview"));
575        assert!(!output.contains("| Metric | Value |"));
576    }
577
578    #[test]
579    fn test_builder_with_mermaid_disabled() {
580        let formatter = MarkdownFormatter::new().with_mermaid(false);
581        let repo = create_multi_language_repo();
582        let map = create_map_with_mermaid();
583        let output = formatter.format(&repo, &map);
584        assert!(!output.contains("```mermaid"));
585    }
586
587    #[test]
588    fn test_builder_with_mermaid_enabled() {
589        let formatter = MarkdownFormatter::new().with_mermaid(true);
590        let repo = create_multi_language_repo();
591        let map = create_map_with_mermaid();
592        let output = formatter.format(&repo, &map);
593        assert!(output.contains("```mermaid"));
594        assert!(output.contains("graph LR"));
595        // Check ID sanitization (- and . replaced with _)
596        assert!(output.contains("main_mod"));
597        assert!(output.contains("lib_rs"));
598    }
599
600    #[test]
601    fn test_builder_with_line_numbers_disabled() {
602        let formatter = MarkdownFormatter::new().with_line_numbers(false);
603        let repo = create_test_repo();
604        let map = create_test_map();
605        let output = formatter.format(&repo, &map);
606        // Should NOT have line numbers like "   1 def main():"
607        assert!(!output.contains("   1 def main"));
608        // Should have raw content
609        assert!(output.contains("def main():"));
610    }
611
612    #[test]
613    fn test_builder_with_model() {
614        let formatter = MarkdownFormatter::new().with_model(TokenizerModel::Gpt4o);
615        let repo = create_test_repo();
616        let map = create_test_map();
617        let output = formatter.format(&repo, &map);
618        // GPT-4o uses o200k encoding, which is 48 in our test data
619        assert!(output.contains("**Tokens**: 48"));
620    }
621
622    #[test]
623    fn test_estimate_output_size() {
624        let repo = create_test_repo();
625        let size = MarkdownFormatter::estimate_output_size(&repo);
626        // base (1000) + files (1 * 400) + content length (~30)
627        assert!(size > 1000);
628        assert!(size < 2000);
629    }
630
631    #[test]
632    fn test_stream_header() {
633        let formatter = MarkdownFormatter::new();
634        let repo = create_test_repo();
635        let mut buf = Vec::new();
636        formatter.stream_header(&mut buf, &repo).unwrap();
637        let output = String::from_utf8(buf).unwrap();
638        assert!(output.contains("# Repository: test"));
639        assert!(output.contains("**Files**: 1"));
640        assert!(output.contains("**Lines**: 2"));
641        assert!(output.contains("**Tokens**: 50")); // Claude tokens
642    }
643
644    #[test]
645    fn test_stream_overview_with_framework() {
646        let formatter = MarkdownFormatter::new();
647        let repo = create_multi_language_repo();
648        let mut buf = Vec::new();
649        formatter.stream_overview(&mut buf, &repo).unwrap();
650        let output = String::from_utf8(buf).unwrap();
651        assert!(output.contains("| Framework | Actix |"));
652        assert!(output.contains("| Primary Language | Rust |"));
653    }
654
655    #[test]
656    fn test_stream_overview_multiple_languages() {
657        let formatter = MarkdownFormatter::new();
658        let repo = create_multi_language_repo();
659        let mut buf = Vec::new();
660        formatter.stream_overview(&mut buf, &repo).unwrap();
661        let output = String::from_utf8(buf).unwrap();
662        assert!(output.contains("### Languages"));
663        assert!(output.contains("| Rust | 2 | 66.7% |"));
664        assert!(output.contains("| Python | 1 | 33.3% |"));
665    }
666
667    #[test]
668    fn test_stream_overview_disabled() {
669        let formatter = MarkdownFormatter::new().with_tables(false);
670        let repo = create_test_repo();
671        let mut buf = Vec::new();
672        formatter.stream_overview(&mut buf, &repo).unwrap();
673        let output = String::from_utf8(buf).unwrap();
674        assert!(output.is_empty());
675    }
676
677    #[test]
678    fn test_stream_repomap() {
679        let formatter = MarkdownFormatter::new();
680        let map = create_test_map();
681        let mut buf = Vec::new();
682        formatter.stream_repomap(&mut buf, &map).unwrap();
683        let output = String::from_utf8(buf).unwrap();
684        assert!(output.contains("## Repository Map"));
685        assert!(output.contains("### Key Symbols"));
686        assert!(output.contains("| 1 | `main` | function | main.py | 1 | Entry point |"));
687    }
688
689    #[test]
690    fn test_stream_repomap_escapes_pipe_in_summary() {
691        let formatter = MarkdownFormatter::new();
692        let map = create_map_with_mermaid();
693        let mut buf = Vec::new();
694        formatter.stream_repomap(&mut buf, &map).unwrap();
695        let output = String::from_utf8(buf).unwrap();
696        // Pipe should be escaped
697        assert!(output.contains("Entry \\| point"));
698    }
699
700    #[test]
701    fn test_stream_structure() {
702        let formatter = MarkdownFormatter::new();
703        let repo = create_multi_language_repo();
704        let mut buf = Vec::new();
705        formatter.stream_structure(&mut buf, &repo).unwrap();
706        let output = String::from_utf8(buf).unwrap();
707        assert!(output.contains("## Project Structure"));
708        assert!(output.contains("```"));
709    }
710
711    #[test]
712    fn test_stream_structure_disabled() {
713        // Create a formatter with tree disabled by modifying internal state
714        let mut formatter = MarkdownFormatter::new();
715        formatter.include_tree = false;
716        let repo = create_test_repo();
717        let mut buf = Vec::new();
718        formatter.stream_structure(&mut buf, &repo).unwrap();
719        let output = String::from_utf8(buf).unwrap();
720        assert!(output.is_empty());
721    }
722
723    #[test]
724    fn test_stream_files_with_line_numbers() {
725        let formatter = MarkdownFormatter::new().with_line_numbers(true);
726        let repo = create_test_repo();
727        let mut buf = Vec::new();
728        formatter.stream_files(&mut buf, &repo).unwrap();
729        let output = String::from_utf8(buf).unwrap();
730        assert!(output.contains("### main.py"));
731        assert!(output.contains("**Tokens**: 50"));
732        assert!(output.contains("**Language**: python"));
733        // Line numbers should be present
734        assert!(output.contains("   1 def main():"));
735        assert!(output.contains("   2     print('hello')"));
736    }
737
738    #[test]
739    fn test_stream_files_without_line_numbers() {
740        let formatter = MarkdownFormatter::new().with_line_numbers(false);
741        let repo = create_test_repo();
742        let mut buf = Vec::new();
743        formatter.stream_files(&mut buf, &repo).unwrap();
744        let output = String::from_utf8(buf).unwrap();
745        // Should have raw content without line numbers
746        assert!(output.contains("def main():\n    print('hello')"));
747    }
748
749    #[test]
750    fn test_stream_files_with_embedded_line_numbers() {
751        let mut repo = create_test_repo();
752        // Set content with embedded line numbers (compressed format)
753        repo.files[0].content = Some("1:def main():\n5:    print('hello')".to_string());
754        let formatter = MarkdownFormatter::new().with_line_numbers(true);
755        let mut buf = Vec::new();
756        formatter.stream_files(&mut buf, &repo).unwrap();
757        let output = String::from_utf8(buf).unwrap();
758        // Should parse and display original line numbers
759        assert!(output.contains("   1 def main():"));
760        assert!(output.contains("   5     print('hello')"));
761    }
762
763    #[test]
764    fn test_stream_files_with_malformed_embedded_line_numbers() {
765        let mut repo = create_test_repo();
766        // Malformed embedded line numbers
767        repo.files[0].content = Some("abc:def main():\nno_colon_here".to_string());
768        let formatter = MarkdownFormatter::new().with_line_numbers(true);
769        let mut buf = Vec::new();
770        formatter.stream_files(&mut buf, &repo).unwrap();
771        let output = String::from_utf8(buf).unwrap();
772        // Should handle gracefully - use sequential numbers since first line doesn't parse
773        assert!(output.contains("   1 abc:def main():"));
774    }
775
776    #[test]
777    fn test_stream_files_with_no_content() {
778        let mut repo = create_test_repo();
779        repo.files[0].content = None;
780        let formatter = MarkdownFormatter::new();
781        let mut buf = Vec::new();
782        formatter.stream_files(&mut buf, &repo).unwrap();
783        let output = String::from_utf8(buf).unwrap();
784        // Should still have ## Files header but no file content
785        assert!(output.contains("## Files"));
786        assert!(!output.contains("### main.py"));
787    }
788
789    #[test]
790    fn test_stream_files_unknown_language() {
791        let mut repo = create_test_repo();
792        repo.files[0].language = None;
793        let formatter = MarkdownFormatter::new();
794        let mut buf = Vec::new();
795        formatter.stream_files(&mut buf, &repo).unwrap();
796        let output = String::from_utf8(buf).unwrap();
797        assert!(output.contains("**Language**: unknown"));
798    }
799
800    #[test]
801    fn test_format_repo_without_map() {
802        let formatter = MarkdownFormatter::new();
803        let repo = create_test_repo();
804        let output = formatter.format_repo(&repo);
805        assert!(output.contains("# Repository: test"));
806        assert!(output.contains("## Overview"));
807        // Should NOT have repomap section
808        assert!(!output.contains("## Repository Map"));
809    }
810
811    #[test]
812    fn test_streaming_formatter_trait() {
813        let formatter = MarkdownFormatter::new();
814        let repo = create_test_repo();
815        let map = create_test_map();
816        let mut buf = Vec::new();
817        formatter.format_to_writer(&repo, &map, &mut buf).unwrap();
818        let output = String::from_utf8(buf).unwrap();
819        assert!(output.contains("# Repository: test"));
820        assert!(output.contains("## Repository Map"));
821    }
822
823    #[test]
824    fn test_streaming_formatter_repo_only() {
825        let formatter = MarkdownFormatter::new();
826        let repo = create_test_repo();
827        let mut buf = Vec::new();
828        formatter.format_repo_to_writer(&repo, &mut buf).unwrap();
829        let output = String::from_utf8(buf).unwrap();
830        assert!(output.contains("# Repository: test"));
831        assert!(!output.contains("## Repository Map"));
832    }
833
834    #[test]
835    fn test_escape_markdown_cell() {
836        assert_eq!(escape_markdown_cell("hello"), "hello");
837        assert_eq!(escape_markdown_cell("a|b"), "a\\|b");
838        assert_eq!(escape_markdown_cell("line1\nline2"), "line1 line2");
839        assert_eq!(escape_markdown_cell("  spaced  "), "spaced");
840        assert_eq!(escape_markdown_cell("a|b\nc|d"), "a\\|b c\\|d");
841    }
842
843    #[test]
844    fn test_escape_markdown_cell_complex() {
845        // Multiple pipes and newlines
846        let input = "col1|col2|col3\nrow1|row2|row3";
847        let expected = "col1\\|col2\\|col3 row1\\|row2\\|row3";
848        assert_eq!(escape_markdown_cell(input), expected);
849    }
850
851    #[test]
852    fn test_full_format_with_all_features() {
853        let formatter = MarkdownFormatter::new()
854            .with_tables(true)
855            .with_mermaid(true)
856            .with_line_numbers(true)
857            .with_model(TokenizerModel::Claude);
858        let repo = create_multi_language_repo();
859        let map = create_map_with_mermaid();
860        let output = formatter.format(&repo, &map);
861
862        // All sections present
863        assert!(output.contains("# Repository: multi-lang"));
864        assert!(output.contains("## Overview"));
865        assert!(output.contains("## Repository Map"));
866        assert!(output.contains("## Project Structure"));
867        assert!(output.contains("## Files"));
868        assert!(output.contains("```mermaid"));
869    }
870
871    #[test]
872    fn test_format_with_empty_repo() {
873        let repo = Repository {
874            name: "empty".to_string(),
875            path: "/tmp/empty".into(),
876            files: vec![],
877            metadata: RepoMetadata::default(),
878        };
879        let map = RepoMap {
880            summary: "Empty repository".to_string(),
881            key_symbols: vec![],
882            module_graph: ModuleGraph { nodes: vec![], edges: vec![] },
883            file_index: vec![],
884            token_count: 0,
885        };
886        let formatter = MarkdownFormatter::new();
887        let output = formatter.format(&repo, &map);
888        assert!(output.contains("# Repository: empty"));
889        // Should not fail
890    }
891
892    #[test]
893    fn test_estimate_output_size_empty_repo() {
894        let repo = Repository {
895            name: "empty".to_string(),
896            path: "/tmp/empty".into(),
897            files: vec![],
898            metadata: RepoMetadata::default(),
899        };
900        let size = MarkdownFormatter::estimate_output_size(&repo);
901        assert_eq!(size, 1000); // Just base size
902    }
903
904    #[test]
905    fn test_estimate_output_size_with_content() {
906        let mut repo = create_test_repo();
907        repo.files[0].content = Some("x".repeat(5000));
908        let size = MarkdownFormatter::estimate_output_size(&repo);
909        // base (1000) + files (1 * 400) + content (5000)
910        assert_eq!(size, 6400);
911    }
912
913    #[test]
914    fn test_structure_nested_paths() {
915        let repo = Repository {
916            name: "nested".to_string(),
917            path: "/tmp/nested".into(),
918            files: vec![
919                RepoFile::new("/tmp/nested/src/a/b/c.rs", "src/a/b/c.rs"),
920                RepoFile::new("/tmp/nested/src/a/b/d.rs", "src/a/b/d.rs"),
921                RepoFile::new("/tmp/nested/src/a/e.rs", "src/a/e.rs"),
922                RepoFile::new("/tmp/nested/tests/test.rs", "tests/test.rs"),
923            ],
924            metadata: RepoMetadata::default(),
925        };
926        let formatter = MarkdownFormatter::new();
927        let mut buf = Vec::new();
928        formatter.stream_structure(&mut buf, &repo).unwrap();
929        let output = String::from_utf8(buf).unwrap();
930        // Should show directory structure
931        assert!(output.contains("src"));
932        assert!(output.contains("tests"));
933    }
934
935    #[test]
936    fn test_name_method() {
937        let formatter = MarkdownFormatter::new();
938        assert_eq!(formatter.name(), "markdown");
939    }
940}