Skip to main content

infiniloom_engine/output/
markdown.rs

1//! GPT-optimized Markdown output formatter
2//!
3//! Supports both in-memory (`format()`) and streaming (`format_to_writer()`) modes.
4
5use crate::output::{Formatter, StreamingFormatter};
6use crate::repomap::RepoMap;
7use crate::types::{Repository, TokenizerModel};
8use std::io::{self, Write};
9
10/// Markdown formatter optimized for GPT
11pub struct MarkdownFormatter {
12    /// Include overview tables
13    include_tables: bool,
14    /// Include Mermaid diagrams
15    include_mermaid: bool,
16    /// Include file tree
17    include_tree: bool,
18    /// Include line numbers in code
19    include_line_numbers: bool,
20    /// Token model for counts in output
21    token_model: TokenizerModel,
22}
23
24impl MarkdownFormatter {
25    /// Create a new Markdown formatter
26    pub fn new() -> Self {
27        Self {
28            include_tables: true,
29            include_mermaid: true,
30            include_tree: true,
31            include_line_numbers: true,
32            token_model: TokenizerModel::Claude,
33        }
34    }
35
36    /// Set tables option
37    pub fn with_tables(mut self, enabled: bool) -> Self {
38        self.include_tables = enabled;
39        self
40    }
41
42    /// Set Mermaid option
43    pub fn with_mermaid(mut self, enabled: bool) -> Self {
44        self.include_mermaid = enabled;
45        self
46    }
47
48    /// Set line numbers option
49    pub fn with_line_numbers(mut self, enabled: bool) -> Self {
50        self.include_line_numbers = enabled;
51        self
52    }
53
54    /// Set token model for token counts in output
55    pub fn with_model(mut self, model: TokenizerModel) -> Self {
56        self.token_model = model;
57        self
58    }
59
60    /// Estimate output size for pre-allocation
61    fn estimate_output_size(repo: &Repository) -> usize {
62        let base = 1000;
63        let files = repo.files.len() * 400;
64        let content: usize = repo
65            .files
66            .iter()
67            .filter_map(|f| f.content.as_ref())
68            .map(|c| c.len())
69            .sum();
70        base + files + content
71    }
72
73    // =========================================================================
74    // Streaming methods (write to impl std::io::Write)
75    // =========================================================================
76
77    fn stream_header<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
78        writeln!(w, "# Repository: {}", repo.name)?;
79        writeln!(w)?;
80        writeln!(
81            w,
82            "> **Files**: {} | **Lines**: {} | **Tokens**: {}",
83            repo.metadata.total_files,
84            repo.metadata.total_lines,
85            repo.metadata.total_tokens.get(self.token_model)
86        )?;
87        writeln!(w)
88    }
89
90    fn stream_overview<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
91        if !self.include_tables {
92            return Ok(());
93        }
94
95        writeln!(w, "## Overview")?;
96        writeln!(w)?;
97        writeln!(w, "| Metric | Value |")?;
98        writeln!(w, "|--------|-------|")?;
99        writeln!(w, "| Files | {} |", repo.metadata.total_files)?;
100        writeln!(w, "| Lines | {} |", repo.metadata.total_lines)?;
101
102        if let Some(lang) = repo.metadata.languages.first() {
103            writeln!(w, "| Primary Language | {} |", lang.language)?;
104        }
105        if let Some(framework) = &repo.metadata.framework {
106            writeln!(w, "| Framework | {} |", framework)?;
107        }
108        writeln!(w)?;
109
110        if repo.metadata.languages.len() > 1 {
111            writeln!(w, "### Languages")?;
112            writeln!(w)?;
113            writeln!(w, "| Language | Files | Percentage |")?;
114            writeln!(w, "|----------|-------|------------|")?;
115            for lang in &repo.metadata.languages {
116                writeln!(w, "| {} | {} | {:.1}% |", lang.language, lang.files, lang.percentage)?;
117            }
118            writeln!(w)?;
119        }
120        Ok(())
121    }
122
123    fn stream_repomap<W: Write>(&self, w: &mut W, map: &RepoMap) -> io::Result<()> {
124        writeln!(w, "## Repository Map")?;
125        writeln!(w)?;
126        writeln!(w, "{}", map.summary)?;
127        writeln!(w)?;
128
129        writeln!(w, "### Key Symbols")?;
130        writeln!(w)?;
131        writeln!(w, "| Rank | Symbol | Type | File | Line | Summary |")?;
132        writeln!(w, "|------|--------|------|------|------|---------|")?;
133        for sym in map.key_symbols.iter().take(15) {
134            let summary = sym
135                .summary
136                .as_deref()
137                .map(escape_markdown_cell)
138                .unwrap_or_default();
139            writeln!(
140                w,
141                "| {} | `{}` | {} | {} | {} | {} |",
142                sym.rank, sym.name, sym.kind, sym.file, sym.line, summary
143            )?;
144        }
145        writeln!(w)?;
146
147        if self.include_mermaid && !map.module_graph.edges.is_empty() {
148            writeln!(w, "### Module Dependencies")?;
149            writeln!(w)?;
150            writeln!(w, "```mermaid")?;
151            writeln!(w, "graph LR")?;
152            for edge in &map.module_graph.edges {
153                let sanitize_id = |s: &str| -> String {
154                    s.chars()
155                        .map(|c| if c == '-' || c == '.' { '_' } else { c })
156                        .collect()
157                };
158                let from_id = sanitize_id(&edge.from);
159                let to_id = sanitize_id(&edge.to);
160                writeln!(w, "    {}[\"{}\"] --> {}[\"{}\"]", from_id, edge.from, to_id, edge.to)?;
161            }
162            writeln!(w, "```")?;
163            writeln!(w)?;
164        }
165        Ok(())
166    }
167
168    fn stream_structure<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
169        if !self.include_tree {
170            return Ok(());
171        }
172
173        writeln!(w, "## Project Structure")?;
174        writeln!(w)?;
175        writeln!(w, "```")?;
176
177        let mut paths: Vec<_> = repo
178            .files
179            .iter()
180            .map(|f| f.relative_path.as_str())
181            .collect();
182        paths.sort();
183
184        let mut prev_parts: Vec<&str> = Vec::new();
185        for path in paths {
186            let parts: Vec<_> = path.split('/').collect();
187            let mut common = 0;
188            for (i, part) in parts.iter().enumerate() {
189                if i < prev_parts.len() && prev_parts[i] == *part {
190                    common = i + 1;
191                } else {
192                    break;
193                }
194            }
195            for (i, part) in parts.iter().enumerate().skip(common) {
196                let indent = "  ".repeat(i);
197                let prefix = if i == parts.len() - 1 {
198                    "📄 "
199                } else {
200                    "📁 "
201                };
202                writeln!(w, "{}{}{}", indent, prefix, part)?;
203            }
204            prev_parts = parts;
205        }
206
207        writeln!(w, "```")?;
208        writeln!(w)
209    }
210
211    fn stream_files<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
212        writeln!(w, "## Files")?;
213        writeln!(w)?;
214
215        for file in &repo.files {
216            if let Some(content) = &file.content {
217                writeln!(w, "### {}", file.relative_path)?;
218                writeln!(w)?;
219                writeln!(
220                    w,
221                    "> **Tokens**: {} | **Language**: {}",
222                    file.token_count.get(self.token_model),
223                    file.language.as_deref().unwrap_or("unknown")
224                )?;
225                writeln!(w)?;
226
227                let lang = file.language.as_deref().unwrap_or("");
228                writeln!(w, "```{}", lang)?;
229                if self.include_line_numbers {
230                    // Check if content has embedded line numbers (format: "N:content")
231                    // This preserves original line numbers when content has been compressed
232                    let first_line = content.lines().next().unwrap_or("");
233                    let has_embedded_line_nums = first_line.contains(':')
234                        && first_line
235                            .split(':')
236                            .next()
237                            .is_some_and(|s| s.parse::<u32>().is_ok());
238
239                    if has_embedded_line_nums {
240                        // Content has embedded line numbers - parse and output
241                        for line in content.lines() {
242                            if let Some((num_str, rest)) = line.split_once(':') {
243                                if let Ok(line_num) = num_str.parse::<u32>() {
244                                    writeln!(w, "{:4} {}", line_num, rest)?;
245                                } else {
246                                    // Fallback for malformed lines
247                                    writeln!(w, "     {}", line)?;
248                                }
249                            } else {
250                                writeln!(w, "     {}", line)?;
251                            }
252                        }
253                    } else {
254                        // No embedded line numbers - use sequential (uncompressed content)
255                        for (i, line) in content.lines().enumerate() {
256                            writeln!(w, "{:4} {}", i + 1, line)?;
257                        }
258                    }
259                } else {
260                    writeln!(w, "{}", content)?;
261                }
262                writeln!(w, "```")?;
263                writeln!(w)?;
264            }
265        }
266        Ok(())
267    }
268}
269
270impl Default for MarkdownFormatter {
271    fn default() -> Self {
272        Self::new()
273    }
274}
275
276impl Formatter for MarkdownFormatter {
277    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
278        // Use streaming internally for consistency
279        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
280        // Vec<u8> write cannot fail, ignore result
281        drop(self.format_to_writer(repo, map, &mut output));
282        // Use lossy conversion to handle any edge cases with invalid UTF-8
283        String::from_utf8(output)
284            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
285    }
286
287    fn format_repo(&self, repo: &Repository) -> String {
288        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
289        // Vec<u8> write cannot fail, ignore result
290        drop(self.format_repo_to_writer(repo, &mut output));
291        // Use lossy conversion to handle any edge cases with invalid UTF-8
292        String::from_utf8(output)
293            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
294    }
295
296    fn name(&self) -> &'static str {
297        "markdown"
298    }
299}
300
301impl StreamingFormatter for MarkdownFormatter {
302    fn format_to_writer<W: Write>(
303        &self,
304        repo: &Repository,
305        map: &RepoMap,
306        writer: &mut W,
307    ) -> io::Result<()> {
308        self.stream_header(writer, repo)?;
309        self.stream_overview(writer, repo)?;
310        self.stream_repomap(writer, map)?;
311        self.stream_structure(writer, repo)?;
312        self.stream_files(writer, repo)?;
313        Ok(())
314    }
315
316    fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()> {
317        self.stream_header(writer, repo)?;
318        self.stream_overview(writer, repo)?;
319        self.stream_structure(writer, repo)?;
320        self.stream_files(writer, repo)?;
321        Ok(())
322    }
323}
324
325fn escape_markdown_cell(text: &str) -> String {
326    text.replace('|', "\\|")
327        .replace('\n', " ")
328        .trim()
329        .to_owned()
330}
331
332#[cfg(test)]
333#[allow(clippy::str_to_string)]
334mod tests {
335    use super::*;
336    use crate::repomap::{
337        FileIndexEntry, ModuleEdge, ModuleGraph, ModuleNode, RankedSymbol, RepoMap,
338        RepoMapGenerator,
339    };
340    use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
341
342    fn create_test_repo() -> Repository {
343        Repository {
344            name: "test".to_string(),
345            path: "/tmp/test".into(),
346            files: vec![RepoFile {
347                path: "/tmp/test/main.py".into(),
348                relative_path: "main.py".to_string(),
349                language: Some("python".to_string()),
350                size_bytes: 100,
351                token_count: TokenCounts {
352                    o200k: 48,
353                    cl100k: 49,
354                    claude: 50,
355                    gemini: 47,
356                    llama: 46,
357                    mistral: 46,
358                    deepseek: 46,
359                    qwen: 46,
360                    cohere: 47,
361                    grok: 46,
362                },
363                symbols: Vec::new(),
364                importance: 0.8,
365                content: Some("def main():\n    print('hello')".to_string()),
366            }],
367            metadata: RepoMetadata {
368                total_files: 1,
369                total_lines: 2,
370                total_tokens: TokenCounts {
371                    o200k: 48,
372                    cl100k: 49,
373                    claude: 50,
374                    gemini: 47,
375                    llama: 46,
376                    mistral: 46,
377                    deepseek: 46,
378                    qwen: 46,
379                    cohere: 47,
380                    grok: 46,
381                },
382                languages: vec![LanguageStats {
383                    language: "Python".to_string(),
384                    files: 1,
385                    lines: 2,
386                    percentage: 100.0,
387                }],
388                framework: None,
389                description: None,
390                branch: None,
391                commit: None,
392                directory_structure: None,
393                external_dependencies: vec![],
394                git_history: None,
395            },
396        }
397    }
398
399    fn create_multi_language_repo() -> Repository {
400        Repository {
401            name: "multi-lang".to_string(),
402            path: "/tmp/multi".into(),
403            files: vec![
404                RepoFile {
405                    path: "/tmp/multi/src/main.rs".into(),
406                    relative_path: "src/main.rs".to_string(),
407                    language: Some("rust".to_string()),
408                    size_bytes: 200,
409                    token_count: TokenCounts::default(),
410                    symbols: Vec::new(),
411                    importance: 0.9,
412                    content: Some("fn main() {\n    println!(\"hello\");\n}".to_string()),
413                },
414                RepoFile {
415                    path: "/tmp/multi/src/lib.rs".into(),
416                    relative_path: "src/lib.rs".to_string(),
417                    language: Some("rust".to_string()),
418                    size_bytes: 150,
419                    token_count: TokenCounts::default(),
420                    symbols: Vec::new(),
421                    importance: 0.8,
422                    content: Some("pub mod utils;".to_string()),
423                },
424                RepoFile {
425                    path: "/tmp/multi/tests/test.py".into(),
426                    relative_path: "tests/test.py".to_string(),
427                    language: Some("python".to_string()),
428                    size_bytes: 100,
429                    token_count: TokenCounts::default(),
430                    symbols: Vec::new(),
431                    importance: 0.5,
432                    content: Some("def test_it(): pass".to_string()),
433                },
434            ],
435            metadata: RepoMetadata {
436                total_files: 3,
437                total_lines: 5,
438                total_tokens: TokenCounts::default(),
439                languages: vec![
440                    LanguageStats {
441                        language: "Rust".to_string(),
442                        files: 2,
443                        lines: 4,
444                        percentage: 66.7,
445                    },
446                    LanguageStats {
447                        language: "Python".to_string(),
448                        files: 1,
449                        lines: 1,
450                        percentage: 33.3,
451                    },
452                ],
453                framework: Some("Actix".to_string()),
454                description: Some("Test project".to_string()),
455                branch: Some("main".to_string()),
456                commit: Some("abc123".to_string()),
457                directory_structure: None,
458                external_dependencies: vec!["tokio".to_string()],
459                git_history: None,
460            },
461        }
462    }
463
464    fn create_test_map() -> RepoMap {
465        RepoMap {
466            summary: "Test repository with 1 key symbol".to_string(),
467            key_symbols: vec![RankedSymbol {
468                rank: 1,
469                name: "main".to_string(),
470                kind: "function".to_string(),
471                file: "main.py".to_string(),
472                line: 1,
473                signature: None,
474                summary: Some("Entry point".to_string()),
475                references: 0,
476                importance: 0.95,
477            }],
478            module_graph: ModuleGraph {
479                nodes: vec![ModuleNode { name: "main".to_string(), files: 1, tokens: 50 }],
480                edges: vec![],
481            },
482            file_index: vec![FileIndexEntry {
483                path: "main.py".to_string(),
484                tokens: 50,
485                importance: "high".to_string(),
486                summary: None,
487            }],
488            token_count: 50,
489        }
490    }
491
492    fn create_map_with_mermaid() -> RepoMap {
493        RepoMap {
494            summary: "Test with dependencies".to_string(),
495            key_symbols: vec![
496                RankedSymbol {
497                    rank: 1,
498                    name: "main".to_string(),
499                    kind: "function".to_string(),
500                    file: "main.rs".to_string(),
501                    line: 1,
502                    signature: Some("fn main()".to_string()),
503                    summary: Some("Entry | point".to_string()),
504                    references: 5,
505                    importance: 0.95,
506                },
507                RankedSymbol {
508                    rank: 2,
509                    name: "helper".to_string(),
510                    kind: "function".to_string(),
511                    file: "lib.rs".to_string(),
512                    line: 5,
513                    signature: None,
514                    summary: None,
515                    references: 2,
516                    importance: 0.7,
517                },
518            ],
519            module_graph: ModuleGraph {
520                nodes: vec![
521                    ModuleNode { name: "main".to_string(), files: 1, tokens: 100 },
522                    ModuleNode { name: "lib".to_string(), files: 1, tokens: 80 },
523                ],
524                edges: vec![ModuleEdge {
525                    from: "main-mod".to_string(),
526                    to: "lib.rs".to_string(),
527                    weight: 1,
528                }],
529            },
530            file_index: vec![
531                FileIndexEntry {
532                    path: "main.rs".to_string(),
533                    tokens: 100,
534                    importance: "critical".to_string(),
535                    summary: None,
536                },
537                FileIndexEntry {
538                    path: "lib.rs".to_string(),
539                    tokens: 80,
540                    importance: "high".to_string(),
541                    summary: None,
542                },
543            ],
544            token_count: 100,
545        }
546    }
547
548    #[test]
549    fn test_markdown_output() {
550        let repo = create_test_repo();
551        let map = RepoMapGenerator::new(1000).generate(&repo);
552
553        let formatter = MarkdownFormatter::new();
554        let output = formatter.format(&repo, &map);
555
556        assert!(output.contains("# Repository: test"));
557        assert!(output.contains("## Overview"));
558        assert!(output.contains("```python"));
559    }
560
561    #[test]
562    fn test_markdown_default() {
563        let formatter = MarkdownFormatter::default();
564        assert_eq!(formatter.name(), "markdown");
565    }
566
567    #[test]
568    fn test_builder_with_tables() {
569        let formatter = MarkdownFormatter::new().with_tables(false);
570        let repo = create_test_repo();
571        let map = create_test_map();
572        let output = formatter.format(&repo, &map);
573        assert!(!output.contains("## Overview"));
574        assert!(!output.contains("| Metric | Value |"));
575    }
576
577    #[test]
578    fn test_builder_with_mermaid_disabled() {
579        let formatter = MarkdownFormatter::new().with_mermaid(false);
580        let repo = create_multi_language_repo();
581        let map = create_map_with_mermaid();
582        let output = formatter.format(&repo, &map);
583        assert!(!output.contains("```mermaid"));
584    }
585
586    #[test]
587    fn test_builder_with_mermaid_enabled() {
588        let formatter = MarkdownFormatter::new().with_mermaid(true);
589        let repo = create_multi_language_repo();
590        let map = create_map_with_mermaid();
591        let output = formatter.format(&repo, &map);
592        assert!(output.contains("```mermaid"));
593        assert!(output.contains("graph LR"));
594        // Check ID sanitization (- and . replaced with _)
595        assert!(output.contains("main_mod"));
596        assert!(output.contains("lib_rs"));
597    }
598
599    #[test]
600    fn test_builder_with_line_numbers_disabled() {
601        let formatter = MarkdownFormatter::new().with_line_numbers(false);
602        let repo = create_test_repo();
603        let map = create_test_map();
604        let output = formatter.format(&repo, &map);
605        // Should NOT have line numbers like "   1 def main():"
606        assert!(!output.contains("   1 def main"));
607        // Should have raw content
608        assert!(output.contains("def main():"));
609    }
610
611    #[test]
612    fn test_builder_with_model() {
613        let formatter = MarkdownFormatter::new().with_model(TokenizerModel::Gpt4o);
614        let repo = create_test_repo();
615        let map = create_test_map();
616        let output = formatter.format(&repo, &map);
617        // GPT-4o uses o200k encoding, which is 48 in our test data
618        assert!(output.contains("**Tokens**: 48"));
619    }
620
621    #[test]
622    fn test_estimate_output_size() {
623        let repo = create_test_repo();
624        let size = MarkdownFormatter::estimate_output_size(&repo);
625        // base (1000) + files (1 * 400) + content length (~30)
626        assert!(size > 1000);
627        assert!(size < 2000);
628    }
629
630    #[test]
631    fn test_stream_header() {
632        let formatter = MarkdownFormatter::new();
633        let repo = create_test_repo();
634        let mut buf = Vec::new();
635        formatter.stream_header(&mut buf, &repo).unwrap();
636        let output = String::from_utf8(buf).unwrap();
637        assert!(output.contains("# Repository: test"));
638        assert!(output.contains("**Files**: 1"));
639        assert!(output.contains("**Lines**: 2"));
640        assert!(output.contains("**Tokens**: 50")); // Claude tokens
641    }
642
643    #[test]
644    fn test_stream_overview_with_framework() {
645        let formatter = MarkdownFormatter::new();
646        let repo = create_multi_language_repo();
647        let mut buf = Vec::new();
648        formatter.stream_overview(&mut buf, &repo).unwrap();
649        let output = String::from_utf8(buf).unwrap();
650        assert!(output.contains("| Framework | Actix |"));
651        assert!(output.contains("| Primary Language | Rust |"));
652    }
653
654    #[test]
655    fn test_stream_overview_multiple_languages() {
656        let formatter = MarkdownFormatter::new();
657        let repo = create_multi_language_repo();
658        let mut buf = Vec::new();
659        formatter.stream_overview(&mut buf, &repo).unwrap();
660        let output = String::from_utf8(buf).unwrap();
661        assert!(output.contains("### Languages"));
662        assert!(output.contains("| Rust | 2 | 66.7% |"));
663        assert!(output.contains("| Python | 1 | 33.3% |"));
664    }
665
666    #[test]
667    fn test_stream_overview_disabled() {
668        let formatter = MarkdownFormatter::new().with_tables(false);
669        let repo = create_test_repo();
670        let mut buf = Vec::new();
671        formatter.stream_overview(&mut buf, &repo).unwrap();
672        let output = String::from_utf8(buf).unwrap();
673        assert!(output.is_empty());
674    }
675
676    #[test]
677    fn test_stream_repomap() {
678        let formatter = MarkdownFormatter::new();
679        let map = create_test_map();
680        let mut buf = Vec::new();
681        formatter.stream_repomap(&mut buf, &map).unwrap();
682        let output = String::from_utf8(buf).unwrap();
683        assert!(output.contains("## Repository Map"));
684        assert!(output.contains("### Key Symbols"));
685        assert!(output.contains("| 1 | `main` | function | main.py | 1 | Entry point |"));
686    }
687
688    #[test]
689    fn test_stream_repomap_escapes_pipe_in_summary() {
690        let formatter = MarkdownFormatter::new();
691        let map = create_map_with_mermaid();
692        let mut buf = Vec::new();
693        formatter.stream_repomap(&mut buf, &map).unwrap();
694        let output = String::from_utf8(buf).unwrap();
695        // Pipe should be escaped
696        assert!(output.contains("Entry \\| point"));
697    }
698
699    #[test]
700    fn test_stream_structure() {
701        let formatter = MarkdownFormatter::new();
702        let repo = create_multi_language_repo();
703        let mut buf = Vec::new();
704        formatter.stream_structure(&mut buf, &repo).unwrap();
705        let output = String::from_utf8(buf).unwrap();
706        assert!(output.contains("## Project Structure"));
707        assert!(output.contains("```"));
708    }
709
710    #[test]
711    fn test_stream_structure_disabled() {
712        // Create a formatter with tree disabled by modifying internal state
713        let mut formatter = MarkdownFormatter::new();
714        formatter.include_tree = false;
715        let repo = create_test_repo();
716        let mut buf = Vec::new();
717        formatter.stream_structure(&mut buf, &repo).unwrap();
718        let output = String::from_utf8(buf).unwrap();
719        assert!(output.is_empty());
720    }
721
722    #[test]
723    fn test_stream_files_with_line_numbers() {
724        let formatter = MarkdownFormatter::new().with_line_numbers(true);
725        let repo = create_test_repo();
726        let mut buf = Vec::new();
727        formatter.stream_files(&mut buf, &repo).unwrap();
728        let output = String::from_utf8(buf).unwrap();
729        assert!(output.contains("### main.py"));
730        assert!(output.contains("**Tokens**: 50"));
731        assert!(output.contains("**Language**: python"));
732        // Line numbers should be present
733        assert!(output.contains("   1 def main():"));
734        assert!(output.contains("   2     print('hello')"));
735    }
736
737    #[test]
738    fn test_stream_files_without_line_numbers() {
739        let formatter = MarkdownFormatter::new().with_line_numbers(false);
740        let repo = create_test_repo();
741        let mut buf = Vec::new();
742        formatter.stream_files(&mut buf, &repo).unwrap();
743        let output = String::from_utf8(buf).unwrap();
744        // Should have raw content without line numbers
745        assert!(output.contains("def main():\n    print('hello')"));
746    }
747
748    #[test]
749    fn test_stream_files_with_embedded_line_numbers() {
750        let mut repo = create_test_repo();
751        // Set content with embedded line numbers (compressed format)
752        repo.files[0].content = Some("1:def main():\n5:    print('hello')".to_string());
753        let formatter = MarkdownFormatter::new().with_line_numbers(true);
754        let mut buf = Vec::new();
755        formatter.stream_files(&mut buf, &repo).unwrap();
756        let output = String::from_utf8(buf).unwrap();
757        // Should parse and display original line numbers
758        assert!(output.contains("   1 def main():"));
759        assert!(output.contains("   5     print('hello')"));
760    }
761
762    #[test]
763    fn test_stream_files_with_malformed_embedded_line_numbers() {
764        let mut repo = create_test_repo();
765        // Malformed embedded line numbers
766        repo.files[0].content = Some("abc:def main():\nno_colon_here".to_string());
767        let formatter = MarkdownFormatter::new().with_line_numbers(true);
768        let mut buf = Vec::new();
769        formatter.stream_files(&mut buf, &repo).unwrap();
770        let output = String::from_utf8(buf).unwrap();
771        // Should handle gracefully - use sequential numbers since first line doesn't parse
772        assert!(output.contains("   1 abc:def main():"));
773    }
774
775    #[test]
776    fn test_stream_files_with_no_content() {
777        let mut repo = create_test_repo();
778        repo.files[0].content = None;
779        let formatter = MarkdownFormatter::new();
780        let mut buf = Vec::new();
781        formatter.stream_files(&mut buf, &repo).unwrap();
782        let output = String::from_utf8(buf).unwrap();
783        // Should still have ## Files header but no file content
784        assert!(output.contains("## Files"));
785        assert!(!output.contains("### main.py"));
786    }
787
788    #[test]
789    fn test_stream_files_unknown_language() {
790        let mut repo = create_test_repo();
791        repo.files[0].language = None;
792        let formatter = MarkdownFormatter::new();
793        let mut buf = Vec::new();
794        formatter.stream_files(&mut buf, &repo).unwrap();
795        let output = String::from_utf8(buf).unwrap();
796        assert!(output.contains("**Language**: unknown"));
797    }
798
799    #[test]
800    fn test_format_repo_without_map() {
801        let formatter = MarkdownFormatter::new();
802        let repo = create_test_repo();
803        let output = formatter.format_repo(&repo);
804        assert!(output.contains("# Repository: test"));
805        assert!(output.contains("## Overview"));
806        // Should NOT have repomap section
807        assert!(!output.contains("## Repository Map"));
808    }
809
810    #[test]
811    fn test_streaming_formatter_trait() {
812        let formatter = MarkdownFormatter::new();
813        let repo = create_test_repo();
814        let map = create_test_map();
815        let mut buf = Vec::new();
816        formatter.format_to_writer(&repo, &map, &mut buf).unwrap();
817        let output = String::from_utf8(buf).unwrap();
818        assert!(output.contains("# Repository: test"));
819        assert!(output.contains("## Repository Map"));
820    }
821
822    #[test]
823    fn test_streaming_formatter_repo_only() {
824        let formatter = MarkdownFormatter::new();
825        let repo = create_test_repo();
826        let mut buf = Vec::new();
827        formatter.format_repo_to_writer(&repo, &mut buf).unwrap();
828        let output = String::from_utf8(buf).unwrap();
829        assert!(output.contains("# Repository: test"));
830        assert!(!output.contains("## Repository Map"));
831    }
832
833    #[test]
834    fn test_escape_markdown_cell() {
835        assert_eq!(escape_markdown_cell("hello"), "hello");
836        assert_eq!(escape_markdown_cell("a|b"), "a\\|b");
837        assert_eq!(escape_markdown_cell("line1\nline2"), "line1 line2");
838        assert_eq!(escape_markdown_cell("  spaced  "), "spaced");
839        assert_eq!(escape_markdown_cell("a|b\nc|d"), "a\\|b c\\|d");
840    }
841
842    #[test]
843    fn test_escape_markdown_cell_complex() {
844        // Multiple pipes and newlines
845        let input = "col1|col2|col3\nrow1|row2|row3";
846        let expected = "col1\\|col2\\|col3 row1\\|row2\\|row3";
847        assert_eq!(escape_markdown_cell(input), expected);
848    }
849
850    #[test]
851    fn test_full_format_with_all_features() {
852        let formatter = MarkdownFormatter::new()
853            .with_tables(true)
854            .with_mermaid(true)
855            .with_line_numbers(true)
856            .with_model(TokenizerModel::Claude);
857        let repo = create_multi_language_repo();
858        let map = create_map_with_mermaid();
859        let output = formatter.format(&repo, &map);
860
861        // All sections present
862        assert!(output.contains("# Repository: multi-lang"));
863        assert!(output.contains("## Overview"));
864        assert!(output.contains("## Repository Map"));
865        assert!(output.contains("## Project Structure"));
866        assert!(output.contains("## Files"));
867        assert!(output.contains("```mermaid"));
868    }
869
870    #[test]
871    fn test_format_with_empty_repo() {
872        let repo = Repository {
873            name: "empty".to_string(),
874            path: "/tmp/empty".into(),
875            files: vec![],
876            metadata: RepoMetadata::default(),
877        };
878        let map = RepoMap {
879            summary: "Empty repository".to_string(),
880            key_symbols: vec![],
881            module_graph: ModuleGraph { nodes: vec![], edges: vec![] },
882            file_index: vec![],
883            token_count: 0,
884        };
885        let formatter = MarkdownFormatter::new();
886        let output = formatter.format(&repo, &map);
887        assert!(output.contains("# Repository: empty"));
888        // Should not fail
889    }
890
891    #[test]
892    fn test_estimate_output_size_empty_repo() {
893        let repo = Repository {
894            name: "empty".to_string(),
895            path: "/tmp/empty".into(),
896            files: vec![],
897            metadata: RepoMetadata::default(),
898        };
899        let size = MarkdownFormatter::estimate_output_size(&repo);
900        assert_eq!(size, 1000); // Just base size
901    }
902
903    #[test]
904    fn test_estimate_output_size_with_content() {
905        let mut repo = create_test_repo();
906        repo.files[0].content = Some("x".repeat(5000));
907        let size = MarkdownFormatter::estimate_output_size(&repo);
908        // base (1000) + files (1 * 400) + content (5000)
909        assert_eq!(size, 6400);
910    }
911
912    #[test]
913    fn test_structure_nested_paths() {
914        let repo = Repository {
915            name: "nested".to_string(),
916            path: "/tmp/nested".into(),
917            files: vec![
918                RepoFile::new("/tmp/nested/src/a/b/c.rs", "src/a/b/c.rs"),
919                RepoFile::new("/tmp/nested/src/a/b/d.rs", "src/a/b/d.rs"),
920                RepoFile::new("/tmp/nested/src/a/e.rs", "src/a/e.rs"),
921                RepoFile::new("/tmp/nested/tests/test.rs", "tests/test.rs"),
922            ],
923            metadata: RepoMetadata::default(),
924        };
925        let formatter = MarkdownFormatter::new();
926        let mut buf = Vec::new();
927        formatter.stream_structure(&mut buf, &repo).unwrap();
928        let output = String::from_utf8(buf).unwrap();
929        // Should show directory structure
930        assert!(output.contains("src"));
931        assert!(output.contains("tests"));
932    }
933
934    #[test]
935    fn test_name_method() {
936        let formatter = MarkdownFormatter::new();
937        assert_eq!(formatter.name(), "markdown");
938    }
939}