infiniloom_engine/output/
markdown.rs

1//! GPT-optimized Markdown output formatter
2//!
3//! Supports both in-memory (`format()`) and streaming (`format_to_writer()`) modes.
4
5use crate::output::{Formatter, StreamingFormatter};
6use crate::repomap::RepoMap;
7use crate::types::{Repository, TokenizerModel};
8use std::io::{self, Write};
9
10/// Markdown formatter optimized for GPT
11pub struct MarkdownFormatter {
12    /// Include overview tables
13    include_tables: bool,
14    /// Include Mermaid diagrams
15    include_mermaid: bool,
16    /// Include file tree
17    include_tree: bool,
18    /// Include line numbers in code
19    include_line_numbers: bool,
20    /// Token model for counts in output
21    token_model: TokenizerModel,
22}
23
24impl MarkdownFormatter {
25    /// Create a new Markdown formatter
26    pub fn new() -> Self {
27        Self {
28            include_tables: true,
29            include_mermaid: true,
30            include_tree: true,
31            include_line_numbers: true,
32            token_model: TokenizerModel::Claude,
33        }
34    }
35
36    /// Set tables option
37    pub fn with_tables(mut self, enabled: bool) -> Self {
38        self.include_tables = enabled;
39        self
40    }
41
42    /// Set Mermaid option
43    pub fn with_mermaid(mut self, enabled: bool) -> Self {
44        self.include_mermaid = enabled;
45        self
46    }
47
48    /// Set line numbers option
49    pub fn with_line_numbers(mut self, enabled: bool) -> Self {
50        self.include_line_numbers = enabled;
51        self
52    }
53
54    /// Set token model for token counts in output
55    pub fn with_model(mut self, model: TokenizerModel) -> Self {
56        self.token_model = model;
57        self
58    }
59
60    /// Estimate output size for pre-allocation
61    fn estimate_output_size(repo: &Repository) -> usize {
62        let base = 1000;
63        let files = repo.files.len() * 400;
64        let content: usize = repo
65            .files
66            .iter()
67            .filter_map(|f| f.content.as_ref())
68            .map(|c| c.len())
69            .sum();
70        base + files + content
71    }
72
73    // =========================================================================
74    // Streaming methods (write to impl std::io::Write)
75    // =========================================================================
76
77    fn stream_header<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
78        writeln!(w, "# Repository: {}", repo.name)?;
79        writeln!(w)?;
80        writeln!(
81            w,
82            "> **Files**: {} | **Lines**: {} | **Tokens**: {}",
83            repo.metadata.total_files,
84            repo.metadata.total_lines,
85            repo.metadata.total_tokens.get(self.token_model)
86        )?;
87        writeln!(w)
88    }
89
90    fn stream_overview<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
91        if !self.include_tables {
92            return Ok(());
93        }
94
95        writeln!(w, "## Overview")?;
96        writeln!(w)?;
97        writeln!(w, "| Metric | Value |")?;
98        writeln!(w, "|--------|-------|")?;
99        writeln!(w, "| Files | {} |", repo.metadata.total_files)?;
100        writeln!(w, "| Lines | {} |", repo.metadata.total_lines)?;
101
102        if let Some(lang) = repo.metadata.languages.first() {
103            writeln!(w, "| Primary Language | {} |", lang.language)?;
104        }
105        if let Some(framework) = &repo.metadata.framework {
106            writeln!(w, "| Framework | {} |", framework)?;
107        }
108        writeln!(w)?;
109
110        if repo.metadata.languages.len() > 1 {
111            writeln!(w, "### Languages")?;
112            writeln!(w)?;
113            writeln!(w, "| Language | Files | Percentage |")?;
114            writeln!(w, "|----------|-------|------------|")?;
115            for lang in &repo.metadata.languages {
116                writeln!(w, "| {} | {} | {:.1}% |", lang.language, lang.files, lang.percentage)?;
117            }
118            writeln!(w)?;
119        }
120        Ok(())
121    }
122
123    fn stream_repomap<W: Write>(&self, w: &mut W, map: &RepoMap) -> io::Result<()> {
124        writeln!(w, "## Repository Map")?;
125        writeln!(w)?;
126        writeln!(w, "{}", map.summary)?;
127        writeln!(w)?;
128
129        writeln!(w, "### Key Symbols")?;
130        writeln!(w)?;
131        writeln!(w, "| Rank | Symbol | Type | File | Line | Summary |")?;
132        writeln!(w, "|------|--------|------|------|------|---------|")?;
133        for sym in map.key_symbols.iter().take(15) {
134            let summary = sym
135                .summary
136                .as_deref()
137                .map(escape_markdown_cell)
138                .unwrap_or_default();
139            writeln!(
140                w,
141                "| {} | `{}` | {} | {} | {} | {} |",
142                sym.rank, sym.name, sym.kind, sym.file, sym.line, summary
143            )?;
144        }
145        writeln!(w)?;
146
147        if self.include_mermaid && !map.module_graph.edges.is_empty() {
148            writeln!(w, "### Module Dependencies")?;
149            writeln!(w)?;
150            writeln!(w, "```mermaid")?;
151            writeln!(w, "graph LR")?;
152            for edge in &map.module_graph.edges {
153                let sanitize_id = |s: &str| -> String {
154                    s.chars()
155                        .map(|c| if c == '-' || c == '.' { '_' } else { c })
156                        .collect()
157                };
158                let from_id = sanitize_id(&edge.from);
159                let to_id = sanitize_id(&edge.to);
160                writeln!(w, "    {}[\"{}\"] --> {}[\"{}\"]", from_id, edge.from, to_id, edge.to)?;
161            }
162            writeln!(w, "```")?;
163            writeln!(w)?;
164        }
165        Ok(())
166    }
167
168    fn stream_structure<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
169        if !self.include_tree {
170            return Ok(());
171        }
172
173        writeln!(w, "## Project Structure")?;
174        writeln!(w)?;
175        writeln!(w, "```")?;
176
177        let mut paths: Vec<_> = repo
178            .files
179            .iter()
180            .map(|f| f.relative_path.as_str())
181            .collect();
182        paths.sort();
183
184        let mut prev_parts: Vec<&str> = Vec::new();
185        for path in paths {
186            let parts: Vec<_> = path.split('/').collect();
187            let mut common = 0;
188            for (i, part) in parts.iter().enumerate() {
189                if i < prev_parts.len() && prev_parts[i] == *part {
190                    common = i + 1;
191                } else {
192                    break;
193                }
194            }
195            for (i, part) in parts.iter().enumerate().skip(common) {
196                let indent = "  ".repeat(i);
197                let prefix = if i == parts.len() - 1 {
198                    "📄 "
199                } else {
200                    "📁 "
201                };
202                writeln!(w, "{}{}{}", indent, prefix, part)?;
203            }
204            prev_parts = parts;
205        }
206
207        writeln!(w, "```")?;
208        writeln!(w)
209    }
210
211    fn stream_files<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
212        writeln!(w, "## Files")?;
213        writeln!(w)?;
214
215        for file in &repo.files {
216            if let Some(content) = &file.content {
217                writeln!(w, "### {}", file.relative_path)?;
218                writeln!(w)?;
219                writeln!(
220                    w,
221                    "> **Tokens**: {} | **Language**: {}",
222                    file.token_count.get(self.token_model),
223                    file.language.as_deref().unwrap_or("unknown")
224                )?;
225                writeln!(w)?;
226
227                let lang = file.language.as_deref().unwrap_or("");
228                writeln!(w, "```{}", lang)?;
229                if self.include_line_numbers {
230                    // Check if content has embedded line numbers (format: "N:content")
231                    // This preserves original line numbers when content has been compressed
232                    let first_line = content.lines().next().unwrap_or("");
233                    let has_embedded_line_nums = first_line.contains(':')
234                        && first_line
235                            .split(':')
236                            .next()
237                            .map(|s| s.parse::<u32>().is_ok())
238                            .unwrap_or(false);
239
240                    if has_embedded_line_nums {
241                        // Content has embedded line numbers - parse and output
242                        for line in content.lines() {
243                            if let Some((num_str, rest)) = line.split_once(':') {
244                                if let Ok(line_num) = num_str.parse::<u32>() {
245                                    writeln!(w, "{:4} {}", line_num, rest)?;
246                                } else {
247                                    // Fallback for malformed lines
248                                    writeln!(w, "     {}", line)?;
249                                }
250                            } else {
251                                writeln!(w, "     {}", line)?;
252                            }
253                        }
254                    } else {
255                        // No embedded line numbers - use sequential (uncompressed content)
256                        for (i, line) in content.lines().enumerate() {
257                            writeln!(w, "{:4} {}", i + 1, line)?;
258                        }
259                    }
260                } else {
261                    writeln!(w, "{}", content)?;
262                }
263                writeln!(w, "```")?;
264                writeln!(w)?;
265            }
266        }
267        Ok(())
268    }
269}
270
271impl Default for MarkdownFormatter {
272    fn default() -> Self {
273        Self::new()
274    }
275}
276
277impl Formatter for MarkdownFormatter {
278    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
279        // Use streaming internally for consistency
280        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
281        // Vec<u8> write cannot fail, ignore result
282        drop(self.format_to_writer(repo, map, &mut output));
283        // Use lossy conversion to handle any edge cases with invalid UTF-8
284        String::from_utf8(output)
285            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
286    }
287
288    fn format_repo(&self, repo: &Repository) -> String {
289        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
290        // Vec<u8> write cannot fail, ignore result
291        drop(self.format_repo_to_writer(repo, &mut output));
292        // Use lossy conversion to handle any edge cases with invalid UTF-8
293        String::from_utf8(output)
294            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
295    }
296
297    fn name(&self) -> &'static str {
298        "markdown"
299    }
300}
301
302impl StreamingFormatter for MarkdownFormatter {
303    fn format_to_writer<W: Write>(
304        &self,
305        repo: &Repository,
306        map: &RepoMap,
307        writer: &mut W,
308    ) -> io::Result<()> {
309        self.stream_header(writer, repo)?;
310        self.stream_overview(writer, repo)?;
311        self.stream_repomap(writer, map)?;
312        self.stream_structure(writer, repo)?;
313        self.stream_files(writer, repo)?;
314        Ok(())
315    }
316
317    fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()> {
318        self.stream_header(writer, repo)?;
319        self.stream_overview(writer, repo)?;
320        self.stream_structure(writer, repo)?;
321        self.stream_files(writer, repo)?;
322        Ok(())
323    }
324}
325
326fn escape_markdown_cell(text: &str) -> String {
327    text.replace('|', "\\|")
328        .replace('\n', " ")
329        .trim()
330        .to_owned()
331}
332
333#[cfg(test)]
334#[allow(clippy::str_to_string)]
335mod tests {
336    use super::*;
337    use crate::repomap::{FileIndexEntry, ModuleEdge, ModuleGraph, ModuleNode, RankedSymbol, RepoMap, RepoMapGenerator};
338    use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
339
340    fn create_test_repo() -> Repository {
341        Repository {
342            name: "test".to_string(),
343            path: "/tmp/test".into(),
344            files: vec![RepoFile {
345                path: "/tmp/test/main.py".into(),
346                relative_path: "main.py".to_string(),
347                language: Some("python".to_string()),
348                size_bytes: 100,
349                token_count: TokenCounts {
350                    o200k: 48,
351                    cl100k: 49,
352                    claude: 50,
353                    gemini: 47,
354                    llama: 46,
355                    mistral: 46,
356                    deepseek: 46,
357                    qwen: 46,
358                    cohere: 47,
359                    grok: 46,
360                },
361                symbols: Vec::new(),
362                importance: 0.8,
363                content: Some("def main():\n    print('hello')".to_string()),
364            }],
365            metadata: RepoMetadata {
366                total_files: 1,
367                total_lines: 2,
368                total_tokens: TokenCounts {
369                    o200k: 48,
370                    cl100k: 49,
371                    claude: 50,
372                    gemini: 47,
373                    llama: 46,
374                    mistral: 46,
375                    deepseek: 46,
376                    qwen: 46,
377                    cohere: 47,
378                    grok: 46,
379                },
380                languages: vec![LanguageStats {
381                    language: "Python".to_string(),
382                    files: 1,
383                    lines: 2,
384                    percentage: 100.0,
385                }],
386                framework: None,
387                description: None,
388                branch: None,
389                commit: None,
390                directory_structure: None,
391                external_dependencies: vec![],
392                git_history: None,
393            },
394        }
395    }
396
397    fn create_multi_language_repo() -> Repository {
398        Repository {
399            name: "multi-lang".to_string(),
400            path: "/tmp/multi".into(),
401            files: vec![
402                RepoFile {
403                    path: "/tmp/multi/src/main.rs".into(),
404                    relative_path: "src/main.rs".to_string(),
405                    language: Some("rust".to_string()),
406                    size_bytes: 200,
407                    token_count: TokenCounts::default(),
408                    symbols: Vec::new(),
409                    importance: 0.9,
410                    content: Some("fn main() {\n    println!(\"hello\");\n}".to_string()),
411                },
412                RepoFile {
413                    path: "/tmp/multi/src/lib.rs".into(),
414                    relative_path: "src/lib.rs".to_string(),
415                    language: Some("rust".to_string()),
416                    size_bytes: 150,
417                    token_count: TokenCounts::default(),
418                    symbols: Vec::new(),
419                    importance: 0.8,
420                    content: Some("pub mod utils;".to_string()),
421                },
422                RepoFile {
423                    path: "/tmp/multi/tests/test.py".into(),
424                    relative_path: "tests/test.py".to_string(),
425                    language: Some("python".to_string()),
426                    size_bytes: 100,
427                    token_count: TokenCounts::default(),
428                    symbols: Vec::new(),
429                    importance: 0.5,
430                    content: Some("def test_it(): pass".to_string()),
431                },
432            ],
433            metadata: RepoMetadata {
434                total_files: 3,
435                total_lines: 5,
436                total_tokens: TokenCounts::default(),
437                languages: vec![
438                    LanguageStats {
439                        language: "Rust".to_string(),
440                        files: 2,
441                        lines: 4,
442                        percentage: 66.7,
443                    },
444                    LanguageStats {
445                        language: "Python".to_string(),
446                        files: 1,
447                        lines: 1,
448                        percentage: 33.3,
449                    },
450                ],
451                framework: Some("Actix".to_string()),
452                description: Some("Test project".to_string()),
453                branch: Some("main".to_string()),
454                commit: Some("abc123".to_string()),
455                directory_structure: None,
456                external_dependencies: vec!["tokio".to_string()],
457                git_history: None,
458            },
459        }
460    }
461
462    fn create_test_map() -> RepoMap {
463        RepoMap {
464            summary: "Test repository with 1 key symbol".to_string(),
465            key_symbols: vec![RankedSymbol {
466                rank: 1,
467                name: "main".to_string(),
468                kind: "function".to_string(),
469                file: "main.py".to_string(),
470                line: 1,
471                signature: None,
472                summary: Some("Entry point".to_string()),
473                references: 0,
474                importance: 0.95,
475            }],
476            module_graph: ModuleGraph {
477                nodes: vec![ModuleNode {
478                    name: "main".to_string(),
479                    files: 1,
480                    tokens: 50,
481                }],
482                edges: vec![],
483            },
484            file_index: vec![FileIndexEntry {
485                path: "main.py".to_string(),
486                tokens: 50,
487                importance: "high".to_string(),
488                summary: None,
489            }],
490            token_count: 50,
491        }
492    }
493
494    fn create_map_with_mermaid() -> RepoMap {
495        RepoMap {
496            summary: "Test with dependencies".to_string(),
497            key_symbols: vec![
498                RankedSymbol {
499                    rank: 1,
500                    name: "main".to_string(),
501                    kind: "function".to_string(),
502                    file: "main.rs".to_string(),
503                    line: 1,
504                    signature: Some("fn main()".to_string()),
505                    summary: Some("Entry | point".to_string()),
506                    references: 5,
507                    importance: 0.95,
508                },
509                RankedSymbol {
510                    rank: 2,
511                    name: "helper".to_string(),
512                    kind: "function".to_string(),
513                    file: "lib.rs".to_string(),
514                    line: 5,
515                    signature: None,
516                    summary: None,
517                    references: 2,
518                    importance: 0.7,
519                },
520            ],
521            module_graph: ModuleGraph {
522                nodes: vec![
523                    ModuleNode {
524                        name: "main".to_string(),
525                        files: 1,
526                        tokens: 100,
527                    },
528                    ModuleNode {
529                        name: "lib".to_string(),
530                        files: 1,
531                        tokens: 80,
532                    },
533                ],
534                edges: vec![ModuleEdge {
535                    from: "main-mod".to_string(),
536                    to: "lib.rs".to_string(),
537                    weight: 1,
538                }],
539            },
540            file_index: vec![
541                FileIndexEntry {
542                    path: "main.rs".to_string(),
543                    tokens: 100,
544                    importance: "critical".to_string(),
545                    summary: None,
546                },
547                FileIndexEntry {
548                    path: "lib.rs".to_string(),
549                    tokens: 80,
550                    importance: "high".to_string(),
551                    summary: None,
552                },
553            ],
554            token_count: 100,
555        }
556    }
557
558    #[test]
559    fn test_markdown_output() {
560        let repo = create_test_repo();
561        let map = RepoMapGenerator::new(1000).generate(&repo);
562
563        let formatter = MarkdownFormatter::new();
564        let output = formatter.format(&repo, &map);
565
566        assert!(output.contains("# Repository: test"));
567        assert!(output.contains("## Overview"));
568        assert!(output.contains("```python"));
569    }
570
571    #[test]
572    fn test_markdown_default() {
573        let formatter = MarkdownFormatter::default();
574        assert_eq!(formatter.name(), "markdown");
575    }
576
577    #[test]
578    fn test_builder_with_tables() {
579        let formatter = MarkdownFormatter::new().with_tables(false);
580        let repo = create_test_repo();
581        let map = create_test_map();
582        let output = formatter.format(&repo, &map);
583        assert!(!output.contains("## Overview"));
584        assert!(!output.contains("| Metric | Value |"));
585    }
586
587    #[test]
588    fn test_builder_with_mermaid_disabled() {
589        let formatter = MarkdownFormatter::new().with_mermaid(false);
590        let repo = create_multi_language_repo();
591        let map = create_map_with_mermaid();
592        let output = formatter.format(&repo, &map);
593        assert!(!output.contains("```mermaid"));
594    }
595
596    #[test]
597    fn test_builder_with_mermaid_enabled() {
598        let formatter = MarkdownFormatter::new().with_mermaid(true);
599        let repo = create_multi_language_repo();
600        let map = create_map_with_mermaid();
601        let output = formatter.format(&repo, &map);
602        assert!(output.contains("```mermaid"));
603        assert!(output.contains("graph LR"));
604        // Check ID sanitization (- and . replaced with _)
605        assert!(output.contains("main_mod"));
606        assert!(output.contains("lib_rs"));
607    }
608
609    #[test]
610    fn test_builder_with_line_numbers_disabled() {
611        let formatter = MarkdownFormatter::new().with_line_numbers(false);
612        let repo = create_test_repo();
613        let map = create_test_map();
614        let output = formatter.format(&repo, &map);
615        // Should NOT have line numbers like "   1 def main():"
616        assert!(!output.contains("   1 def main"));
617        // Should have raw content
618        assert!(output.contains("def main():"));
619    }
620
621    #[test]
622    fn test_builder_with_model() {
623        let formatter = MarkdownFormatter::new().with_model(TokenizerModel::Gpt4o);
624        let repo = create_test_repo();
625        let map = create_test_map();
626        let output = formatter.format(&repo, &map);
627        // GPT-4o uses o200k encoding, which is 48 in our test data
628        assert!(output.contains("**Tokens**: 48"));
629    }
630
631    #[test]
632    fn test_estimate_output_size() {
633        let repo = create_test_repo();
634        let size = MarkdownFormatter::estimate_output_size(&repo);
635        // base (1000) + files (1 * 400) + content length (~30)
636        assert!(size > 1000);
637        assert!(size < 2000);
638    }
639
640    #[test]
641    fn test_stream_header() {
642        let formatter = MarkdownFormatter::new();
643        let repo = create_test_repo();
644        let mut buf = Vec::new();
645        formatter.stream_header(&mut buf, &repo).unwrap();
646        let output = String::from_utf8(buf).unwrap();
647        assert!(output.contains("# Repository: test"));
648        assert!(output.contains("**Files**: 1"));
649        assert!(output.contains("**Lines**: 2"));
650        assert!(output.contains("**Tokens**: 50")); // Claude tokens
651    }
652
653    #[test]
654    fn test_stream_overview_with_framework() {
655        let formatter = MarkdownFormatter::new();
656        let repo = create_multi_language_repo();
657        let mut buf = Vec::new();
658        formatter.stream_overview(&mut buf, &repo).unwrap();
659        let output = String::from_utf8(buf).unwrap();
660        assert!(output.contains("| Framework | Actix |"));
661        assert!(output.contains("| Primary Language | Rust |"));
662    }
663
664    #[test]
665    fn test_stream_overview_multiple_languages() {
666        let formatter = MarkdownFormatter::new();
667        let repo = create_multi_language_repo();
668        let mut buf = Vec::new();
669        formatter.stream_overview(&mut buf, &repo).unwrap();
670        let output = String::from_utf8(buf).unwrap();
671        assert!(output.contains("### Languages"));
672        assert!(output.contains("| Rust | 2 | 66.7% |"));
673        assert!(output.contains("| Python | 1 | 33.3% |"));
674    }
675
676    #[test]
677    fn test_stream_overview_disabled() {
678        let formatter = MarkdownFormatter::new().with_tables(false);
679        let repo = create_test_repo();
680        let mut buf = Vec::new();
681        formatter.stream_overview(&mut buf, &repo).unwrap();
682        let output = String::from_utf8(buf).unwrap();
683        assert!(output.is_empty());
684    }
685
686    #[test]
687    fn test_stream_repomap() {
688        let formatter = MarkdownFormatter::new();
689        let map = create_test_map();
690        let mut buf = Vec::new();
691        formatter.stream_repomap(&mut buf, &map).unwrap();
692        let output = String::from_utf8(buf).unwrap();
693        assert!(output.contains("## Repository Map"));
694        assert!(output.contains("### Key Symbols"));
695        assert!(output.contains("| 1 | `main` | function | main.py | 1 | Entry point |"));
696    }
697
698    #[test]
699    fn test_stream_repomap_escapes_pipe_in_summary() {
700        let formatter = MarkdownFormatter::new();
701        let map = create_map_with_mermaid();
702        let mut buf = Vec::new();
703        formatter.stream_repomap(&mut buf, &map).unwrap();
704        let output = String::from_utf8(buf).unwrap();
705        // Pipe should be escaped
706        assert!(output.contains("Entry \\| point"));
707    }
708
709    #[test]
710    fn test_stream_structure() {
711        let formatter = MarkdownFormatter::new();
712        let repo = create_multi_language_repo();
713        let mut buf = Vec::new();
714        formatter.stream_structure(&mut buf, &repo).unwrap();
715        let output = String::from_utf8(buf).unwrap();
716        assert!(output.contains("## Project Structure"));
717        assert!(output.contains("```"));
718    }
719
720    #[test]
721    fn test_stream_structure_disabled() {
722        // Create a formatter with tree disabled by modifying internal state
723        let mut formatter = MarkdownFormatter::new();
724        formatter.include_tree = false;
725        let repo = create_test_repo();
726        let mut buf = Vec::new();
727        formatter.stream_structure(&mut buf, &repo).unwrap();
728        let output = String::from_utf8(buf).unwrap();
729        assert!(output.is_empty());
730    }
731
732    #[test]
733    fn test_stream_files_with_line_numbers() {
734        let formatter = MarkdownFormatter::new().with_line_numbers(true);
735        let repo = create_test_repo();
736        let mut buf = Vec::new();
737        formatter.stream_files(&mut buf, &repo).unwrap();
738        let output = String::from_utf8(buf).unwrap();
739        assert!(output.contains("### main.py"));
740        assert!(output.contains("**Tokens**: 50"));
741        assert!(output.contains("**Language**: python"));
742        // Line numbers should be present
743        assert!(output.contains("   1 def main():"));
744        assert!(output.contains("   2     print('hello')"));
745    }
746
747    #[test]
748    fn test_stream_files_without_line_numbers() {
749        let formatter = MarkdownFormatter::new().with_line_numbers(false);
750        let repo = create_test_repo();
751        let mut buf = Vec::new();
752        formatter.stream_files(&mut buf, &repo).unwrap();
753        let output = String::from_utf8(buf).unwrap();
754        // Should have raw content without line numbers
755        assert!(output.contains("def main():\n    print('hello')"));
756    }
757
758    #[test]
759    fn test_stream_files_with_embedded_line_numbers() {
760        let mut repo = create_test_repo();
761        // Set content with embedded line numbers (compressed format)
762        repo.files[0].content = Some("1:def main():\n5:    print('hello')".to_string());
763        let formatter = MarkdownFormatter::new().with_line_numbers(true);
764        let mut buf = Vec::new();
765        formatter.stream_files(&mut buf, &repo).unwrap();
766        let output = String::from_utf8(buf).unwrap();
767        // Should parse and display original line numbers
768        assert!(output.contains("   1 def main():"));
769        assert!(output.contains("   5     print('hello')"));
770    }
771
772    #[test]
773    fn test_stream_files_with_malformed_embedded_line_numbers() {
774        let mut repo = create_test_repo();
775        // Malformed embedded line numbers
776        repo.files[0].content = Some("abc:def main():\nno_colon_here".to_string());
777        let formatter = MarkdownFormatter::new().with_line_numbers(true);
778        let mut buf = Vec::new();
779        formatter.stream_files(&mut buf, &repo).unwrap();
780        let output = String::from_utf8(buf).unwrap();
781        // Should handle gracefully - use sequential numbers since first line doesn't parse
782        assert!(output.contains("   1 abc:def main():"));
783    }
784
785    #[test]
786    fn test_stream_files_with_no_content() {
787        let mut repo = create_test_repo();
788        repo.files[0].content = None;
789        let formatter = MarkdownFormatter::new();
790        let mut buf = Vec::new();
791        formatter.stream_files(&mut buf, &repo).unwrap();
792        let output = String::from_utf8(buf).unwrap();
793        // Should still have ## Files header but no file content
794        assert!(output.contains("## Files"));
795        assert!(!output.contains("### main.py"));
796    }
797
798    #[test]
799    fn test_stream_files_unknown_language() {
800        let mut repo = create_test_repo();
801        repo.files[0].language = None;
802        let formatter = MarkdownFormatter::new();
803        let mut buf = Vec::new();
804        formatter.stream_files(&mut buf, &repo).unwrap();
805        let output = String::from_utf8(buf).unwrap();
806        assert!(output.contains("**Language**: unknown"));
807    }
808
809    #[test]
810    fn test_format_repo_without_map() {
811        let formatter = MarkdownFormatter::new();
812        let repo = create_test_repo();
813        let output = formatter.format_repo(&repo);
814        assert!(output.contains("# Repository: test"));
815        assert!(output.contains("## Overview"));
816        // Should NOT have repomap section
817        assert!(!output.contains("## Repository Map"));
818    }
819
820    #[test]
821    fn test_streaming_formatter_trait() {
822        let formatter = MarkdownFormatter::new();
823        let repo = create_test_repo();
824        let map = create_test_map();
825        let mut buf = Vec::new();
826        formatter.format_to_writer(&repo, &map, &mut buf).unwrap();
827        let output = String::from_utf8(buf).unwrap();
828        assert!(output.contains("# Repository: test"));
829        assert!(output.contains("## Repository Map"));
830    }
831
832    #[test]
833    fn test_streaming_formatter_repo_only() {
834        let formatter = MarkdownFormatter::new();
835        let repo = create_test_repo();
836        let mut buf = Vec::new();
837        formatter.format_repo_to_writer(&repo, &mut buf).unwrap();
838        let output = String::from_utf8(buf).unwrap();
839        assert!(output.contains("# Repository: test"));
840        assert!(!output.contains("## Repository Map"));
841    }
842
843    #[test]
844    fn test_escape_markdown_cell() {
845        assert_eq!(escape_markdown_cell("hello"), "hello");
846        assert_eq!(escape_markdown_cell("a|b"), "a\\|b");
847        assert_eq!(escape_markdown_cell("line1\nline2"), "line1 line2");
848        assert_eq!(escape_markdown_cell("  spaced  "), "spaced");
849        assert_eq!(escape_markdown_cell("a|b\nc|d"), "a\\|b c\\|d");
850    }
851
852    #[test]
853    fn test_escape_markdown_cell_complex() {
854        // Multiple pipes and newlines
855        let input = "col1|col2|col3\nrow1|row2|row3";
856        let expected = "col1\\|col2\\|col3 row1\\|row2\\|row3";
857        assert_eq!(escape_markdown_cell(input), expected);
858    }
859
860    #[test]
861    fn test_full_format_with_all_features() {
862        let formatter = MarkdownFormatter::new()
863            .with_tables(true)
864            .with_mermaid(true)
865            .with_line_numbers(true)
866            .with_model(TokenizerModel::Claude);
867        let repo = create_multi_language_repo();
868        let map = create_map_with_mermaid();
869        let output = formatter.format(&repo, &map);
870
871        // All sections present
872        assert!(output.contains("# Repository: multi-lang"));
873        assert!(output.contains("## Overview"));
874        assert!(output.contains("## Repository Map"));
875        assert!(output.contains("## Project Structure"));
876        assert!(output.contains("## Files"));
877        assert!(output.contains("```mermaid"));
878    }
879
880    #[test]
881    fn test_format_with_empty_repo() {
882        let repo = Repository {
883            name: "empty".to_string(),
884            path: "/tmp/empty".into(),
885            files: vec![],
886            metadata: RepoMetadata::default(),
887        };
888        let map = RepoMap {
889            summary: "Empty repository".to_string(),
890            key_symbols: vec![],
891            module_graph: ModuleGraph {
892                nodes: vec![],
893                edges: vec![],
894            },
895            file_index: vec![],
896            token_count: 0,
897        };
898        let formatter = MarkdownFormatter::new();
899        let output = formatter.format(&repo, &map);
900        assert!(output.contains("# Repository: empty"));
901        // Should not fail
902    }
903
904    #[test]
905    fn test_estimate_output_size_empty_repo() {
906        let repo = Repository {
907            name: "empty".to_string(),
908            path: "/tmp/empty".into(),
909            files: vec![],
910            metadata: RepoMetadata::default(),
911        };
912        let size = MarkdownFormatter::estimate_output_size(&repo);
913        assert_eq!(size, 1000); // Just base size
914    }
915
916    #[test]
917    fn test_estimate_output_size_with_content() {
918        let mut repo = create_test_repo();
919        repo.files[0].content = Some("x".repeat(5000));
920        let size = MarkdownFormatter::estimate_output_size(&repo);
921        // base (1000) + files (1 * 400) + content (5000)
922        assert_eq!(size, 6400);
923    }
924
925    #[test]
926    fn test_structure_nested_paths() {
927        let repo = Repository {
928            name: "nested".to_string(),
929            path: "/tmp/nested".into(),
930            files: vec![
931                RepoFile::new("/tmp/nested/src/a/b/c.rs", "src/a/b/c.rs"),
932                RepoFile::new("/tmp/nested/src/a/b/d.rs", "src/a/b/d.rs"),
933                RepoFile::new("/tmp/nested/src/a/e.rs", "src/a/e.rs"),
934                RepoFile::new("/tmp/nested/tests/test.rs", "tests/test.rs"),
935            ],
936            metadata: RepoMetadata::default(),
937        };
938        let formatter = MarkdownFormatter::new();
939        let mut buf = Vec::new();
940        formatter.stream_structure(&mut buf, &repo).unwrap();
941        let output = String::from_utf8(buf).unwrap();
942        // Should show directory structure
943        assert!(output.contains("src"));
944        assert!(output.contains("tests"));
945    }
946
947    #[test]
948    fn test_name_method() {
949        let formatter = MarkdownFormatter::new();
950        assert_eq!(formatter.name(), "markdown");
951    }
952}