Skip to main content

infiniloom_engine/output/
markdown.rs

1//! GPT-optimized Markdown output formatter
2//!
3//! Supports both in-memory (`format()`) and streaming (`format_to_writer()`) modes.
4
5use crate::output::{Formatter, StreamingFormatter};
6use crate::repomap::RepoMap;
7use crate::types::{Repository, TokenizerModel};
8use std::io::{self, Write};
9
10/// Markdown formatter optimized for GPT
11pub struct MarkdownFormatter {
12    /// Include overview tables
13    include_tables: bool,
14    /// Include Mermaid diagrams
15    include_mermaid: bool,
16    /// Include file tree
17    include_tree: bool,
18    /// Include line numbers in code
19    include_line_numbers: bool,
20    /// Token model for counts in output
21    token_model: TokenizerModel,
22}
23
24impl MarkdownFormatter {
25    /// Create a new Markdown formatter
26    pub fn new() -> Self {
27        Self {
28            include_tables: true,
29            include_mermaid: true,
30            include_tree: true,
31            include_line_numbers: true,
32            token_model: TokenizerModel::Claude,
33        }
34    }
35
36    /// Set tables option
37    pub fn with_tables(mut self, enabled: bool) -> Self {
38        self.include_tables = enabled;
39        self
40    }
41
42    /// Set Mermaid option
43    pub fn with_mermaid(mut self, enabled: bool) -> Self {
44        self.include_mermaid = enabled;
45        self
46    }
47
48    /// Set line numbers option
49    pub fn with_line_numbers(mut self, enabled: bool) -> Self {
50        self.include_line_numbers = enabled;
51        self
52    }
53
54    /// Set token model for token counts in output
55    pub fn with_model(mut self, model: TokenizerModel) -> Self {
56        self.token_model = model;
57        self
58    }
59
60    /// Estimate output size for pre-allocation
61    fn estimate_output_size(repo: &Repository) -> usize {
62        let base = 1000;
63        let files = repo.files.len() * 400;
64        let content: usize = repo
65            .files
66            .iter()
67            .filter_map(|f| f.content.as_ref())
68            .map(|c| c.len())
69            .sum();
70        base + files + content
71    }
72
73    // =========================================================================
74    // Streaming methods (write to impl std::io::Write)
75    // =========================================================================
76
77    fn stream_header<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
78        writeln!(w, "# Repository: {}", repo.name)?;
79        writeln!(w)?;
80        writeln!(
81            w,
82            "> **Files**: {} | **Lines**: {} | **Tokens**: {}",
83            repo.metadata.total_files,
84            repo.metadata.total_lines,
85            repo.metadata.total_tokens.get(self.token_model)
86        )?;
87        writeln!(w)
88    }
89
90    fn stream_overview<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
91        if !self.include_tables {
92            return Ok(());
93        }
94
95        writeln!(w, "## Overview")?;
96        writeln!(w)?;
97        writeln!(w, "| Metric | Value |")?;
98        writeln!(w, "|--------|-------|")?;
99        writeln!(w, "| Files | {} |", repo.metadata.total_files)?;
100        writeln!(w, "| Lines | {} |", repo.metadata.total_lines)?;
101
102        if let Some(lang) = repo.metadata.languages.first() {
103            writeln!(w, "| Primary Language | {} |", lang.language)?;
104        }
105        if let Some(framework) = &repo.metadata.framework {
106            writeln!(w, "| Framework | {} |", framework)?;
107        }
108        writeln!(w)?;
109
110        if repo.metadata.languages.len() > 1 {
111            writeln!(w, "### Languages")?;
112            writeln!(w)?;
113            writeln!(w, "| Language | Files | Percentage |")?;
114            writeln!(w, "|----------|-------|------------|")?;
115            for lang in &repo.metadata.languages {
116                writeln!(w, "| {} | {} | {:.1}% |", lang.language, lang.files, lang.percentage)?;
117            }
118            writeln!(w)?;
119        }
120        Ok(())
121    }
122
123    fn stream_repomap<W: Write>(&self, w: &mut W, map: &RepoMap) -> io::Result<()> {
124        writeln!(w, "## Repository Map")?;
125        writeln!(w)?;
126        writeln!(w, "{}", map.summary)?;
127        writeln!(w)?;
128
129        writeln!(w, "### Key Symbols")?;
130        writeln!(w)?;
131        writeln!(w, "| Rank | Symbol | Type | File | Line | Summary |")?;
132        writeln!(w, "|------|--------|------|------|------|---------|")?;
133        for sym in map.key_symbols.iter().take(15) {
134            let summary = sym
135                .summary
136                .as_deref()
137                .map(escape_markdown_cell)
138                .unwrap_or_default();
139            writeln!(
140                w,
141                "| {} | `{}` | {} | {} | {} | {} |",
142                sym.rank, sym.name, sym.kind, sym.file, sym.line, summary
143            )?;
144        }
145        writeln!(w)?;
146
147        if self.include_mermaid && !map.module_graph.edges.is_empty() {
148            writeln!(w, "### Module Dependencies")?;
149            writeln!(w)?;
150            writeln!(w, "```mermaid")?;
151            writeln!(w, "graph LR")?;
152            for edge in &map.module_graph.edges {
153                let sanitize_id = |s: &str| -> String {
154                    s.chars()
155                        .map(|c| if c == '-' || c == '.' { '_' } else { c })
156                        .collect()
157                };
158                let from_id = sanitize_id(&edge.from);
159                let to_id = sanitize_id(&edge.to);
160                writeln!(w, "    {}[\"{}\"] --> {}[\"{}\"]", from_id, edge.from, to_id, edge.to)?;
161            }
162            writeln!(w, "```")?;
163            writeln!(w)?;
164        }
165        Ok(())
166    }
167
168    fn stream_structure<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
169        if !self.include_tree {
170            return Ok(());
171        }
172
173        writeln!(w, "## Project Structure")?;
174        writeln!(w)?;
175        writeln!(w, "```")?;
176
177        let mut paths: Vec<_> = repo
178            .files
179            .iter()
180            .map(|f| f.relative_path.as_str())
181            .collect();
182        paths.sort();
183
184        let mut prev_parts: Vec<&str> = Vec::new();
185        for path in paths {
186            let parts: Vec<_> = path.split('/').collect();
187            let mut common = 0;
188            for (i, part) in parts.iter().enumerate() {
189                if i < prev_parts.len() && prev_parts[i] == *part {
190                    common = i + 1;
191                } else {
192                    break;
193                }
194            }
195            for (i, part) in parts.iter().enumerate().skip(common) {
196                let indent = "  ".repeat(i);
197                let prefix = if i == parts.len() - 1 {
198                    "\u{1f4c4} "
199                } else {
200                    "\u{1f4c1} "
201                };
202                writeln!(w, "{}{}{}", indent, prefix, part)?;
203            }
204            prev_parts = parts;
205        }
206
207        writeln!(w, "```")?;
208        writeln!(w)
209    }
210
211    fn stream_files<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
212        writeln!(w, "## Files")?;
213        writeln!(w)?;
214
215        for file in &repo.files {
216            if let Some(content) = &file.content {
217                writeln!(w, "### {}", file.relative_path)?;
218                writeln!(w)?;
219                writeln!(
220                    w,
221                    "> **Tokens**: {} | **Language**: {}",
222                    file.token_count.get(self.token_model),
223                    file.language.as_deref().unwrap_or("unknown")
224                )?;
225                writeln!(w)?;
226
227                let lang = file.language.as_deref().unwrap_or("");
228                let fence = code_fence(content);
229                writeln!(w, "{}{}", fence, lang)?;
230                if self.include_line_numbers {
231                    // Check if content has embedded line numbers (format: "N:content")
232                    // This preserves original line numbers when content has been compressed
233                    let first_line = content.lines().next().unwrap_or("");
234                    let has_embedded_line_nums = first_line.contains(':')
235                        && first_line
236                            .split(':')
237                            .next()
238                            .is_some_and(|s| s.parse::<u32>().is_ok());
239
240                    if has_embedded_line_nums {
241                        // Content has embedded line numbers - parse and output
242                        for line in content.lines() {
243                            if let Some((num_str, rest)) = line.split_once(':') {
244                                if let Ok(line_num) = num_str.parse::<u32>() {
245                                    writeln!(w, "{:4} {}", line_num, rest)?;
246                                } else {
247                                    // Fallback for malformed lines
248                                    writeln!(w, "     {}", line)?;
249                                }
250                            } else {
251                                writeln!(w, "     {}", line)?;
252                            }
253                        }
254                    } else {
255                        // No embedded line numbers - use sequential (uncompressed content)
256                        for (i, line) in content.lines().enumerate() {
257                            writeln!(w, "{:4} {}", i + 1, line)?;
258                        }
259                    }
260                } else {
261                    writeln!(w, "{}", content)?;
262                }
263                writeln!(w, "{}", fence)?;
264                writeln!(w)?;
265            }
266        }
267        Ok(())
268    }
269}
270
271impl Default for MarkdownFormatter {
272    fn default() -> Self {
273        Self::new()
274    }
275}
276
277impl Formatter for MarkdownFormatter {
278    fn format(&self, repo: &Repository, map: &RepoMap) -> String {
279        // Use streaming internally for consistency
280        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
281        // Vec<u8> write cannot fail, ignore result
282        drop(self.format_to_writer(repo, map, &mut output));
283        // Use lossy conversion to handle any edge cases with invalid UTF-8
284        String::from_utf8(output)
285            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
286    }
287
288    fn format_repo(&self, repo: &Repository) -> String {
289        let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
290        // Vec<u8> write cannot fail, ignore result
291        drop(self.format_repo_to_writer(repo, &mut output));
292        // Use lossy conversion to handle any edge cases with invalid UTF-8
293        String::from_utf8(output)
294            .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
295    }
296
297    fn name(&self) -> &'static str {
298        "markdown"
299    }
300}
301
302impl StreamingFormatter for MarkdownFormatter {
303    fn format_to_writer<W: Write>(
304        &self,
305        repo: &Repository,
306        map: &RepoMap,
307        writer: &mut W,
308    ) -> io::Result<()> {
309        self.stream_header(writer, repo)?;
310        self.stream_overview(writer, repo)?;
311        self.stream_repomap(writer, map)?;
312        self.stream_structure(writer, repo)?;
313        self.stream_files(writer, repo)?;
314        Ok(())
315    }
316
317    fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()> {
318        self.stream_header(writer, repo)?;
319        self.stream_overview(writer, repo)?;
320        self.stream_structure(writer, repo)?;
321        self.stream_files(writer, repo)?;
322        Ok(())
323    }
324}
325
326/// Returns the minimum number of backticks needed for a code fence
327/// that won't be broken by the content. Per CommonMark spec, the fence
328/// must use more backticks than any run of backticks in the content.
329fn code_fence(content: &str) -> String {
330    let min_backticks = 3;
331    let max_run = content
332        .as_bytes()
333        .split(|&b| b != b'`')
334        .map(|run| run.len())
335        .max()
336        .unwrap_or(0);
337    let count = if max_run >= min_backticks {
338        max_run + 1
339    } else {
340        min_backticks
341    };
342    "`".repeat(count)
343}
344
345fn escape_markdown_cell(text: &str) -> String {
346    text.replace('|', "\\|")
347        .replace('\n', " ")
348        .trim()
349        .to_owned()
350}
351
352#[cfg(test)]
353#[allow(clippy::str_to_string)]
354mod tests {
355    use super::*;
356    use crate::repomap::{
357        FileIndexEntry, ModuleEdge, ModuleGraph, ModuleNode, RankedSymbol, RepoMap,
358        RepoMapGenerator,
359    };
360    use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
361
362    fn create_test_repo() -> Repository {
363        Repository {
364            name: "test".to_string(),
365            path: "/tmp/test".into(),
366            files: vec![RepoFile {
367                path: "/tmp/test/main.py".into(),
368                relative_path: "main.py".to_string(),
369                language: Some("python".to_string()),
370                size_bytes: 100,
371                token_count: TokenCounts {
372                    o200k: 48,
373                    cl100k: 49,
374                    claude: 50,
375                    gemini: 47,
376                    llama: 46,
377                    mistral: 46,
378                    deepseek: 46,
379                    qwen: 46,
380                    cohere: 47,
381                    grok: 46,
382                },
383                symbols: Vec::new(),
384                importance: 0.8,
385                content: Some("def main():\n    print('hello')".to_string()),
386            }],
387            metadata: RepoMetadata {
388                total_files: 1,
389                total_lines: 2,
390                total_tokens: TokenCounts {
391                    o200k: 48,
392                    cl100k: 49,
393                    claude: 50,
394                    gemini: 47,
395                    llama: 46,
396                    mistral: 46,
397                    deepseek: 46,
398                    qwen: 46,
399                    cohere: 47,
400                    grok: 46,
401                },
402                languages: vec![LanguageStats {
403                    language: "Python".to_string(),
404                    files: 1,
405                    lines: 2,
406                    percentage: 100.0,
407                }],
408                framework: None,
409                description: None,
410                branch: None,
411                commit: None,
412                directory_structure: None,
413                external_dependencies: vec![],
414                git_history: None,
415            },
416        }
417    }
418
419    fn create_multi_language_repo() -> Repository {
420        Repository {
421            name: "multi-lang".to_string(),
422            path: "/tmp/multi".into(),
423            files: vec![
424                RepoFile {
425                    path: "/tmp/multi/src/main.rs".into(),
426                    relative_path: "src/main.rs".to_string(),
427                    language: Some("rust".to_string()),
428                    size_bytes: 200,
429                    token_count: TokenCounts::default(),
430                    symbols: Vec::new(),
431                    importance: 0.9,
432                    content: Some("fn main() {\n    println!(\"hello\");\n}".to_string()),
433                },
434                RepoFile {
435                    path: "/tmp/multi/src/lib.rs".into(),
436                    relative_path: "src/lib.rs".to_string(),
437                    language: Some("rust".to_string()),
438                    size_bytes: 150,
439                    token_count: TokenCounts::default(),
440                    symbols: Vec::new(),
441                    importance: 0.8,
442                    content: Some("pub mod utils;".to_string()),
443                },
444                RepoFile {
445                    path: "/tmp/multi/tests/test.py".into(),
446                    relative_path: "tests/test.py".to_string(),
447                    language: Some("python".to_string()),
448                    size_bytes: 100,
449                    token_count: TokenCounts::default(),
450                    symbols: Vec::new(),
451                    importance: 0.5,
452                    content: Some("def test_it(): pass".to_string()),
453                },
454            ],
455            metadata: RepoMetadata {
456                total_files: 3,
457                total_lines: 5,
458                total_tokens: TokenCounts::default(),
459                languages: vec![
460                    LanguageStats {
461                        language: "Rust".to_string(),
462                        files: 2,
463                        lines: 4,
464                        percentage: 66.7,
465                    },
466                    LanguageStats {
467                        language: "Python".to_string(),
468                        files: 1,
469                        lines: 1,
470                        percentage: 33.3,
471                    },
472                ],
473                framework: Some("Actix".to_string()),
474                description: Some("Test project".to_string()),
475                branch: Some("main".to_string()),
476                commit: Some("abc123".to_string()),
477                directory_structure: None,
478                external_dependencies: vec!["tokio".to_string()],
479                git_history: None,
480            },
481        }
482    }
483
484    fn create_test_map() -> RepoMap {
485        RepoMap {
486            summary: "Test repository with 1 key symbol".to_string(),
487            key_symbols: vec![RankedSymbol {
488                rank: 1,
489                name: "main".to_string(),
490                kind: "function".to_string(),
491                file: "main.py".to_string(),
492                line: 1,
493                signature: None,
494                summary: Some("Entry point".to_string()),
495                references: 0,
496                importance: 0.95,
497            }],
498            module_graph: ModuleGraph {
499                nodes: vec![ModuleNode { name: "main".to_string(), files: 1, tokens: 50 }],
500                edges: vec![],
501            },
502            file_index: vec![FileIndexEntry {
503                path: "main.py".to_string(),
504                tokens: 50,
505                importance: "high".to_string(),
506                summary: None,
507            }],
508            token_count: 50,
509        }
510    }
511
512    fn create_map_with_mermaid() -> RepoMap {
513        RepoMap {
514            summary: "Test with dependencies".to_string(),
515            key_symbols: vec![
516                RankedSymbol {
517                    rank: 1,
518                    name: "main".to_string(),
519                    kind: "function".to_string(),
520                    file: "main.rs".to_string(),
521                    line: 1,
522                    signature: Some("fn main()".to_string()),
523                    summary: Some("Entry | point".to_string()),
524                    references: 5,
525                    importance: 0.95,
526                },
527                RankedSymbol {
528                    rank: 2,
529                    name: "helper".to_string(),
530                    kind: "function".to_string(),
531                    file: "lib.rs".to_string(),
532                    line: 5,
533                    signature: None,
534                    summary: None,
535                    references: 2,
536                    importance: 0.7,
537                },
538            ],
539            module_graph: ModuleGraph {
540                nodes: vec![
541                    ModuleNode { name: "main".to_string(), files: 1, tokens: 100 },
542                    ModuleNode { name: "lib".to_string(), files: 1, tokens: 80 },
543                ],
544                edges: vec![ModuleEdge {
545                    from: "main-mod".to_string(),
546                    to: "lib.rs".to_string(),
547                    weight: 1,
548                }],
549            },
550            file_index: vec![
551                FileIndexEntry {
552                    path: "main.rs".to_string(),
553                    tokens: 100,
554                    importance: "critical".to_string(),
555                    summary: None,
556                },
557                FileIndexEntry {
558                    path: "lib.rs".to_string(),
559                    tokens: 80,
560                    importance: "high".to_string(),
561                    summary: None,
562                },
563            ],
564            token_count: 100,
565        }
566    }
567
568    #[test]
569    fn test_markdown_output() {
570        let repo = create_test_repo();
571        let map = RepoMapGenerator::new(1000).generate(&repo);
572
573        let formatter = MarkdownFormatter::new();
574        let output = formatter.format(&repo, &map);
575
576        assert!(output.contains("# Repository: test"));
577        assert!(output.contains("## Overview"));
578        assert!(output.contains("```python"));
579    }
580
581    #[test]
582    fn test_markdown_default() {
583        let formatter = MarkdownFormatter::default();
584        assert_eq!(formatter.name(), "markdown");
585    }
586
587    #[test]
588    fn test_builder_with_tables() {
589        let formatter = MarkdownFormatter::new().with_tables(false);
590        let repo = create_test_repo();
591        let map = create_test_map();
592        let output = formatter.format(&repo, &map);
593        assert!(!output.contains("## Overview"));
594        assert!(!output.contains("| Metric | Value |"));
595    }
596
597    #[test]
598    fn test_builder_with_mermaid_disabled() {
599        let formatter = MarkdownFormatter::new().with_mermaid(false);
600        let repo = create_multi_language_repo();
601        let map = create_map_with_mermaid();
602        let output = formatter.format(&repo, &map);
603        assert!(!output.contains("```mermaid"));
604    }
605
606    #[test]
607    fn test_builder_with_mermaid_enabled() {
608        let formatter = MarkdownFormatter::new().with_mermaid(true);
609        let repo = create_multi_language_repo();
610        let map = create_map_with_mermaid();
611        let output = formatter.format(&repo, &map);
612        assert!(output.contains("```mermaid"));
613        assert!(output.contains("graph LR"));
614        // Check ID sanitization (- and . replaced with _)
615        assert!(output.contains("main_mod"));
616        assert!(output.contains("lib_rs"));
617    }
618
619    #[test]
620    fn test_builder_with_line_numbers_disabled() {
621        let formatter = MarkdownFormatter::new().with_line_numbers(false);
622        let repo = create_test_repo();
623        let map = create_test_map();
624        let output = formatter.format(&repo, &map);
625        // Should NOT have line numbers like "   1 def main():"
626        assert!(!output.contains("   1 def main"));
627        // Should have raw content
628        assert!(output.contains("def main():"));
629    }
630
631    #[test]
632    fn test_builder_with_model() {
633        let formatter = MarkdownFormatter::new().with_model(TokenizerModel::Gpt4o);
634        let repo = create_test_repo();
635        let map = create_test_map();
636        let output = formatter.format(&repo, &map);
637        // GPT-4o uses o200k encoding, which is 48 in our test data
638        assert!(output.contains("**Tokens**: 48"));
639    }
640
641    #[test]
642    fn test_estimate_output_size() {
643        let repo = create_test_repo();
644        let size = MarkdownFormatter::estimate_output_size(&repo);
645        // base (1000) + files (1 * 400) + content length (~30)
646        assert!(size > 1000);
647        assert!(size < 2000);
648    }
649
650    #[test]
651    fn test_stream_header() {
652        let formatter = MarkdownFormatter::new();
653        let repo = create_test_repo();
654        let mut buf = Vec::new();
655        formatter.stream_header(&mut buf, &repo).unwrap();
656        let output = String::from_utf8(buf).unwrap();
657        assert!(output.contains("# Repository: test"));
658        assert!(output.contains("**Files**: 1"));
659        assert!(output.contains("**Lines**: 2"));
660        assert!(output.contains("**Tokens**: 50")); // Claude tokens
661    }
662
663    #[test]
664    fn test_stream_overview_with_framework() {
665        let formatter = MarkdownFormatter::new();
666        let repo = create_multi_language_repo();
667        let mut buf = Vec::new();
668        formatter.stream_overview(&mut buf, &repo).unwrap();
669        let output = String::from_utf8(buf).unwrap();
670        assert!(output.contains("| Framework | Actix |"));
671        assert!(output.contains("| Primary Language | Rust |"));
672    }
673
674    #[test]
675    fn test_stream_overview_multiple_languages() {
676        let formatter = MarkdownFormatter::new();
677        let repo = create_multi_language_repo();
678        let mut buf = Vec::new();
679        formatter.stream_overview(&mut buf, &repo).unwrap();
680        let output = String::from_utf8(buf).unwrap();
681        assert!(output.contains("### Languages"));
682        assert!(output.contains("| Rust | 2 | 66.7% |"));
683        assert!(output.contains("| Python | 1 | 33.3% |"));
684    }
685
686    #[test]
687    fn test_stream_overview_disabled() {
688        let formatter = MarkdownFormatter::new().with_tables(false);
689        let repo = create_test_repo();
690        let mut buf = Vec::new();
691        formatter.stream_overview(&mut buf, &repo).unwrap();
692        let output = String::from_utf8(buf).unwrap();
693        assert!(output.is_empty());
694    }
695
696    #[test]
697    fn test_stream_repomap() {
698        let formatter = MarkdownFormatter::new();
699        let map = create_test_map();
700        let mut buf = Vec::new();
701        formatter.stream_repomap(&mut buf, &map).unwrap();
702        let output = String::from_utf8(buf).unwrap();
703        assert!(output.contains("## Repository Map"));
704        assert!(output.contains("### Key Symbols"));
705        assert!(output.contains("| 1 | `main` | function | main.py | 1 | Entry point |"));
706    }
707
708    #[test]
709    fn test_stream_repomap_escapes_pipe_in_summary() {
710        let formatter = MarkdownFormatter::new();
711        let map = create_map_with_mermaid();
712        let mut buf = Vec::new();
713        formatter.stream_repomap(&mut buf, &map).unwrap();
714        let output = String::from_utf8(buf).unwrap();
715        // Pipe should be escaped
716        assert!(output.contains("Entry \\| point"));
717    }
718
719    #[test]
720    fn test_stream_structure() {
721        let formatter = MarkdownFormatter::new();
722        let repo = create_multi_language_repo();
723        let mut buf = Vec::new();
724        formatter.stream_structure(&mut buf, &repo).unwrap();
725        let output = String::from_utf8(buf).unwrap();
726        assert!(output.contains("## Project Structure"));
727        assert!(output.contains("```"));
728    }
729
730    #[test]
731    fn test_stream_structure_disabled() {
732        // Create a formatter with tree disabled by modifying internal state
733        let mut formatter = MarkdownFormatter::new();
734        formatter.include_tree = false;
735        let repo = create_test_repo();
736        let mut buf = Vec::new();
737        formatter.stream_structure(&mut buf, &repo).unwrap();
738        let output = String::from_utf8(buf).unwrap();
739        assert!(output.is_empty());
740    }
741
742    #[test]
743    fn test_stream_files_with_line_numbers() {
744        let formatter = MarkdownFormatter::new().with_line_numbers(true);
745        let repo = create_test_repo();
746        let mut buf = Vec::new();
747        formatter.stream_files(&mut buf, &repo).unwrap();
748        let output = String::from_utf8(buf).unwrap();
749        assert!(output.contains("### main.py"));
750        assert!(output.contains("**Tokens**: 50"));
751        assert!(output.contains("**Language**: python"));
752        // Line numbers should be present
753        assert!(output.contains("   1 def main():"));
754        assert!(output.contains("   2     print('hello')"));
755    }
756
757    #[test]
758    fn test_stream_files_without_line_numbers() {
759        let formatter = MarkdownFormatter::new().with_line_numbers(false);
760        let repo = create_test_repo();
761        let mut buf = Vec::new();
762        formatter.stream_files(&mut buf, &repo).unwrap();
763        let output = String::from_utf8(buf).unwrap();
764        // Should have raw content without line numbers
765        assert!(output.contains("def main():\n    print('hello')"));
766    }
767
768    #[test]
769    fn test_stream_files_with_embedded_line_numbers() {
770        let mut repo = create_test_repo();
771        // Set content with embedded line numbers (compressed format)
772        repo.files[0].content = Some("1:def main():\n5:    print('hello')".to_string());
773        let formatter = MarkdownFormatter::new().with_line_numbers(true);
774        let mut buf = Vec::new();
775        formatter.stream_files(&mut buf, &repo).unwrap();
776        let output = String::from_utf8(buf).unwrap();
777        // Should parse and display original line numbers
778        assert!(output.contains("   1 def main():"));
779        assert!(output.contains("   5     print('hello')"));
780    }
781
782    #[test]
783    fn test_stream_files_with_malformed_embedded_line_numbers() {
784        let mut repo = create_test_repo();
785        // Malformed embedded line numbers
786        repo.files[0].content = Some("abc:def main():\nno_colon_here".to_string());
787        let formatter = MarkdownFormatter::new().with_line_numbers(true);
788        let mut buf = Vec::new();
789        formatter.stream_files(&mut buf, &repo).unwrap();
790        let output = String::from_utf8(buf).unwrap();
791        // Should handle gracefully - use sequential numbers since first line doesn't parse
792        assert!(output.contains("   1 abc:def main():"));
793    }
794
795    #[test]
796    fn test_stream_files_with_no_content() {
797        let mut repo = create_test_repo();
798        repo.files[0].content = None;
799        let formatter = MarkdownFormatter::new();
800        let mut buf = Vec::new();
801        formatter.stream_files(&mut buf, &repo).unwrap();
802        let output = String::from_utf8(buf).unwrap();
803        // Should still have ## Files header but no file content
804        assert!(output.contains("## Files"));
805        assert!(!output.contains("### main.py"));
806    }
807
808    #[test]
809    fn test_stream_files_unknown_language() {
810        let mut repo = create_test_repo();
811        repo.files[0].language = None;
812        let formatter = MarkdownFormatter::new();
813        let mut buf = Vec::new();
814        formatter.stream_files(&mut buf, &repo).unwrap();
815        let output = String::from_utf8(buf).unwrap();
816        assert!(output.contains("**Language**: unknown"));
817    }
818
819    #[test]
820    fn test_format_repo_without_map() {
821        let formatter = MarkdownFormatter::new();
822        let repo = create_test_repo();
823        let output = formatter.format_repo(&repo);
824        assert!(output.contains("# Repository: test"));
825        assert!(output.contains("## Overview"));
826        // Should NOT have repomap section
827        assert!(!output.contains("## Repository Map"));
828    }
829
830    #[test]
831    fn test_streaming_formatter_trait() {
832        let formatter = MarkdownFormatter::new();
833        let repo = create_test_repo();
834        let map = create_test_map();
835        let mut buf = Vec::new();
836        formatter.format_to_writer(&repo, &map, &mut buf).unwrap();
837        let output = String::from_utf8(buf).unwrap();
838        assert!(output.contains("# Repository: test"));
839        assert!(output.contains("## Repository Map"));
840    }
841
842    #[test]
843    fn test_streaming_formatter_repo_only() {
844        let formatter = MarkdownFormatter::new();
845        let repo = create_test_repo();
846        let mut buf = Vec::new();
847        formatter.format_repo_to_writer(&repo, &mut buf).unwrap();
848        let output = String::from_utf8(buf).unwrap();
849        assert!(output.contains("# Repository: test"));
850        assert!(!output.contains("## Repository Map"));
851    }
852
853    #[test]
854    fn test_escape_markdown_cell() {
855        assert_eq!(escape_markdown_cell("hello"), "hello");
856        assert_eq!(escape_markdown_cell("a|b"), "a\\|b");
857        assert_eq!(escape_markdown_cell("line1\nline2"), "line1 line2");
858        assert_eq!(escape_markdown_cell("  spaced  "), "spaced");
859        assert_eq!(escape_markdown_cell("a|b\nc|d"), "a\\|b c\\|d");
860    }
861
862    #[test]
863    fn test_escape_markdown_cell_complex() {
864        // Multiple pipes and newlines
865        let input = "col1|col2|col3\nrow1|row2|row3";
866        let expected = "col1\\|col2\\|col3 row1\\|row2\\|row3";
867        assert_eq!(escape_markdown_cell(input), expected);
868    }
869
870    #[test]
871    fn test_full_format_with_all_features() {
872        let formatter = MarkdownFormatter::new()
873            .with_tables(true)
874            .with_mermaid(true)
875            .with_line_numbers(true)
876            .with_model(TokenizerModel::Claude);
877        let repo = create_multi_language_repo();
878        let map = create_map_with_mermaid();
879        let output = formatter.format(&repo, &map);
880
881        // All sections present
882        assert!(output.contains("# Repository: multi-lang"));
883        assert!(output.contains("## Overview"));
884        assert!(output.contains("## Repository Map"));
885        assert!(output.contains("## Project Structure"));
886        assert!(output.contains("## Files"));
887        assert!(output.contains("```mermaid"));
888    }
889
890    #[test]
891    fn test_format_with_empty_repo() {
892        let repo = Repository {
893            name: "empty".to_string(),
894            path: "/tmp/empty".into(),
895            files: vec![],
896            metadata: RepoMetadata::default(),
897        };
898        let map = RepoMap {
899            summary: "Empty repository".to_string(),
900            key_symbols: vec![],
901            module_graph: ModuleGraph { nodes: vec![], edges: vec![] },
902            file_index: vec![],
903            token_count: 0,
904        };
905        let formatter = MarkdownFormatter::new();
906        let output = formatter.format(&repo, &map);
907        assert!(output.contains("# Repository: empty"));
908        // Should not fail
909    }
910
911    #[test]
912    fn test_estimate_output_size_empty_repo() {
913        let repo = Repository {
914            name: "empty".to_string(),
915            path: "/tmp/empty".into(),
916            files: vec![],
917            metadata: RepoMetadata::default(),
918        };
919        let size = MarkdownFormatter::estimate_output_size(&repo);
920        assert_eq!(size, 1000); // Just base size
921    }
922
923    #[test]
924    fn test_estimate_output_size_with_content() {
925        let mut repo = create_test_repo();
926        repo.files[0].content = Some("x".repeat(5000));
927        let size = MarkdownFormatter::estimate_output_size(&repo);
928        // base (1000) + files (1 * 400) + content (5000)
929        assert_eq!(size, 6400);
930    }
931
932    #[test]
933    fn test_structure_nested_paths() {
934        let repo = Repository {
935            name: "nested".to_string(),
936            path: "/tmp/nested".into(),
937            files: vec![
938                RepoFile::new("/tmp/nested/src/a/b/c.rs", "src/a/b/c.rs"),
939                RepoFile::new("/tmp/nested/src/a/b/d.rs", "src/a/b/d.rs"),
940                RepoFile::new("/tmp/nested/src/a/e.rs", "src/a/e.rs"),
941                RepoFile::new("/tmp/nested/tests/test.rs", "tests/test.rs"),
942            ],
943            metadata: RepoMetadata::default(),
944        };
945        let formatter = MarkdownFormatter::new();
946        let mut buf = Vec::new();
947        formatter.stream_structure(&mut buf, &repo).unwrap();
948        let output = String::from_utf8(buf).unwrap();
949        // Should show directory structure
950        assert!(output.contains("src"));
951        assert!(output.contains("tests"));
952    }
953
954    #[test]
955    fn test_name_method() {
956        let formatter = MarkdownFormatter::new();
957        assert_eq!(formatter.name(), "markdown");
958    }
959
960    #[test]
961    fn test_code_fence_no_backticks() {
962        assert_eq!(code_fence("hello world"), "```");
963    }
964
965    #[test]
966    fn test_code_fence_with_triple_backticks() {
967        // Content has ``` so fence must use 4 backticks
968        assert_eq!(code_fence("some ```code``` here"), "````");
969    }
970
971    #[test]
972    fn test_code_fence_with_longer_backtick_run() {
973        // Content has ````` (5 backticks) so fence must use 6
974        assert_eq!(code_fence("a`````b"), "``````");
975    }
976
977    #[test]
978    fn test_stream_files_content_with_triple_backticks() {
979        let mut repo = create_test_repo();
980        repo.files[0].content = Some("# Example\n```python\nprint('hi')\n```\n".to_string());
981        repo.files[0].language = Some("markdown".to_string());
982        let formatter = MarkdownFormatter::new().with_line_numbers(false);
983        let mut buf = Vec::new();
984        formatter.stream_files(&mut buf, &repo).unwrap();
985        let output = String::from_utf8(buf).unwrap();
986        // The outer fence must use 4+ backticks to avoid breaking
987        assert!(output.contains("````markdown"));
988        assert!(output.contains("\n````\n"));
989        // The inner triple backticks in the content must appear intact
990        assert!(output.contains("```python"));
991    }
992}