Skip to main content

flat/
output.rs

1use crate::filters::SkipReason;
2use crate::tokens::is_prose_extension;
3use std::collections::HashMap;
4use std::io::Write;
5
6#[derive(Debug, Default)]
7pub struct Statistics {
8    pub total_files: usize,
9    pub included_files: usize,
10    pub skipped_by_reason: HashMap<String, usize>,
11    pub included_by_extension: HashMap<String, usize>,
12    pub output_size: usize,
13    pub prose_bytes: usize,   // bytes from prose files (md, txt, rst, etc.)
14    pub code_bytes: usize,    // bytes from code files
15    pub compressed_files: usize,
16    pub token_budget: Option<usize>,
17    pub tokens_used: usize,
18    pub excluded_by_budget: Vec<String>,
19}
20
21impl Statistics {
22    pub fn new() -> Self {
23        Self::default()
24    }
25
26    pub fn add_included(&mut self, extension: Option<&str>) {
27        self.total_files += 1;
28        self.included_files += 1;
29        let ext = extension.unwrap_or("no extension").to_string();
30        *self.included_by_extension.entry(ext).or_insert(0) += 1;
31    }
32
33    pub fn add_file_size_estimate(&mut self, file_size: u64, path_length: usize, extension: Option<&str>) {
34        // Estimate XML overhead:
35        // - Opening tag: <file path="..."> + newline = ~15 + path_length bytes
36        // - Closing tag: </file>\n\n = 9 bytes
37        // - Potential newline after content = 1 byte
38        let overhead = 25 + path_length;
39        let total_bytes = file_size as usize + overhead;
40        self.output_size += total_bytes;
41
42        // Track by content type
43        let ext_str = extension.unwrap_or("");
44        if is_prose_extension(ext_str) {
45            self.prose_bytes += total_bytes;
46        } else {
47            self.code_bytes += total_bytes;
48        }
49    }
50
51    pub fn add_compressed(&mut self) {
52        self.compressed_files += 1;
53    }
54
55    pub fn add_skipped(&mut self, reason: SkipReason) {
56        self.total_files += 1;
57        *self
58            .skipped_by_reason
59            .entry(reason.to_string())
60            .or_insert(0) += 1;
61    }
62
63    pub fn add_output_bytes(&mut self, bytes: usize) {
64        self.output_size += bytes;
65        // Conservative: treat all as code (higher token estimate) when type unknown
66        self.code_bytes += bytes;
67    }
68
69    pub fn total_skipped(&self) -> usize {
70        self.skipped_by_reason.values().sum()
71    }
72
73    pub fn estimated_tokens(&self) -> usize {
74        // Conservative estimation per PDR spec:
75        // - Code files: bytes / 3 (~3.0 chars/token)
76        // - Prose files: bytes / 4 (~4.0 chars/token)
77        let code_tokens = self.code_bytes / 3;
78        let prose_tokens = self.prose_bytes / 4;
79        code_tokens + prose_tokens
80    }
81
82    fn format_bytes(bytes: usize) -> String {
83        const KB: usize = 1024;
84        const MB: usize = KB * 1024;
85
86        if bytes >= MB {
87            format!("{:.2} MB", bytes as f64 / MB as f64)
88        } else if bytes >= KB {
89            format!("{:.2} KB", bytes as f64 / KB as f64)
90        } else {
91            format!("{} bytes", bytes)
92        }
93    }
94
95    fn format_tokens(tokens: usize) -> String {
96        if tokens >= 10_000 {
97            // Use k suffix for 10k and above
98            if tokens >= 1_000_000 {
99                format!("{:.1}M", tokens as f64 / 1_000_000.0)
100            } else {
101                format!("{:.1}k", tokens as f64 / 1_000.0)
102            }
103        } else if tokens >= 1_000 {
104            // Use commas for thousands (manual formatting)
105            let s = tokens.to_string();
106            let mut result = String::new();
107            for (i, c) in s.chars().rev().enumerate() {
108                if i > 0 && i % 3 == 0 {
109                    result.push(',');
110                }
111                result.push(c);
112            }
113            result.chars().rev().collect()
114        } else {
115            // No formatting for small numbers
116            tokens.to_string()
117        }
118    }
119
120    pub fn format_summary(&self) -> String {
121        let mut summary = format!(
122            "<summary>\nTotal files: {}\nIncluded: {}",
123            self.total_files, self.included_files
124        );
125
126        // Add extension breakdown for included files
127        if !self.included_by_extension.is_empty() {
128            let mut extensions: Vec<_> = self.included_by_extension.iter().collect();
129            extensions.sort_by(|(a_ext, a_count), (b_ext, b_count)| {
130                b_count.cmp(a_count).then_with(|| a_ext.cmp(b_ext))
131            });
132
133            let ext_str = extensions
134                .iter()
135                .map(|(ext, count)| {
136                    if *ext == "no extension" {
137                        format!("{} without extension", count)
138                    } else {
139                        format!("{} .{}", count, ext)
140                    }
141                })
142                .collect::<Vec<_>>()
143                .join(", ");
144
145            summary.push_str(&format!(" ({})", ext_str));
146        }
147
148        summary.push('\n');
149
150        if self.compressed_files > 0 {
151            summary.push_str(&format!("Compressed: {} files\n", self.compressed_files));
152        }
153
154        if self.total_skipped() > 0 {
155            summary.push_str(&format!("Skipped: {}", self.total_skipped()));
156
157            let mut reasons: Vec<_> = self.skipped_by_reason.iter().collect();
158            reasons.sort_by(|(a_reason, a_count), (b_reason, b_count)| {
159                b_count.cmp(a_count).then_with(|| a_reason.cmp(b_reason))
160            });
161
162            let reason_str = reasons
163                .iter()
164                .map(|(reason, count)| format!("{} {}", count, reason))
165                .collect::<Vec<_>>()
166                .join(", ");
167
168            summary.push_str(&format!(" ({})", reason_str));
169            summary.push('\n');
170        }
171
172        // Add token budget info
173        if let Some(budget) = self.token_budget {
174            summary.push_str(&format!(
175                "Token budget: {} / {} used\n",
176                Self::format_tokens(self.tokens_used),
177                Self::format_tokens(budget)
178            ));
179            if !self.excluded_by_budget.is_empty() {
180                summary.push_str(&format!(
181                    "Excluded by budget: {} files\n",
182                    self.excluded_by_budget.len()
183                ));
184            }
185        }
186
187        // Add output size (skip token estimate when budget is active to avoid confusion)
188        if self.output_size > 0 {
189            if self.token_budget.is_some() {
190                summary.push_str(&format!(
191                    "Output size: {}\n",
192                    Self::format_bytes(self.output_size),
193                ));
194            } else {
195                summary.push_str(&format!(
196                    "Output size: {} (~{} tokens)\n",
197                    Self::format_bytes(self.output_size),
198                    Self::format_tokens(self.estimated_tokens())
199                ));
200            }
201        }
202
203        summary.push_str("</summary>\n");
204        summary
205    }
206}
207
208pub struct OutputWriter {
209    writer: Box<dyn Write>,
210    bytes_written: usize,
211}
212
213impl OutputWriter {
214    pub fn new(writer: Box<dyn Write>) -> Self {
215        Self {
216            writer,
217            bytes_written: 0,
218        }
219    }
220
221    pub fn bytes_written(&self) -> usize {
222        self.bytes_written
223    }
224
225    pub fn write_file_content(&mut self, path: &str, content: &str) -> std::io::Result<()> {
226        self.write_file_content_with_mode(path, content, None)
227    }
228
229    pub fn write_file_content_with_mode(
230        &mut self,
231        path: &str,
232        content: &str,
233        mode: Option<&str>,
234    ) -> std::io::Result<()> {
235        let escaped_path = escape_xml(path);
236        let opening_tag = match mode {
237            Some(m) => format!("<file path=\"{}\" mode=\"{}\">\n", escaped_path, m),
238            None => format!("<file path=\"{}\">\n", escaped_path),
239        };
240        self.writer.write_all(opening_tag.as_bytes())?;
241        self.bytes_written += opening_tag.len();
242
243        self.writer.write_all(content.as_bytes())?;
244        self.bytes_written += content.len();
245
246        if !content.ends_with('\n') {
247            self.writer.write_all(b"\n")?;
248            self.bytes_written += 1;
249        }
250
251        self.writer.write_all(b"</file>\n\n")?;
252        self.bytes_written += 9; // "</file>\n\n"
253
254        Ok(())
255    }
256
257    pub fn write_summary(&mut self, stats: &Statistics) -> std::io::Result<()> {
258        let summary = stats.format_summary();
259        self.writer.write_all(summary.as_bytes())?;
260        self.bytes_written += summary.len();
261
262        self.writer.write_all(b"\n")?;
263        self.bytes_written += 1;
264
265        Ok(())
266    }
267
268    pub fn write_file_path(&mut self, path: &str) -> std::io::Result<()> {
269        let line = format!("{}\n", path);
270        self.writer.write_all(line.as_bytes())?;
271        self.bytes_written += line.len();
272        Ok(())
273    }
274}
275
276/// Escape XML special characters in strings
277fn escape_xml(s: &str) -> String {
278    s.replace('&', "&amp;")
279        .replace('<', "&lt;")
280        .replace('>', "&gt;")
281        .replace('"', "&quot;")
282        .replace('\'', "&apos;")
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_statistics() {
291        let mut stats = Statistics::new();
292        stats.add_included(Some("rs"));
293        stats.add_included(Some("toml"));
294        stats.add_skipped(SkipReason::Binary);
295        stats.add_skipped(SkipReason::Secret);
296        stats.add_skipped(SkipReason::Binary);
297
298        assert_eq!(stats.total_files, 5);
299        assert_eq!(stats.included_files, 2);
300        assert_eq!(stats.total_skipped(), 3);
301        assert_eq!(stats.included_by_extension.get("rs"), Some(&1));
302        assert_eq!(stats.included_by_extension.get("toml"), Some(&1));
303    }
304
305    #[test]
306    fn test_escape_xml() {
307        assert_eq!(escape_xml("hello"), "hello");
308        assert_eq!(escape_xml("<tag>"), "&lt;tag&gt;");
309        assert_eq!(escape_xml("a & b"), "a &amp; b");
310        assert_eq!(escape_xml("\"quoted\""), "&quot;quoted&quot;");
311    }
312}