1use crate::filters::SkipReason;
2use crate::tokens::is_prose_extension;
3use std::collections::HashMap;
4use std::io::Write;
5
6#[derive(Debug, Default)]
7pub struct Statistics {
8 pub total_files: usize,
9 pub included_files: usize,
10 pub skipped_by_reason: HashMap<String, usize>,
11 pub included_by_extension: HashMap<String, usize>,
12 pub output_size: usize,
13 pub prose_bytes: usize, pub code_bytes: usize, pub compressed_files: usize,
16 pub token_budget: Option<usize>,
17 pub tokens_used: usize,
18 pub excluded_by_budget: Vec<String>,
19}
20
21impl Statistics {
22 pub fn new() -> Self {
23 Self::default()
24 }
25
26 pub fn add_included(&mut self, extension: Option<&str>) {
27 self.total_files += 1;
28 self.included_files += 1;
29 let ext = extension.unwrap_or("no extension").to_string();
30 *self.included_by_extension.entry(ext).or_insert(0) += 1;
31 }
32
33 pub fn add_file_size_estimate(&mut self, file_size: u64, path_length: usize, extension: Option<&str>) {
34 let overhead = 25 + path_length;
39 let total_bytes = file_size as usize + overhead;
40 self.output_size += total_bytes;
41
42 let ext_str = extension.unwrap_or("");
44 if is_prose_extension(ext_str) {
45 self.prose_bytes += total_bytes;
46 } else {
47 self.code_bytes += total_bytes;
48 }
49 }
50
51 pub fn add_compressed(&mut self) {
52 self.compressed_files += 1;
53 }
54
55 pub fn add_skipped(&mut self, reason: SkipReason) {
56 self.total_files += 1;
57 *self
58 .skipped_by_reason
59 .entry(reason.to_string())
60 .or_insert(0) += 1;
61 }
62
63 pub fn add_output_bytes(&mut self, bytes: usize) {
64 self.output_size += bytes;
65 self.code_bytes += bytes;
67 }
68
69 pub fn total_skipped(&self) -> usize {
70 self.skipped_by_reason.values().sum()
71 }
72
73 pub fn estimated_tokens(&self) -> usize {
74 let code_tokens = self.code_bytes / 3;
78 let prose_tokens = self.prose_bytes / 4;
79 code_tokens + prose_tokens
80 }
81
82 fn format_bytes(bytes: usize) -> String {
83 const KB: usize = 1024;
84 const MB: usize = KB * 1024;
85
86 if bytes >= MB {
87 format!("{:.2} MB", bytes as f64 / MB as f64)
88 } else if bytes >= KB {
89 format!("{:.2} KB", bytes as f64 / KB as f64)
90 } else {
91 format!("{} bytes", bytes)
92 }
93 }
94
95 fn format_tokens(tokens: usize) -> String {
96 if tokens >= 10_000 {
97 if tokens >= 1_000_000 {
99 format!("{:.1}M", tokens as f64 / 1_000_000.0)
100 } else {
101 format!("{:.1}k", tokens as f64 / 1_000.0)
102 }
103 } else if tokens >= 1_000 {
104 let s = tokens.to_string();
106 let mut result = String::new();
107 for (i, c) in s.chars().rev().enumerate() {
108 if i > 0 && i % 3 == 0 {
109 result.push(',');
110 }
111 result.push(c);
112 }
113 result.chars().rev().collect()
114 } else {
115 tokens.to_string()
117 }
118 }
119
120 pub fn format_summary(&self) -> String {
121 let mut summary = format!(
122 "<summary>\nTotal files: {}\nIncluded: {}",
123 self.total_files, self.included_files
124 );
125
126 if !self.included_by_extension.is_empty() {
128 let mut extensions: Vec<_> = self.included_by_extension.iter().collect();
129 extensions.sort_by(|(a_ext, a_count), (b_ext, b_count)| {
130 b_count.cmp(a_count).then_with(|| a_ext.cmp(b_ext))
131 });
132
133 let ext_str = extensions
134 .iter()
135 .map(|(ext, count)| {
136 if *ext == "no extension" {
137 format!("{} without extension", count)
138 } else {
139 format!("{} .{}", count, ext)
140 }
141 })
142 .collect::<Vec<_>>()
143 .join(", ");
144
145 summary.push_str(&format!(" ({})", ext_str));
146 }
147
148 summary.push('\n');
149
150 if self.compressed_files > 0 {
151 summary.push_str(&format!("Compressed: {} files\n", self.compressed_files));
152 }
153
154 if self.total_skipped() > 0 {
155 summary.push_str(&format!("Skipped: {}", self.total_skipped()));
156
157 let mut reasons: Vec<_> = self.skipped_by_reason.iter().collect();
158 reasons.sort_by(|(a_reason, a_count), (b_reason, b_count)| {
159 b_count.cmp(a_count).then_with(|| a_reason.cmp(b_reason))
160 });
161
162 let reason_str = reasons
163 .iter()
164 .map(|(reason, count)| format!("{} {}", count, reason))
165 .collect::<Vec<_>>()
166 .join(", ");
167
168 summary.push_str(&format!(" ({})", reason_str));
169 summary.push('\n');
170 }
171
172 if let Some(budget) = self.token_budget {
174 summary.push_str(&format!(
175 "Token budget: {} / {} used\n",
176 Self::format_tokens(self.tokens_used),
177 Self::format_tokens(budget)
178 ));
179 if !self.excluded_by_budget.is_empty() {
180 summary.push_str(&format!(
181 "Excluded by budget: {} files\n",
182 self.excluded_by_budget.len()
183 ));
184 }
185 }
186
187 if self.output_size > 0 {
189 if self.token_budget.is_some() {
190 summary.push_str(&format!(
191 "Output size: {}\n",
192 Self::format_bytes(self.output_size),
193 ));
194 } else {
195 summary.push_str(&format!(
196 "Output size: {} (~{} tokens)\n",
197 Self::format_bytes(self.output_size),
198 Self::format_tokens(self.estimated_tokens())
199 ));
200 }
201 }
202
203 summary.push_str("</summary>\n");
204 summary
205 }
206}
207
208pub struct OutputWriter {
209 writer: Box<dyn Write>,
210 bytes_written: usize,
211}
212
213impl OutputWriter {
214 pub fn new(writer: Box<dyn Write>) -> Self {
215 Self {
216 writer,
217 bytes_written: 0,
218 }
219 }
220
221 pub fn bytes_written(&self) -> usize {
222 self.bytes_written
223 }
224
225 pub fn write_file_content(&mut self, path: &str, content: &str) -> std::io::Result<()> {
226 self.write_file_content_with_mode(path, content, None)
227 }
228
229 pub fn write_file_content_with_mode(
230 &mut self,
231 path: &str,
232 content: &str,
233 mode: Option<&str>,
234 ) -> std::io::Result<()> {
235 let escaped_path = escape_xml(path);
236 let opening_tag = match mode {
237 Some(m) => format!("<file path=\"{}\" mode=\"{}\">\n", escaped_path, m),
238 None => format!("<file path=\"{}\">\n", escaped_path),
239 };
240 self.writer.write_all(opening_tag.as_bytes())?;
241 self.bytes_written += opening_tag.len();
242
243 self.writer.write_all(content.as_bytes())?;
244 self.bytes_written += content.len();
245
246 if !content.ends_with('\n') {
247 self.writer.write_all(b"\n")?;
248 self.bytes_written += 1;
249 }
250
251 self.writer.write_all(b"</file>\n\n")?;
252 self.bytes_written += 9; Ok(())
255 }
256
257 pub fn write_summary(&mut self, stats: &Statistics) -> std::io::Result<()> {
258 let summary = stats.format_summary();
259 self.writer.write_all(summary.as_bytes())?;
260 self.bytes_written += summary.len();
261
262 self.writer.write_all(b"\n")?;
263 self.bytes_written += 1;
264
265 Ok(())
266 }
267
268 pub fn write_file_path(&mut self, path: &str) -> std::io::Result<()> {
269 let line = format!("{}\n", path);
270 self.writer.write_all(line.as_bytes())?;
271 self.bytes_written += line.len();
272 Ok(())
273 }
274}
275
276fn escape_xml(s: &str) -> String {
278 s.replace('&', "&")
279 .replace('<', "<")
280 .replace('>', ">")
281 .replace('"', """)
282 .replace('\'', "'")
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_statistics() {
291 let mut stats = Statistics::new();
292 stats.add_included(Some("rs"));
293 stats.add_included(Some("toml"));
294 stats.add_skipped(SkipReason::Binary);
295 stats.add_skipped(SkipReason::Secret);
296 stats.add_skipped(SkipReason::Binary);
297
298 assert_eq!(stats.total_files, 5);
299 assert_eq!(stats.included_files, 2);
300 assert_eq!(stats.total_skipped(), 3);
301 assert_eq!(stats.included_by_extension.get("rs"), Some(&1));
302 assert_eq!(stats.included_by_extension.get("toml"), Some(&1));
303 }
304
305 #[test]
306 fn test_escape_xml() {
307 assert_eq!(escape_xml("hello"), "hello");
308 assert_eq!(escape_xml("<tag>"), "<tag>");
309 assert_eq!(escape_xml("a & b"), "a & b");
310 assert_eq!(escape_xml("\"quoted\""), ""quoted"");
311 }
312}