1#![forbid(unsafe_code)]
2
3mod priority;
4pub mod render;
5pub mod tokenize;
6pub mod truncate;
7
8use camino::{Utf8Path, Utf8PathBuf};
9use std::time::SystemTime;
10
11use priority::prioritize;
12use tokenize::TokenCounter;
13use truncate::truncate_file;
14
15#[derive(Debug, thiserror::Error)]
17pub enum PackError {
18 #[error("empty scope: no files to pack")]
20 EmptyScope,
21 #[error("token budget too small: {0} bytes minimum required")]
24 BudgetTooSmall(usize),
25 #[error("IO error: {0}")]
27 Io(String),
28}
29
30pub type Result<T> = std::result::Result<T, PackError>;
32
33pub enum PackScope {
39 All,
40 Paths(Vec<Utf8PathBuf>),
41 Symbol(String),
42}
43
44#[derive(Debug)]
46pub enum PackFormat {
47 Xml,
49 Markdown,
51}
52
53pub struct PackInclude {
55 pub tests: bool,
58 pub docs: bool,
60}
61
62pub struct PackRequest {
64 pub scope: PackScope,
65 pub format: PackFormat,
66 pub token_budget: usize,
68 pub include: PackInclude,
69}
70
71#[derive(Debug)]
73pub struct PackResult {
74 pub format: PackFormat,
75 pub content: String,
77 pub token_count: usize,
79 pub files_included: Vec<Utf8PathBuf>,
81 pub files_truncated: Vec<Utf8PathBuf>,
83 pub files_omitted: Vec<Utf8PathBuf>,
85}
86
87pub trait PackContext {
93 fn list_files(&self, scope: &PackScope) -> Vec<Utf8PathBuf>;
95 fn read(&self, file: &Utf8Path) -> Result<String>;
97 fn modified(&self, file: &Utf8Path) -> Option<SystemTime>;
99 fn in_edges(&self, file: &Utf8Path) -> Result<usize>;
102}
103
104pub trait Packer {
107 fn pack(&self, req: &PackRequest, ctx: &dyn PackContext) -> Result<PackResult>;
109}
110
111pub struct DefaultPacker {
115 counter: TokenCounter,
116}
117
118impl DefaultPacker {
119 pub fn new() -> Result<Self> {
121 Ok(Self {
122 counter: TokenCounter::new()?,
123 })
124 }
125}
126
127impl Packer for DefaultPacker {
128 fn pack(&self, req: &PackRequest, ctx: &dyn PackContext) -> Result<PackResult> {
129 let mut files = ctx.list_files(&req.scope);
130
131 if !req.include.tests {
132 files.retain(|f| !is_test_file(f));
133 }
134 if !req.include.docs {
135 files.retain(|f| !is_doc_file(f));
136 }
137
138 if files.is_empty() {
139 return Err(PackError::EmptyScope);
140 }
141
142 let ordered = prioritize(&files, ctx);
143
144 let (file_entries, _budget_used) =
145 self.read_with_budget(&ordered, req.token_budget, &req.format, ctx)?;
146
147 let mut files_included = Vec::new();
148 let mut files_truncated = Vec::new();
149 for (path, _, is_truncated, _) in &file_entries {
150 if *is_truncated {
151 files_truncated.push(path.clone());
152 } else {
153 files_included.push(path.clone());
154 }
155 }
156
157 let included_set: std::collections::HashSet<_> =
158 file_entries.iter().map(|(p, _, _, _)| p).collect();
159 let files_omitted: Vec<_> = ordered
160 .iter()
161 .filter(|p| !included_set.contains(p))
162 .cloned()
163 .collect();
164
165 let file_refs: Vec<(Utf8PathBuf, &str, bool, usize)> = file_entries
166 .iter()
167 .map(|(p, c, t, n)| (p.clone(), c.as_str(), *t, *n))
168 .collect();
169
170 let content = match req.format {
171 PackFormat::Xml => render::xml::render_xml(&file_refs, "repository"),
172 PackFormat::Markdown => render::markdown::render_markdown(&file_refs, "repository"),
173 };
174
175 let token_count = self.counter.count(&content);
176
177 Ok(PackResult {
178 format: match req.format {
179 PackFormat::Xml => PackFormat::Xml,
180 PackFormat::Markdown => PackFormat::Markdown,
181 },
182 content,
183 token_count,
184 files_included,
185 files_truncated,
186 files_omitted,
187 })
188 }
189}
190
191type FileEntry = (Utf8PathBuf, String, bool, usize);
195
196impl DefaultPacker {
197 fn read_with_budget(
198 &self,
199 ordered: &[Utf8PathBuf],
200 budget: usize,
201 format: &PackFormat,
202 ctx: &dyn PackContext,
203 ) -> Result<(Vec<FileEntry>, usize)> {
204 let overhead_per_file: usize = match format {
205 PackFormat::Xml => 120,
206 PackFormat::Markdown => 80,
207 };
208
209 let total_overhead = overhead_per_file.saturating_mul(ordered.len());
210 if total_overhead >= budget {
211 return Err(PackError::BudgetTooSmall(total_overhead));
212 }
213
214 let mut remaining_budget = budget.saturating_sub(total_overhead);
215 let mut entries: Vec<(Utf8PathBuf, String, bool, usize)> = Vec::new();
216 let file_count = ordered.len();
217
218 for (idx, file) in ordered.iter().enumerate() {
219 if remaining_budget == 0 {
220 break;
221 }
222
223 let remaining_files = file_count.saturating_sub(entries.len());
224 let per_file = remaining_budget / remaining_files.max(1);
225 if per_file == 0 {
226 break;
227 }
228
229 let content = match ctx.read(file) {
230 Ok(c) => c,
231 Err(_) => continue,
232 };
233
234 let full_count = self.counter.count(&content);
235
236 if full_count <= per_file {
237 remaining_budget = remaining_budget.saturating_sub(full_count);
238 entries.push((file.clone(), content, false, full_count));
239 } else {
240 let (truncated, trunc_count) = truncate_file(&content, per_file, &self.counter);
241 remaining_budget = remaining_budget.saturating_sub(trunc_count);
242 if trunc_count > 0 {
244 entries.push((file.clone(), truncated, true, trunc_count));
245 }
246 }
248
249 let _ = idx;
251 }
252
253 let total_used = entries.iter().map(|(_, _, _, c)| c).sum::<usize>()
254 + overhead_per_file.saturating_mul(entries.len());
255
256 Ok((entries, total_used))
257 }
258}
259
260fn is_test_file(path: &Utf8Path) -> bool {
263 let file_name = path.file_name().unwrap_or("");
264 if file_name.ends_with("_test.rs")
266 || file_name.ends_with("_test.ts")
267 || file_name.ends_with("_test.tsx")
268 || file_name.ends_with("_test.js")
269 || file_name.ends_with("_test.jsx")
270 || file_name.ends_with("_test.py")
271 || file_name.ends_with("_spec.ts")
272 || file_name.ends_with("_spec.js")
273 || file_name.ends_with("test.py")
274 {
275 return true;
276 }
277 let path_str = path.as_str();
279 if path_str.contains("/test/")
280 || path_str.contains("/tests/")
281 || path_str.starts_with("test/")
282 || path_str.starts_with("tests/")
283 || path_str.contains("/__tests__/")
284 || path_str.contains("/spec/")
285 {
286 return true;
287 }
288 false
289}
290
291fn is_doc_file(path: &Utf8Path) -> bool {
294 path.extension() == Some("md")
295}
296
297#[cfg(test)]
300mod test_util;
301
302#[cfg(test)]
303mod snapshot_tests;
304
305#[cfg(test)]
306#[allow(clippy::unwrap_used)]
307mod tests {
308 use super::*;
309 use crate::test_util::*;
310 use std::collections::HashMap;
311
312 #[test]
315 fn empty_scope_errors() {
316 let packer = DefaultPacker::new().unwrap();
317 let ctx = TestContext::with_content_files(HashMap::new());
318 let req = PackRequest {
319 scope: PackScope::All,
320 format: PackFormat::Xml,
321 token_budget: 1000,
322 include: PackInclude {
323 tests: false,
324 docs: false,
325 },
326 };
327 let result = packer.pack(&req, &ctx);
328 assert!(result.is_err());
329 match result.unwrap_err() {
330 PackError::EmptyScope => {}
331 other => panic!("expected EmptyScope, got {other:?}"),
332 }
333 }
334
335 #[test]
336 fn budget_too_small_errors() {
337 let packer = DefaultPacker::new().unwrap();
338 let mut files = HashMap::new();
339 files.insert(
340 path("src/main.rs"),
341 "fn main() { println!(\"hello\"); }".to_string(),
342 );
343 let ctx = TestContext::with_content_files(files);
344 let req = PackRequest {
345 scope: PackScope::All,
346 format: PackFormat::Xml,
347 token_budget: 10, include: PackInclude {
349 tests: false,
350 docs: false,
351 },
352 };
353 let result = packer.pack(&req, &ctx);
354 assert!(result.is_err());
355 }
356
357 #[test]
360 fn pack_single_file_in_full() {
361 let packer = DefaultPacker::new().unwrap();
362 let mut files = HashMap::new();
363 files.insert(path("src/main.rs"), "fn main() {}".to_string());
364 let ctx = TestContext::with_content_files(files);
365 let req = PackRequest {
366 scope: PackScope::All,
367 format: PackFormat::Xml,
368 token_budget: 500,
369 include: PackInclude {
370 tests: false,
371 docs: false,
372 },
373 };
374 let result = packer.pack(&req, &ctx).unwrap();
375 assert_eq!(result.files_included.len(), 1);
376 assert_eq!(result.files_truncated.len(), 0);
377 assert_eq!(result.files_omitted.len(), 0);
378 assert!(result.token_count > 0);
379 assert!(result.content.contains("fn main() {}"));
380 }
381
382 #[test]
383 fn pack_multiple_files_orders_by_priority() {
384 let packer = DefaultPacker::new().unwrap();
385 let mut files = HashMap::new();
386 files.insert(path("src/utils.rs"), "// utils".to_string());
387 files.insert(path("src/lib.rs"), "// lib".to_string());
388 files.insert(path("README.md"), "# Readme".to_string());
389 let ctx = TestContext::with_content_files(files);
390 let req = PackRequest {
391 scope: PackScope::All,
392 format: PackFormat::Xml,
393 token_budget: 2000,
394 include: PackInclude {
395 tests: true,
396 docs: true,
397 },
398 };
399 let result = packer.pack(&req, &ctx).unwrap();
400 let lib_pos = result
403 .files_included
404 .iter()
405 .position(|p| p.as_str() == "src/lib.rs")
406 .unwrap();
407 let readme_pos = result
408 .files_included
409 .iter()
410 .position(|p| p.as_str() == "README.md")
411 .unwrap();
412 let utils_pos = result
413 .files_included
414 .iter()
415 .position(|p| p.as_str() == "src/utils.rs")
416 .unwrap();
417 assert!(lib_pos < readme_pos);
418 assert!(readme_pos < utils_pos);
419 }
420
421 #[test]
422 fn pack_excludes_tests_when_flag_false() {
423 let packer = DefaultPacker::new().unwrap();
424 let mut files = HashMap::new();
425 files.insert(path("src/lib.rs"), "// lib".to_string());
426 files.insert(path("src/lib_test.rs"), "// test".to_string());
427 files.insert(path("tests/integration.rs"), "// integration".to_string());
428 let ctx = TestContext::with_content_files(files);
429 let req = PackRequest {
430 scope: PackScope::All,
431 format: PackFormat::Xml,
432 token_budget: 2000,
433 include: PackInclude {
434 tests: false,
435 docs: true,
436 },
437 };
438 let result = packer.pack(&req, &ctx).unwrap();
439 assert_eq!(result.files_included.len(), 1);
440 assert_eq!(result.files_included[0].as_str(), "src/lib.rs");
441 }
442
443 #[test]
444 fn pack_includes_tests_when_flag_true() {
445 let packer = DefaultPacker::new().unwrap();
446 let mut files = HashMap::new();
447 files.insert(path("src/lib.rs"), "// lib".to_string());
448 files.insert(path("src/lib_test.rs"), "// test".to_string());
449 let ctx = TestContext::with_content_files(files);
450 let req = PackRequest {
451 scope: PackScope::All,
452 format: PackFormat::Xml,
453 token_budget: 2000,
454 include: PackInclude {
455 tests: true,
456 docs: true,
457 },
458 };
459 let result = packer.pack(&req, &ctx).unwrap();
460 assert_eq!(result.files_included.len(), 2);
461 }
462
463 #[test]
464 fn pack_truncates_when_budget_tight() {
465 let packer = DefaultPacker::new().unwrap();
466 let mut files = HashMap::new();
467 let big_content: String = std::iter::repeat_n("fn unique_word_", 200)
470 .collect::<Vec<_>>()
471 .join("\n");
472 files.insert(path("src/big.rs"), big_content);
473 let ctx = TestContext::with_content_files(files);
474 let req = PackRequest {
475 scope: PackScope::All,
476 format: PackFormat::Xml,
477 token_budget: 200,
478 include: PackInclude {
479 tests: false,
480 docs: false,
481 },
482 };
483 let result = packer.pack(&req, &ctx).unwrap();
484 assert_eq!(result.files_included.len(), 0);
487 assert!(result.files_truncated.len() + result.files_omitted.len() == 1);
488 if !result.files_truncated.is_empty() {
489 assert!(result.content.contains("[truncated"));
490 }
491 }
492
493 #[test]
494 fn markdown_format_output() {
495 let packer = DefaultPacker::new().unwrap();
496 let mut files = HashMap::new();
497 files.insert(
498 path("src/lib.rs"),
499 "pub fn add(a: i32, b: i32) -> i32 { a + b }".to_string(),
500 );
501 let ctx = TestContext::with_content_files(files);
502 let req = PackRequest {
503 scope: PackScope::All,
504 format: PackFormat::Markdown,
505 token_budget: 2000,
506 include: PackInclude {
507 tests: false,
508 docs: false,
509 },
510 };
511 let result = packer.pack(&req, &ctx).unwrap();
512 assert!(result.content.starts_with("# Repository:"));
513 assert!(result.content.contains("```rust"));
514 assert!(result.content.contains("## File:"));
515 }
516
517 #[test]
518 fn pack_result_is_deterministic() {
519 let packer = DefaultPacker::new().unwrap();
520 let mut files = HashMap::new();
521 files.insert(path("a.rs"), "// a".to_string());
522 files.insert(path("b.rs"), "// b".to_string());
523 let ctx = TestContext::with_content_files(files);
524 let req = PackRequest {
525 scope: PackScope::All,
526 format: PackFormat::Xml,
527 token_budget: 2000,
528 include: PackInclude {
529 tests: false,
530 docs: false,
531 },
532 };
533 let r1 = packer.pack(&req, &ctx).unwrap();
534 let r2 = packer.pack(&req, &ctx).unwrap();
535 assert_eq!(r1.content, r2.content);
536 assert_eq!(r1.token_count, r2.token_count);
537 }
538
539 #[test]
540 fn file_content_is_preserved_in_output() {
541 let packer = DefaultPacker::new().unwrap();
542 let content = "fn hello() -> &'static str { \"world\" }";
543 let mut files = HashMap::new();
544 files.insert(path("src/greeting.rs"), content.to_string());
545 let ctx = TestContext::with_content_files(files);
546 let req = PackRequest {
547 scope: PackScope::All,
548 format: PackFormat::Markdown,
549 token_budget: 2000,
550 include: PackInclude {
551 tests: false,
552 docs: false,
553 },
554 };
555 let result = packer.pack(&req, &ctx).unwrap();
556 assert!(result.content.contains(content));
557 }
558}