#![forbid(unsafe_code)]
mod priority;
pub mod render;
pub mod tokenize;
pub mod truncate;
use camino::{Utf8Path, Utf8PathBuf};
use std::time::SystemTime;
use priority::prioritize;
use tokenize::TokenCounter;
use truncate::truncate_file;
#[derive(Debug, thiserror::Error)]
pub enum PackError {
#[error("empty scope: no files to pack")]
EmptyScope,
#[error("token budget too small: {0} bytes minimum required")]
BudgetTooSmall(usize),
#[error("IO error: {0}")]
Io(String),
}
pub type Result<T> = std::result::Result<T, PackError>;
pub enum PackScope {
All,
Paths(Vec<Utf8PathBuf>),
Symbol(String),
}
#[derive(Debug)]
pub enum PackFormat {
Xml,
Markdown,
}
pub struct PackInclude {
pub tests: bool,
pub docs: bool,
}
pub struct PackRequest {
pub scope: PackScope,
pub format: PackFormat,
pub token_budget: usize,
pub include: PackInclude,
}
#[derive(Debug)]
pub struct PackResult {
pub format: PackFormat,
pub content: String,
pub token_count: usize,
pub files_included: Vec<Utf8PathBuf>,
pub files_truncated: Vec<Utf8PathBuf>,
pub files_omitted: Vec<Utf8PathBuf>,
}
pub trait PackContext {
fn list_files(&self, scope: &PackScope) -> Vec<Utf8PathBuf>;
fn read(&self, file: &Utf8Path) -> Result<String>;
fn modified(&self, file: &Utf8Path) -> Option<SystemTime>;
fn in_edges(&self, file: &Utf8Path) -> Result<usize>;
}
pub trait Packer {
fn pack(&self, req: &PackRequest, ctx: &dyn PackContext) -> Result<PackResult>;
}
pub struct DefaultPacker {
counter: TokenCounter,
}
impl DefaultPacker {
pub fn new() -> Result<Self> {
Ok(Self {
counter: TokenCounter::new()?,
})
}
}
impl Packer for DefaultPacker {
fn pack(&self, req: &PackRequest, ctx: &dyn PackContext) -> Result<PackResult> {
let mut files = ctx.list_files(&req.scope);
if !req.include.tests {
files.retain(|f| !is_test_file(f));
}
if !req.include.docs {
files.retain(|f| !is_doc_file(f));
}
if files.is_empty() {
return Err(PackError::EmptyScope);
}
let ordered = prioritize(&files, ctx);
let (file_entries, _budget_used) =
self.read_with_budget(&ordered, req.token_budget, &req.format, ctx)?;
let mut files_included = Vec::new();
let mut files_truncated = Vec::new();
for (path, _, is_truncated, _) in &file_entries {
if *is_truncated {
files_truncated.push(path.clone());
} else {
files_included.push(path.clone());
}
}
let included_set: std::collections::HashSet<_> =
file_entries.iter().map(|(p, _, _, _)| p).collect();
let files_omitted: Vec<_> = ordered
.iter()
.filter(|p| !included_set.contains(p))
.cloned()
.collect();
let file_refs: Vec<(Utf8PathBuf, &str, bool, usize)> = file_entries
.iter()
.map(|(p, c, t, n)| (p.clone(), c.as_str(), *t, *n))
.collect();
let content = match req.format {
PackFormat::Xml => render::xml::render_xml(&file_refs, "repository"),
PackFormat::Markdown => render::markdown::render_markdown(&file_refs, "repository"),
};
let token_count = self.counter.count(&content);
Ok(PackResult {
format: match req.format {
PackFormat::Xml => PackFormat::Xml,
PackFormat::Markdown => PackFormat::Markdown,
},
content,
token_count,
files_included,
files_truncated,
files_omitted,
})
}
}
type FileEntry = (Utf8PathBuf, String, bool, usize);
impl DefaultPacker {
fn read_with_budget(
&self,
ordered: &[Utf8PathBuf],
budget: usize,
format: &PackFormat,
ctx: &dyn PackContext,
) -> Result<(Vec<FileEntry>, usize)> {
let overhead_per_file: usize = match format {
PackFormat::Xml => 120,
PackFormat::Markdown => 80,
};
let total_overhead = overhead_per_file.saturating_mul(ordered.len());
if total_overhead >= budget {
return Err(PackError::BudgetTooSmall(total_overhead));
}
let mut remaining_budget = budget.saturating_sub(total_overhead);
let mut entries: Vec<(Utf8PathBuf, String, bool, usize)> = Vec::new();
let file_count = ordered.len();
for (idx, file) in ordered.iter().enumerate() {
if remaining_budget == 0 {
break;
}
let remaining_files = file_count.saturating_sub(entries.len());
let per_file = remaining_budget / remaining_files.max(1);
if per_file == 0 {
break;
}
let content = match ctx.read(file) {
Ok(c) => c,
Err(_) => continue,
};
let full_count = self.counter.count(&content);
if full_count <= per_file {
remaining_budget = remaining_budget.saturating_sub(full_count);
entries.push((file.clone(), content, false, full_count));
} else {
let (truncated, trunc_count) = truncate_file(&content, per_file, &self.counter);
remaining_budget = remaining_budget.saturating_sub(trunc_count);
if trunc_count > 0 {
entries.push((file.clone(), truncated, true, trunc_count));
}
}
let _ = idx;
}
let total_used = entries.iter().map(|(_, _, _, c)| c).sum::<usize>()
+ overhead_per_file.saturating_mul(entries.len());
Ok((entries, total_used))
}
}
fn is_test_file(path: &Utf8Path) -> bool {
let file_name = path.file_name().unwrap_or("");
if file_name.ends_with("_test.rs")
|| file_name.ends_with("_test.ts")
|| file_name.ends_with("_test.tsx")
|| file_name.ends_with("_test.js")
|| file_name.ends_with("_test.jsx")
|| file_name.ends_with("_test.py")
|| file_name.ends_with("_spec.ts")
|| file_name.ends_with("_spec.js")
|| file_name.ends_with("test.py")
{
return true;
}
let path_str = path.as_str();
if path_str.contains("/test/")
|| path_str.contains("/tests/")
|| path_str.starts_with("test/")
|| path_str.starts_with("tests/")
|| path_str.contains("/__tests__/")
|| path_str.contains("/spec/")
{
return true;
}
false
}
fn is_doc_file(path: &Utf8Path) -> bool {
path.extension() == Some("md")
}
#[cfg(test)]
mod test_util;
#[cfg(test)]
mod snapshot_tests;
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use crate::test_util::*;
use std::collections::HashMap;
#[test]
fn empty_scope_errors() {
let packer = DefaultPacker::new().unwrap();
let ctx = TestContext::with_content_files(HashMap::new());
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 1000,
include: PackInclude {
tests: false,
docs: false,
},
};
let result = packer.pack(&req, &ctx);
assert!(result.is_err());
match result.unwrap_err() {
PackError::EmptyScope => {}
other => panic!("expected EmptyScope, got {other:?}"),
}
}
#[test]
fn budget_too_small_errors() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
files.insert(
path("src/main.rs"),
"fn main() { println!(\"hello\"); }".to_string(),
);
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 10, include: PackInclude {
tests: false,
docs: false,
},
};
let result = packer.pack(&req, &ctx);
assert!(result.is_err());
}
#[test]
fn pack_single_file_in_full() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
files.insert(path("src/main.rs"), "fn main() {}".to_string());
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 500,
include: PackInclude {
tests: false,
docs: false,
},
};
let result = packer.pack(&req, &ctx).unwrap();
assert_eq!(result.files_included.len(), 1);
assert_eq!(result.files_truncated.len(), 0);
assert_eq!(result.files_omitted.len(), 0);
assert!(result.token_count > 0);
assert!(result.content.contains("fn main() {}"));
}
#[test]
fn pack_multiple_files_orders_by_priority() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
files.insert(path("src/utils.rs"), "// utils".to_string());
files.insert(path("src/lib.rs"), "// lib".to_string());
files.insert(path("README.md"), "# Readme".to_string());
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 2000,
include: PackInclude {
tests: true,
docs: true,
},
};
let result = packer.pack(&req, &ctx).unwrap();
let lib_pos = result
.files_included
.iter()
.position(|p| p.as_str() == "src/lib.rs")
.unwrap();
let readme_pos = result
.files_included
.iter()
.position(|p| p.as_str() == "README.md")
.unwrap();
let utils_pos = result
.files_included
.iter()
.position(|p| p.as_str() == "src/utils.rs")
.unwrap();
assert!(lib_pos < readme_pos);
assert!(readme_pos < utils_pos);
}
#[test]
fn pack_excludes_tests_when_flag_false() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
files.insert(path("src/lib.rs"), "// lib".to_string());
files.insert(path("src/lib_test.rs"), "// test".to_string());
files.insert(path("tests/integration.rs"), "// integration".to_string());
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 2000,
include: PackInclude {
tests: false,
docs: true,
},
};
let result = packer.pack(&req, &ctx).unwrap();
assert_eq!(result.files_included.len(), 1);
assert_eq!(result.files_included[0].as_str(), "src/lib.rs");
}
#[test]
fn pack_includes_tests_when_flag_true() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
files.insert(path("src/lib.rs"), "// lib".to_string());
files.insert(path("src/lib_test.rs"), "// test".to_string());
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 2000,
include: PackInclude {
tests: true,
docs: true,
},
};
let result = packer.pack(&req, &ctx).unwrap();
assert_eq!(result.files_included.len(), 2);
}
#[test]
fn pack_truncates_when_budget_tight() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
let big_content: String = std::iter::repeat_n("fn unique_word_", 200)
.collect::<Vec<_>>()
.join("\n");
files.insert(path("src/big.rs"), big_content);
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 200,
include: PackInclude {
tests: false,
docs: false,
},
};
let result = packer.pack(&req, &ctx).unwrap();
assert_eq!(result.files_included.len(), 0);
assert!(result.files_truncated.len() + result.files_omitted.len() == 1);
if !result.files_truncated.is_empty() {
assert!(result.content.contains("[truncated"));
}
}
#[test]
fn markdown_format_output() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
files.insert(
path("src/lib.rs"),
"pub fn add(a: i32, b: i32) -> i32 { a + b }".to_string(),
);
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Markdown,
token_budget: 2000,
include: PackInclude {
tests: false,
docs: false,
},
};
let result = packer.pack(&req, &ctx).unwrap();
assert!(result.content.starts_with("# Repository:"));
assert!(result.content.contains("```rust"));
assert!(result.content.contains("## File:"));
}
#[test]
fn pack_result_is_deterministic() {
let packer = DefaultPacker::new().unwrap();
let mut files = HashMap::new();
files.insert(path("a.rs"), "// a".to_string());
files.insert(path("b.rs"), "// b".to_string());
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Xml,
token_budget: 2000,
include: PackInclude {
tests: false,
docs: false,
},
};
let r1 = packer.pack(&req, &ctx).unwrap();
let r2 = packer.pack(&req, &ctx).unwrap();
assert_eq!(r1.content, r2.content);
assert_eq!(r1.token_count, r2.token_count);
}
#[test]
fn file_content_is_preserved_in_output() {
let packer = DefaultPacker::new().unwrap();
let content = "fn hello() -> &'static str { \"world\" }";
let mut files = HashMap::new();
files.insert(path("src/greeting.rs"), content.to_string());
let ctx = TestContext::with_content_files(files);
let req = PackRequest {
scope: PackScope::All,
format: PackFormat::Markdown,
token_budget: 2000,
include: PackInclude {
tests: false,
docs: false,
},
};
let result = packer.pack(&req, &ctx).unwrap();
assert!(result.content.contains(content));
}
}