extern crate anyhow;
extern crate ignore;
extern crate tempfile;
extern crate tiktoken_rs;
use anyhow::Context;
use ignore::{overrides::OverrideBuilder, WalkBuilder};
use std::io::{BufWriter, Write};
use std::path::Path;
use tempfile::NamedTempFile;
use tiktoken_rs::o200k_base;
pub enum GitignoreMode {
Auto,
Path(std::path::PathBuf),
Disabled,
}
pub fn write_directory_contents_yaml(
directory: &Path,
output: &Path,
ignore_patterns: &[String],
gitignore_mode: GitignoreMode,
dumpignore_path: Option<&Path>,
) -> anyhow::Result<()> {
let bpe = o200k_base().context("Failed to load BPE tokenizer")?;
let absolute_directory = directory
.canonicalize()
.context("Failed to get absolute path")?;
let mut walker = WalkBuilder::new(&absolute_directory);
if let Some(dumpignore) = dumpignore_path {
walker.add_custom_ignore_filename(dumpignore);
}
if !ignore_patterns.is_empty() {
let mut override_builder = OverrideBuilder::new(&absolute_directory);
for pattern in ignore_patterns {
let ignore_pattern = if pattern.starts_with('!') {
pattern.to_string()
} else {
format!("!{}", pattern)
};
if let Err(err) = override_builder.add(&ignore_pattern) {
log::warn!("Failed to add ignore pattern '{}': {}", pattern, err);
}
}
if let Ok(overrides) = override_builder.build() {
walker.overrides(overrides);
}
}
match gitignore_mode {
GitignoreMode::Auto => {
walker.require_git(false); walker.ignore(true); walker.git_ignore(true); walker.git_global(true); walker.git_exclude(true); }
GitignoreMode::Path(ref p) => {
walker.require_git(false);
walker.ignore(false);
walker.git_ignore(false);
walker.git_global(false);
walker.git_exclude(false);
walker.add_ignore(p);
}
GitignoreMode::Disabled => {
walker.require_git(false);
walker.ignore(false);
walker.git_ignore(false);
walker.git_global(false);
walker.git_exclude(false);
}
}
walker.follow_links(false);
let output_abs = if output.is_absolute() {
output.to_path_buf()
} else {
absolute_directory.join(output)
};
let output_abs = output_abs.canonicalize().unwrap_or(output_abs);
walker.filter_entry(move |entry| entry.path() != output_abs);
let walker_iter = walker.build();
let tmp = NamedTempFile::new().context("Failed to create temporary file")?;
let mut writer = BufWriter::new(tmp.reopen()?);
let project_name = absolute_directory
.file_name()
.map_or("project", |s| s.to_str().unwrap());
writeln!(writer, "project: {}", project_name)?;
writeln!(writer, "files:")?;
let mut file_count = 0;
let mut total_characters = 0;
let mut total_tokens = 0;
for entry in walker_iter {
let entry = entry.context("Failed to read directory entry")?;
if entry.file_type().is_some_and(|ft| ft.is_file()) {
let relative_path = entry
.path()
.strip_prefix(&absolute_directory)
.context("Failed to get relative path")?;
let relative_path_str = relative_path.to_string_lossy();
let metadata = entry.metadata().context("Failed to get metadata")?;
let size_bytes = metadata.len();
let size_str = if size_bytes < 1024 {
format!("{} B", size_bytes)
} else {
let kb = size_bytes as f64 / 1024.0;
format!("{:.1} KB", kb)
};
let (lines, tokens, content) = match std::fs::read_to_string(entry.path()) {
Ok(text) => {
let line_count = text.lines().count();
let tokens = bpe.encode_with_special_tokens(&text);
(line_count, tokens.len(), text)
}
Err(_) => (
0,
0,
format!("Binary or inaccessible file: {}", entry.path().display()),
),
};
file_count += 1;
total_characters += content.chars().count();
total_tokens += tokens;
writeln!(writer, " - path: {:?}", relative_path_str)?;
writeln!(writer, " size: \"{}\"", size_str)?;
writeln!(writer, " lines: {}", lines)?;
writeln!(writer, " tokens: {}", tokens)?;
writeln!(writer, " content: |")?;
for line in content.lines() {
writeln!(writer, " {}", line)?;
}
}
}
if let Some(parent) = output.parent() {
std::fs::create_dir_all(parent).ok();
}
tmp.persist(output).map_err(|e| anyhow::anyhow!(e.error))?;
writer.flush()?;
log::info!(
"Processed {} files with {} total characters and {} total tokens.",
file_count,
total_characters,
total_tokens
);
Ok(())
}