use crate::configuration::Code2PromptConfig;
use crate::file_processor;
use crate::filter::{build_globset, should_include_file};
use crate::sort::{FileSortMethod, sort_files, sort_tree};
use crate::tokenizer::count_tokens;
use crate::util::strip_utf8_bom;
use anyhow::Result;
use content_inspector::{ContentType, inspect};
use ignore::WalkBuilder;
use log::debug;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::fs;
use std::io::Read;
use std::path::{Path, PathBuf};
use termtree::Tree;
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct EntryMetadata {
pub is_dir: bool,
pub is_symlink: bool,
}
impl From<&std::fs::Metadata> for EntryMetadata {
fn from(meta: &std::fs::Metadata) -> Self {
Self {
is_dir: meta.is_dir(),
is_symlink: meta.is_symlink(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileEntry {
pub path: String,
pub extension: String,
pub code: String,
pub token_count: usize,
pub metadata: EntryMetadata,
#[serde(skip_serializing_if = "Option::is_none")]
pub mod_time: Option<u64>,
}
#[derive(Debug, Clone)]
struct FileToProcess {
absolute_path: PathBuf,
relative_path: PathBuf,
metadata: std::fs::Metadata,
}
pub fn traverse_directory(
config: &Code2PromptConfig,
selection_engine: Option<&mut crate::selection::SelectionEngine>,
) -> Result<(String, Vec<FileEntry>)> {
let (tree, files_to_process) = discover_files(config, selection_engine)?;
let mut files = process_files_parallel(files_to_process, config)?;
assemble_results(tree, &mut files, config)
}
fn discover_files(
config: &Code2PromptConfig,
mut selection_engine: Option<&mut crate::selection::SelectionEngine>,
) -> Result<(Tree<String>, Vec<FileToProcess>)> {
let canonical_root_path = config.path.canonicalize()?;
let parent_directory = display_name(&canonical_root_path);
let include_globset = build_globset(&config.include_patterns);
let exclude_globset = build_globset(&config.exclude_patterns);
let walker = WalkBuilder::new(&canonical_root_path)
.hidden(!config.hidden)
.git_ignore(!config.no_ignore)
.follow_links(config.follow_symlinks)
.build()
.filter_map(|entry| entry.ok());
let mut tree = Tree::new(parent_directory.to_owned());
let mut files_to_process = Vec::new();
for entry in walker {
let path = entry.path();
if let Ok(relative_path) = path.strip_prefix(&canonical_root_path) {
let entry_match = if let Some(engine) = selection_engine.as_mut() {
engine.is_selected(relative_path)
} else {
should_include_file(relative_path, &include_globset, &exclude_globset)
};
let include_in_tree = config.full_directory_tree || entry_match;
if include_in_tree {
let mut current_tree = &mut tree;
for component in relative_path.components() {
let component_str = component.as_os_str().to_string_lossy().to_string();
current_tree = if let Some(pos) = current_tree
.leaves
.iter_mut()
.position(|child| child.root == component_str)
{
&mut current_tree.leaves[pos]
} else {
let new_tree = Tree::new(component_str.clone());
current_tree.leaves.push(new_tree);
current_tree.leaves.last_mut().unwrap()
};
}
}
if path.is_file()
&& entry_match
&& let Ok(metadata) = entry.metadata()
{
files_to_process.push(FileToProcess {
absolute_path: path.to_path_buf(),
relative_path: relative_path.to_path_buf(),
metadata,
});
}
}
}
Ok((tree, files_to_process))
}
fn process_files_parallel(
files_to_process: Vec<FileToProcess>,
config: &Code2PromptConfig,
) -> Result<Vec<FileEntry>> {
let files: Vec<Option<FileEntry>> = files_to_process
.par_iter()
.map(|file_info| process_single_file(file_info, config))
.collect();
Ok(files.into_iter().flatten().collect())
}
fn read_file_with_binary_check(path: &Path, file_size: u64) -> std::io::Result<Option<Vec<u8>>> {
const SAMPLE_SIZE: usize = 8192;
let mut file = fs::File::open(path)?;
let mut buffer = Vec::with_capacity(file_size.min(1024 * 1024 * 10) as usize);
let bytes_to_read = SAMPLE_SIZE.min(file_size as usize);
let mut sample_buffer = vec![0u8; bytes_to_read];
file.read_exact(&mut sample_buffer)?;
if inspect(&sample_buffer) == ContentType::BINARY {
return Ok(None); }
buffer.extend_from_slice(&sample_buffer);
if file_size > SAMPLE_SIZE as u64 {
file.read_to_end(&mut buffer)?;
}
Ok(Some(buffer))
}
fn process_single_file(file_info: &FileToProcess, config: &Code2PromptConfig) -> Option<FileEntry> {
let path = &file_info.absolute_path;
let relative_path = &file_info.relative_path;
let metadata = &file_info.metadata;
let code_bytes = match read_file_with_binary_check(path, metadata.len()) {
Ok(Some(bytes)) => bytes,
Ok(None) => {
debug!("Skipped binary file: {}", path.display());
return None;
}
Err(e) => {
debug!("Failed to read file {}: {}", path.display(), e);
return None;
}
};
let clean_bytes = strip_utf8_bom(&code_bytes);
let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
let processor = file_processor::get_processor_for_extension(extension);
let code = match processor.process(clean_bytes, path) {
Ok(processed) => processed,
Err(e) => {
log::warn!(
"File processing failed for {}: {}. Using raw text fallback.",
path.display(),
e
);
String::from_utf8_lossy(clean_bytes).into_owned()
}
};
let code_block = wrap_code_block(&code, extension, config.line_numbers, config.no_codeblock);
if code.trim().is_empty() || code.contains(char::REPLACEMENT_CHARACTER) {
debug!("Excluded file (empty or invalid UTF-8): {}", path.display());
return None;
}
let file_path = if config.absolute_path {
path.to_string_lossy().to_string()
} else {
relative_path.to_string_lossy().to_string()
};
let token_count = count_tokens(&code, &config.encoding);
let mod_time = if let Some(method) = config.sort_method {
if method == FileSortMethod::DateAsc || method == FileSortMethod::DateDesc {
metadata
.modified()
.ok()
.and_then(|mtime| mtime.duration_since(std::time::SystemTime::UNIX_EPOCH).ok())
.map(|d| d.as_secs())
} else {
None
}
} else {
None
};
debug!(target: "included_files", "Included file: {}", file_path);
Some(FileEntry {
path: file_path,
extension: extension.to_string(),
code: code_block,
token_count,
metadata: EntryMetadata::from(metadata),
mod_time,
})
}
fn assemble_results(
mut tree: Tree<String>,
files: &mut [FileEntry],
config: &Code2PromptConfig,
) -> Result<(String, Vec<FileEntry>)> {
sort_tree(&mut tree, config.sort_method);
sort_files(files, config.sort_method);
Ok((tree.to_string(), files.to_owned()))
}
pub fn display_name<P: AsRef<Path>>(p: P) -> String {
let path = p.as_ref();
if let Some(name) = path.file_name() {
return name.to_string_lossy().into_owned();
}
if let Ok(cwd) = std::env::current_dir()
&& let Some(name) = cwd.file_name()
{
return name.to_string_lossy().into_owned();
}
".".to_string()
}
pub fn wrap_code_block(
code: &str,
extension: &str,
line_numbers: bool,
no_codeblock: bool,
) -> String {
let delimiter = "`".repeat(3);
let mut code_with_line_numbers = String::new();
if line_numbers {
for (line_number, line) in code.lines().enumerate() {
code_with_line_numbers.push_str(&format!("{:4} | {}\n", line_number + 1, line));
}
} else {
code_with_line_numbers = code.to_string();
}
if no_codeblock {
code_with_line_numbers
} else {
format!(
"{}{}\n{}\n{}",
delimiter, extension, code_with_line_numbers, delimiter
)
}
}