use anyhow::{Context, Result};
use git2::Repository;
use std::collections::HashSet;
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use tracing::{info, info_span, warn};
pub struct RepositoryContext {
pub repo: Repository,
pub root: PathBuf,
}
pub struct WorkingDirInfo {
pub branch_name: String,
pub status_summary: String,
}
#[tracing::instrument(level = "info", skip(repo_path), fields(repo_path=%repo_path.display()), err)]
pub fn discover_repository(repo_path: &Path) -> Result<RepositoryContext> {
let span = info_span!("repository.discover");
let _guard = span.enter();
info!("discovering git repository");
let repo = Repository::discover(repo_path).with_context(|| {
format!(
"Failed to discover repository from '{}'",
repo_path.display()
)
})?;
let repo_root = repo
.workdir()
.context("Repository is bare, cannot flatten.")?
.to_path_buf();
info!(repo_root=%repo_root.display(), "repository discovered successfully");
Ok(RepositoryContext {
repo,
root: repo_root,
})
}
#[tracing::instrument(level = "info", skip(repo), err)]
pub fn get_working_dir_info(repo: &Repository) -> Result<WorkingDirInfo> {
let span = info_span!("repository.get_working_dir_info");
let _guard = span.enter();
info!("analyzing working directory state");
let branch_name = match repo.head() {
Ok(head) => {
if let Some(name) = head.shorthand() {
name.to_string()
} else {
"detached HEAD".to_string()
}
}
Err(_) => "unknown".to_string(),
};
let mut change_count = 0;
let statuses = repo.statuses(None)?;
for status in statuses.iter() {
let status_flags = status.status();
if !status_flags.is_ignored() && !status_flags.is_empty() {
change_count += 1;
}
}
let status_summary = if change_count > 0 {
format!("{change_count} files with changes")
} else {
"clean working directory".to_string()
};
info!(branch=%branch_name, changes=%change_count, "working directory analyzed");
Ok(WorkingDirInfo {
branch_name,
status_summary,
})
}
#[tracing::instrument(level = "info", skip(repo, writer), fields(filter_count=filter_paths.len()), err)]
pub fn walk_and_write_working_dir<W: Write>(
repo: &Repository,
filter_paths: &[PathBuf],
writer: &mut W,
) -> Result<usize> {
let span = info_span!("repository.walk_working_dir");
let _guard = span.enter();
info!("starting working directory walk");
let repo_root = repo
.workdir()
.context("Repository has no working directory")?;
let statuses = repo.statuses(None)?;
let mut git_tracked_files = HashSet::new();
let mut ignored_files = HashSet::new();
for status in statuses.iter() {
if let Some(path) = status.path() {
let file_path = repo_root.join(path);
if status.status().is_ignored() {
ignored_files.insert(file_path);
} else {
git_tracked_files.insert(file_path);
}
}
}
let paths_to_walk = if filter_paths.is_empty() {
vec![repo_root.to_path_buf()]
} else {
filter_paths
.iter()
.map(|p| {
if p.is_absolute() {
p.clone()
} else {
repo_root.join(p)
}
})
.collect()
};
let mut file_count = 0;
for start_path in paths_to_walk {
if !start_path.exists() {
warn!(path=%start_path.display(), "filter path does not exist, skipping");
continue;
}
file_count += walk_directory_recursive(
&start_path,
repo_root,
&git_tracked_files,
&ignored_files,
writer,
)?;
}
info!(
files_processed = file_count,
"working directory walk completed successfully"
);
Ok(file_count)
}
fn walk_directory_recursive<W: Write>(
dir_path: &Path,
repo_root: &Path,
git_tracked_files: &HashSet<PathBuf>,
ignored_files: &HashSet<PathBuf>,
writer: &mut W,
) -> Result<usize> {
let mut file_count = 0;
if dir_path.is_file() {
if should_include_file(dir_path, git_tracked_files, ignored_files)? {
process_file(dir_path, repo_root, writer)?;
file_count += 1;
}
return Ok(file_count);
}
let entries = fs::read_dir(dir_path)
.with_context(|| format!("Failed to read directory: {}", dir_path.display()))?;
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.file_name().map(|n| n == ".git").unwrap_or(false) {
continue;
}
if path.is_file() {
if should_include_file(&path, git_tracked_files, ignored_files)? {
process_file(&path, repo_root, writer)?;
file_count += 1;
}
} else if path.is_dir() {
file_count += walk_directory_recursive(
&path,
repo_root,
git_tracked_files,
ignored_files,
writer,
)?;
}
}
Ok(file_count)
}
fn should_include_file(
file_path: &Path,
git_tracked_files: &HashSet<PathBuf>,
ignored_files: &HashSet<PathBuf>,
) -> Result<bool> {
if ignored_files.contains(file_path) {
return Ok(false);
}
Ok(git_tracked_files.contains(file_path) || !ignored_files.contains(file_path))
}
fn process_file<W: Write>(file_path: &Path, repo_root: &Path, writer: &mut W) -> Result<()> {
let file_span = info_span!("repository.process_file", file_path=%file_path.display());
let _file_guard = file_span.enter();
let relative_path = file_path.strip_prefix(repo_root).unwrap_or(file_path);
writeln!(writer, "--- File: {} ---", relative_path.display())
.context("Failed to write file header")?;
let content = fs::read(file_path)
.with_context(|| format!("Failed to read file: {}", file_path.display()))?;
if is_binary_content(&content) {
writeln!(writer, "[Binary file: content not included]\n")
.context("Failed to write binary file placeholder")?;
} else {
writer
.write_all(&content)
.context("Failed to write file content")?;
writeln!(writer, "\n").context("Failed to write trailing newline")?;
}
info!(file_path=%relative_path.display(), "file processed successfully");
Ok(())
}
fn is_binary_content(content: &[u8]) -> bool {
let check_len = content.len().min(8192);
content[..check_len].contains(&0)
}