use std::{
fs,
path::{absolute, Component, Path, PathBuf},
};
use crate::error::{AppError, Result};
use super::run_git;
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct TargetFile {
pub(crate) repo_root: PathBuf,
pub(crate) repo_path: PathBuf,
}
pub(crate) fn resolve_target_file(invocation_dir: &Path, file_path: &Path) -> Result<TargetFile> {
let input_path = if file_path.is_absolute() {
file_path.to_path_buf()
} else {
invocation_dir.join(file_path)
};
validate_no_traversal_above_root(&input_path)?;
let absolute_path = absolute(&input_path)
.map_err(|err| AppError::io("failed to resolve absolute path", err))?;
reject_existing_directory(&absolute_path, file_path)?;
let search_dir = existing_search_dir(&absolute_path)?;
let repo_root = find_repo_root(&search_dir)?;
let normalized_path = normalize_path_from_existing_ancestor(&absolute_path, &search_dir)?;
let repo_path = normalized_path
.strip_prefix(&repo_root)
.map_err(|_| {
AppError::message(format!(
"target file {} is not inside repository {}",
file_path.display(),
repo_root.display()
))
})?
.to_path_buf();
Ok(TargetFile {
repo_root,
repo_path,
})
}
fn reject_existing_directory(path: &Path, display_path: &Path) -> Result<()> {
if is_directory_entry(path)? {
Err(AppError::message(format!(
"target path {} is not a file",
display_path.display()
)))
} else {
Ok(())
}
}
fn existing_search_dir(absolute_path: &Path) -> Result<PathBuf> {
let mut candidate = if is_directory_entry(absolute_path)? {
absolute_path.to_path_buf()
} else {
absolute_path
.parent()
.ok_or_else(|| {
AppError::message(format!(
"target path {} has no parent",
absolute_path.display()
))
})?
.to_path_buf()
};
while !candidate.exists() {
if !candidate.pop() {
return Err(AppError::message(format!(
"no existing parent directory for target path {}",
absolute_path.display()
)));
}
}
Ok(candidate)
}
fn is_directory_entry(path: &Path) -> Result<bool> {
match fs::symlink_metadata(path) {
Ok(metadata) => Ok(metadata.file_type().is_dir()),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
Err(err) => Err(AppError::io(
format!("failed to inspect target path {}", path.display()),
err,
)),
}
}
fn find_repo_root(search_dir: &Path) -> Result<PathBuf> {
let output = run_git(
search_dir,
"git rev-parse",
&["rev-parse", "--show-toplevel"],
)?;
let stdout = String::from_utf8_lossy(&output.stdout);
let repo_root = stdout.trim();
if repo_root.is_empty() {
return Err(AppError::message(
"git rev-parse returned an empty repository root",
));
}
fs::canonicalize(repo_root).map_err(|err| {
AppError::io(
format!("failed to canonicalize repository root {repo_root}"),
err,
)
})
}
fn normalize_path_from_existing_ancestor(path: &Path, existing: &Path) -> Result<PathBuf> {
let remainder = path
.strip_prefix(existing)
.unwrap_or_else(|_| Path::new(""));
let remainder = normalize_relative_components(remainder)?;
let canonical_existing = fs::canonicalize(existing).map_err(|err| {
AppError::io(
format!("failed to canonicalize path {}", existing.display()),
err,
)
})?;
Ok(if remainder.as_os_str().is_empty() {
canonical_existing
} else {
canonical_existing.join(remainder)
})
}
fn normalize_relative_components(path: &Path) -> Result<PathBuf> {
let mut normalized = PathBuf::new();
for component in path.components() {
match component {
Component::CurDir => {}
Component::ParentDir => {
if !normalized.pop() {
return Err(AppError::message(format!(
"path {} traverses above existing parent",
path.display()
)));
}
}
Component::Prefix(_) | Component::RootDir => {
return Err(AppError::message(format!(
"path {} must be relative",
path.display()
)));
}
Component::Normal(part) => normalized.push(part),
}
}
Ok(normalized)
}
fn validate_no_traversal_above_root(path: &Path) -> Result<()> {
let mut depth = 0usize;
for component in path.components() {
match component {
Component::CurDir | Component::Prefix(_) | Component::RootDir => {}
Component::Normal(_) => depth += 1,
Component::ParentDir => {
if depth == 0 {
return Err(AppError::message(format!(
"path {} traverses above filesystem root",
path.display()
)));
}
depth -= 1;
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use std::{
fs,
path::{Path, PathBuf},
};
use super::*;
use crate::git::{load_commits, load_diff, DiffLineKind};
mod fixture {
use std::{
env, fs,
path::{Path, PathBuf},
process::{self, Command},
sync::atomic::{AtomicU64, Ordering},
time::{SystemTime, UNIX_EPOCH},
};
pub(super) struct TempDir(PathBuf);
static NEXT_TEMP_ID: AtomicU64 = AtomicU64::new(0);
impl TempDir {
pub(super) fn new() -> Self {
let id = NEXT_TEMP_ID.fetch_add(1, Ordering::Relaxed);
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("system time should be after epoch")
.as_nanos();
let path = env::temp_dir().join(format!(
"git-file-history-rs-test-{}-{nanos}-{id}",
process::id()
));
fs::create_dir_all(&path).expect("temp dir should be created");
Self(path)
}
pub(super) fn path(&self) -> &Path {
&self.0
}
}
impl Drop for TempDir {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.0);
}
}
pub(super) struct GitRepo {
_temp: Option<TempDir>,
path: PathBuf,
}
impl GitRepo {
pub(super) fn new() -> Self {
let temp = TempDir::new();
let path = temp.path().to_path_buf();
let repo = Self {
_temp: Some(temp),
path,
};
repo.init(&["init"]);
repo.configure_identity();
repo
}
pub(super) fn new_sha256_when_supported() -> Option<Self> {
let temp = TempDir::new();
let path = temp.path().to_path_buf();
let repo = Self {
_temp: Some(temp),
path,
};
if !repo.try_init(&["init", "--object-format=sha256"]) {
return None;
}
repo.configure_identity();
Some(repo)
}
pub(super) fn init_at(path: PathBuf) -> Self {
fs::create_dir_all(&path).expect("temp repo should be created");
let repo = Self { _temp: None, path };
repo.init(&["init"]);
repo.configure_identity();
repo
}
pub(super) fn path(&self) -> &Path {
&self.path
}
pub(super) fn commit_file(&self, path: &Path, contents: &str, message: &str) {
if let Some(parent) = path
.parent()
.filter(|parent| !parent.as_os_str().is_empty())
{
fs::create_dir_all(self.path.join(parent))
.expect("parent dir should be created");
}
fs::write(self.path.join(path), contents).expect("file should be written");
self.add_and_commit(".", message);
}
pub(super) fn add_and_commit(&self, pathspec: &str, message: &str) {
self.run(&["add", pathspec]);
self.run(&["commit", "-m", message]);
}
fn configure_identity(&self) {
self.run(&["config", "user.name", "Test User"]);
self.run(&["config", "user.email", "test@example.com"]);
self.run(&["config", "commit.gpgsign", "false"]);
}
fn init(&self, args: &[&str]) {
assert!(self.try_init(args), "git {args:?} failed");
}
fn try_init(&self, args: &[&str]) -> bool {
Command::new("git")
.current_dir(&self.path)
.args(args)
.output()
.expect("git should run")
.status
.success()
}
fn run(&self, args: &[&str]) {
let output = Command::new("git")
.current_dir(&self.path)
.args(args)
.output()
.expect("git should run");
assert!(
output.status.success(),
"git {:?} failed\nstdout: {}\nstderr: {}",
args,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
}
}
}
#[test]
fn loads_commits_and_diff_from_sha256_git_repo_when_supported() {
let Some(repo) = fixture::GitRepo::new_sha256_when_supported() else {
return;
};
let file_path = PathBuf::from("file.txt");
repo.commit_file(&file_path, "one\n", "initial commit");
repo.commit_file(&file_path, "one\ntwo\n", "second commit");
let commits = load_commits(repo.path(), &file_path).expect("commits should load");
assert_eq!(commits.len(), 2);
assert_eq!(commits[0].hash.len(), 64);
let diff = load_diff(repo.path(), &commits[0].hash, &file_path).expect("diff should load");
assert!(diff.iter().any(|line| line.text == "+two"));
}
#[test]
fn loads_commits_and_diff_from_git_repo() {
let repo = fixture::GitRepo::new();
let file_path = PathBuf::from("file with spaces.txt");
repo.commit_file(&file_path, "one\n", "initial commit");
repo.commit_file(&file_path, "one\ntwo\n", "second commit");
let commits = load_commits(repo.path(), &file_path).expect("commits should load");
assert_eq!(commits.len(), 2);
assert_eq!(commits[0].hash.len(), 40);
assert_eq!(commits[0].subject, "second commit");
let diff = load_diff(repo.path(), &commits[0].hash, &file_path).expect("diff should load");
assert!(diff.iter().any(|line| line.text == "+two"));
assert!(diff.iter().any(|line| line.kind == DiffLineKind::Hunk));
}
#[test]
fn resolves_target_file_repo_from_parent_directory() {
let outer = fixture::TempDir::new();
let repo = fixture::GitRepo::init_at(outer.path().join("repo"));
let file_path = PathBuf::from("nested/file.txt");
repo.commit_file(&file_path, "one\n", "initial commit");
let target = resolve_target_file(outer.path(), Path::new("repo/nested/file.txt"))
.expect("target should resolve from outside repo");
assert_eq!(target.repo_root, fs::canonicalize(repo.path()).unwrap());
assert_eq!(target.repo_path, file_path);
let commits = load_commits(&target.repo_root, &target.repo_path).expect("commits load");
assert_eq!(commits.len(), 1);
}
#[test]
#[cfg(unix)]
fn resolves_target_file_without_following_final_symlink() {
let outer = fixture::TempDir::new();
let repo = fixture::GitRepo::init_at(outer.path().join("repo"));
let outside_target = outer.path().join("outside.txt");
fs::write(&outside_target, "outside\n").expect("outside file should be written");
symlink_file(&outside_target, &repo.path().join("link.txt"));
repo.add_and_commit("link.txt", "add symlink");
let target = resolve_target_file(outer.path(), Path::new("repo/link.txt"))
.expect("symlink target should resolve as repo path");
assert_eq!(target.repo_root, fs::canonicalize(repo.path()).unwrap());
assert_eq!(target.repo_path, PathBuf::from("link.txt"));
let commits = load_commits(&target.repo_root, &target.repo_path).expect("commits load");
assert_eq!(commits.len(), 1);
let diff = load_diff(&target.repo_root, &commits[0].hash, &target.repo_path)
.expect("symlink diff should load");
assert!(diff.iter().any(|line| line.text.contains("link.txt")));
}
#[test]
#[cfg(unix)]
fn resolves_symlink_to_directory_as_repo_path() {
let outer = fixture::TempDir::new();
let repo = fixture::GitRepo::init_at(outer.path().join("repo"));
let outside_target = outer.path().join("outside-dir");
fs::create_dir_all(&outside_target).expect("outside dir should be created");
symlink_dir(&outside_target, &repo.path().join("link-dir"));
repo.add_and_commit("link-dir", "add directory symlink");
let target = resolve_target_file(outer.path(), Path::new("repo/link-dir"))
.expect("symlink target should resolve as repo path");
assert_eq!(target.repo_root, fs::canonicalize(repo.path()).unwrap());
assert_eq!(target.repo_path, PathBuf::from("link-dir"));
let commits = load_commits(&target.repo_root, &target.repo_path).expect("commits load");
assert_eq!(commits.len(), 1);
}
#[test]
fn treats_pathspec_magic_as_literal_filename() {
let repo = fixture::GitRepo::new();
repo.commit_file(
Path::new("secret.txt"),
"real secret\n",
"add normal secret",
);
let literal_pathspec = PathBuf::from(":(top)secret.txt");
repo.commit_file(&literal_pathspec, "literal\n", "add literal pathspec");
let commits = load_commits(repo.path(), &literal_pathspec).expect("commits should load");
let secret_commits =
load_commits(repo.path(), Path::new("secret.txt")).expect("commits should load");
let diff =
load_diff(repo.path(), &commits[0].hash, &literal_pathspec).expect("diff should load");
assert_eq!(commits.len(), 1);
assert_eq!(secret_commits.len(), 1);
assert_eq!(commits[0].subject, "add literal pathspec");
assert_eq!(secret_commits[0].subject, "add normal secret");
assert!(diff
.iter()
.any(|line| line.text.contains(":(top)secret.txt")));
assert!(!diff.iter().any(
|line| line.text.contains("secret.txt") && !line.text.contains(":(top)secret.txt")
));
}
#[test]
fn resolves_missing_file_with_existing_parent_directory() {
let outer = fixture::TempDir::new();
let repo = fixture::GitRepo::init_at(outer.path().join("repo"));
fs::create_dir_all(repo.path().join("nested")).expect("nested dir should be created");
let target =
resolve_target_file(outer.path(), Path::new("repo/nested/missing/deleted.txt"))
.expect("missing file should resolve through existing parent");
assert_eq!(target.repo_root, fs::canonicalize(repo.path()).unwrap());
assert_eq!(
target.repo_path,
PathBuf::from("nested/missing/deleted.txt")
);
}
#[test]
fn rejects_existing_directory_targets() {
let outer = fixture::TempDir::new();
let repo = fixture::GitRepo::init_at(outer.path().join("repo"));
fs::create_dir_all(repo.path().join("nested")).expect("nested dir should be created");
let error = resolve_target_file(outer.path(), Path::new("repo/nested"))
.expect_err("directory target should fail");
assert!(error.to_string().contains("is not a file"));
}
#[test]
fn resolves_missing_file_with_dot_dot_components() {
let outer = fixture::TempDir::new();
let repo = fixture::GitRepo::init_at(outer.path().join("repo"));
fs::create_dir_all(repo.path().join("nested")).expect("nested dir should be created");
let target = resolve_target_file(
outer.path(),
Path::new("repo/nested/../nested/missing/deleted.txt"),
)
.expect("missing file should resolve through dot-dot components");
assert_eq!(target.repo_root, fs::canonicalize(repo.path()).unwrap());
assert_eq!(
target.repo_path,
PathBuf::from("nested/missing/deleted.txt")
);
}
#[test]
fn resolves_missing_file_when_missing_component_precedes_dot_dot() {
let outer = fixture::TempDir::new();
let repo = fixture::GitRepo::init_at(outer.path().join("repo"));
fs::create_dir_all(repo.path().join("nested")).expect("nested dir should be created");
let target = resolve_target_file(
outer.path(),
Path::new("repo/missing/../nested/deleted.txt"),
)
.expect("missing file should resolve through missing dot-dot component");
assert_eq!(target.repo_root, fs::canonicalize(repo.path()).unwrap());
assert_eq!(target.repo_path, PathBuf::from("nested/deleted.txt"));
}
#[test]
fn rejects_paths_that_traverse_above_root() {
let error = validate_no_traversal_above_root(Path::new("/../../repo/file"))
.expect_err("path should not traverse above root");
assert!(error
.to_string()
.contains("traverses above filesystem root"));
}
#[cfg(unix)]
fn symlink_file(original: &Path, link: &Path) {
std::os::unix::fs::symlink(original, link).expect("symlink should be created");
}
#[cfg(unix)]
fn symlink_dir(original: &Path, link: &Path) {
std::os::unix::fs::symlink(original, link).expect("symlink should be created");
}
}