use std::fs::{self, File};
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use gix_hash::ObjectId;
#[cfg_attr(not(test), allow(unused_imports))]
use gix_pack::Find;
use gix_pack::data::output::bytes::FromEntriesIter;
use gix_pack::data::output::count::objects::ObjectExpansion;
use gix_pack::data::output::{count, entry};
use tempfile::{NamedTempFile, TempDir};
use crate::git::{PeeledTip, Sha};
use super::PackchainError;
use super::schema::Sha40;
const PACK_TRAILER_LEN: usize = 20;
const PACK_HEADER_LEN: u64 = 12;
const PACK_MIN_LEN: u64 = PACK_HEADER_LEN + PACK_TRAILER_LEN as u64;
pub(crate) struct BuiltPack {
pub(crate) pack_path: PathBuf,
pub(crate) idx_path: PathBuf,
pub(crate) content_sha: Sha40,
pub(crate) pack_bytes: u64,
}
pub(crate) fn build_baseline_pack(
repo_dir: &Path,
peeled: PeeledTip,
out_dir: &Path,
) -> Result<BuiltPack, PackchainError> {
let repo = gix::open(repo_dir).map_err(crate::git::GitError::from)?;
match peeled {
PeeledTip::Commit { commit, tag_chain } => {
let commit_ids = collect_commits_baseline(&repo, *commit.as_object_id())?;
build_pack(
&repo,
commit_ids,
ObjectExpansion::TreeContents,
&tag_chain,
out_dir,
)
}
PeeledTip::Tree { tree, tag_chain } => {
let oids =
super::git::enumerate_tree_closure(&repo, tree).map_err(PackchainError::Git)?;
build_pack(&repo, oids, ObjectExpansion::AsIs, &tag_chain, out_dir)
}
PeeledTip::Blob { blob, tag_chain } => build_pack(
&repo,
vec![blob],
ObjectExpansion::AsIs,
&tag_chain,
out_dir,
),
}
}
pub(crate) fn build_incremental_pack(
repo_dir: &Path,
prior_commit: Sha,
local_commit: Sha,
tag_chain: &[ObjectId],
out_dir: &Path,
) -> Result<BuiltPack, PackchainError> {
let repo = gix::open(repo_dir).map_err(crate::git::GitError::from)?;
let commit_ids = collect_commits_incremental(
&repo,
*local_commit.as_object_id(),
*prior_commit.as_object_id(),
)?;
build_pack(
&repo,
commit_ids,
ObjectExpansion::TreeAdditionsComparedToAncestor,
tag_chain,
out_dir,
)
}
fn collect_commits_baseline(
repo: &gix::Repository,
tip: ObjectId,
) -> Result<Vec<ObjectId>, PackchainError> {
repo.rev_walk([tip])
.all()
.map_err(|e| PackchainError::PackBuild(e.to_string()))?
.map(|info| info.map(|i| i.id))
.collect::<Result<Vec<_>, _>>()
.map_err(|e| PackchainError::PackBuild(e.to_string()))
}
fn collect_commits_incremental(
repo: &gix::Repository,
local_tip: ObjectId,
prior_tip: ObjectId,
) -> Result<Vec<ObjectId>, PackchainError> {
repo.rev_walk([local_tip])
.with_hidden([prior_tip])
.all()
.map_err(|e| PackchainError::PackBuild(e.to_string()))?
.map(|info| info.map(|i| i.id))
.collect::<Result<Vec<_>, _>>()
.map_err(|e| PackchainError::PackBuild(e.to_string()))
}
fn build_pack(
repo: &gix::Repository,
input_oids: Vec<ObjectId>,
expansion: ObjectExpansion,
tag_chain: &[ObjectId],
out_dir: &Path,
) -> Result<BuiltPack, PackchainError> {
let mut odb = repo.objects.clone().into_inner();
odb.prevent_pack_unload();
let (mut counts, _stats) = count::objects(
odb.clone(),
Box::new(
input_oids
.into_iter()
.map(Ok::<_, Box<dyn std::error::Error + Send + Sync + 'static>>),
),
&gix::progress::Discard,
&AtomicBool::new(false),
count::objects::Options {
input_object_expansion: expansion,
thread_limit: Some(1),
..Default::default()
},
)
.map_err(|e| PackchainError::PackBuild(e.to_string()))?;
counts.extend(
crate::bundle::count_objects_as_is(odb.clone(), tag_chain)
.map_err(|e| PackchainError::PackBuild(e.to_string()))?,
);
if counts.is_empty() {
return Err(PackchainError::PackBuild(
"no objects to pack — caller should have short-circuited on prior.tip == local_tip"
.into(),
));
}
let num_entries = u32::try_from(counts.len())
.map_err(|_| PackchainError::PackBuild("too many objects for a single pack".into()))?;
let entries_iter = entry::iter_from_counts(
counts,
odb,
Box::new(gix::progress::Discard),
entry::iter_from_counts::Options {
thread_limit: Some(1),
..Default::default()
},
)
.map(|r| r.map(|(_, entries)| entries));
let mut pack_tmp = NamedTempFile::new_in(out_dir)?;
{
let pack_iter = FromEntriesIter::new(
entries_iter,
pack_tmp.as_file_mut(),
num_entries,
gix_pack::data::Version::V2,
gix_hash::Kind::Sha1,
);
for r in pack_iter {
r.map_err(|e| PackchainError::PackBuild(e.to_string()))?;
}
}
pack_tmp.as_file().sync_all()?;
let pack_bytes = pack_tmp.as_file().metadata()?.len();
if pack_bytes < PACK_MIN_LEN {
return Err(PackchainError::PackTrailer(format!(
"pack file shorter than {PACK_MIN_LEN} bytes (got {pack_bytes})",
)));
}
let mut trailer = [0u8; PACK_TRAILER_LEN];
pack_tmp
.as_file_mut()
.seek(SeekFrom::Start(pack_bytes - PACK_TRAILER_LEN as u64))?;
pack_tmp.as_file_mut().read_exact(&mut trailer)?;
let trailer_oid = ObjectId::from(trailer);
let content_sha = Sha40::from_oid(&trailer_oid)?;
let pack_path = out_dir.join(format!("{}.pack", content_sha.as_str()));
pack_tmp
.persist(&pack_path)
.map_err(|e| PackchainError::Io(e.error))?;
let idx_path = derive_idx(&pack_path, out_dir, &content_sha)?;
Ok(BuiltPack {
pack_path,
idx_path,
content_sha,
pack_bytes,
})
}
fn derive_idx(
pack_path: &Path,
out_dir: &Path,
content_sha: &Sha40,
) -> Result<PathBuf, PackchainError> {
let scratch = TempDir::new_in(out_dir)?;
let pack_file = File::open(pack_path)?;
let mut pack_reader = BufReader::new(pack_file);
let outcome = gix_pack::Bundle::write_to_directory(
&mut pack_reader,
Some(scratch.path()),
&mut gix::progress::Discard,
&AtomicBool::new(false),
None::<gix::odb::Handle>,
gix_pack::bundle::write::Options {
object_hash: gix_hash::Kind::Sha1,
..Default::default()
},
)?;
let derived_idx = outcome
.index_path
.ok_or_else(|| PackchainError::PackBuild("gix_pack did not emit an .idx path".into()))?;
let idx_path = out_dir.join(format!("{}.idx", content_sha.as_str()));
fs::rename(&derived_idx, &idx_path)?;
if let Some(keep_path) = outcome.keep_path {
let _ = fs::remove_file(keep_path);
}
Ok(idx_path)
}
#[cfg(test)]
mod tests {
use super::*;
use gix::actor::SignatureRef;
use gix::bstr::BStr;
use std::sync::atomic::AtomicBool;
use tempfile::TempDir;
fn signature() -> SignatureRef<'static> {
SignatureRef {
name: BStr::new("Tester"),
email: BStr::new("t@example.com"),
time: "0 +0000",
}
}
fn commit_tip(commit: Sha, tag_chain: Vec<ObjectId>) -> PeeledTip {
PeeledTip::Commit { commit, tag_chain }
}
fn fixture_two_commits() -> (TempDir, Sha, Sha) {
let tmp = TempDir::new().unwrap();
let repo = gix::init(tmp.path()).unwrap();
let blob1 = repo.write_blob(b"v1").unwrap().detach();
let tree1 = repo
.write_object(&gix::objs::Tree {
entries: vec![gix::objs::tree::Entry {
mode: gix::objs::tree::EntryKind::Blob.into(),
filename: "a.txt".into(),
oid: blob1,
}],
})
.unwrap()
.detach();
let c1 = repo
.commit_as(
signature(),
signature(),
"refs/heads/main",
"first",
tree1,
std::iter::empty::<ObjectId>(),
)
.unwrap()
.detach();
let blob2 = repo.write_blob(b"v2 plus more").unwrap().detach();
let blob_b = repo.write_blob(b"new file b").unwrap().detach();
let tree2 = repo
.write_object(&gix::objs::Tree {
entries: vec![
gix::objs::tree::Entry {
mode: gix::objs::tree::EntryKind::Blob.into(),
filename: "a.txt".into(),
oid: blob2,
},
gix::objs::tree::Entry {
mode: gix::objs::tree::EntryKind::Blob.into(),
filename: "b.txt".into(),
oid: blob_b,
},
],
})
.unwrap()
.detach();
let c2 = repo
.commit_as(
signature(),
signature(),
"refs/heads/main",
"second",
tree2,
std::iter::once(c1),
)
.unwrap()
.detach();
(tmp, Sha::from_object_id(c1), Sha::from_object_id(c2))
}
fn fixture_single_commit() -> (TempDir, Sha) {
let tmp = TempDir::new().unwrap();
let repo = gix::init(tmp.path()).unwrap();
let blob = repo.write_blob(b"only").unwrap().detach();
let tree = repo
.write_object(&gix::objs::Tree {
entries: vec![gix::objs::tree::Entry {
mode: gix::objs::tree::EntryKind::Blob.into(),
filename: "x".into(),
oid: blob,
}],
})
.unwrap()
.detach();
let c = repo
.commit_as(
signature(),
signature(),
"refs/heads/main",
"only",
tree,
std::iter::empty::<ObjectId>(),
)
.unwrap()
.detach();
(tmp, Sha::from_object_id(c))
}
#[test]
fn build_baseline_pack_handles_single_commit_repo() {
let (repo_dir, tip) = fixture_single_commit();
let out = TempDir::new().unwrap();
let built = build_baseline_pack(repo_dir.path(), commit_tip(tip, vec![]), out.path())
.expect("build");
assert!(built.pack_path.exists());
assert!(built.idx_path.exists());
assert!(built.pack_bytes >= PACK_MIN_LEN);
assert_eq!(built.content_sha.as_str().len(), 40);
}
#[test]
fn build_baseline_pack_content_sha_matches_trailer() {
let (repo_dir, tip) = fixture_single_commit();
let out = TempDir::new().unwrap();
let built = build_baseline_pack(repo_dir.path(), commit_tip(tip, vec![]), out.path())
.expect("build");
let pack_bytes = std::fs::read(&built.pack_path).unwrap();
assert!(pack_bytes.len() >= PACK_TRAILER_LEN);
let trailer_start = pack_bytes.len() - PACK_TRAILER_LEN;
let trailer = &pack_bytes[trailer_start..];
let oid = ObjectId::try_from(trailer).unwrap();
assert_eq!(built.content_sha.as_str(), oid.to_string());
}
#[test]
fn build_baseline_pack_round_trips_via_bundle_write_to_directory() {
let (repo_dir, c1, c2) = fixture_two_commits();
let out = TempDir::new().unwrap();
let built = build_baseline_pack(repo_dir.path(), commit_tip(c2, vec![]), out.path())
.expect("build");
let dst = TempDir::new().unwrap();
let dst_repo = gix::init(dst.path()).unwrap();
let pack_dir = dst_repo.git_dir().join("objects/pack");
std::fs::create_dir_all(&pack_dir).unwrap();
let pack_file = File::open(&built.pack_path).unwrap();
let mut reader = BufReader::new(pack_file);
gix_pack::Bundle::write_to_directory(
&mut reader,
Some(&pack_dir),
&mut gix::progress::Discard,
&AtomicBool::new(false),
None::<gix::odb::Handle>,
gix_pack::bundle::write::Options {
object_hash: gix_hash::Kind::Sha1,
..Default::default()
},
)
.expect("install pack");
let odb = dst_repo.objects.clone().into_inner();
assert!(odb.contains(c1.as_object_id()), "c1 must be reachable");
assert!(odb.contains(c2.as_object_id()), "c2 must be reachable");
}
#[test]
fn build_incremental_pack_only_emits_new_objects() {
let (repo_dir, c1, c2) = fixture_two_commits();
let out_baseline = TempDir::new().unwrap();
let baseline =
build_baseline_pack(repo_dir.path(), commit_tip(c1, vec![]), out_baseline.path())
.expect("baseline");
let out_incr = TempDir::new().unwrap();
let incr = build_incremental_pack(repo_dir.path(), c1, c2, &[], out_incr.path())
.expect("incremental");
let baseline_idx =
gix_pack::index::File::at(&baseline.idx_path, gix_hash::Kind::Sha1).unwrap();
let incr_idx = gix_pack::index::File::at(&incr.idx_path, gix_hash::Kind::Sha1).unwrap();
assert_eq!(
baseline_idx.num_objects(),
3,
"baseline at c1: commit + tree + blob",
);
assert_eq!(
incr_idx.num_objects(),
6,
"incremental c1→c2 must omit c1's blob (= 6 objects, not the 7 a TreeContents pack of c2 would produce)",
);
}
#[test]
fn baseline_plus_incremental_install_reaches_full_history() {
let (repo_dir, c1, c2) = fixture_two_commits();
let out_baseline = TempDir::new().unwrap();
let baseline =
build_baseline_pack(repo_dir.path(), commit_tip(c1, vec![]), out_baseline.path())
.expect("baseline");
let out_incr = TempDir::new().unwrap();
let incr = build_incremental_pack(repo_dir.path(), c1, c2, &[], out_incr.path())
.expect("incremental");
let src = gix::open(repo_dir.path()).unwrap();
let c1_tree_id = src
.find_object(*c1.as_object_id())
.unwrap()
.peel_to_kind(gix::object::Kind::Commit)
.unwrap()
.into_commit()
.tree_id()
.unwrap()
.detach();
let blob1 = {
let tree = src
.find_object(c1_tree_id)
.unwrap()
.peel_to_kind(gix::object::Kind::Tree)
.unwrap()
.into_tree();
let entry = tree.iter().next().unwrap().unwrap();
entry.oid().to_owned()
};
let dst = TempDir::new().unwrap();
let dst_repo = gix::init(dst.path()).unwrap();
let pack_dir = dst_repo.git_dir().join("objects/pack");
std::fs::create_dir_all(&pack_dir).unwrap();
for pack_path in [&baseline.pack_path, &incr.pack_path] {
let pf = File::open(pack_path).unwrap();
let mut r = BufReader::new(pf);
gix_pack::Bundle::write_to_directory(
&mut r,
Some(&pack_dir),
&mut gix::progress::Discard,
&AtomicBool::new(false),
Some(dst_repo.objects.clone().into_inner()),
gix_pack::bundle::write::Options {
object_hash: gix_hash::Kind::Sha1,
..Default::default()
},
)
.unwrap_or_else(|e| panic!("install pack {}: {e}", pack_path.display()));
}
let odb = dst_repo.objects.clone().into_inner();
assert!(odb.contains(c1.as_object_id()), "c1 must be reachable");
assert!(odb.contains(c2.as_object_id()), "c2 must be reachable");
assert!(
odb.contains(&blob1),
"blob1 (c1-only) must be reachable after installing baseline + incremental",
);
}
fn write_annotated_tag(
repo: &gix::Repository,
target: ObjectId,
target_kind: gix::object::Kind,
name: &str,
) -> ObjectId {
let tag = gix::objs::Tag {
target,
target_kind,
name: name.into(),
tagger: Some(signature().to_owned().expect("static signature is valid")),
message: "release".into(),
pgp_signature: None,
};
repo.write_object(&tag).unwrap().detach()
}
fn install_into_fresh_repo(pack_path: &Path) -> (TempDir, gix::Repository) {
let dst = TempDir::new().unwrap();
let dst_repo = gix::init(dst.path()).unwrap();
let pack_dir = dst_repo.git_dir().join("objects/pack");
std::fs::create_dir_all(&pack_dir).unwrap();
let pf = File::open(pack_path).unwrap();
let mut r = BufReader::new(pf);
gix_pack::Bundle::write_to_directory(
&mut r,
Some(&pack_dir),
&mut gix::progress::Discard,
&AtomicBool::new(false),
None::<gix::odb::Handle>,
gix_pack::bundle::write::Options {
object_hash: gix_hash::Kind::Sha1,
..Default::default()
},
)
.unwrap();
(dst, dst_repo)
}
#[test]
fn build_baseline_pack_includes_tag_object_when_tag_chain_nonempty() {
let (repo_dir, _c1, c2) = fixture_two_commits();
let repo = gix::open(repo_dir.path()).unwrap();
let tag_oid =
write_annotated_tag(&repo, *c2.as_object_id(), gix::object::Kind::Commit, "v1");
drop(repo);
let out = TempDir::new().unwrap();
let built = build_baseline_pack(repo_dir.path(), commit_tip(c2, vec![tag_oid]), out.path())
.expect("build");
let (_dst_dir, dst_repo) = install_into_fresh_repo(&built.pack_path);
let odb = dst_repo.objects.clone().into_inner();
assert!(odb.contains(&tag_oid), "tag object must be in pack");
assert!(
odb.contains(c2.as_object_id()),
"tag's commit target must also be in pack",
);
let tag_obj = dst_repo
.find_object(tag_oid)
.expect("find tag")
.peel_to_kind(gix::object::Kind::Tag)
.expect("peel to tag");
let target = tag_obj.into_tag().target_id().expect("decode tag");
assert_eq!(
target.detach(),
*c2.as_object_id(),
"tag must point at the commit",
);
}
#[test]
fn build_baseline_pack_includes_full_chain_for_tag_of_tag() {
let (repo_dir, _c1, c2) = fixture_two_commits();
let repo = gix::open(repo_dir.path()).unwrap();
let inner = write_annotated_tag(
&repo,
*c2.as_object_id(),
gix::object::Kind::Commit,
"inner",
);
let outer = write_annotated_tag(&repo, inner, gix::object::Kind::Tag, "outer");
drop(repo);
let out = TempDir::new().unwrap();
let built = build_baseline_pack(
repo_dir.path(),
commit_tip(c2, vec![outer, inner]),
out.path(),
)
.expect("build");
let (_dst_dir, dst_repo) = install_into_fresh_repo(&built.pack_path);
let odb = dst_repo.objects.clone().into_inner();
assert!(odb.contains(&outer), "outer tag must be in pack");
assert!(odb.contains(&inner), "inner tag must be in pack");
assert!(
odb.contains(c2.as_object_id()),
"commit target must also be in pack",
);
}
#[test]
fn build_baseline_pack_with_empty_tag_chain_emits_same_object_count_as_today() {
let (repo_dir, _c1, c2) = fixture_two_commits();
let out = TempDir::new().unwrap();
let built = build_baseline_pack(repo_dir.path(), commit_tip(c2, vec![]), out.path())
.expect("build");
let idx = gix_pack::index::File::at(&built.idx_path, gix_hash::Kind::Sha1).unwrap();
assert_eq!(
idx.num_objects(),
7,
"branch-tip baseline must emit 7 objects (no tag chain to add)",
);
}
#[test]
fn build_incremental_pack_includes_new_tag_chain() {
let (repo_dir, c1, c2) = fixture_two_commits();
let repo = gix::open(repo_dir.path()).unwrap();
let new_tag =
write_annotated_tag(&repo, *c2.as_object_id(), gix::object::Kind::Commit, "v2");
drop(repo);
let out = TempDir::new().unwrap();
let built = build_incremental_pack(repo_dir.path(), c1, c2, &[new_tag], out.path())
.expect("incremental");
let (_dst_dir, dst_repo) = install_into_fresh_repo(&built.pack_path);
let odb = dst_repo.objects.clone().into_inner();
assert!(
odb.contains(&new_tag),
"new tag must be in incremental pack"
);
}
fn root_tree_of(commit: Sha, repo_dir: &Path) -> ObjectId {
let repo = gix::open(repo_dir).unwrap();
repo.find_object(*commit.as_object_id())
.unwrap()
.peel_to_kind(gix::object::Kind::Commit)
.unwrap()
.into_commit()
.tree_id()
.unwrap()
.detach()
}
#[test]
fn build_baseline_pack_for_tree_tip_includes_tree_closure_and_tag() {
let (repo_dir, _c1, c2) = fixture_two_commits();
let root_tree = root_tree_of(c2, repo_dir.path());
let repo = gix::open(repo_dir.path()).unwrap();
let tag = write_annotated_tag(&repo, root_tree, gix::object::Kind::Tree, "v1-tree");
drop(repo);
let out = TempDir::new().unwrap();
let peeled = PeeledTip::Tree {
tree: root_tree,
tag_chain: vec![tag],
};
let built = build_baseline_pack(repo_dir.path(), peeled, out.path()).expect("build");
let (_dst_dir, dst_repo) = install_into_fresh_repo(&built.pack_path);
let odb = dst_repo.objects.clone().into_inner();
assert!(odb.contains(&tag), "tag object must be in pack");
assert!(odb.contains(&root_tree), "leaf tree must be in pack");
let src = gix::open(repo_dir.path()).unwrap();
let tree_obj = src.find_object(root_tree).unwrap().into_tree();
for entry in tree_obj.iter() {
let entry = entry.unwrap();
assert!(
odb.contains(entry.oid()),
"blob {:?} must be in pack",
entry.oid(),
);
}
}
#[test]
fn build_baseline_pack_for_blob_tip_packs_only_blob_and_tag() {
let (repo_dir, _c1, _c2) = fixture_two_commits();
let repo = gix::open(repo_dir.path()).unwrap();
let blob = repo.write_blob(b"leaf").unwrap().detach();
let tag = write_annotated_tag(&repo, blob, gix::object::Kind::Blob, "v1-blob");
drop(repo);
let out = TempDir::new().unwrap();
let peeled = PeeledTip::Blob {
blob,
tag_chain: vec![tag],
};
let built = build_baseline_pack(repo_dir.path(), peeled, out.path()).expect("build");
let idx = gix_pack::index::File::at(&built.idx_path, gix_hash::Kind::Sha1).unwrap();
assert_eq!(
idx.num_objects(),
2,
"blob-tip pack must contain exactly the blob + the tag",
);
let (_dst_dir, dst_repo) = install_into_fresh_repo(&built.pack_path);
let odb = dst_repo.objects.clone().into_inner();
assert!(odb.contains(&blob));
assert!(odb.contains(&tag));
}
#[test]
fn build_baseline_pack_for_bare_tree_ref_emits_tree_with_empty_chain() {
let (repo_dir, _c1, c2) = fixture_two_commits();
let root_tree = root_tree_of(c2, repo_dir.path());
let out = TempDir::new().unwrap();
let peeled = PeeledTip::Tree {
tree: root_tree,
tag_chain: Vec::new(),
};
let built = build_baseline_pack(repo_dir.path(), peeled, out.path()).expect("build");
let (_dst_dir, dst_repo) = install_into_fresh_repo(&built.pack_path);
let odb = dst_repo.objects.clone().into_inner();
assert!(odb.contains(&root_tree));
let src = gix::open(repo_dir.path()).unwrap();
let tree_obj = src.find_object(root_tree).unwrap().into_tree();
for entry in tree_obj.iter() {
let entry = entry.unwrap();
assert!(
odb.contains(entry.oid()),
"tree blob {:?} must be in pack",
entry.oid(),
);
}
}
}