#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
use flate2::Compression;
use flate2::read::ZlibDecoder;
use flate2::write::ZlibEncoder;
use flate2::{Decompress, FlushDecompress};
use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
use sley_formats::{Bundle, BundleReference};
use sley_object::{
Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object,
tree_entry_object_type,
};
use sley_pack::{
MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
PackWriteOptions,
};
use std::collections::{HashMap, HashSet};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
use std::{env, fs};
static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
pub trait ObjectReader {
fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
false
}
fn has_shallow_grafts(&self) -> bool {
false
}
fn is_promised_object(&self, _oid: &ObjectId) -> bool {
false
}
}
fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
(*oid == ObjectId::empty_tree(format))
.then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
}
fn with_missing_object_context(
err: GitError,
oid: ObjectId,
context: MissingObjectContext,
) -> GitError {
let kind = err
.not_found_kind()
.and_then(sley_core::NotFoundKind::missing_object_kind);
match kind {
Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
None => err,
}
}
pub fn grafted_parents<R: ObjectReader + ?Sized>(
reader: &R,
oid: &ObjectId,
parents: Vec<ObjectId>,
) -> Vec<ObjectId> {
if reader.is_shallow_graft(oid) {
Vec::new()
} else {
parents
}
}
pub trait ObjectWriter {
fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BundleUnbundleResult {
pub written_objects: Vec<ObjectId>,
pub references: Vec<BundleReference>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PackUnpackResult {
pub written_objects: Vec<ObjectId>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PackInstallResult {
pub pack_name: String,
pub pack_path: PathBuf,
pub index_path: PathBuf,
pub promisor_path: Option<PathBuf>,
pub object_ids: Vec<ObjectId>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RawPackInstallResult {
pub object_ids: Vec<ObjectId>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct RawPackInstallOptions {
pub promisor: bool,
}
pub trait RawPackInstaller {
fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ObjectPrefixResolution {
Missing,
Unique(ObjectId),
Ambiguous(Vec<ObjectId>),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ObjectStorageInfo {
pub disk_size: u64,
pub deltabase: ObjectId,
}
impl RawPackInstaller for FileObjectDatabase {
fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
Ok(RawPackInstallResult {
object_ids: result.object_ids,
})
}
}
impl RawPackInstaller for ObjectDatabase {
fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
Ok(RawPackInstallResult {
object_ids: result.written_objects,
})
}
}
pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
let mut missing = Vec::new();
for prerequisite in &bundle.prerequisites {
match reader.read_object(&prerequisite.oid) {
Ok(object) => {
let actual = object.object_id(bundle.format)?;
if actual != prerequisite.oid {
return Err(GitError::InvalidObject(format!(
"bundle prerequisite {} hashes to {actual}",
prerequisite.oid
)));
}
}
Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
Err(err) => return Err(err),
}
}
if missing.is_empty() {
return Ok(());
}
Err(GitError::object_not_found_in(
missing[0],
MissingObjectContext::PackInstall,
))
}
pub fn unbundle_objects<R, W>(
bundle: &Bundle,
prerequisite_reader: &R,
writer: &mut W,
) -> Result<BundleUnbundleResult>
where
R: ObjectReader,
W: ObjectWriter,
{
verify_bundle_prerequisites(bundle, prerequisite_reader)?;
let pack = PackFile::parse_bundle(bundle)?;
let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
Ok(BundleUnbundleResult {
written_objects,
references: bundle.references.clone(),
})
}
pub fn install_bundle_pack<R>(
bundle: &Bundle,
prerequisite_reader: &R,
destination: &impl RawPackInstaller,
) -> Result<BundleUnbundleResult>
where
R: ObjectReader,
{
verify_bundle_prerequisites(bundle, prerequisite_reader)?;
let install = destination.install_raw_pack(&bundle.pack)?;
Ok(BundleUnbundleResult {
written_objects: install.object_ids,
references: bundle.references.clone(),
})
}
pub fn unpack_packfile_objects<W>(
pack_bytes: &[u8],
format: ObjectFormat,
writer: &W,
) -> Result<PackUnpackResult>
where
W: ObjectWriter,
{
let pack = PackFile::parse(pack_bytes, format)?;
write_pack_objects(pack, writer, "pack")
}
fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
where
W: ObjectWriter,
{
let mut written_objects = Vec::with_capacity(pack.entries.len());
for entry in pack.entries {
let expected = entry.entry.oid;
let actual = writer.write_object(entry.object)?;
if actual != expected {
return Err(GitError::InvalidObject(format!(
"{source} object id mismatch: expected {expected}, wrote {actual}"
)));
}
written_objects.push(actual);
}
Ok(PackUnpackResult { written_objects })
}
pub fn collect_reachable_object_ids<R, I>(
reader: &R,
format: ObjectFormat,
starts: I,
) -> Result<HashSet<ObjectId>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
}
pub fn collect_reachable_object_ids_with_cut<R, I>(
reader: &R,
format: ObjectFormat,
starts: I,
cut: &HashSet<ObjectId>,
) -> Result<HashSet<ObjectId>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
}
pub fn collect_reachable_object_ids_excluding<R, I>(
reader: &R,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
) -> Result<HashSet<ObjectId>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
}
pub fn collect_reachable_objects<R, I>(
reader: &R,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
) -> Result<Vec<Arc<EncodedObject>>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
let mut objects = Vec::new();
walk_reachable_objects(reader, format, starts, excluded, |_, object| {
objects.push(Arc::clone(object));
})?;
Ok(objects)
}
#[derive(Debug, Clone)]
struct ReachablePackObject {
oid: ObjectId,
object: Arc<EncodedObject>,
}
fn collect_reachable_pack_objects<R, I>(
reader: &R,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
) -> Result<Vec<ReachablePackObject>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
let mut objects = Vec::new();
walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
objects.push(ReachablePackObject {
oid: *oid,
object: Arc::clone(object),
});
})?;
Ok(objects)
}
fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
objects
.iter()
.map(|entry| PackInput {
oid: &entry.oid,
object: &entry.object,
})
.collect()
}
pub fn install_reachable_pack<I>(
source: &impl ObjectReader,
destination: &impl RawPackInstaller,
format: ObjectFormat,
starts: I,
) -> Result<Option<RawPackInstallResult>>
where
I: IntoIterator<Item = ObjectId>,
{
install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
}
pub fn install_reachable_pack_excluding<I>(
source: &impl ObjectReader,
destination: &impl RawPackInstaller,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
) -> Result<Option<RawPackInstallResult>>
where
I: IntoIterator<Item = ObjectId>,
{
let pack = match build_reachable_pack(source, format, starts, excluded)? {
Some(pack) => pack,
None => return Ok(None),
};
destination.install_raw_pack(&pack.pack).map(Some)
}
pub fn build_reachable_pack<R, I>(
reader: &R,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
) -> Result<Option<PackWrite>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
if objects.is_empty() {
return Ok(None);
}
let inputs = pack_inputs(&objects);
PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
}
pub fn build_and_install_reachable_pack<R, I>(
source: &R,
destination: &FileObjectDatabase,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
options: RawPackInstallOptions,
) -> Result<Option<PackInstallResult>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
build_and_install_reachable_pack_filtered(
source,
destination,
format,
starts,
excluded,
options,
None,
None,
)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PackObjectFilter {
BlobNone,
BlobLimit(u64),
TreeDepth(u32),
SparsePathSet(Vec<String>),
}
#[allow(clippy::too_many_arguments)]
pub fn build_and_install_reachable_pack_filtered<R, I>(
source: &R,
destination: &FileObjectDatabase,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
options: RawPackInstallOptions,
filter: Option<PackObjectFilter>,
unpack_limit: Option<usize>,
) -> Result<Option<PackInstallResult>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
{
let starts: Vec<ObjectId> = starts.into_iter().collect();
let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
match filter {
Some(PackObjectFilter::BlobNone) => {
objects.retain(|entry| {
entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
});
}
Some(PackObjectFilter::BlobLimit(limit)) => {
objects.retain(|entry| {
entry.object.object_type != ObjectType::Blob
|| wanted.contains(&entry.oid)
|| (entry.object.body.len() as u64) < limit
});
}
Some(PackObjectFilter::TreeDepth(depth)) => {
let tree_depths = collect_tree_filter_depths(source, format, &objects)?;
objects.retain(|entry| {
if wanted.contains(&entry.oid) {
return true;
}
match entry.object.object_type {
ObjectType::Blob => false,
ObjectType::Tree => tree_depths
.get(&entry.oid)
.is_some_and(|tree_depth| *tree_depth < depth),
_ => true,
}
});
}
Some(PackObjectFilter::SparsePathSet(paths)) => {
let allowed_blobs = collect_sparse_filter_blobs(source, format, &objects, &paths)?;
objects.retain(|entry| {
entry.object.object_type != ObjectType::Blob
|| wanted.contains(&entry.oid)
|| allowed_blobs.contains(&entry.oid)
});
}
None => {}
}
if objects.is_empty() {
return Ok(None);
}
if let Some(limit) = unpack_limit
&& objects.len() < limit
{
for entry in &objects {
destination.loose().write_object((*entry.object).clone())?;
}
return Ok(None);
}
let inputs = pack_inputs(&objects);
let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
trace_packfile(&pack.pack)?;
destination
.install_generated_pack_unchecked(&pack, options)
.map(Some)
}
fn trace_packfile(pack: &[u8]) -> Result<()> {
let Some(path) = env::var_os("GIT_TRACE_PACKFILE").filter(|value| !value.is_empty()) else {
return Ok(());
};
fs::write(path, pack)?;
Ok(())
}
fn collect_tree_filter_depths<R>(
reader: &R,
format: ObjectFormat,
objects: &[ReachablePackObject],
) -> Result<HashMap<ObjectId, u32>>
where
R: ObjectReader,
{
let available: HashSet<ObjectId> = objects.iter().map(|entry| entry.oid).collect();
let mut depths = HashMap::new();
let mut stack = Vec::new();
for entry in objects {
if entry.object.object_type != ObjectType::Commit {
continue;
}
let commit = Commit::parse(format, &entry.object.body)?;
if available.contains(&commit.tree) {
stack.push((commit.tree, 0u32));
}
}
while let Some((tree_oid, depth)) = stack.pop() {
if depths
.get(&tree_oid)
.is_some_and(|old_depth| *old_depth <= depth)
{
continue;
}
depths.insert(tree_oid, depth);
let tree = reader.read_object(&tree_oid)?;
if tree.object_type != ObjectType::Tree {
continue;
}
let child_depth = depth.saturating_add(1);
for entry in TreeEntries::new(format, &tree.body) {
let entry = entry?;
if tree_entry_object_type(entry.mode) == ObjectType::Tree
&& available.contains(&entry.oid)
{
stack.push((entry.oid, child_depth));
}
}
}
Ok(depths)
}
fn collect_sparse_filter_blobs<R>(
reader: &R,
format: ObjectFormat,
objects: &[ReachablePackObject],
paths: &[String],
) -> Result<HashSet<ObjectId>>
where
R: ObjectReader,
{
let wanted_paths: HashSet<&str> = paths.iter().map(String::as_str).collect();
let mut allowed = HashSet::new();
let mut seen_trees = HashSet::new();
for entry in objects {
if entry.object.object_type != ObjectType::Commit {
continue;
}
let commit = Commit::parse(format, &entry.object.body)?;
collect_sparse_tree_blobs(
reader,
format,
&commit.tree,
"",
&wanted_paths,
&mut seen_trees,
&mut allowed,
)?;
}
Ok(allowed)
}
fn collect_sparse_tree_blobs<R>(
reader: &R,
format: ObjectFormat,
tree_oid: &ObjectId,
prefix: &str,
wanted_paths: &HashSet<&str>,
seen_trees: &mut HashSet<ObjectId>,
allowed: &mut HashSet<ObjectId>,
) -> Result<()>
where
R: ObjectReader,
{
if !seen_trees.insert(*tree_oid) {
return Ok(());
}
let tree = reader.read_object(tree_oid)?;
if tree.object_type != ObjectType::Tree {
return Ok(());
}
for entry in TreeEntries::new(format, &tree.body) {
let entry = entry?;
let name = String::from_utf8_lossy(entry.name);
let path = if prefix.is_empty() {
name.into_owned()
} else {
format!("{prefix}/{name}")
};
if tree_entry_object_type(entry.mode) == ObjectType::Tree {
collect_sparse_tree_blobs(
reader,
format,
&entry.oid,
&path,
wanted_paths,
seen_trees,
allowed,
)?;
} else if wanted_paths.contains(path.as_str()) {
allowed.insert(entry.oid);
}
}
Ok(())
}
pub fn assemble_pack_with_verbatim_reuse(
format: ObjectFormat,
reused_pack_bytes: &[u8],
appended: &[PackInput<'_>],
) -> Result<(Vec<u8>, u32)> {
assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
}
pub fn assemble_pack_with_verbatim_reuses(
format: ObjectFormat,
reused_packs: &[&[u8]],
appended: &[PackInput<'_>],
) -> Result<(Vec<u8>, u32)> {
let hash_len = format.raw_len();
let mut reused_count = 0u32;
let mut capacity = 12 + hash_len + 64 * appended.len();
for reused_pack_bytes in reused_packs {
if reused_pack_bytes.len() < 12 + hash_len {
return Err(GitError::InvalidFormat("reused pack too short".into()));
}
if &reused_pack_bytes[..4] != b"PACK" {
return Err(GitError::InvalidFormat(
"reused pack has no signature".into(),
));
}
let version = u32::from_be_bytes([
reused_pack_bytes[4],
reused_pack_bytes[5],
reused_pack_bytes[6],
reused_pack_bytes[7],
]);
if version != 2 {
return Err(GitError::Unsupported(format!(
"reused pack version {version}"
)));
}
let count = u32::from_be_bytes([
reused_pack_bytes[8],
reused_pack_bytes[9],
reused_pack_bytes[10],
reused_pack_bytes[11],
]);
reused_count = reused_count
.checked_add(count)
.ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
}
let total = reused_count
.checked_add(appended.len() as u32)
.ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
let mut out = Vec::with_capacity(capacity);
out.extend_from_slice(b"PACK");
out.extend_from_slice(&2u32.to_be_bytes());
out.extend_from_slice(&total.to_be_bytes());
for reused_pack_bytes in reused_packs {
out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
}
for input in appended {
write_undeltified_pack_entry(&mut out, input.object)?;
}
let checksum = sley_core::digest_bytes(format, &out)?;
out.extend_from_slice(checksum.as_bytes());
Ok((out, reused_count))
}
pub fn assemble_pack_with_verbatim_entries(
format: ObjectFormat,
reused_entries: &[&[u8]],
appended: &[PackInput<'_>],
) -> Result<(Vec<u8>, u32)> {
let reused_count = u32::try_from(reused_entries.len())
.map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
let total = reused_count
.checked_add(appended.len() as u32)
.ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
let mut capacity = 12 + format.raw_len() + 64 * appended.len();
for entry in reused_entries {
capacity = capacity.saturating_add(entry.len());
}
let mut out = Vec::with_capacity(capacity);
out.extend_from_slice(b"PACK");
out.extend_from_slice(&2u32.to_be_bytes());
out.extend_from_slice(&total.to_be_bytes());
for entry in reused_entries {
out.extend_from_slice(entry);
}
for input in appended {
write_undeltified_pack_entry(&mut out, input.object)?;
}
let checksum = sley_core::digest_bytes(format, &out)?;
out.extend_from_slice(checksum.as_bytes());
Ok((out, reused_count))
}
fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
let type_bits: u8 = match object.object_type {
ObjectType::Commit => 1,
ObjectType::Tree => 2,
ObjectType::Blob => 3,
ObjectType::Tag => 4,
};
let mut size = object.body.len() as u64;
let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
size >>= 4;
while size > 0 {
out.push(byte | 0x80);
byte = (size & 0x7f) as u8;
size >>= 7;
}
out.push(byte);
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&object.body)?;
out.extend_from_slice(&encoder.finish()?);
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RepackResult {
pub pack: Vec<u8>,
pub idx: Vec<u8>,
pub object_count: usize,
pub obsolete_packs: Vec<PathBuf>,
pub packed_loose: Vec<ObjectId>,
retained_pack_stems: Vec<String>,
pack_checksum: ObjectId,
index_entries: Vec<PackIndexEntry>,
}
#[derive(Debug, Clone, Default)]
pub struct RepackOptions {
pub local: bool,
pub pack_kept_objects: bool,
pub keep_pack_stems: HashSet<String>,
}
pub fn repack_reachable_objects(
git_dir: &Path,
format: ObjectFormat,
roots: &[ObjectId],
) -> Result<Option<RepackResult>> {
repack_reachable_objects_with_options(git_dir, format, roots, &RepackOptions::default())
}
pub fn repack_reachable_objects_with_options(
git_dir: &Path,
format: ObjectFormat,
roots: &[ObjectId],
options: &RepackOptions,
) -> Result<Option<RepackResult>> {
let objects_dir = repository_objects_dir(git_dir);
let database = if options.local {
FileObjectDatabase::without_alternates(objects_dir.clone(), format)
} else {
FileObjectDatabase::new(objects_dir.clone(), format)
};
let retained_pack_stems = repack_retained_pack_stems(
&objects_dir.join("pack"),
&options.keep_pack_stems,
!options.pack_kept_objects,
)?;
let excluded_oids = if options.pack_kept_objects {
HashSet::new()
} else {
pack_oids_for_stems(&objects_dir.join("pack"), format, &retained_pack_stems)?
};
let mut seen: HashSet<ObjectId> = HashSet::new();
let mut objects: Vec<ReachablePackObject> = Vec::new();
let mut pending: Vec<ObjectId> = roots.to_vec();
while let Some(oid) = pending.pop() {
if !seen.insert(oid) {
continue;
}
let object = match database.read_object(&oid) {
Ok(object) => object,
Err(GitError::NotFound(_)) => continue,
Err(err) => return Err(err),
};
match object.object_type {
ObjectType::Commit => {
let commit = Commit::parse_ref(format, &object.body)?;
pending.extend(grafted_parents(&database, &oid, commit.parents));
pending.push(commit.tree);
}
ObjectType::Tree => {
for entry in TreeEntries::new(format, &object.body) {
let entry = entry?;
if !entry.is_gitlink() {
pending.push(entry.oid);
}
}
}
ObjectType::Tag => {
let tag = Tag::parse_ref(format, &object.body)?;
pending.push(tag.object);
}
ObjectType::Blob => {}
}
if !excluded_oids.contains(&oid) {
objects.push(ReachablePackObject { oid, object });
}
}
if !options.local {
for (alternate, oid) in alternate_packed_object_ids(&objects_dir, format)? {
if excluded_oids.contains(&oid) || !seen.insert(oid) {
continue;
}
let alternate_db = FileObjectDatabase::without_alternates(alternate, format);
match alternate_db.read_object(&oid) {
Ok(object) => objects.push(ReachablePackObject { oid, object }),
Err(GitError::NotFound(_)) => {}
Err(err) => return Err(err),
}
}
}
if objects.is_empty() {
return Ok(None);
}
let inputs = pack_inputs(&objects);
let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
let object_count = written.entries.len();
let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
.into_iter()
.filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
.collect();
let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
.into_iter()
.filter(|oid| packed_oid_set.contains(oid))
.collect();
packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
let pack_checksum = written.checksum;
let index_entries = written.entries.clone();
Ok(Some(RepackResult {
pack: written.pack,
idx: written.index,
object_count,
obsolete_packs,
packed_loose,
retained_pack_stems,
pack_checksum,
index_entries,
}))
}
fn repack_retained_pack_stems(
pack_dir: &Path,
explicit: &HashSet<String>,
keep_dot_keep: bool,
) -> Result<Vec<String>> {
let mut stems = explicit.clone();
if keep_dot_keep {
for pack_path in existing_pack_files(pack_dir)? {
if pack_path.with_extension("keep").exists()
&& let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
{
stems.insert(stem.to_string());
}
}
}
let mut stems = stems.into_iter().collect::<Vec<_>>();
stems.sort();
Ok(stems)
}
fn pack_oids_for_stems(
pack_dir: &Path,
format: ObjectFormat,
stems: &[String],
) -> Result<HashSet<ObjectId>> {
let wanted: HashSet<&str> = stems.iter().map(String::as_str).collect();
if wanted.is_empty() {
return Ok(HashSet::new());
}
let mut oids = HashSet::new();
for pack_path in existing_pack_files(pack_dir)? {
let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
continue;
};
if !wanted.contains(stem) {
continue;
}
let index_path = pack_path.with_extension("idx");
if !index_path.exists() {
continue;
}
let index = PackIndex::parse(&fs::read(index_path)?, format)?;
oids.extend(index.entries.into_iter().map(|entry| entry.oid));
}
Ok(oids)
}
fn alternate_packed_object_ids(
objects_dir: &Path,
format: ObjectFormat,
) -> Result<Vec<(PathBuf, ObjectId)>> {
let mut oids = Vec::new();
for alternate in alternate_object_dirs(objects_dir) {
let mut alternate_oids = HashSet::new();
collect_packed_object_ids(&alternate.join("pack"), format, &mut alternate_oids)?;
oids.extend(
alternate_oids
.into_iter()
.map(|oid| (alternate.clone(), oid)),
);
}
oids.sort_by(|left, right| {
left.0
.cmp(&right.0)
.then(left.1.as_bytes().cmp(right.1.as_bytes()))
});
Ok(oids)
}
pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
let objects_dir = repository_objects_dir(git_dir);
let database = FileObjectDatabase::new(objects_dir.clone(), format);
let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
if all_oids.is_empty() {
return Ok(None);
}
let mut objects = Vec::with_capacity(all_oids.len());
for oid in &all_oids {
objects.push(ReachablePackObject {
oid: *oid,
object: database.read_object(oid)?,
});
}
let inputs = pack_inputs(&objects);
let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
let object_count = written.entries.len();
let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
.into_iter()
.filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
.collect();
let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
.into_iter()
.filter(|oid| packed_oid_set.contains(oid))
.collect();
packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
Ok(Some(RepackResult {
pack: written.pack,
idx: written.index,
object_count,
obsolete_packs,
packed_loose,
retained_pack_stems: Vec::new(),
pack_checksum: written.checksum,
index_entries: written.entries,
}))
}
pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
let objects_dir = repository_objects_dir(git_dir);
let database = FileObjectDatabase::new(objects_dir.clone(), format);
let loose_oids = loose_object_ids(&objects_dir, format)?;
if loose_oids.is_empty() {
return Ok(None);
}
let mut objects = Vec::with_capacity(loose_oids.len());
for oid in &loose_oids {
objects.push(ReachablePackObject {
oid: *oid,
object: database.read_object(oid)?,
});
}
let inputs = pack_inputs(&objects);
let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
let object_count = written.entries.len();
let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
let mut packed_loose: Vec<ObjectId> = loose_oids
.into_iter()
.filter(|oid| packed_oid_set.contains(oid))
.collect();
packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
let pack_checksum = written.checksum;
let index_entries = written.entries.clone();
Ok(Some(RepackResult {
pack: written.pack,
idx: written.index,
object_count,
obsolete_packs: Vec::new(),
packed_loose,
retained_pack_stems: Vec::new(),
pack_checksum,
index_entries,
}))
}
#[derive(Debug, Clone)]
struct GeometryPack {
pack_path: PathBuf,
oids: Vec<ObjectId>,
weight: u64,
is_promisor: bool,
}
#[derive(Debug, Clone)]
pub struct GeometricRepackResult {
pub result: Option<RepackResult>,
pub rolled_up_packs: Vec<PathBuf>,
}
fn collect_geometry_packs(
objects_dir: &Path,
format: ObjectFormat,
kept_pack_stems: &HashSet<String>,
) -> Result<Vec<GeometryPack>> {
let pack_dir = objects_dir.join("pack");
let mut packs = Vec::new();
for pack_path in existing_pack_files(&pack_dir)? {
if pack_path.with_extension("mtimes").exists() {
continue;
}
if pack_path.with_extension("keep").exists() {
continue;
}
let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
continue;
};
if kept_pack_stems.contains(stem) {
continue;
}
let index_path = pack_path.with_extension("idx");
if !index_path.exists() {
continue;
}
let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
let oids: Vec<ObjectId> = index.entries.iter().map(|entry| entry.oid).collect();
let weight = oids.len() as u64;
packs.push(GeometryPack {
is_promisor: pack_path.with_extension("promisor").exists(),
pack_path,
oids,
weight,
});
}
packs.sort_by(|a, b| a.weight.cmp(&b.weight).then(a.pack_path.cmp(&b.pack_path)));
Ok(packs)
}
fn compute_geometry_split(packs: &[GeometryPack], split_factor: u64) -> usize {
let pack_nr = packs.len();
if pack_nr == 0 {
return 0;
}
let mut i = pack_nr - 1;
while i > 0 {
let ours = packs[i].weight;
let prev = packs[i - 1].weight;
if ours < split_factor.saturating_mul(prev) {
break;
}
i -= 1;
}
let mut split = i;
if split != 0 {
split += 1;
}
let mut total_size: u64 = packs[..split].iter().map(|p| p.weight).sum();
for pack in &packs[split..] {
if pack.weight < split_factor.saturating_mul(total_size) {
split += 1;
total_size = total_size.saturating_add(pack.weight);
} else {
break;
}
}
split
}
pub fn repack_geometric(
git_dir: &Path,
format: ObjectFormat,
split_factor: u64,
kept_pack_stems: &HashSet<String>,
) -> Result<GeometricRepackResult> {
let objects_dir = repository_objects_dir(git_dir);
let database = FileObjectDatabase::new(objects_dir.clone(), format);
let all_packs = collect_geometry_packs(&objects_dir, format, kept_pack_stems)?;
let packs: Vec<GeometryPack> = all_packs
.into_iter()
.filter(|pack| !pack.is_promisor)
.collect();
let split = compute_geometry_split(&packs, split_factor);
let loose_oids = loose_object_ids(&objects_dir, format)?;
let mut excluded_oids: HashSet<ObjectId> = HashSet::new();
for pack in &packs[split..] {
excluded_oids.extend(pack.oids.iter().copied());
}
let mut included: Vec<ObjectId> = Vec::new();
let mut seen: HashSet<ObjectId> = HashSet::new();
for pack in &packs[..split] {
for oid in &pack.oids {
if excluded_oids.contains(oid) {
continue;
}
if seen.insert(*oid) {
included.push(*oid);
}
}
}
for oid in &loose_oids {
if excluded_oids.contains(oid) {
continue;
}
if seen.insert(*oid) {
included.push(*oid);
}
}
if included.is_empty() {
return Ok(GeometricRepackResult {
result: None,
rolled_up_packs: Vec::new(),
});
}
included.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
let mut objects = Vec::with_capacity(included.len());
for oid in &included {
objects.push(ReachablePackObject {
oid: *oid,
object: database.read_object(oid)?,
});
}
let inputs = pack_inputs(&objects);
let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
let object_count = written.entries.len();
let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
let mut packed_loose: Vec<ObjectId> = loose_oids
.into_iter()
.filter(|oid| packed_oid_set.contains(oid))
.collect();
packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
let rolled_up_packs: Vec<PathBuf> = packs[..split]
.iter()
.map(|pack| pack.pack_path.clone())
.collect();
let pack_checksum = written.checksum;
let index_entries = written.entries.clone();
Ok(GeometricRepackResult {
result: Some(RepackResult {
pack: written.pack,
idx: written.index,
object_count,
obsolete_packs: rolled_up_packs.clone(),
packed_loose,
retained_pack_stems: Vec::new(),
pack_checksum,
index_entries,
}),
rolled_up_packs,
})
}
pub fn install_repack_result(
git_dir: &Path,
format: ObjectFormat,
result: &RepackResult,
prune: bool,
) -> Result<()> {
install_repack_result_with_bitmap(git_dir, format, result, prune, None)
}
pub fn install_repack_result_with_bitmap(
git_dir: &Path,
format: ObjectFormat,
result: &RepackResult,
prune: bool,
bitmap_tips: Option<&HashSet<ObjectId>>,
) -> Result<()> {
let objects_dir = repository_objects_dir(git_dir);
let pack_dir = objects_dir.join("pack");
fs::create_dir_all(&pack_dir)?;
validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
let parsed_index = PackIndex::parse(&result.idx, format)?;
if parsed_index.pack_checksum != result.pack_checksum {
return Err(GitError::InvalidFormat(
"repack index checksum does not match the new pack".into(),
));
}
if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
return Err(GitError::InvalidFormat(
"repack index does not match the new pack contents".into(),
));
}
let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
let reverse_index = sley_pack::PackReverseIndex::write(
format,
&sley_pack::pack_order_index_positions(&parsed_index.entries),
&result.pack_checksum,
)?;
write_pack_component(&new_pack_path, &result.pack)?;
write_pack_component(&new_rev_path, &reverse_index)?;
write_pack_component(&new_index_path, &result.idx)?;
if let Some(tips) = bitmap_tips {
let database = FileObjectDatabase::new(objects_dir.clone(), format);
if let Some(bitmap) = build_pack_bitmap(
&database,
format,
&result.index_entries,
&result.pack_checksum,
tips,
)? {
let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
remove_file_if_exists(&bitmap_path)?;
write_pack_component(&bitmap_path, &bitmap)?;
}
}
if !prune {
return Ok(());
}
let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
prune_obsolete_pack_paths(
&objects_dir,
format,
&result.obsolete_packs,
&new_pack_path,
&result.retained_pack_stems,
)?;
prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
Ok(())
}
pub fn install_geometric_repack_result(
git_dir: &Path,
format: ObjectFormat,
geometric: &GeometricRepackResult,
prune: bool,
bitmap_tips: Option<&HashSet<ObjectId>>,
) -> Result<()> {
let Some(result) = geometric.result.as_ref() else {
return Ok(());
};
let objects_dir = repository_objects_dir(git_dir);
let pack_dir = objects_dir.join("pack");
fs::create_dir_all(&pack_dir)?;
validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
let parsed_index = PackIndex::parse(&result.idx, format)?;
if parsed_index.pack_checksum != result.pack_checksum {
return Err(GitError::InvalidFormat(
"repack index checksum does not match the new pack".into(),
));
}
if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
return Err(GitError::InvalidFormat(
"repack index does not match the new pack contents".into(),
));
}
let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
let reverse_index = sley_pack::PackReverseIndex::write(
format,
&sley_pack::pack_order_index_positions(&parsed_index.entries),
&result.pack_checksum,
)?;
write_pack_component(&new_pack_path, &result.pack)?;
write_pack_component(&new_rev_path, &reverse_index)?;
write_pack_component(&new_index_path, &result.idx)?;
if let Some(tips) = bitmap_tips {
let database = FileObjectDatabase::new(objects_dir.clone(), format);
if let Some(bitmap) = build_pack_bitmap(
&database,
format,
&result.index_entries,
&result.pack_checksum,
tips,
)? {
let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
remove_file_if_exists(&bitmap_path)?;
write_pack_component(&bitmap_path, &bitmap)?;
}
}
if !prune {
return Ok(());
}
for pack_path in &geometric.rolled_up_packs {
if *pack_path == new_pack_path {
continue;
}
if pack_path.with_extension("keep").exists() {
continue;
}
remove_file_if_exists(pack_path)?;
remove_file_if_exists(&pack_path.with_extension("idx"))?;
for ext in ["rev", "mtimes", "bitmap", "promisor"] {
remove_file_if_exists(&pack_path.with_extension(ext))?;
}
}
let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
let removed_stems: HashSet<String> = geometric
.rolled_up_packs
.iter()
.filter_map(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned()))
.collect();
prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
Ok(())
}
fn validate_pack_checksum(
pack: &[u8],
format: ObjectFormat,
expected: &ObjectId,
context: &str,
) -> Result<()> {
if expected.format() != format {
return Err(GitError::InvalidObjectId(format!(
"{context} checksum format does not match object format"
)));
}
let hash_len = format.raw_len();
if pack.len() < 12 + hash_len {
return Err(GitError::InvalidFormat(format!(
"{context} pack file too short"
)));
}
if &pack[..4] != b"PACK" {
return Err(GitError::InvalidFormat(format!(
"{context} pack file missing PACK signature"
)));
}
let trailer_offset = pack.len() - hash_len;
let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
if &actual != expected || trailer != *expected {
return Err(GitError::InvalidFormat(format!(
"{context} pack checksum does not match generated pack"
)));
}
Ok(())
}
fn path_mtime_secs(path: &Path) -> u32 {
fs::metadata(path)
.and_then(|metadata| metadata.modified())
.ok()
.and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
.map(|dur| dur.as_secs() as u32)
.unwrap_or(0)
}
#[derive(Debug, Clone)]
pub struct CruftPack {
pub pack: Vec<u8>,
pub idx: Vec<u8>,
pub rev: Vec<u8>,
pub mtimes: Vec<u8>,
pub checksum: ObjectId,
pub oids: Vec<ObjectId>,
}
#[derive(Debug, Clone)]
pub struct CruftRepackResult {
pub reachable: Option<RepackResult>,
pub cruft: Option<CruftPack>,
pub obsolete_packs: Vec<PathBuf>,
pub obsolete_cruft_packs: Vec<PathBuf>,
retained_pack_stems: Vec<String>,
}
pub fn object_mtimes_on_disk_pub(
objects_dir: &Path,
format: ObjectFormat,
) -> Result<HashMap<ObjectId, u32>> {
object_mtimes_on_disk(objects_dir, format)
}
fn object_mtimes_on_disk(
objects_dir: &Path,
format: ObjectFormat,
) -> Result<HashMap<ObjectId, u32>> {
let mut mtimes: HashMap<ObjectId, u32> = HashMap::new();
let mut record = |oid: ObjectId, mtime: u32| {
mtimes
.entry(oid)
.and_modify(|existing| {
if mtime > *existing {
*existing = mtime;
}
})
.or_insert(mtime);
};
let pack_dir = objects_dir.join("pack");
if let Ok(entries) = fs::read_dir(&pack_dir) {
let mut idx_paths: Vec<PathBuf> = Vec::new();
for entry in entries {
let path = entry?.path();
if path.extension().and_then(|ext| ext.to_str()) == Some("idx") {
idx_paths.push(path);
}
}
idx_paths.sort();
for idx_path in idx_paths {
let pack_path = idx_path.with_extension("pack");
if !pack_path.exists() {
continue;
}
let index = PackIndex::parse(&fs::read(&idx_path)?, format)?;
let mtimes_path = idx_path.with_extension("mtimes");
let pack_object_mtimes: Option<Vec<u32>> =
fs::read(&mtimes_path).ok().and_then(|bytes| {
sley_pack::PackMtimes::parse(&bytes, format, index.entries.len())
.ok()
.map(|parsed| parsed.mtimes)
});
let pack_mtime = path_mtime_secs(&pack_path);
for (pos, entry) in index.entries.iter().enumerate() {
let mtime = pack_object_mtimes
.as_ref()
.and_then(|table| table.get(pos).copied())
.unwrap_or(pack_mtime);
record(entry.oid, mtime);
}
}
}
let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
for oid in loose_object_ids(objects_dir, format)? {
let path = store.object_path(&oid)?;
record(oid, path_mtime_secs(&path));
}
Ok(mtimes)
}
pub fn build_cruft_pack_pub(
database: &FileObjectDatabase,
format: ObjectFormat,
survivors: &HashMap<ObjectId, u32>,
) -> Result<Option<CruftPack>> {
build_cruft_pack(database, format, survivors)
}
fn build_cruft_pack(
database: &FileObjectDatabase,
format: ObjectFormat,
survivors: &HashMap<ObjectId, u32>,
) -> Result<Option<CruftPack>> {
if survivors.is_empty() {
return Ok(None);
}
let mut ordered: Vec<(ObjectId, u32)> = survivors.iter().map(|(o, m)| (*o, *m)).collect();
ordered.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
let mut oids: Vec<ObjectId> = Vec::with_capacity(ordered.len());
let mut objects: Vec<Arc<EncodedObject>> = Vec::with_capacity(ordered.len());
let mut mtime_by_oid: HashMap<ObjectId, u32> = HashMap::with_capacity(ordered.len());
for (oid, mtime) in ordered {
match database.read_object(&oid) {
Ok(object) => {
oids.push(oid);
objects.push(object);
mtime_by_oid.insert(oid, mtime);
}
Err(GitError::NotFound(_)) => {}
Err(err) => return Err(err),
}
}
if oids.is_empty() {
return Ok(None);
}
let inputs: Vec<PackInput<'_>> = oids
.iter()
.zip(&objects)
.map(|(oid, object)| PackInput {
oid,
object: object.as_ref(),
})
.collect();
let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
let mut sorted_entries: Vec<&sley_pack::PackIndexEntry> = written.entries.iter().collect();
sorted_entries.sort_by(|a, b| a.oid.as_bytes().cmp(b.oid.as_bytes()));
let mtimes_table: Vec<u32> = sorted_entries
.iter()
.map(|entry| mtime_by_oid.get(&entry.oid).copied().unwrap_or(0))
.collect();
let positions = sley_pack::pack_order_index_positions(&written.entries);
let rev = sley_pack::PackReverseIndex::write(format, &positions, &written.checksum)?;
let mtimes = sley_pack::PackMtimes::write(format, &mtimes_table, &written.checksum)?;
let mut cruft_oids: Vec<ObjectId> = sorted_entries.iter().map(|e| e.oid).collect();
cruft_oids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
Ok(Some(CruftPack {
pack: written.pack,
idx: written.index,
rev,
mtimes,
checksum: written.checksum,
oids: cruft_oids,
}))
}
pub fn repack_cruft(
git_dir: &Path,
format: ObjectFormat,
roots: &[ObjectId],
cruft_expiration: Option<u32>,
) -> Result<CruftRepackResult> {
repack_cruft_with_options(
git_dir,
format,
roots,
cruft_expiration,
&RepackOptions::default(),
)
}
pub fn repack_cruft_with_options(
git_dir: &Path,
format: ObjectFormat,
roots: &[ObjectId],
cruft_expiration: Option<u32>,
options: &RepackOptions,
) -> Result<CruftRepackResult> {
let objects_dir = repository_objects_dir(git_dir);
let database = FileObjectDatabase::new(objects_dir.clone(), format);
let pack_dir = objects_dir.join("pack");
let retained_pack_stems = repack_retained_pack_stems(
&pack_dir,
&options.keep_pack_stems,
!options.pack_kept_objects,
)?;
let excluded_oids = if options.pack_kept_objects {
HashSet::new()
} else {
pack_oids_for_stems(&pack_dir, format, &retained_pack_stems)?
};
let mut reachable_ids = collect_reachable_object_ids(&database, format, roots.iter().copied())?;
reachable_ids.retain(|oid| !excluded_oids.contains(oid));
let reachable_result = if reachable_ids.is_empty() {
None
} else {
let mut ids: Vec<ObjectId> = reachable_ids.iter().copied().collect();
ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
let mut objects = Vec::with_capacity(ids.len());
for oid in &ids {
match database.read_object(oid) {
Ok(object) => objects.push(ReachablePackObject { oid: *oid, object }),
Err(GitError::NotFound(_)) => {}
Err(err) => return Err(err),
}
}
if objects.is_empty() {
None
} else {
let inputs = pack_inputs(&objects);
let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
let packed_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
.into_iter()
.filter(|oid| packed_set.contains(oid))
.collect();
packed_loose.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
Some(RepackResult {
pack: written.pack,
idx: written.index,
object_count: written.entries.len(),
obsolete_packs: Vec::new(),
packed_loose,
retained_pack_stems: Vec::new(),
pack_checksum: written.checksum,
index_entries: written.entries,
})
}
};
let mut survivors: HashMap<ObjectId, u32> = object_mtimes_on_disk(&objects_dir, format)?
.into_iter()
.filter(|(oid, _)| !reachable_ids.contains(oid) && !excluded_oids.contains(oid))
.collect();
if let Some(expiration) = cruft_expiration {
rescue_and_expire_cruft_objects(&database, format, &mut survivors, expiration)?;
}
let cruft = build_cruft_pack(&database, format, &survivors)?;
let mut obsolete_packs = Vec::new();
let mut obsolete_cruft_packs = Vec::new();
for pack_path in existing_pack_files(&pack_dir)? {
if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
&& retained_pack_stems.iter().any(|retained| retained == stem)
{
continue;
}
if pack_path.with_extension("keep").exists() {
continue;
}
if pack_path.with_extension("mtimes").exists() {
obsolete_cruft_packs.push(pack_path);
} else {
obsolete_packs.push(pack_path);
}
}
Ok(CruftRepackResult {
reachable: reachable_result,
cruft,
obsolete_packs,
obsolete_cruft_packs,
retained_pack_stems,
})
}
fn rescue_and_expire_cruft_objects(
database: &FileObjectDatabase,
format: ObjectFormat,
survivors: &mut HashMap<ObjectId, u32>,
expiration: u32,
) -> Result<()> {
let recent: Vec<ObjectId> = survivors
.iter()
.filter(|(_, mtime)| **mtime > expiration)
.map(|(oid, _)| *oid)
.collect();
let mut keep: HashSet<ObjectId> = HashSet::new();
let mut pending: Vec<ObjectId> = recent.clone();
while let Some(oid) = pending.pop() {
if !keep.insert(oid) {
continue;
}
let Ok(object) = database.read_object(&oid) else {
continue;
};
match object.object_type {
ObjectType::Commit => {
if let Ok(commit) = Commit::parse_ref(format, &object.body) {
pending.extend(commit.parents.iter().copied());
pending.push(commit.tree);
}
}
ObjectType::Tree => {
for entry in TreeEntries::new(format, &object.body).flatten() {
if !entry.is_gitlink() {
pending.push(entry.oid);
}
}
}
ObjectType::Tag => {
if let Ok(tag) = Tag::parse_ref(format, &object.body) {
pending.push(tag.object);
}
}
ObjectType::Blob => {}
}
}
survivors.retain(|oid, mtime| *mtime > expiration || keep.contains(oid));
Ok(())
}
pub fn install_cruft_repack_result(
git_dir: &Path,
format: ObjectFormat,
result: &CruftRepackResult,
prune: bool,
) -> Result<()> {
let objects_dir = repository_objects_dir(git_dir);
let pack_dir = objects_dir.join("pack");
fs::create_dir_all(&pack_dir)?;
let new_reachable_name = result
.reachable
.as_ref()
.map(|r| format!("pack-{}.pack", r.pack_checksum.to_hex()));
let new_cruft_name = result
.cruft
.as_ref()
.map(|c| format!("pack-{}.pack", c.checksum.to_hex()));
if let Some(reachable) = result.reachable.as_ref() {
let parsed_index = PackIndex::parse(&reachable.idx, format)?;
let pack_name = format!("pack-{}", reachable.pack_checksum.to_hex());
let reverse_index = sley_pack::PackReverseIndex::write(
format,
&sley_pack::pack_order_index_positions(&parsed_index.entries),
&reachable.pack_checksum,
)?;
write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &reachable.pack)?;
write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &reverse_index)?;
write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &reachable.idx)?;
}
if let Some(cruft) = result.cruft.as_ref() {
let pack_name = format!("pack-{}", cruft.checksum.to_hex());
write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &cruft.pack)?;
write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &cruft.rev)?;
write_pack_component(&pack_dir.join(format!("{pack_name}.mtimes")), &cruft.mtimes)?;
write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &cruft.idx)?;
}
if !prune {
return Ok(());
}
let mut present: HashSet<ObjectId> = HashSet::new();
if let Some(reachable) = result.reachable.as_ref() {
present.extend(reachable.index_entries.iter().map(|e| e.oid));
}
if let Some(cruft) = result.cruft.as_ref() {
present.extend(cruft.oids.iter().copied());
}
let mut removed_stems: HashSet<String> = HashSet::new();
for pack_path in result
.obsolete_packs
.iter()
.chain(result.obsolete_cruft_packs.iter())
{
let file_name = pack_path.file_name().and_then(|n| n.to_str());
if file_name == new_reachable_name.as_deref() || file_name == new_cruft_name.as_deref() {
continue;
}
if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
&& result
.retained_pack_stems
.iter()
.any(|retained| retained == stem)
{
continue;
}
if pack_path.with_extension("keep").exists() {
continue;
}
if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) {
removed_stems.insert(stem.to_string());
}
remove_file_if_exists(pack_path)?;
remove_file_if_exists(&pack_path.with_extension("idx"))?;
for ext in ["rev", "mtimes", "bitmap", "promisor"] {
remove_file_if_exists(&pack_path.with_extension(ext))?;
}
}
let loose_now_packed: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
.into_iter()
.filter(|oid| present.contains(oid))
.collect();
prune_loose_objects(&objects_dir, format, loose_now_packed.iter(), &present)?;
prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
Ok(())
}
fn pack_index_entries_match_writer(
parsed: &[PackIndexEntry],
writer_entries: &[PackIndexEntry],
) -> bool {
if parsed.len() != writer_entries.len() {
return false;
}
let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
parsed.iter().zip(writer_entries).all(|(left, right)| {
left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
})
}
pub fn prune_unreachable_loose<I>(
git_dir: &Path,
format: ObjectFormat,
roots: I,
delete: bool,
) -> Result<Vec<ObjectId>>
where
I: IntoIterator<Item = ObjectId>,
{
let objects_dir = repository_objects_dir(git_dir);
let database = FileObjectDatabase::new(objects_dir.clone(), format);
let reachable = collect_reachable_object_ids(&database, format, roots)?;
let store = LooseObjectStore::new(objects_dir.clone(), format);
let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
.into_iter()
.filter(|oid| !reachable.contains(oid))
.collect();
pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
if delete {
for oid in &pruned {
let path = store.object_path(oid)?;
match fs::remove_file(&path) {
Ok(()) => {}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
Err(err) => return Err(GitError::Io(err.to_string())),
}
}
}
Ok(pruned)
}
fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
let oids = loose_object_id_set(objects_dir, format)?;
let mut oids = oids.into_iter().collect::<Vec<_>>();
oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
Ok(oids)
}
fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
let mut oids = HashSet::new();
collect_loose_object_ids(objects_dir, format, &mut oids)?;
Ok(oids)
}
fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
if !pack_dir.exists() {
return Ok(Vec::new());
}
let mut packs = Vec::new();
for entry in fs::read_dir(pack_dir)? {
let path = entry?.path();
if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
packs.push(path);
}
}
packs.sort();
Ok(packs)
}
fn prune_obsolete_pack_paths(
objects_dir: &Path,
format: ObjectFormat,
packs: &[PathBuf],
keep: &Path,
retained_pack_stems: &[String],
) -> Result<()> {
prune_pack_paths_matching(objects_dir, format, packs.iter(), keep, retained_pack_stems, |_| Ok(true))
}
fn prune_pack_paths_matching<'a>(
objects_dir: &Path,
format: ObjectFormat,
packs: impl IntoIterator<Item = &'a PathBuf>,
keep: &Path,
retained_pack_stems: &[String],
mut should_prune: impl FnMut(&Path) -> Result<bool>,
) -> Result<()> {
let pack_dir = objects_dir.join("pack");
let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
let retained_pack_stems: HashSet<&str> =
retained_pack_stems.iter().map(String::as_str).collect();
let mut removed_stems: HashSet<String> = HashSet::new();
for pack_path in packs {
if pack_path == keep {
continue;
}
let Some(stem) = pack_path.file_stem() else {
continue;
};
if Some(stem) == keep_stem.as_deref() {
continue;
}
if let Some(stem) = stem.to_str()
&& retained_pack_stems.contains(stem)
{
continue;
}
if pack_path.with_extension("keep").exists()
|| pack_path.with_extension("promisor").exists()
{
continue;
}
if !should_prune(pack_path)? {
continue;
}
remove_file_if_exists(pack_path)?;
remove_file_if_exists(&pack_path.with_extension("idx"))?;
for ext in ["rev", "mtimes", "bitmap"] {
remove_file_if_exists(&pack_path.with_extension(ext))?;
}
removed_stems.insert(stem.to_string_lossy().into_owned());
}
prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
Ok(())
}
fn prune_stale_multi_pack_index(
pack_dir: &Path,
format: ObjectFormat,
removed_stems: &HashSet<String>,
) -> Result<()> {
if removed_stems.is_empty() {
return Ok(());
}
let midx_path = pack_dir.join("multi-pack-index");
if !midx_path.exists() {
return Ok(());
}
let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
let references_removed_pack = midx.pack_names.iter().any(|name| {
let stem = name.strip_suffix(".idx").unwrap_or(name);
removed_stems.contains(stem)
});
if references_removed_pack {
remove_file_if_exists(&midx_path)?;
}
Ok(())
}
fn prune_loose_objects<'a, I>(
objects_dir: &Path,
format: ObjectFormat,
candidates: I,
present: &HashSet<ObjectId>,
) -> Result<()>
where
I: IntoIterator<Item = &'a ObjectId>,
{
let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
for oid in candidates {
if !present.contains(oid) {
continue;
}
remove_file_if_exists(&store.object_path(oid)?)?;
}
Ok(())
}
enum PackDeltaBase {
Offset(u64),
Ref(ObjectId),
}
struct PackIndexOffsetInfo {
end_offset: u64,
delta_base_oid: Option<ObjectId>,
}
fn scan_pack_index_offsets(
index: &PackIndex,
target_offset: u64,
trailer_offset: u64,
delta_base_offset: Option<u64>,
) -> Result<PackIndexOffsetInfo> {
let mut target_count = 0usize;
let mut next_offset = None;
let mut delta_base_oid = None;
for entry in &index.entries {
if entry.offset == target_offset {
target_count += 1;
} else if entry.offset > target_offset {
match next_offset {
Some(current) if current <= entry.offset => {}
_ => next_offset = Some(entry.offset),
}
}
if Some(entry.offset) == delta_base_offset {
delta_base_oid = Some(entry.oid);
}
}
if target_count == 0 {
return Err(GitError::InvalidFormat(format!(
"pack index offset {target_offset} not found"
)));
}
if let Some(offset) = delta_base_offset
&& delta_base_oid.is_none()
{
return Err(GitError::InvalidFormat(format!(
"ofs-delta base offset {offset} not found"
)));
}
Ok(PackIndexOffsetInfo {
end_offset: if target_count > 1 {
target_offset
} else {
next_offset.unwrap_or(trailer_offset)
},
delta_base_oid,
})
}
fn pack_entry_delta_base(
format: ObjectFormat,
pack: &[u8],
entry_offset: u64,
) -> Result<Option<PackDeltaBase>> {
let mut cursor = usize::try_from(entry_offset)
.map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
let first = pack_next_byte(pack, &mut cursor)?;
let kind = (first >> 4) & 0x07;
let mut byte = first;
while byte & 0x80 != 0 {
byte = pack_next_byte(pack, &mut cursor)?;
}
match kind {
6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
pack,
&mut cursor,
entry_offset,
)?))),
7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
format,
pack,
&mut cursor,
)?))),
_ => Ok(None),
}
}
fn parse_ref_delta_base_oid(
format: ObjectFormat,
pack: &[u8],
cursor: &mut usize,
) -> Result<ObjectId> {
let raw_len = format.raw_len();
if *cursor + raw_len > pack.len() {
return Err(GitError::InvalidFormat(
"truncated ref-delta base object id".into(),
));
}
let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
*cursor += raw_len;
Ok(oid)
}
fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
let mut byte = pack_next_byte(pack, cursor)?;
let mut relative = u64::from(byte & 0x7f);
while byte & 0x80 != 0 {
byte = pack_next_byte(pack, cursor)?;
relative = relative
.checked_add(1)
.and_then(|value| value.checked_shl(7))
.and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
.ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
}
entry_offset
.checked_sub(relative)
.ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
}
fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
let Some(byte) = pack.get(*cursor).copied() else {
return Err(GitError::InvalidFormat("truncated pack entry".into()));
};
*cursor += 1;
Ok(byte)
}
fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
Ok(ObjectId::null(format))
}
fn remove_file_if_exists(path: &Path) -> Result<()> {
match fs::remove_file(path) {
Ok(()) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(err) => Err(GitError::Io(err.to_string())),
}
}
fn walk_reachable_objects<R, I, F>(
reader: &R,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
visit: F,
) -> Result<HashSet<ObjectId>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
F: FnMut(&ObjectId, &Arc<EncodedObject>),
{
walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
}
fn walk_reachable_objects_with_cut<R, I, F>(
reader: &R,
format: ObjectFormat,
starts: I,
excluded: &HashSet<ObjectId>,
cut: &HashSet<ObjectId>,
mut visit: F,
) -> Result<HashSet<ObjectId>>
where
R: ObjectReader,
I: IntoIterator<Item = ObjectId>,
F: FnMut(&ObjectId, &Arc<EncodedObject>),
{
let mut seen = HashSet::new();
let mut pending = Vec::new();
for start in starts {
pending.push(start);
while let Some(oid) = pending.pop() {
if excluded.contains(&oid) {
continue;
}
if !seen.insert(oid) {
continue;
}
let object = reader.read_object(&oid).map_err(|err| {
with_missing_object_context(err, oid, MissingObjectContext::Traversal)
})?;
match object.object_type {
ObjectType::Commit => {
let (tree, parents) = {
let commit = Commit::parse_ref(format, &object.body)?;
(commit.tree, commit.parents)
};
visit(&oid, &object);
if !cut.contains(&oid) {
for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
pending.push(parent);
}
}
pending.push(tree);
}
ObjectType::Tree => {
let mut child_oids = Vec::new();
for entry in TreeEntries::new(format, &object.body) {
let entry = entry?;
if entry.is_gitlink() {
continue;
}
child_oids.push(entry.oid);
}
visit(&oid, &object);
pending.extend(child_oids.into_iter().rev());
}
ObjectType::Tag => {
let target = {
let tag = Tag::parse_ref(format, &object.body)?;
tag.object
};
visit(&oid, &object);
pending.push(target);
}
ObjectType::Blob => visit(&oid, &object),
}
}
}
Ok(seen)
}
fn bitset_get(words: &[u64], position: u32) -> bool {
let word = (position / 64) as usize;
word < words.len() && words[word] & (1u64 << (position % 64)) != 0
}
fn bitset_set(words: &mut [u64], position: u32) {
let word = (position / 64) as usize;
if word < words.len() {
words[word] |= 1u64 << (position % 64);
}
}
fn bitset_or(acc: &mut [u64], other: &[u64]) {
for (dst, src) in acc.iter_mut().zip(other) {
*dst |= *src;
}
}
fn bitset_positions(words: &[u64]) -> Vec<u32> {
let mut positions = Vec::new();
for (word_index, word) in words.iter().enumerate() {
let mut remaining = *word;
while remaining != 0 {
let bit = remaining.trailing_zeros();
positions.push(word_index as u32 * 64 + bit);
remaining &= remaining - 1;
}
}
positions
}
fn commit_identity_timestamp(identity: &[u8]) -> i64 {
let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
let _tz = fields.next();
fields
.next()
.and_then(|raw| std::str::from_utf8(raw).ok())
.and_then(|raw| raw.parse::<i64>().ok())
.unwrap_or(0)
}
fn bitmap_next_commit_index(idx: u32) -> u32 {
const MIN_COMMITS: u32 = 100;
const MAX_COMMITS: u32 = 5000;
const MUST_REGION: u32 = 100;
const MIN_REGION: u32 = 20000;
if idx <= MUST_REGION {
return 0;
}
if idx <= MIN_REGION {
let offset = idx - MUST_REGION;
return offset.min(MIN_COMMITS);
}
let offset = idx - MIN_REGION;
offset.clamp(MIN_COMMITS, MAX_COMMITS)
}
pub fn build_pack_bitmap(
db: &FileObjectDatabase,
format: ObjectFormat,
index_entries: &[PackIndexEntry],
pack_checksum: &ObjectId,
preferred_tips: &HashSet<ObjectId>,
) -> Result<Option<Vec<u8>>> {
let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
by_offset.sort_by_key(|&slot| index_entries[slot].offset);
let bit_order: Vec<ObjectId> = by_offset
.into_iter()
.map(|slot| index_entries[slot].oid)
.collect();
build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
}
pub fn build_midx_bitmap(
db: &FileObjectDatabase,
format: ObjectFormat,
midx_entries: &[sley_pack::MultiPackIndexEntry],
midx_checksum: &ObjectId,
preferred_pack: u32,
preferred_tips: &HashSet<ObjectId>,
) -> Result<Option<Vec<u8>>> {
let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
pseudo.sort_by_key(|&slot| {
let entry = &midx_entries[slot];
(
entry.pack_int_id != preferred_pack,
entry.pack_int_id,
entry.offset,
)
});
let bit_order: Vec<ObjectId> = pseudo
.into_iter()
.map(|slot| midx_entries[slot].oid)
.collect();
build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
}
fn bitmap_num_maximal_commits(
db: &FileObjectDatabase,
format: ObjectFormat,
selected: &[ObjectId],
) -> Result<usize> {
let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
let mut stack: Vec<ObjectId> = selected.to_vec();
while let Some(oid) = stack.pop() {
if first_parent.contains_key(&oid) {
continue;
}
let object = db.read_object(&oid)?;
let commit = Commit::parse_ref(format, &object.body)?;
let parent = grafted_parents(db, &oid, commit.parents).first().copied();
first_parent.insert(oid, parent);
if let Some(parent) = parent {
stack.push(parent);
}
}
let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
for parent in first_parent.values().flatten() {
*pending_children.entry(*parent).or_default() += 1;
}
let word_count = selected.len().div_ceil(64);
struct MaximalEnt {
mask: Vec<u64>,
maximal: bool,
}
let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
for (bit, oid) in selected.iter().enumerate() {
let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
mask: vec![0u64; word_count],
maximal: true,
});
ent.mask[bit / 64] |= 1u64 << (bit % 64);
ent.maximal = true;
}
let mut queue: Vec<ObjectId> = first_parent
.keys()
.filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
.copied()
.collect();
let mut num_maximal = 0usize;
while let Some(oid) = queue.pop() {
if let Some(ent) = ents.remove(&oid) {
if ent.maximal {
num_maximal += 1;
}
if let Some(Some(parent)) = first_parent.get(&oid) {
match ents.entry(*parent) {
std::collections::hash_map::Entry::Vacant(vacant) => {
vacant.insert(MaximalEnt {
mask: ent.mask.clone(),
maximal: false,
});
}
std::collections::hash_map::Entry::Occupied(mut occupied) => {
let parent_ent = occupied.get_mut();
let c_not_p = ent
.mask
.iter()
.zip(&parent_ent.mask)
.any(|(child, parent)| child & !parent != 0);
if c_not_p {
let p_not_c = parent_ent
.mask
.iter()
.zip(&ent.mask)
.any(|(parent, child)| parent & !child != 0);
for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
*parent |= child;
}
parent_ent.maximal = p_not_c;
}
}
}
}
}
if let Some(Some(parent)) = first_parent.get(&oid)
&& let Some(remaining) = pending_children.get_mut(parent)
{
*remaining -= 1;
if *remaining == 0 {
queue.push(*parent);
}
}
}
Ok(num_maximal)
}
fn build_reachability_bitmap(
db: &FileObjectDatabase,
format: ObjectFormat,
checksum: &ObjectId,
bit_order: &[ObjectId],
preferred_tips: &HashSet<ObjectId>,
) -> Result<Option<Vec<u8>>> {
if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
return Ok(None);
}
let object_count = bit_order.len();
let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
oid_sorted.sort_by(|&left, &right| {
bit_order[left as usize]
.as_bytes()
.cmp(bit_order[right as usize].as_bytes())
});
let mut index_position = vec![0u32; object_count];
for (position, &slot) in oid_sorted.iter().enumerate() {
index_position[slot as usize] = position as u32;
}
let mut oid_to_pack = HashMap::with_capacity(object_count);
for (pack_pos, oid) in bit_order.iter().enumerate() {
oid_to_pack.insert(*oid, pack_pos as u32);
}
let mut object_types = Vec::with_capacity(object_count);
struct IndexedCommit {
oid: ObjectId,
pack_pos: u32,
index_pos: u32,
date: i64,
parent_count: usize,
}
let mut indexed_commits = Vec::new();
for (pack_pos, oid) in bit_order.iter().enumerate() {
let object_type = match db.read_object_header(oid)? {
Some((object_type, _)) => object_type,
None => db.read_object(oid)?.object_type,
};
object_types.push(object_type);
if object_type == ObjectType::Commit {
let object = db.read_object(oid)?;
let commit = Commit::parse_ref(format, &object.body)?;
indexed_commits.push(IndexedCommit {
oid: *oid,
pack_pos: pack_pos as u32,
index_pos: index_position[pack_pos],
date: commit_identity_timestamp(commit.committer),
parent_count: grafted_parents(db, oid, commit.parents).len(),
});
}
}
indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
let mut selected: Vec<&IndexedCommit> = Vec::new();
let commit_count = indexed_commits.len() as u32;
if commit_count < 100 {
selected.extend(indexed_commits.iter());
} else {
let mut i = 0u32;
loop {
let next = bitmap_next_commit_index(i);
if i + next >= commit_count {
break;
}
let mut chosen = &indexed_commits[(i + next) as usize];
if next > 0 {
for j in 0..=next {
let candidate = &indexed_commits[(i + j) as usize];
if preferred_tips.contains(&candidate.oid) {
chosen = candidate;
break;
}
if candidate.parent_count >= 2 {
chosen = candidate;
}
}
}
selected.push(chosen);
i += next + 1;
}
}
if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
}
let word_count = object_count.div_ceil(64);
let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
for commit in selected.iter().rev() {
let mut acc = vec![0u64; word_count];
let mut pending = vec![commit.oid];
while let Some(oid) = pending.pop() {
let Some(&pack_pos) = oid_to_pack.get(&oid) else {
eprintln!(
"warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
);
return Ok(None);
};
if bitset_get(&acc, pack_pos) {
continue;
}
if let Some(stored) = memo.get(&oid) {
bitset_or(&mut acc, stored);
continue;
}
bitset_set(&mut acc, pack_pos);
let object = db.read_object(&oid)?;
let tree = {
let parsed = Commit::parse_ref(format, &object.body)?;
pending.extend(grafted_parents(db, &oid, parsed.parents));
parsed.tree
};
if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
return Ok(None);
}
}
memo.insert(commit.oid, Arc::new(acc));
}
let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
for commit in &selected {
let words = match memo.get(&commit.oid) {
Some(words) => words,
None => continue,
};
writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
}
writer.write().map(Some)
}
fn bitmap_mark_tree(
db: &impl ObjectReader,
format: ObjectFormat,
tree: &ObjectId,
oid_to_pack: &HashMap<ObjectId, u32>,
acc: &mut [u64],
) -> Result<bool> {
let Some(&pack_pos) = oid_to_pack.get(tree) else {
eprintln!(
"warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
);
return Ok(false);
};
if bitset_get(acc, pack_pos) {
return Ok(true);
}
bitset_set(acc, pack_pos);
let object = db.read_object(tree)?;
for entry in TreeEntries::new(format, &object.body) {
let entry = entry?;
if entry.is_gitlink() {
continue;
}
if entry.is_tree() {
if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
return Ok(false);
}
} else {
let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
eprintln!(
"warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
entry.oid
);
return Ok(false);
};
bitset_set(acc, blob_pos);
}
}
Ok(true)
}
pub struct LoadedPackBitmap {
object_count: u32,
oid_to_pack: HashMap<ObjectId, u32>,
pack_to_oid: Vec<ObjectId>,
commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
commits: Vec<u64>,
trees: Vec<u64>,
blobs: Vec<u64>,
tags: Vec<u64>,
}
impl LoadedPackBitmap {
pub fn object_count(&self) -> u32 {
self.object_count
}
pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
self.oid_to_pack.get(oid).copied()
}
pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
self.pack_to_oid.get(position as usize)
}
pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
self.commit_words.get(oid)
}
pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
self.commit_words.keys()
}
pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
match object_type {
ObjectType::Commit => &self.commits,
ObjectType::Tree => &self.trees,
ObjectType::Blob => &self.blobs,
ObjectType::Tag => &self.tags,
}
}
fn word_count(&self) -> usize {
(self.object_count as usize).div_ceil(64)
}
}
pub fn load_pack_bitmap(
objects_dir: &Path,
format: ObjectFormat,
) -> Result<Option<LoadedPackBitmap>> {
let pack_dir = objects_dir.join("pack");
if !pack_dir.exists() {
return Ok(None);
}
if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
return Ok(Some(bitmap));
}
let mut bitmap_paths = Vec::new();
for entry in fs::read_dir(&pack_dir)? {
let path = entry?.path();
if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
&& path
.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| name.starts_with("pack-"))
{
bitmap_paths.push(path);
}
}
bitmap_paths.sort();
for bitmap_path in bitmap_paths {
match load_pack_bitmap_file(&bitmap_path, format) {
Ok(Some(bitmap)) => return Ok(Some(bitmap)),
Ok(None) | Err(_) => continue,
}
}
Ok(None)
}
fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
let midx_path = pack_dir.join("multi-pack-index");
if !midx_path.exists() {
return Ok(None);
}
let Ok(midx_bytes) = fs::read(&midx_path) else {
return Ok(None);
};
if midx_has_bad_ridx_chunk(&midx_bytes, format) {
eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
eprintln!("warning: multi-pack bitmap is missing required reverse index");
return Ok(None);
}
let midx = match MultiPackIndex::parse(&midx_bytes, format) {
Ok(midx) => midx,
Err(GitError::InvalidFormat(message))
if message == "multi-pack-index reverse-index chunk is the wrong size" =>
{
eprintln!("error: {message}");
eprintln!("warning: multi-pack bitmap is missing required reverse index");
return Ok(None);
}
Err(_) => return Ok(None),
};
let bitmap_path = pack_dir.join(format!(
"multi-pack-index-{}.bitmap",
midx.checksum.to_hex()
));
if !bitmap_path.exists() {
return Ok(None);
}
let object_count = midx.objects.len();
let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
.map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
.unwrap_or(true);
let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
(Some(chunk), true) => {
sley_core::trace2::data("load_midx_revindex", "source", "midx");
chunk.clone()
}
_ => {
let rev_path =
pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
let Ok(rev_bytes) = fs::read(&rev_path) else {
return Ok(None);
};
let Ok(parsed_rev) =
sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
else {
return Ok(None);
};
sley_core::trace2::data("load_midx_revindex", "source", "rev");
parsed_rev.positions
}
};
let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
return Ok(None);
};
let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
Ok(parsed) => parsed,
Err(_) => return Ok(None),
};
if parsed.pack_checksum != midx.checksum {
return Ok(None);
}
let mut pack_to_oid = Vec::with_capacity(object_count);
for &midx_pos in &reverse_index {
let Some(entry) = midx.objects.get(midx_pos as usize) else {
return Ok(None);
};
pack_to_oid.push(entry.oid);
}
let mut oid_to_pack = HashMap::with_capacity(object_count);
for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
oid_to_pack.insert(*oid, pack_pos as u32);
}
match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
midx.objects.get(position).map(|entry| entry.oid)
}) {
Ok(loaded) => Ok(Some(loaded)),
Err(_) => Ok(None),
}
}
fn midx_has_bad_ridx_chunk(bytes: &[u8], format: ObjectFormat) -> bool {
let hash_len = format.raw_len();
if bytes.len() < 12 + 12 + hash_len || &bytes[..4] != b"MIDX" {
return false;
}
let chunk_count = bytes[6] as usize;
let table_len = match (chunk_count + 1).checked_mul(12) {
Some(table_len) => table_len,
None => return false,
};
let table_end = match 12usize.checked_add(table_len) {
Some(table_end) if table_end <= bytes.len().saturating_sub(hash_len) => table_end,
_ => return false,
};
let mut entries = Vec::with_capacity(chunk_count + 1);
let mut cursor = 12usize;
while cursor < table_end {
let id = [
bytes[cursor],
bytes[cursor + 1],
bytes[cursor + 2],
bytes[cursor + 3],
];
let mut raw_offset = [0u8; 8];
raw_offset.copy_from_slice(&bytes[cursor + 4..cursor + 12]);
entries.push((id, u64::from_be_bytes(raw_offset) as usize));
cursor += 12;
}
let mut oidf = None;
let mut ridx = None;
for pair in entries.windows(2) {
let start = pair[0].1;
let end = pair[1].1;
if end < start || end > bytes.len().saturating_sub(hash_len) {
return false;
}
match &pair[0].0 {
b"OIDF" => oidf = Some((start, end)),
b"RIDX" => ridx = Some((start, end)),
_ => {}
}
}
let Some((oidf_start, oidf_end)) = oidf else {
return false;
};
let Some((ridx_start, ridx_end)) = ridx else {
return false;
};
if oidf_end.saturating_sub(oidf_start) != 256 * 4 {
return false;
}
let object_count_start = oidf_end - 4;
let object_count = u32::from_be_bytes([
bytes[object_count_start],
bytes[object_count_start + 1],
bytes[object_count_start + 2],
bytes[object_count_start + 3],
]) as usize;
ridx_end.saturating_sub(ridx_start) != object_count.saturating_mul(4)
}
fn load_pack_bitmap_file(
bitmap_path: &Path,
format: ObjectFormat,
) -> Result<Option<LoadedPackBitmap>> {
let index_path = bitmap_path.with_extension("idx");
if !index_path.exists() {
return Ok(None);
}
let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
let object_count = index.entries.len();
let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
if parsed.pack_checksum != index.pack_checksum {
return Ok(None);
}
let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
let mut pack_to_oid = Vec::with_capacity(object_count);
for index_pos in &pack_order {
pack_to_oid.push(index.entries[*index_pos as usize].oid);
}
let mut oid_to_pack = HashMap::with_capacity(object_count);
for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
oid_to_pack.insert(*oid, pack_pos as u32);
}
assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
index.entries.get(position).map(|entry| entry.oid)
})
.map(Some)
}
fn assemble_loaded_bitmap(
parsed: PackBitmapIndex,
object_count: usize,
pack_to_oid: Vec<ObjectId>,
oid_to_pack: HashMap<ObjectId, u32>,
lookup_oid: impl Fn(usize) -> Option<ObjectId>,
) -> Result<LoadedPackBitmap> {
let word_count = object_count.div_ceil(64);
let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
let mut words = bitmap.to_words()?;
words.resize(word_count, 0);
Ok(words)
};
let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
let mut commit_words = HashMap::with_capacity(parsed.entries.len());
for (entry_index, entry) in parsed.entries.iter().enumerate() {
let mut words = expand(&entry.bitmap)?;
if entry.xor_offset > 0 {
let base_index = entry_index - entry.xor_offset as usize;
let base = &resolved[base_index];
for (dst, src) in words.iter_mut().zip(base.iter()) {
*dst ^= *src;
}
}
let words = Arc::new(words);
resolved.push(Arc::clone(&words));
let commit_oid = lookup_oid(entry.object_position as usize)
.ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
commit_words.insert(commit_oid, words);
}
Ok(LoadedPackBitmap {
object_count: object_count as u32,
oid_to_pack,
pack_to_oid,
commit_words,
commits: expand(&parsed.type_bitmaps.commits)?,
trees: expand(&parsed.type_bitmaps.trees)?,
blobs: expand(&parsed.type_bitmaps.blobs)?,
tags: expand(&parsed.type_bitmaps.tags)?,
})
}
pub struct BitmapWalkResult {
pub words: Vec<u64>,
pub extended: Vec<(ObjectId, ObjectType)>,
}
impl BitmapWalkResult {
pub fn subtract(&mut self, haves: &BitmapWalkResult) {
for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
*dst &= !*src;
}
let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
self.extended.retain(|(oid, _)| !have_ext.contains(oid));
}
}
pub fn bitmap_reachable(
bitmap: &LoadedPackBitmap,
db: &impl ObjectReader,
format: ObjectFormat,
roots: &[ObjectId],
include_objects: bool,
) -> Result<BitmapWalkResult> {
let mut walk = BitmapFillWalk {
bitmap,
words: vec![0u64; bitmap.word_count()],
extended: Vec::new(),
extended_seen: HashSet::new(),
};
let mut commit_stack: Vec<ObjectId> = Vec::new();
for root in roots {
let mut oid = *root;
loop {
let object = db.read_object(&oid)?;
match object.object_type {
ObjectType::Tag => {
walk.mark(&oid, ObjectType::Tag);
let tag = Tag::parse_ref(format, &object.body)?;
oid = tag.object;
}
ObjectType::Commit => {
commit_stack.push(oid);
break;
}
ObjectType::Tree => {
walk.mark_tree_closure(db, format, &oid)?;
break;
}
ObjectType::Blob => {
walk.mark(&oid, ObjectType::Blob);
break;
}
}
}
}
while let Some(oid) = commit_stack.pop() {
if let Some(position) = bitmap.pack_position(&oid) {
if bitset_get(&walk.words, position) {
continue;
}
if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
bitset_or(&mut walk.words, stored);
continue;
}
bitset_set(&mut walk.words, position);
} else {
if walk.extended_seen.contains(&oid) {
continue;
}
walk.extended_seen.insert(oid);
walk.extended.push((oid, ObjectType::Commit));
}
let object = db.read_object(&oid)?;
let commit = Commit::parse_ref(format, &object.body)?;
commit_stack.extend(grafted_parents(db, &oid, commit.parents));
if include_objects {
walk.mark_tree_closure(db, format, &commit.tree)?;
}
}
Ok(BitmapWalkResult {
words: walk.words,
extended: walk.extended,
})
}
struct BitmapFillWalk<'a> {
bitmap: &'a LoadedPackBitmap,
words: Vec<u64>,
extended: Vec<(ObjectId, ObjectType)>,
extended_seen: HashSet<ObjectId>,
}
impl BitmapFillWalk<'_> {
fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
if let Some(position) = self.bitmap.pack_position(oid) {
if bitset_get(&self.words, position) {
return false;
}
bitset_set(&mut self.words, position);
true
} else {
if !self.extended_seen.insert(*oid) {
return false;
}
self.extended.push((*oid, object_type));
true
}
}
fn mark_tree_closure(
&mut self,
db: &impl ObjectReader,
format: ObjectFormat,
tree: &ObjectId,
) -> Result<()> {
if !self.mark(tree, ObjectType::Tree) {
return Ok(());
}
let object = db.read_object(tree)?;
for entry in TreeEntries::new(format, &object.body) {
let entry = entry?;
if entry.is_gitlink() {
continue;
}
if entry.is_tree() {
self.mark_tree_closure(db, format, &entry.oid)?;
} else {
self.mark(&entry.oid, ObjectType::Blob);
}
}
Ok(())
}
}
#[derive(Debug)]
pub struct ObjectDatabase {
format: ObjectFormat,
objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
promisor: bool,
}
impl ObjectDatabase {
pub fn new(format: ObjectFormat) -> Self {
Self {
format,
objects: Mutex::new(HashMap::new()),
promisor: false,
}
}
pub fn with_promisor(mut self, promisor: bool) -> Self {
self.promisor = promisor;
self
}
pub fn contains(&self, oid: &ObjectId) -> bool {
self.objects
.lock()
.map(|objects| objects.contains_key(oid))
.unwrap_or(false)
}
pub fn validate(&self, oid: &ObjectId) -> Result<()> {
let object = self.read_object(oid)?;
let actual = object.object_id(self.format)?;
if &actual == oid {
Ok(())
} else {
Err(GitError::InvalidObject(format!(
"object id mismatch: expected {oid}, got {actual}"
)))
}
}
}
impl ObjectReader for ObjectDatabase {
fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
self.objects
.lock()
.map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
.get(oid)
.map(Arc::clone)
.or_else(|| implied_empty_tree_object(self.format, oid))
.ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
}
}
impl ObjectWriter for ObjectDatabase {
fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
let oid = object.object_id(self.format)?;
self.objects
.lock()
.map_err(|_| GitError::Io("object cache lock poisoned".into()))?
.entry(oid)
.or_insert_with(|| Arc::new(object));
Ok(oid)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Alternate {
pub path: std::path::PathBuf,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PartialClonePolicy {
pub promisor_remote: Option<String>,
pub allow_missing_promised_objects: bool,
}
type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
#[derive(Debug)]
enum PackData {
#[cfg(feature = "mmap")]
Mapped(sley_mmap::MappedFile),
Heap(Vec<u8>),
}
impl std::ops::Deref for PackData {
type Target = [u8];
fn deref(&self) -> &[u8] {
match self {
#[cfg(feature = "mmap")]
Self::Mapped(mapped) => mapped,
Self::Heap(bytes) => bytes,
}
}
}
#[cfg(feature = "mmap")]
fn load_pack_data(pack_path: &Path) -> Result<PackData> {
match sley_mmap::MappedFile::open_pack(pack_path) {
Ok(mapped) => Ok(PackData::Mapped(mapped)),
Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
}
}
#[cfg(not(feature = "mmap"))]
fn load_pack_data(pack_path: &Path) -> Result<PackData> {
Ok(PackData::Heap(fs::read(pack_path)?))
}
#[cfg(feature = "mmap")]
fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
match sley_mmap::MappedFile::open_pack(index_path) {
Ok(mapped) => Ok(Arc::new(mapped)),
Err(_) => Ok(Arc::new(fs::read(index_path)?)),
}
}
#[cfg(not(feature = "mmap"))]
fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
Ok(Arc::new(fs::read(index_path)?))
}
#[cfg(feature = "mmap")]
fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
match sley_mmap::MappedFile::open_multi_pack_index(midx_path) {
Ok(mapped) => Ok(Arc::new(mapped)),
Err(_) => Ok(Arc::new(fs::read(midx_path)?)),
}
}
#[cfg(not(feature = "mmap"))]
fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
Ok(Arc::new(fs::read(midx_path)?))
}
type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
fn cached_object_cost(object: &EncodedObject) -> usize {
object.body.len().saturating_add(64)
}
fn cache_budget_from_env(var: &str, default: usize) -> usize {
match env::var(var) {
Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
Err(_) => default,
}
}
fn object_cache_budget() -> usize {
static BUDGET: OnceLock<usize> = OnceLock::new();
*BUDGET.get_or_init(|| {
cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
})
}
fn delta_base_cache_budget() -> usize {
static BUDGET: OnceLock<usize> = OnceLock::new();
*BUDGET.get_or_init(|| {
cache_budget_from_env(
"SLEY_DELTA_BASE_CACHE_BYTES",
DEFAULT_DELTA_BASE_CACHE_BYTES,
)
})
}
fn verify_reads_enabled() -> bool {
static VERIFY: OnceLock<bool> = OnceLock::new();
*VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
Ok(value) => !matches!(value.trim(), "" | "0"),
Err(_) => false,
})
}
#[derive(Debug)]
struct LruCache<K: std::hash::Hash + Eq + Clone> {
budget: usize,
used: usize,
map: HashMap<K, LruEntry<K>>,
head: Option<K>,
tail: Option<K>,
}
#[derive(Debug)]
struct LruEntry<K> {
object: Arc<EncodedObject>,
prev: Option<K>,
next: Option<K>,
}
impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
fn new(budget: usize) -> Self {
Self {
budget,
used: 0,
map: HashMap::new(),
head: None,
tail: None,
}
}
fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
let object = Arc::clone(&self.map.get(key)?.object);
self.touch(key);
Some(object)
}
fn touch(&mut self, key: &K) {
if self.tail.as_ref() == Some(key) {
return;
}
if self.map.contains_key(key) {
self.detach(key);
self.attach_back(key.clone());
}
}
fn remove(&mut self, key: &K) {
if let Some(entry) = self.map.get(key) {
self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
}
self.detach(key);
self.map.remove(key);
}
fn detach(&mut self, key: &K) {
let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
let prev = entry.prev.take();
let next = entry.next.take();
(prev, next)
}) else {
return;
};
match &prev {
Some(prev_key) => {
if let Some(prev_entry) = self.map.get_mut(prev_key) {
prev_entry.next = next.clone();
}
}
None => self.head = next.clone(),
}
match &next {
Some(next_key) => {
if let Some(next_entry) = self.map.get_mut(next_key) {
next_entry.prev = prev.clone();
}
}
None => self.tail = prev.clone(),
}
}
fn attach_back(&mut self, key: K) {
let previous_tail = self.tail.replace(key.clone());
match previous_tail {
Some(tail_key) => {
if let Some(tail_entry) = self.map.get_mut(&tail_key) {
tail_entry.next = Some(key.clone());
}
if let Some(entry) = self.map.get_mut(&key) {
entry.prev = Some(tail_key);
entry.next = None;
}
}
None => {
self.head = Some(key.clone());
if let Some(entry) = self.map.get_mut(&key) {
entry.prev = None;
entry.next = None;
}
}
}
}
fn clear(&mut self) {
self.map.clear();
self.head = None;
self.tail = None;
self.used = 0;
}
fn put(&mut self, key: K, object: Arc<EncodedObject>) {
if self.budget == 0 {
return;
}
let cost = cached_object_cost(&object);
if cost > self.budget {
self.remove(&key);
return;
}
if let Some(entry) = self.map.get_mut(&key) {
let previous = std::mem::replace(&mut entry.object, object);
self.used = self
.used
.saturating_sub(cached_object_cost(&previous))
.saturating_add(cost);
self.touch(&key);
} else {
self.used = self.used.saturating_add(cost);
self.map.insert(
key.clone(),
LruEntry {
object,
prev: None,
next: None,
},
);
self.attach_back(key);
}
while self.used > self.budget {
let Some(evicted) = self.head.clone() else {
break;
};
self.remove(&evicted);
}
}
}
type LruObjectCache = LruCache<ObjectId>;
type LruOffsetCache = LruCache<u64>;
struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
self.0.lock().ok()?.get(&offset)
}
fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
if let Ok(mut cache) = self.0.lock() {
cache.put(offset, object);
}
}
}
struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
self.0.lock().ok()?.get(&pack_offset).copied()
}
fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
if let Ok(mut cache) = self.0.lock() {
cache.insert(pack_offset, header);
}
}
}
type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
#[derive(Debug)]
struct RegisteredPack {
idx: PathBuf,
pack: PathBuf,
index: Mutex<Option<Arc<PackIndexViewData>>>,
data: Mutex<Option<Arc<PackData>>>,
delta_cache: Arc<Mutex<LruOffsetCache>>,
header_type_cache: PackHeaderTypeCache,
}
impl RegisteredPack {
fn new(idx: PathBuf, pack: PathBuf) -> Self {
Self {
idx,
pack,
index: Mutex::new(None),
data: Mutex::new(None),
delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
header_type_cache: Arc::new(Mutex::new(HashMap::new())),
}
}
fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
if let Ok(cache) = self.index.lock()
&& let Some(index) = cache.as_ref()
{
return Ok(Arc::clone(index));
}
let index_bytes = load_pack_index_data(&self.idx)?;
let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
index_bytes,
format,
)?);
if let Ok(mut cache) = self.index.lock() {
*cache = Some(Arc::clone(&index));
}
Ok(index)
}
fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
if let Ok(cache) = self.data.lock()
&& let Some(bytes) = cache.as_ref()
{
return Ok(Arc::clone(bytes));
}
if let Ok(cache) = pack_bytes.lock()
&& let Some(bytes) = cache.get(&self.pack)
{
let bytes = Arc::clone(bytes);
if let Ok(mut local_cache) = self.data.lock() {
*local_cache = Some(Arc::clone(&bytes));
}
return Ok(bytes);
}
let bytes = Arc::new(load_pack_data(&self.pack)?);
if let Ok(mut local_cache) = self.data.lock() {
*local_cache = Some(Arc::clone(&bytes));
}
if let Ok(mut cache) = pack_bytes.lock() {
cache.insert(self.pack.clone(), Arc::clone(&bytes));
}
Ok(bytes)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct PackDirFingerprint {
modified: Option<std::time::SystemTime>,
idx_count: usize,
pack_count: usize,
}
#[derive(Debug)]
struct PackRegistrySnapshot {
fingerprint: PackDirFingerprint,
packs: Vec<Arc<RegisteredPack>>,
recent_pack: Mutex<Option<usize>>,
}
impl PackRegistrySnapshot {
fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
Self {
fingerprint,
packs,
recent_pack: Mutex::new(None),
}
}
fn cached_hint(&self) -> Option<usize> {
self.recent_pack
.lock()
.ok()
.and_then(|hint| *hint)
.filter(|pack_index| *pack_index < self.packs.len())
}
fn remember_hint(&self, pack_index: usize) {
if let Ok(mut hint) = self.recent_pack.lock() {
*hint = Some(pack_index);
}
}
}
type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
#[derive(Debug, Clone)]
struct PackLookup {
pack: PathBuf,
registered: Option<Arc<RegisteredPack>>,
offset: u64,
}
impl PackLookup {
fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
Self {
pack: pack.pack.clone(),
registered: Some(pack),
offset,
}
}
fn from_path(pack: PathBuf, offset: u64) -> Self {
Self {
pack,
registered: None,
offset,
}
}
fn pack_path(&self) -> &Path {
&self.pack
}
fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
match &self.registered {
Some(pack) => pack.bytes(&database.pack_bytes),
None => database.cached_pack_bytes(&self.pack),
}
}
fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
match &self.registered {
Some(pack) => database.cached_pack_index(&pack.idx),
None => database.cached_pack_index(&self.pack.with_extension("idx")),
}
}
fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
match &self.registered {
Some(pack) => Some(Arc::clone(&pack.delta_cache)),
None => database.pack_delta_cache(&self.pack),
}
}
fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
match &self.registered {
Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
None => database.pack_header_type_cache(&self.pack),
}
}
}
#[derive(Debug, Clone)]
pub struct FileObjectDatabase {
loose: LooseObjectStore,
objects_dir: PathBuf,
alternates: Vec<PathBuf>,
format: ObjectFormat,
pack_bytes: PackBytesCache,
pack_indexes: PackIndexCache,
multi_pack_indexes: MultiPackIndexCache,
multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
pack_registry: PackRegistryCache,
decoded: DecodedObjectCache,
pack_deltas: PackDeltaCaches,
pack_header_types: PackHeaderTypeCaches,
promisor_objects: Arc<OnceLock<HashSet<ObjectId>>>,
shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
}
#[derive(Debug)]
pub struct ObjectPresenceChecker {
db: FileObjectDatabase,
pack_dir: PathBuf,
midx: Option<Arc<MultiPackIndexOidLookup>>,
registry: Option<Arc<PackRegistrySnapshot>>,
registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
recent_pack: Option<usize>,
prepared_packs: bool,
prepared_registry: bool,
}
impl ObjectPresenceChecker {
fn new(db: FileObjectDatabase) -> Self {
let pack_dir = db.objects_dir.join("pack");
Self {
db,
pack_dir,
midx: None,
registry: None,
registry_indexes: Vec::new(),
recent_pack: None,
prepared_packs: false,
prepared_registry: false,
}
}
pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
if oid.format() != self.db.format {
return Err(GitError::InvalidObjectId(format!(
"object {oid} uses {}, store uses {}",
oid.format().name(),
self.db.format.name()
)));
}
if self.db.loose.exists(oid)? {
return Ok(true);
}
if self.find_packed(oid, false)? {
return Ok(true);
}
if self.find_packed(oid, true)? {
return Ok(true);
}
for alternate in &self.db.alternates {
if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
return Ok(true);
}
}
self.db.loose.invalidate_cache();
self.db.loose.exists(oid)
}
fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
self.prepare_packs(force_rescan)?;
if let Some(midx) = &self.midx
&& midx.contains(oid)
{
return Ok(true);
}
self.prepare_registry(force_rescan)?;
self.find_in_registry(oid)
}
fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
if self.prepared_packs && !force_rescan {
return Ok(());
}
let midx_path = self.pack_dir.join("multi-pack-index");
self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
self.prepared_packs = true;
Ok(())
}
fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
if self.prepared_registry && !force_rescan {
return Ok(());
}
let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
let registry_changed = match self.registry.as_ref() {
Some(cached) => !Arc::ptr_eq(cached, ®istry),
None => true,
};
if registry_changed {
self.registry_indexes = vec![None; registry.packs.len()];
self.recent_pack = None;
self.registry = Some(registry);
}
self.prepared_registry = true;
Ok(())
}
fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
return Ok(false);
};
if let Some(pack_index) = self
.recent_pack
.filter(|pack_index| *pack_index < registry.packs.len())
{
let index = self.registry_index(®istry, pack_index)?;
if index.find(oid).is_some() {
return Ok(true);
}
}
for pack_index in 0..registry.packs.len() {
if Some(pack_index) == self.recent_pack {
continue;
}
let index = self.registry_index(®istry, pack_index)?;
if index.find(oid).is_some() {
self.recent_pack = Some(pack_index);
return Ok(true);
}
}
Ok(false)
}
fn registry_index(
&mut self,
registry: &PackRegistrySnapshot,
pack_index: usize,
) -> Result<Arc<PackIndexViewData>> {
if self.registry_indexes.len() != registry.packs.len() {
self.registry_indexes = vec![None; registry.packs.len()];
self.recent_pack = None;
}
if let Some(index) = self
.registry_indexes
.get(pack_index)
.and_then(|index| index.as_ref())
{
return Ok(Arc::clone(index));
}
let index = registry.packs[pack_index].index(self.db.format)?;
if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
*slot = Some(Arc::clone(&index));
}
Ok(index)
}
}
fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
let Ok(contents) = std::fs::read_to_string(shallow_file) else {
return HashSet::new();
};
contents
.lines()
.filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
.collect()
}
pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
env::var_os("GIT_OBJECT_DIRECTORY")
.map(PathBuf::from)
.unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
}
pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
return PathBuf::from(common_dir);
}
let git_dir = git_dir.as_ref();
let commondir = git_dir.join("commondir");
if let Ok(value) = fs::read_to_string(&commondir) {
let path = PathBuf::from(value.trim());
let common = if path.is_absolute() {
path
} else {
git_dir.join(path)
};
return fs::canonicalize(&common).unwrap_or(common);
}
git_dir.to_path_buf()
}
pub fn repository_object_ids(
git_dir: impl AsRef<Path>,
format: ObjectFormat,
) -> Result<Vec<ObjectId>> {
object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
}
pub fn object_ids_in_objects_dir(
objects_dir: impl AsRef<Path>,
format: ObjectFormat,
) -> Result<Vec<ObjectId>> {
let objects_dir = objects_dir.as_ref();
let mut oids = HashSet::new();
collect_loose_object_ids(objects_dir, format, &mut oids)?;
collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
let mut oids = oids.into_iter().collect::<Vec<_>>();
oids.sort_by_key(ObjectId::to_hex);
Ok(oids)
}
fn collect_loose_object_ids(
objects_dir: &Path,
format: ObjectFormat,
oids: &mut HashSet<ObjectId>,
) -> Result<()> {
if !objects_dir.exists() {
return Ok(());
}
let hex_len = format.hex_len();
for entry in fs::read_dir(objects_dir)? {
let entry = entry?;
if !entry.file_type()?.is_dir() {
continue;
}
let name = entry.file_name();
let Some(fanout) = name.to_str() else {
continue;
};
if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
continue;
}
for object_entry in fs::read_dir(entry.path())? {
let object_entry = object_entry?;
if !object_entry.file_type()?.is_file() {
continue;
}
let name = object_entry.file_name();
let Some(suffix) = name.to_str() else {
continue;
};
if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
continue;
}
oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
}
}
Ok(())
}
fn collect_loose_fanout_object_ids(
objects_dir: &Path,
format: ObjectFormat,
fanout: u8,
oids: &mut HashSet<ObjectId>,
) -> Result<()> {
let fanout_hex = format!("{fanout:02x}");
let fanout_dir = objects_dir.join(&fanout_hex);
let entries = match fs::read_dir(&fanout_dir) {
Ok(entries) => entries,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
Err(err) => return Err(GitError::Io(err.to_string())),
};
let hex_len = format.hex_len();
for object_entry in entries {
let object_entry = object_entry?;
let name = object_entry.file_name();
let Some(suffix) = name.to_str() else {
continue;
};
if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
continue;
}
oids.insert(ObjectId::from_hex(
format,
&format!("{fanout_hex}{suffix}"),
)?);
}
Ok(())
}
#[derive(Debug, Default)]
struct LoosePresenceCache {
loaded_fanouts: HashSet<u8>,
objects: HashSet<ObjectId>,
}
pub fn packed_object_ids(
objects_dir: impl AsRef<Path>,
format: ObjectFormat,
) -> Result<HashSet<ObjectId>> {
let mut oids = HashSet::new();
collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
Ok(oids)
}
fn collect_packed_object_ids(
pack_dir: &Path,
format: ObjectFormat,
oids: &mut HashSet<ObjectId>,
) -> Result<()> {
if !pack_dir.exists() {
return Ok(());
}
let mut midx_pack_names = HashSet::new();
let midx_path = pack_dir.join("multi-pack-index");
if midx_path.exists() {
let midx = MultiPackIndex::parse_without_checksum(&fs::read(&midx_path)?, format)?;
midx_pack_names.extend(midx.pack_names.iter().cloned());
oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
}
for entry in fs::read_dir(pack_dir)? {
let path = entry?.path();
if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
continue;
}
if !path.with_extension("pack").exists() {
continue;
}
let index = match PackIndex::parse(&fs::read(&path)?, format) {
Ok(index) => index,
Err(_err)
if path
.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name| midx_pack_names.contains(name)) =>
{
eprintln!(
"error: packfile {} index unavailable",
path.with_extension("pack").display()
);
continue;
}
Err(err) => return Err(err),
};
oids.extend(index.entries.into_iter().map(|entry| entry.oid));
}
Ok(())
}
impl FileObjectDatabase {
pub fn object_format(&self) -> ObjectFormat {
self.format
}
pub fn objects_dir(&self) -> &Path {
&self.objects_dir
}
pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
let objects_dir = objects_dir.into();
Self {
loose: LooseObjectStore::new(objects_dir.clone(), format),
alternates: alternate_object_dirs(&objects_dir),
objects_dir,
format,
pack_bytes: Arc::new(Mutex::new(HashMap::new())),
pack_indexes: Arc::new(Mutex::new(HashMap::new())),
multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
pack_registry: Arc::new(Mutex::new(None)),
decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
pack_deltas: Arc::new(Mutex::new(HashMap::new())),
pack_header_types: Arc::new(Mutex::new(HashMap::new())),
promisor_objects: Arc::new(OnceLock::new()),
shallow_grafts: Arc::new(std::sync::OnceLock::new()),
}
}
fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
let objects_dir = objects_dir.into();
Self {
loose: LooseObjectStore::new(objects_dir.clone(), format),
alternates: Vec::new(),
objects_dir,
format,
pack_bytes: Arc::new(Mutex::new(HashMap::new())),
pack_indexes: Arc::new(Mutex::new(HashMap::new())),
multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
pack_registry: Arc::new(Mutex::new(None)),
decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
pack_deltas: Arc::new(Mutex::new(HashMap::new())),
pack_header_types: Arc::new(Mutex::new(HashMap::new())),
promisor_objects: Arc::new(OnceLock::new()),
shallow_grafts: Arc::new(std::sync::OnceLock::new()),
}
}
pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
Self::new(repository_objects_dir(git_dir), format)
}
pub fn refresh_read_cache(&self) {
if let Ok(mut cache) = self.pack_registry.lock() {
*cache = None;
}
if let Ok(mut cache) = self.pack_indexes.lock() {
cache.clear();
}
if let Ok(mut cache) = self.multi_pack_indexes.lock() {
cache.clear();
}
if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
cache.clear();
}
if let Ok(mut cache) = self.pack_bytes.lock() {
cache.clear();
}
if let Ok(mut cache) = self.pack_deltas.lock() {
cache.clear();
}
if let Ok(mut cache) = self.pack_header_types.lock() {
cache.clear();
}
if let Ok(mut cache) = self.decoded.lock() {
cache.clear();
}
self.loose.invalidate_cache();
}
pub fn loose(&self) -> &LooseObjectStore {
&self.loose
}
pub fn presence_checker(&self) -> ObjectPresenceChecker {
ObjectPresenceChecker::new(self.clone())
}
pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
self.install_pack_with_options(pack, RawPackInstallOptions::default())
}
pub fn write_blob_as_pack(
&self,
oid: ObjectId,
object: &EncodedObject,
compression_level: u32,
) -> Result<ObjectId> {
if object.object_type != ObjectType::Blob {
return Err(GitError::InvalidObject(
"write_blob_as_pack requires a blob object".into(),
));
}
if oid.format() != self.format {
return Err(GitError::InvalidObjectId(format!(
"object {oid} uses {}, store uses {}",
oid.format().name(),
self.format.name()
)));
}
if self.contains(&oid)? {
return Ok(oid);
}
let input = [PackInput {
oid: &oid,
object,
}];
let options = PackWriteOptions::new()
.with_window(0)
.with_depth(0)
.with_reorder(false)
.with_compression_level(compression_level);
let pack = PackFile::write_packed_with_known_ids_and_options(&input, self.format, &options)?;
self.install_pack(&pack)?;
Ok(oid)
}
pub fn write_blobs_as_pack(
&self,
objects: &[(ObjectId, EncodedObject)],
compression_level: u32,
) -> Result<()> {
let mut seen = HashSet::with_capacity(objects.len());
let mut inputs = Vec::new();
for (oid, object) in objects {
if object.object_type != ObjectType::Blob {
return Err(GitError::InvalidObject(
"write_blobs_as_pack requires blob objects".into(),
));
}
if oid.format() != self.format {
return Err(GitError::InvalidObjectId(format!(
"object {oid} uses {}, store uses {}",
oid.format().name(),
self.format.name()
)));
}
if seen.insert(*oid) && !self.contains(oid)? {
inputs.push(PackInput { oid, object });
}
}
if inputs.is_empty() {
return Ok(());
}
let options = PackWriteOptions::new()
.with_window(0)
.with_depth(0)
.with_reorder(false)
.with_compression_level(compression_level);
let pack = PackFile::write_packed_with_known_ids_and_options(&inputs, self.format, &options)?;
self.install_pack(&pack)?;
Ok(())
}
pub fn install_pack_with_options(
&self,
pack: &PackWrite,
options: RawPackInstallOptions,
) -> Result<PackInstallResult> {
if pack.checksum.format() != self.format {
return Err(GitError::InvalidObjectId(format!(
"pack checksum uses {}, store uses {}",
pack.checksum.format().name(),
self.format.name()
)));
}
for entry in &pack.entries {
if entry.oid.format() != self.format {
return Err(GitError::InvalidObjectId(format!(
"pack entry {} uses {}, store uses {}",
entry.oid,
entry.oid.format().name(),
self.format.name()
)));
}
}
let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
let parsed_index = PackIndex::parse(&pack.index, self.format)?;
if canonical_index.pack_checksum != pack.checksum
|| parsed_index.pack_checksum != pack.checksum
{
return Err(GitError::InvalidFormat(
"pack and index checksums do not match pack write".into(),
));
}
if pack.index != canonical_index.index {
return Err(GitError::InvalidFormat(
"pack index does not match pack contents".into(),
));
}
let pack_dir = self.objects_dir.join("pack");
fs::create_dir_all(&pack_dir)?;
let pack_name = format!("pack-{}", pack.checksum.to_hex());
let pack_path = pack_dir.join(format!("{pack_name}.pack"));
let index_path = pack_dir.join(format!("{pack_name}.idx"));
if !pack_path.exists() || !index_path.exists() {
write_pack_component(&pack_path, &pack.pack)?;
write_pack_component(&index_path, &pack.index)?;
}
let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
Ok(PackInstallResult {
pack_name,
pack_path,
index_path,
promisor_path,
object_ids: canonical_index
.entries
.iter()
.map(|entry| entry.oid)
.collect(),
})
}
pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
}
pub fn install_written_pack_with_options(
&self,
pack: &PackWrite,
options: RawPackInstallOptions,
) -> Result<PackInstallResult> {
validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
let parsed_index = PackIndex::parse(&pack.index, self.format)?;
if parsed_index.pack_checksum != pack.checksum {
return Err(GitError::InvalidFormat(
"pack write index checksum does not match pack".into(),
));
}
if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
return Err(GitError::InvalidFormat(
"pack write index does not match generated entries".into(),
));
}
self.install_generated_pack_unchecked(pack, options)
}
fn install_generated_pack_unchecked(
&self,
pack: &PackWrite,
options: RawPackInstallOptions,
) -> Result<PackInstallResult> {
let pack_dir = self.objects_dir.join("pack");
fs::create_dir_all(&pack_dir)?;
let pack_name = format!("pack-{}", pack.checksum.to_hex());
let pack_path = pack_dir.join(format!("{pack_name}.pack"));
let index_path = pack_dir.join(format!("{pack_name}.idx"));
if !pack_path.exists() || !index_path.exists() {
write_pack_component(&pack_path, &pack.pack)?;
write_pack_component(&index_path, &pack.index)?;
}
let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
Ok(PackInstallResult {
pack_name,
pack_path,
index_path,
promisor_path,
object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
})
}
pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
}
pub fn install_raw_pack_with_options(
&self,
pack_bytes: &[u8],
options: RawPackInstallOptions,
) -> Result<PackInstallResult> {
let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
let pack_dir = self.objects_dir.join("pack");
fs::create_dir_all(&pack_dir)?;
let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
let pack_path = pack_dir.join(format!("{pack_name}.pack"));
let index_path = pack_dir.join(format!("{pack_name}.idx"));
if !pack_path.exists() || !index_path.exists() {
write_pack_component(&pack_path, pack_bytes)?;
write_pack_component(&index_path, &built.index)?;
}
let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
Ok(PackInstallResult {
pack_name,
pack_path,
index_path,
promisor_path,
object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
})
}
pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
if self.loose.exists(oid)? {
return Ok(true);
}
if self.find_pack_containing(oid)?.is_some() {
return Ok(true);
}
for alternate in &self.alternates {
if Self::without_alternates(alternate, self.format).contains(oid)? {
return Ok(true);
}
}
self.loose.invalidate_cache();
self.loose.exists(oid)
}
pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
.into_iter()
.collect::<HashSet<_>>();
for alternate in &self.alternates {
oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
}
let mut oids = oids.into_iter().collect::<Vec<_>>();
oids.sort_by_key(ObjectId::to_hex);
Ok(oids)
}
pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
if let Some(disk_size) = self.loose.disk_size(oid)? {
return Ok(Some(ObjectStorageInfo {
disk_size,
deltabase: zero_oid(self.format)?,
}));
}
if let Some(info) = self.packed_object_storage_info(oid)? {
return Ok(Some(info));
}
for alternate in &self.alternates {
if let Some(info) =
Self::without_alternates(alternate, self.format).object_storage_info(oid)?
{
return Ok(Some(info));
}
}
self.loose.invalidate_cache();
if let Some(disk_size) = self.loose.disk_size(oid)? {
return Ok(Some(ObjectStorageInfo {
disk_size,
deltabase: zero_oid(self.format)?,
}));
}
Ok(None)
}
pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
let mut matches = self.object_ids_with_prefix(prefix)?;
Ok(match matches.len() {
0 => ObjectPrefixResolution::Missing,
1 => ObjectPrefixResolution::Unique(matches.remove(0)),
_ => ObjectPrefixResolution::Ambiguous(matches),
})
}
pub fn object_ids_with_prefix(&self, prefix: &str) -> Result<Vec<ObjectId>> {
validate_object_id_prefix(self.format, prefix)?;
let mut matches = Vec::new();
for oid in self.object_ids()? {
if object_id_matches_prefix(&oid, prefix) {
matches.push(oid);
}
}
Ok(matches)
}
pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
if implied_empty_tree_object(self.format, oid).is_some() {
return Ok(Some((ObjectType::Tree, 0)));
}
if let Ok(mut cache) = self.decoded.lock()
&& let Some(object) = cache.get(oid)
{
return Ok(Some((object.object_type, object.body.len() as u64)));
}
if let Some(header) = self.loose.read_header(oid)? {
return Ok(Some(header));
}
if let Some(pack_lookup) = self.find_pack_containing(oid)? {
let bytes = pack_lookup.pack_bytes(self)?;
let type_cache = pack_lookup.header_type_cache(self);
let resolve_ref_base = |base: &ObjectId| {
self.read_object_header(base)
.map(|header| header.map(|(t, _)| t))
};
let header = match &type_cache {
Some(cache) => {
let mut adapter = PackHeaderTypeCacheAdapter(cache);
sley_pack::read_object_header_at_with_cache(
&bytes,
pack_lookup.offset,
self.format,
resolve_ref_base,
&mut adapter,
)?
}
None => sley_pack::read_object_header_at(
&bytes,
pack_lookup.offset,
self.format,
resolve_ref_base,
)?,
};
return Ok(Some(header));
}
for alternate in &self.alternates {
if let Some(header) =
Self::without_alternates(alternate, self.format).read_object_header(oid)?
{
return Ok(Some(header));
}
}
self.loose.invalidate_cache();
if let Some(header) = self.loose.read_header(oid)? {
return Ok(Some(header));
}
Ok(None)
}
fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
if let Ok(mut cache) = self.decoded.lock()
&& let Some(object) = cache.get(oid)
{
return Ok(Some(object));
}
let Some(pack_lookup) = self.find_pack_containing(oid)? else {
return Ok(None);
};
self.read_packed_object_at_lookup(oid, &pack_lookup)
.map(Some)
}
fn read_packed_object_at_lookup(
&self,
oid: &ObjectId,
pack_lookup: &PackLookup,
) -> Result<Arc<EncodedObject>> {
if let Ok(mut cache) = self.decoded.lock()
&& let Some(object) = cache.get(oid)
{
return Ok(object);
}
let bytes = pack_lookup.pack_bytes(self)?;
let delta_cache = pack_lookup.delta_cache(self);
let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
let object = match &delta_adapter {
Some(adapter) => sley_pack::read_object_at_with_cache_arc(
&bytes,
pack_lookup.offset,
self.format,
resolve_ref_base,
adapter,
)?,
None => sley_pack::read_object_at_arc(
&bytes,
pack_lookup.offset,
self.format,
resolve_ref_base,
)?,
};
if verify_reads_enabled() {
let actual = object.object_id(self.format)?;
if actual != *oid {
return Err(GitError::InvalidObject(format!(
"pack object id mismatch: index says {oid}, decoded {actual}"
)));
}
}
if let Ok(mut cache) = self.decoded.lock() {
cache.put(*oid, Arc::clone(&object));
}
Ok(object)
}
fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
let mut caches = self.pack_deltas.lock().ok()?;
let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
});
Some(Arc::clone(cache))
}
fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
let mut caches = self.pack_header_types.lock().ok()?;
let cache = caches
.entry(pack_path.to_path_buf())
.or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
Some(Arc::clone(cache))
}
fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
if let Ok(cache) = self.pack_bytes.lock()
&& let Some(bytes) = cache.get(pack_path)
{
return Ok(Arc::clone(bytes));
}
let bytes = Arc::new(load_pack_data(pack_path)?);
if let Ok(mut cache) = self.pack_bytes.lock() {
cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
}
Ok(bytes)
}
fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
if let Ok(cache) = self.pack_indexes.lock()
&& let Some(index) = cache.get(index_path)
{
return Ok(Arc::clone(index));
}
let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
if let Ok(mut cache) = self.pack_indexes.lock() {
cache.insert(index_path.to_path_buf(), Arc::clone(&index));
}
Ok(index)
}
fn cached_multi_pack_index_oid_lookup(
&self,
midx_path: &Path,
) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
if !midx_path.exists() {
return Ok(None);
}
if let Ok(cache) = self.multi_pack_oid_lookups.lock()
&& let Some(midx) = cache.get(midx_path)
{
return Ok(Some(Arc::clone(midx)));
}
let bytes = load_multi_pack_index_lookup_data(midx_path)?;
let midx = match MultiPackIndexOidLookup::parse(bytes, self.format) {
Ok(midx) => Arc::new(midx),
Err(GitError::InvalidFormat(message))
if message.starts_with("multi-pack-index hash id ") =>
{
let actual = message
.strip_prefix("multi-pack-index hash id ")
.and_then(|rest| rest.split_whitespace().next())
.unwrap_or("0");
let expected = match self.format {
ObjectFormat::Sha1 => 1,
ObjectFormat::Sha256 => 2,
};
eprintln!(
"error: multi-pack-index hash version {actual} does not match version {expected}"
);
return Ok(None);
}
Err(err) => return Err(err),
};
if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
}
Ok(Some(midx))
}
fn cached_pack_registry(
&self,
pack_dir: &Path,
force_rescan: bool,
) -> Result<Arc<PackRegistrySnapshot>> {
if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
return Ok(registry);
}
let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
if let Ok(mut cache) = self.pack_registry.lock() {
match cache.as_ref() {
Some(existing)
if existing.fingerprint == scanned.fingerprint
&& same_registered_pack_set(&existing.packs, &scanned.packs) =>
{
return Ok(Arc::clone(existing));
}
_ => {
*cache = Some(Arc::clone(&scanned));
}
}
}
Ok(scanned)
}
fn find_in_pack_registry(
&self,
registry: Arc<PackRegistrySnapshot>,
oid: &ObjectId,
) -> Result<Option<PackLookup>> {
let hinted_pack_index = registry.cached_hint();
if let Some(pack_index) = hinted_pack_index {
let pack = ®istry.packs[pack_index];
match pack.index(self.format) {
Ok(index) => {
if let Some(entry) = index.find(oid) {
return Ok(Some(PackLookup::from_registered(
Arc::clone(pack),
entry.offset,
)));
}
}
Err(_) => {
eprintln!("error: packfile {} index unavailable", pack.pack.display());
}
}
}
for (pack_index, pack) in registry.packs.iter().enumerate() {
if Some(pack_index) == hinted_pack_index {
continue;
}
let index = match pack.index(self.format) {
Ok(index) => index,
Err(_) => {
eprintln!("error: packfile {} index unavailable", pack.pack.display());
continue;
}
};
if let Some(entry) = index.find(oid) {
registry.remember_hint(pack_index);
return Ok(Some(PackLookup::from_registered(
Arc::clone(pack),
entry.offset,
)));
}
}
Ok(None)
}
fn read_packed_object_from_other_packs(
&self,
oid: &ObjectId,
exclude: &PackLookup,
) -> Result<Option<Arc<EncodedObject>>> {
let pack_dir = self.objects_dir.join("pack");
let Ok(entries) = fs::read_dir(&pack_dir) else {
return Ok(None);
};
let excluded_pack = exclude.pack_path().to_path_buf();
for entry in entries {
let idx_path = entry?.path();
if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
continue;
}
let pack_path = idx_path.with_extension("pack");
if pack_path == excluded_pack {
continue;
}
let Ok(idx_bytes) = fs::read(&idx_path) else {
continue;
};
let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
continue;
};
let Some(entry) = index.find(oid) else {
continue;
};
let candidate = PackLookup::from_path(pack_path, entry.offset);
if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
return Ok(Some(object));
}
}
Ok(None)
}
fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
if oid.format() != self.format {
return Err(GitError::InvalidObjectId(format!(
"object {oid} uses {}, store uses {}",
oid.format().name(),
self.format.name()
)));
}
let pack_dir = self.objects_dir.join("pack");
if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
&& let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
{
return Ok(Some(pack_paths));
}
if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
&& let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
{
return Ok(Some(pack_paths));
}
if !pack_dir.exists() {
return Ok(None);
}
if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
return Ok(Some(pack_paths));
}
let registry = self.cached_pack_registry(&pack_dir, false)?;
if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(®istry), oid)? {
return Ok(Some(pack_paths));
}
let refreshed = self.cached_pack_registry(&pack_dir, true)?;
if Arc::ptr_eq(®istry, &refreshed) {
return Ok(None);
}
self.find_in_pack_registry(refreshed, oid)
}
fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
let Some(pack_lookup) = self.find_pack_containing(oid)? else {
return Ok(None);
};
let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
let trailer_offset = pack_len
.checked_sub(self.format.raw_len() as u64)
.ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
let index = pack_lookup.pack_index(self)?;
let pack = pack_lookup.pack_bytes(self)?;
let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
let delta_base_offset = match &delta_base {
Some(PackDeltaBase::Offset(offset)) => Some(*offset),
Some(PackDeltaBase::Ref(_)) | None => None,
};
let offset_info = scan_pack_index_offsets(
&index,
pack_lookup.offset,
trailer_offset,
delta_base_offset,
)?;
let disk_size = offset_info
.end_offset
.checked_sub(pack_lookup.offset)
.ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
let deltabase = match delta_base {
Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
})?,
Some(PackDeltaBase::Ref(oid)) => oid,
None => zero_oid(self.format)?,
};
Ok(Some(ObjectStorageInfo {
disk_size,
deltabase,
}))
}
fn find_midx_pack_containing(
&self,
pack_dir: &Path,
oid: &ObjectId,
) -> Result<Option<PackLookup>> {
let midx_path = pack_dir.join("multi-pack-index");
let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
return Ok(None);
};
self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
}
fn midx_oid_lookup_pack_paths(
&self,
pack_dir: &Path,
midx: &MultiPackIndexOidLookup,
oid: &ObjectId,
) -> Result<Option<PackLookup>> {
let Some(entry) = midx.find(oid)? else {
return Ok(None);
};
let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
return Err(GitError::InvalidFormat(
"multi-pack-index object points past pack table".into(),
));
};
let pack_file_name = pack_name
.strip_suffix(".idx")
.map(|stem| format!("{stem}.pack"))
.unwrap_or_else(|| pack_name.to_string());
let pack = pack_dir.join(pack_file_name);
Ok(Some(PackLookup::from_path(pack, entry.offset)))
}
fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
let cache = self.multi_pack_oid_lookups.lock().ok()?;
cache.get(&midx_path).map(Arc::clone)
}
fn cached_loaded_pack_registry(
&self,
_pack_dir: &Path,
) -> Result<Option<Arc<PackRegistrySnapshot>>> {
let cache = match self.pack_registry.lock() {
Ok(cache) => cache,
Err(_) => return Ok(None),
};
Ok(cache.as_ref().map(Arc::clone))
}
}
fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
if prefix.len() < 4 || prefix.len() > format.hex_len() {
return Err(GitError::InvalidObjectId(format!(
"expected 4 to {} hex digits for {}, got {}",
format.hex_len(),
format.name(),
prefix.len()
)));
}
if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
return Err(GitError::InvalidObjectId(format!(
"non-hex object id prefix {prefix}"
)));
}
Ok(())
}
fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
oid.to_hex()
.as_bytes()
.iter()
.zip(prefix.as_bytes())
.all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
}
fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
match fs::metadata(pack_dir) {
Ok(metadata) => Ok(metadata.modified().ok()),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(GitError::Io(err.to_string())),
}
}
fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
let modified = pack_dir_modified(pack_dir)?;
let entries = match fs::read_dir(pack_dir) {
Ok(entries) => entries,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Ok(PackRegistrySnapshot::new(
PackDirFingerprint {
modified,
idx_count: 0,
pack_count: 0,
},
Vec::new(),
));
}
Err(err) => return Err(GitError::Io(err.to_string())),
};
let mut idx_paths = Vec::new();
let mut idx_count = 0;
let mut pack_count = 0;
for entry in entries {
let entry = entry?;
let path = entry.path();
match path.extension().and_then(|ext| ext.to_str()) {
Some("idx") => {
idx_count += 1;
idx_paths.push(path);
}
Some("pack") => {
pack_count += 1;
}
_ => {}
}
}
let mut packs = Vec::new();
for idx in idx_paths {
let pack = idx.with_extension("pack");
let Ok(metadata) = fs::metadata(&pack) else {
continue;
};
let modified = pack_sort_modified(&metadata);
packs.push((
modified,
metadata.len(),
Arc::new(RegisteredPack::new(idx, pack)),
));
}
packs.sort_by(|left, right| {
right
.0
.cmp(&left.0)
.then_with(|| right.1.cmp(&left.1))
.then_with(|| left.2.idx.cmp(&right.2.idx))
});
let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
Ok(PackRegistrySnapshot::new(
PackDirFingerprint {
modified,
idx_count,
pack_count,
},
packs,
))
}
fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
metadata
.modified()
.ok()
.and_then(|modified| {
modified
.duration_since(std::time::UNIX_EPOCH)
.ok()
.map(|duration| (duration.as_secs(), duration.subsec_nanos()))
})
.unwrap_or((0, 0))
}
fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
left.len() == right.len()
&& left
.iter()
.zip(right.iter())
.all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
}
fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
let mut alternates = Vec::new();
if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
for raw in value.to_string_lossy().split(':') {
if !raw.is_empty() {
alternates.push(PathBuf::from(raw));
}
}
}
let alternates_path = objects_dir.join("info").join("alternates");
if let Ok(contents) = fs::read(&alternates_path) {
for raw in contents.split(|byte| *byte == b'\n') {
let line = raw.strip_suffix(b"\r").unwrap_or(raw);
if line.is_empty() || line.starts_with(b"#") {
continue;
}
let Ok(value) = std::str::from_utf8(line) else {
continue;
};
let path = Path::new(value);
let absolute = if path.is_absolute() {
path.to_path_buf()
} else {
objects_dir.join(path)
};
alternates.push(absolute);
}
}
alternates
}
impl ObjectReader for FileObjectDatabase {
fn is_promised_object(&self, oid: &ObjectId) -> bool {
self.promisor_objects().contains(oid)
}
fn has_shallow_grafts(&self) -> bool {
!self
.shallow_grafts
.get_or_init(|| {
let shallow_file = self
.objects_dir
.parent()
.map(|git_dir| git_dir.join("shallow"));
match shallow_file {
Some(path) => read_shallow_grafts(&path, self.format),
None => HashSet::new(),
}
})
.is_empty()
}
fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
self.shallow_grafts
.get_or_init(|| {
let shallow_file = self
.objects_dir
.parent()
.map(|git_dir| git_dir.join("shallow"));
match shallow_file {
Some(path) => read_shallow_grafts(&path, self.format),
None => HashSet::new(),
}
})
.contains(oid)
}
fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
if let Some(object) = implied_empty_tree_object(self.format, oid) {
return Ok(object);
}
if let Some(pack_lookup) = self.find_pack_containing(oid)? {
match self.read_packed_object_at_lookup(oid, &pack_lookup) {
Ok(object) => return Ok(object),
Err(GitError::NotFound(_)) => {}
Err(packed_err) => {
if let Ok(object) = self.loose.read_object(oid) {
return Ok(object);
}
if let Some(object) =
self.read_packed_object_from_other_packs(oid, &pack_lookup)?
{
return Ok(object);
}
for alternate in &self.alternates {
if let Ok(object) =
Self::without_alternates(alternate, self.format).read_object(oid)
{
return Ok(object);
}
}
return Err(packed_err);
}
}
}
let loose_err = match self.loose.read_object(oid) {
Ok(object) => return Ok(object),
Err(GitError::NotFound(_)) => None,
Err(err) => Some(err),
};
if let Some(object) = self.read_packed_object(oid)? {
return Ok(object);
}
for alternate in &self.alternates {
match Self::without_alternates(alternate, self.format).read_object(oid) {
Ok(object) => return Ok(object),
Err(GitError::NotFound(_)) => {}
Err(err) => return Err(err),
}
}
self.loose.invalidate_cache();
match self.loose.read_object(oid) {
Ok(object) => return Ok(object),
Err(GitError::NotFound(_)) => {}
Err(err) => return Err(err),
}
if let Some(err) = loose_err {
return Err(err);
}
Err(GitError::object_not_found_in(
*oid,
MissingObjectContext::Read,
))
}
}
impl FileObjectDatabase {
fn promisor_objects(&self) -> &HashSet<ObjectId> {
self.promisor_objects.get_or_init(|| {
let mut promised =
promisor_pack_object_ids(&self.objects_dir, self.format).unwrap_or_default();
let mut pending = promised.iter().copied().collect::<Vec<_>>();
while let Some(oid) = pending.pop() {
let Ok(object) = self.read_object(&oid) else {
continue;
};
for link in promisor_object_links(self.format, &object) {
if promised.insert(link) {
pending.push(link);
}
}
}
promised
})
}
}
fn promisor_pack_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
let pack_dir = objects_dir.join("pack");
let mut oids = HashSet::new();
if !pack_dir.exists() {
return Ok(oids);
}
for entry in fs::read_dir(pack_dir)? {
let path = entry?.path();
if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
continue;
}
if !path.with_extension("pack").exists() || !path.with_extension("promisor").exists() {
continue;
}
let index = PackIndex::parse(&fs::read(path)?, format)?;
oids.extend(index.entries.into_iter().map(|entry| entry.oid));
}
Ok(oids)
}
fn promisor_object_links(format: ObjectFormat, object: &EncodedObject) -> Vec<ObjectId> {
match object.object_type {
ObjectType::Commit => Commit::parse_ref(format, &object.body)
.map(|commit| {
let mut links = Vec::with_capacity(commit.parents.len() + 1);
links.push(commit.tree);
links.extend(commit.parents);
links
})
.unwrap_or_default(),
ObjectType::Tree => TreeEntries::new(format, &object.body)
.filter_map(|entry| entry.ok().map(|entry| entry.oid))
.collect(),
ObjectType::Tag => Tag::parse_ref(format, &object.body)
.map(|tag| vec![tag.object])
.unwrap_or_default(),
ObjectType::Blob => Vec::new(),
}
}
impl ObjectWriter for FileObjectDatabase {
fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
let oid = object.object_id(self.format)?;
if self.contains(&oid)? {
return Ok(oid);
}
self.loose.write_object(object)
}
}
fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
if path.exists() {
return Ok(());
}
let parent = path
.parent()
.ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
fs::create_dir_all(parent)?;
let temp_path = unique_temp_path(parent);
let write_result = (|| -> Result<()> {
{
let mut file = fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&temp_path)?;
file.write_all(bytes)?;
file.sync_all()?;
}
match fs::rename(&temp_path, path) {
Ok(()) => Ok(()),
Err(_) if path.exists() => {
let _ = fs::remove_file(&temp_path);
Ok(())
}
Err(err) => Err(GitError::Io(err.to_string())),
}
})();
if write_result.is_err() {
let _ = fs::remove_file(&temp_path);
}
write_result
}
fn write_promisor_pack_sidecar(
pack_dir: &Path,
pack_name: &str,
promisor: bool,
) -> Result<Option<PathBuf>> {
if !promisor {
return Ok(None);
}
let path = pack_dir.join(format!("{pack_name}.promisor"));
write_pack_component(&path, b"")?;
Ok(Some(path))
}
const MAX_LOOSE_HEADER_LEN: usize = 32;
fn loose_header_too_long(oid: &ObjectId) -> GitError {
GitError::InvalidObject(format!(
"header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
))
}
fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
GitError::InvalidObject(format!("unable to unpack {oid} header"))
}
fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
let [cmf, flg, ..] = *input else { return None };
if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
return Some("inflate: data stream error (incorrect header check)");
}
if cmf & 0x0f != 8 {
return Some("inflate: data stream error (unknown compression method)");
}
if cmf >> 4 > 7 {
return Some("inflate: data stream error (invalid window size)");
}
if flg & 0x20 != 0 {
return Some("inflate: needs dictionary (no message)");
}
None
}
fn emit_inflate_diagnostic(input: &[u8]) {
if let Some(diagnostic) = inflate_header_diagnostic(input) {
eprintln!("error: {diagnostic}");
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LooseObjectIntegrity {
Ok,
HashMismatch { actual: ObjectId },
Corrupt,
}
#[derive(Debug, Clone)]
pub struct LooseObjectStore {
objects_dir: PathBuf,
format: ObjectFormat,
loose_cache: Arc<Mutex<LoosePresenceCache>>,
}
impl LooseObjectStore {
pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
Self {
objects_dir: objects_dir.into(),
format,
loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
}
}
fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
let mut guard = self.loose_cache.lock().ok()?;
let fanout = oid.as_bytes()[0];
if !guard.loaded_fanouts.contains(&fanout) {
collect_loose_fanout_object_ids(
&self.objects_dir,
self.format,
fanout,
&mut guard.objects,
)
.ok()?;
guard.loaded_fanouts.insert(fanout);
}
Some(guard.objects.contains(oid))
}
fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
if let Ok(mut guard) = self.loose_cache.lock() {
guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
guard.loaded_fanouts = (0..=u8::MAX).collect();
let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
return Ok(ids);
}
loose_object_ids(&self.objects_dir, self.format)
}
fn note_loose_write(&self, oid: ObjectId) {
if let Ok(mut guard) = self.loose_cache.lock() {
guard.objects.insert(oid);
}
}
pub(crate) fn invalidate_cache(&self) {
if let Ok(mut guard) = self.loose_cache.lock() {
*guard = LoosePresenceCache::default();
}
}
pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
Self::new(repository_objects_dir(git_dir), format)
}
fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
if oid.format() != self.format {
return Err(GitError::InvalidObjectId(format!(
"object {oid} uses {}, store uses {}",
oid.format().name(),
self.format.name()
)));
}
Ok(())
}
pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
self.validate_oid_format(oid)?;
let hex = oid.to_hex();
Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
}
pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
self.validate_oid_format(oid)?;
if self.cached_loose_presence(oid) == Some(false) {
return Ok(false);
}
let path = self.object_path(oid)?;
Ok(path.exists())
}
pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
self.validate_oid_format(oid)?;
if self.cached_loose_presence(oid) == Some(false) {
return Ok(None);
}
let path = self.object_path(oid)?;
match fs::metadata(path) {
Ok(metadata) => Ok(Some(metadata.len())),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(GitError::Io(err.to_string())),
}
}
pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
self.validate_oid_format(oid)?;
if self.cached_loose_presence(oid) == Some(false) {
return Ok(None);
}
let path = self.object_path(oid)?;
let compressed = match fs::read(&path) {
Ok(compressed) => compressed,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(GitError::Io(err.to_string())),
};
match inflate_loose_header(&compressed)? {
LooseHeader::Ok(header) => {
let header = std::str::from_utf8(&header)
.map_err(|err| GitError::InvalidObject(err.to_string()))?;
let (kind, size) = header
.split_once(' ')
.ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
let object_type = kind.parse::<ObjectType>()?;
let size = size
.parse::<u64>()
.map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
Ok(Some((object_type, size)))
}
LooseHeader::Bad => {
emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
Err(loose_unpack_header_failed(oid))
}
LooseHeader::TooLong => {
Err(loose_header_too_long(oid))
}
}
}
pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
self.loose_object_ids_cached()
}
pub fn verify_object(
&self,
oid: &ObjectId,
display_path: &str,
) -> Result<Option<LooseObjectIntegrity>> {
let path = self.object_path(oid)?;
let compressed = match fs::read(&path) {
Ok(compressed) => compressed,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(err) => return Err(GitError::Io(err.to_string())),
};
let mut decoder = ZlibDecoder::new(compressed.as_slice());
let mut framed = Vec::new();
if decoder.read_to_end(&mut framed).is_err() {
emit_inflate_diagnostic(&compressed);
if framed_loose_header_terminated(&framed) {
eprintln!("error: corrupt loose object '{oid}'");
eprintln!("error: unable to unpack contents of {display_path}");
} else {
eprintln!("error: unable to unpack header of {display_path}");
}
return Ok(Some(LooseObjectIntegrity::Corrupt));
}
if !framed_loose_header_terminated(&framed) {
eprintln!("error: unable to unpack header of {display_path}");
return Ok(Some(LooseObjectIntegrity::Corrupt));
}
if (decoder.total_in() as usize) < compressed.len() {
eprintln!("error: garbage at end of loose object '{oid}'");
eprintln!("error: unable to unpack contents of {display_path}");
return Ok(Some(LooseObjectIntegrity::Corrupt));
}
if let Some(declared) = loose_header_declared_size(&framed) {
let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
let body_len = framed.len() - (nul + 1).min(framed.len());
if body_len < declared {
eprintln!("error: corrupt loose object '{oid}'");
eprintln!("error: unable to unpack contents of {display_path}");
return Ok(Some(LooseObjectIntegrity::Corrupt));
}
}
let Ok(object) = parse_framed_object(&framed) else {
if let Some(header) = loose_header_with_unknown_type(&framed) {
eprintln!("error: unable to parse type from header '{header}' of {display_path}");
} else {
eprintln!("error: unable to parse header of {display_path}");
}
return Ok(Some(LooseObjectIntegrity::Corrupt));
};
let actual = object.object_id(self.format)?;
if &actual != oid {
return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
}
Ok(Some(LooseObjectIntegrity::Ok))
}
}
fn framed_loose_header_terminated(framed: &[u8]) -> bool {
framed
.iter()
.take(MAX_LOOSE_HEADER_LEN)
.any(|byte| *byte == 0)
}
fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
let nul = framed.iter().position(|&b| b == 0)?;
let header = std::str::from_utf8(&framed[..nul]).ok()?;
let (kind, size) = header.split_once(' ')?;
let size: usize = size.parse().ok()?;
if framed.len() - (nul + 1) != size {
return None;
}
if kind.parse::<ObjectType>().is_ok() {
return None;
}
Some(header.to_string())
}
fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
let nul = framed.iter().position(|&b| b == 0)?;
let header = std::str::from_utf8(&framed[..nul]).ok()?;
let (_kind, size) = header.split_once(' ')?;
size.parse::<usize>().ok()
}
enum LooseHeader {
Ok(Vec<u8>),
Bad,
TooLong,
}
fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
let mut decompress = Decompress::new(true);
let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
let produced = decompress.total_out() as usize;
match status {
Ok(_) => {
let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
match window.iter().position(|&byte| byte == 0) {
Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
None => Ok(LooseHeader::TooLong),
}
}
Err(_) => Ok(LooseHeader::Bad),
}
}
impl ObjectReader for LooseObjectStore {
fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
self.validate_oid_format(oid)?;
if self.cached_loose_presence(oid) == Some(false) {
return Err(GitError::object_not_found_in(
*oid,
MissingObjectContext::Read,
));
}
let path = self.object_path(oid)?;
let compressed = match fs::read(&path) {
Ok(compressed) => compressed,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Err(GitError::object_not_found_in(
*oid,
MissingObjectContext::Read,
));
}
Err(err) => return Err(GitError::Io(err.to_string())),
};
let mut decoder = ZlibDecoder::new(compressed.as_slice());
let mut framed = Vec::new();
if decoder.read_to_end(&mut framed).is_err() {
emit_inflate_diagnostic(&compressed);
if !framed_loose_header_terminated(&framed) {
return Err(loose_unpack_header_failed(oid));
}
return Err(GitError::InvalidObject(format!(
"corrupt loose object '{oid}'"
)));
}
if framed
.iter()
.take(MAX_LOOSE_HEADER_LEN)
.all(|byte| *byte != 0)
{
return Err(loose_header_too_long(oid));
}
let object = parse_framed_object(&framed)?;
if verify_reads_enabled() {
let actual = object.object_id(self.format)?;
if &actual != oid {
return Err(GitError::InvalidObject(format!(
"loose object {} hashes to {actual}",
path.display()
)));
}
}
Ok(Arc::new(object))
}
}
impl ObjectWriter for LooseObjectStore {
fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
let oid = object.object_id(self.format)?;
let path = self.object_path(&oid)?;
if path.exists() {
self.note_loose_write(oid);
return Ok(oid);
}
let parent = path
.parent()
.ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
fs::create_dir_all(parent)?;
let temp_path = unique_temp_path(parent);
let write_result = (|| -> Result<()> {
let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&object.framed_bytes())?;
let compressed = encoder.finish()?;
{
let mut file = fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&temp_path)?;
file.write_all(&compressed)?;
}
match fs::rename(&temp_path, &path) {
Ok(()) => Ok(()),
Err(_) if path.exists() => {
let _ = fs::remove_file(&temp_path);
Ok(())
}
Err(err) => Err(GitError::Io(err.to_string())),
}
})();
if write_result.is_err() {
let _ = fs::remove_file(&temp_path);
}
write_result?;
self.note_loose_write(oid);
Ok(oid)
}
}
fn unique_temp_path(parent: &Path) -> PathBuf {
let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
}
#[cfg(test)]
mod tests {
use super::*;
use sley_core::BString;
use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
use sley_pack::{PackFile, PackWriteOptions};
fn blob_of(byte: u8, len: usize) -> EncodedObject {
EncodedObject::new(ObjectType::Blob, vec![byte; len])
}
fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
Arc::new(blob_of(byte, len))
}
fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
reader
.read_object(oid)
.expect("test operation should succeed")
.as_ref()
.clone()
}
#[test]
fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
let one = cached_object_cost(&blob_of(0, 1000));
let mut cache = LruCache::<u32>::new(one * 2 + 8);
cache.put(1, cached_blob_of(b'a', 1000));
cache.put(2, cached_blob_of(b'b', 1000));
assert!(cache.get(&1).is_some());
cache.put(3, cached_blob_of(b'c', 1000));
assert!(cache.get(&1).is_some());
assert!(cache.get(&2).is_none());
assert!(cache.get(&3).is_some());
}
#[test]
fn lru_cache_zero_budget_is_inert() {
let mut cache = LruCache::<u32>::new(0);
cache.put(1, cached_blob_of(b'a', 16));
assert!(cache.get(&1).is_none());
}
#[test]
fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
cache.put(1, cached_blob_of(b'a', 50));
assert!(cache.get(&1).is_some());
cache.put(1, cached_blob_of(b'b', 10_000));
assert!(cache.get(&1).is_none());
cache.put(2, cached_blob_of(b'c', 50));
assert!(cache.get(&2).is_some());
}
#[test]
fn lru_cache_replacing_entry_updates_byte_accounting() {
let small = cached_object_cost(&blob_of(0, 500));
let mut cache = LruCache::<u32>::new(small * 2 + 200);
cache.put(1, cached_blob_of(b'a', 500));
cache.put(2, cached_blob_of(b'b', 500));
assert!(cache.get(&1).is_some());
assert!(cache.get(&2).is_some());
cache.put(2, cached_blob_of(b'b', 1000));
assert!(cache.get(&2).is_some());
assert!(cache.get(&1).is_none());
}
#[test]
fn write_and_validate_blob() {
let db = ObjectDatabase::new(ObjectFormat::Sha1);
let oid = db
.write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
.expect("test operation should succeed");
assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
db.validate(&oid).expect("test operation should succeed");
}
#[test]
fn loose_store_writes_and_reads_object() {
let root = std::env::temp_dir().join(format!(
"sley-loose-store-{}-{}",
std::process::id(),
TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
));
let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
let oid = store
.write_object(object.clone())
.expect("test operation should succeed");
assert_eq!(read_object_for_assert(&store, &oid), object);
assert!(
store
.object_path(&oid)
.expect("test operation should succeed")
.exists()
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn read_header_detects_corruption_within_gits_header_window() {
let root = temp_root("sley-loose-header-corrupt");
let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
let oid = store
.write_object(object)
.expect("test operation should succeed");
let path = store
.object_path(&oid)
.expect("test operation should succeed");
let mut bytes = fs::read(&path).expect("test operation should succeed");
bytes[10] = 0;
fs::write(&path, &bytes).expect("test operation should succeed");
store.invalidate_cache();
let err = store
.read_header(&oid)
.expect_err("corrupt loose header must fail like git's ULHR_BAD");
let msg = err.to_string();
assert!(
msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
"expected git's ULHR_BAD message, got: {msg}"
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn read_header_ignores_corruption_past_gits_header_window() {
let root = temp_root("sley-loose-header-deep-corrupt");
let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
let body: Vec<u8> = (0..4096u32)
.map(|i| (i.wrapping_mul(2654435761)) as u8)
.collect();
let object = EncodedObject::new(ObjectType::Blob, body.clone());
let oid = store
.write_object(object)
.expect("test operation should succeed");
let path = store
.object_path(&oid)
.expect("test operation should succeed");
let mut bytes = fs::read(&path).expect("test operation should succeed");
let deep = bytes.len() / 2;
bytes[deep] ^= 0xff;
fs::write(&path, &bytes).expect("test operation should succeed");
store.invalidate_cache();
let header = store
.read_header(&oid)
.expect("header-only read must still succeed for deep body corruption");
assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_reads_object_from_pack_index() {
let root = temp_root("sley-file-odb-pack");
let git_dir = root.join(".git");
let pack_dir = git_dir.join("objects").join("pack");
fs::create_dir_all(&pack_dir).expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
.expect("test operation should succeed");
let pack_name = written.checksum.to_hex();
fs::write(
pack_dir.join(format!("pack-{pack_name}.pack")),
written.pack,
)
.expect("test operation should succeed");
fs::write(
pack_dir.join(format!("pack-{pack_name}.idx")),
written.index,
)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
assert!(db.contains(&oid).expect("test operation should succeed"));
assert_eq!(read_object_for_assert(&db, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_loose_cache_observes_same_process_write_after_miss() {
let root = temp_root("sley-file-odb-loose-cache-write");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
db.loose()
.write_object(object.clone())
.expect("test operation should succeed");
assert_eq!(read_object_for_assert(&db, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn object_presence_checker_observes_same_process_loose_write_after_miss() {
let root = temp_root("sley-presence-checker-loose-cache-write");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let mut checker = db.presence_checker();
let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
assert!(
!checker
.contains(&oid)
.expect("test operation should succeed")
);
db.loose()
.write_object(object)
.expect("test operation should succeed");
assert!(
checker
.contains(&oid)
.expect("test operation should succeed")
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
let root = temp_root("sley-read-object-header");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let db = FileObjectDatabase::from_git_dir(&git_dir, format);
let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
let loose_oid = db
.write_object(loose.clone())
.expect("test operation should succeed");
let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
let mut child_body = vec![b'a'; 4096];
child_body.extend_from_slice(b" plus a deltified tail\n");
let child = EncodedObject::new(ObjectType::Blob, child_body);
let commitish =
EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
let base_oid = base
.object_id(format)
.expect("test operation should succeed");
let child_oid = child
.object_id(format)
.expect("test operation should succeed");
let commit_oid = commitish
.object_id(format)
.expect("test operation should succeed");
let options = PackWriteOptions::new()
.with_prefer_ofs_delta(true)
.with_reorder(false);
let pack = PackFile::write_packed_with_options(
&[base.clone(), child.clone(), commitish.clone()],
format,
&options,
)
.expect("test operation should succeed");
db.install_pack(&pack)
.expect("test operation should succeed");
for (oid, want_type, want_len) in [
(&loose_oid, ObjectType::Blob, loose.body.len()),
(&base_oid, ObjectType::Blob, base.body.len()),
(&child_oid, ObjectType::Blob, child.body.len()),
(&commit_oid, ObjectType::Commit, commitish.body.len()),
] {
assert_eq!(
db.read_object_header(oid)
.expect("test operation should succeed"),
Some((want_type, want_len as u64)),
"header for {oid}"
);
let full = db.read_object(oid).expect("test operation should succeed");
assert_eq!(
db.read_object_header(oid)
.expect("test operation should succeed"),
Some((full.object_type, full.body.len() as u64))
);
}
let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
.expect("test operation should succeed");
assert_eq!(
db.read_object_header(&missing)
.expect("test operation should succeed"),
None
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn object_storage_info_reports_loose_packed_and_delta_metadata() {
let root = temp_root("sley-object-storage-info");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let db = FileObjectDatabase::from_git_dir(&git_dir, format);
let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
let loose_oid = db
.write_object(loose)
.expect("test operation should succeed");
let loose_size = fs::metadata(
db.loose()
.object_path(&loose_oid)
.expect("test operation should succeed"),
)
.expect("test operation should succeed")
.len();
let loose_info = db
.object_storage_info(&loose_oid)
.expect("test operation should succeed")
.expect("test operation should succeed");
assert_eq!(loose_info.disk_size, loose_size);
assert_eq!(
loose_info.deltabase,
zero_oid(format).expect("test operation should succeed")
);
let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
let mut child_body = vec![b'a'; 4096];
child_body.extend_from_slice(b" changed tail\n");
let child = EncodedObject::new(ObjectType::Blob, child_body);
let base_oid = base
.object_id(format)
.expect("test operation should succeed");
let child_oid = child
.object_id(format)
.expect("test operation should succeed");
let options = PackWriteOptions::new()
.with_prefer_ofs_delta(true)
.with_reorder(false);
let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
.expect("test operation should succeed");
db.install_pack(&pack)
.expect("test operation should succeed");
let base_info = db
.object_storage_info(&base_oid)
.expect("test operation should succeed")
.expect("test operation should succeed");
assert!(base_info.disk_size > 0);
assert_eq!(
base_info.deltabase,
zero_oid(format).expect("test operation should succeed")
);
let child_info = db
.object_storage_info(&child_oid)
.expect("test operation should succeed")
.expect("test operation should succeed");
assert!(child_info.disk_size > 0);
assert_eq!(child_info.deltabase, base_oid);
let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
.expect("test operation should succeed");
assert_eq!(
db.object_storage_info(&missing)
.expect("test operation should succeed"),
None
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_resolves_unique_loose_object_prefix() {
let root = temp_root("sley-file-odb-prefix-loose");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
let oid = db
.write_object(object)
.expect("test operation should succeed");
let prefix = &oid.to_hex()[..8];
assert_eq!(
db.resolve_prefix(prefix)
.expect("test operation should succeed"),
ObjectPrefixResolution::Unique(oid)
);
assert!(
db.object_ids()
.expect("test operation should succeed")
.contains(&oid)
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_resolves_unique_packed_object_prefix() {
let root = temp_root("sley-file-odb-prefix-packed");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
.expect("test operation should succeed");
db.install_pack(&pack)
.expect("test operation should succeed");
let prefix = &oid.to_hex()[..8];
assert_eq!(
db.resolve_prefix(prefix)
.expect("test operation should succeed"),
ObjectPrefixResolution::Unique(oid)
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_reports_ambiguous_object_prefix() {
let root = temp_root("sley-file-odb-prefix-ambiguous");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let mut seen = HashMap::new();
let (prefix, first, second) = (0..10_000)
.find_map(|idx| {
let object =
EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
let oid = db
.write_object(object)
.expect("test operation should succeed");
let prefix = oid.to_hex()[..4].to_string();
seen.insert(prefix.clone(), oid)
.map(|first| (prefix, first, oid))
})
.expect("test should find a 4-hex collision");
let ObjectPrefixResolution::Ambiguous(mut matches) = db
.resolve_prefix(&prefix)
.expect("test operation should succeed")
else {
panic!("expected ambiguous prefix {prefix}");
};
matches.sort_by_key(ObjectId::to_hex);
let mut expected = vec![first, second];
expected.sort_by_key(ObjectId::to_hex);
assert_eq!(matches, expected);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_rejects_too_short_object_prefix() {
let root = temp_root("sley-file-odb-prefix-short");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
assert!(matches!(
db.resolve_prefix("abc"),
Err(GitError::InvalidObjectId(_))
));
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_reads_sha256_object_from_pack_index() {
let root = temp_root("sley-file-odb-pack-sha256");
let git_dir = root.join(".git");
let pack_dir = git_dir.join("objects").join("pack");
fs::create_dir_all(&pack_dir).expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha256)
.expect("test operation should succeed");
let written =
PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
.expect("test operation should succeed");
let pack_name = written.checksum.to_hex();
fs::write(
pack_dir.join(format!("pack-{pack_name}.pack")),
written.pack,
)
.expect("test operation should succeed");
fs::write(
pack_dir.join(format!("pack-{pack_name}.idx")),
written.index,
)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
assert!(db.contains(&oid).expect("test operation should succeed"));
assert_eq!(read_object_for_assert(&db, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_installs_sha256_pack_without_loose_objects() {
let root = temp_root("sley-file-odb-install-pack");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha256)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
let result = db
.install_pack(&pack)
.expect("test operation should succeed");
assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
assert_eq!(result.object_ids, vec![oid]);
assert!(result.pack_path.exists());
assert!(result.index_path.exists());
assert_eq!(result.promisor_path, None);
assert!(
!db.loose()
.object_path(&oid)
.expect("test operation should succeed")
.exists()
);
assert!(db.contains(&oid).expect("test operation should succeed"));
assert_eq!(read_object_for_assert(&db, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_installs_raw_sha256_pack_without_loose_objects() {
let root = temp_root("sley-file-odb-install-raw-pack");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha256)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
let result = db
.install_raw_pack(&pack.pack)
.expect("test operation should succeed");
assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
assert_eq!(result.object_ids, vec![oid]);
assert!(result.pack_path.exists());
assert!(result.index_path.exists());
assert_eq!(result.promisor_path, None);
assert!(
!db.loose()
.object_path(&oid)
.expect("test operation should succeed")
.exists()
);
assert!(db.contains(&oid).expect("test operation should succeed"));
assert_eq!(read_object_for_assert(&db, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_rejects_noncanonical_pack_index() {
let root = temp_root("sley-file-odb-install-bad-index");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
.expect("test operation should succeed");
let mut entries = pack.entries.clone();
entries[0].crc32 ^= 1;
let mut bad_pack = pack.clone();
bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
assert!(db.install_pack(&bad_pack).is_err());
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_installs_raw_promisor_pack_with_sidecar() {
let root = temp_root("sley-file-odb-install-raw-promisor-pack");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let result = db
.install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
.expect("test operation should succeed");
let promisor_path = result.promisor_path.expect("promisor sidecar");
assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
assert_eq!(
promisor_path.extension().and_then(|ext| ext.to_str()),
Some("promisor")
);
assert!(promisor_path.exists());
assert_eq!(
fs::read(&promisor_path).expect("test operation should succeed"),
b""
);
assert!(result.pack_path.exists());
assert!(result.index_path.exists());
assert!(
!db.loose()
.object_path(&oid)
.expect("test operation should succeed")
.exists()
);
assert_eq!(read_object_for_assert(&db, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn repository_objects_dir_uses_linked_worktree_common_dir() {
let root = temp_root("sley-odb-common-dir");
let common = root.join(".git");
let admin = common.join("worktrees").join("linked");
fs::create_dir_all(&admin).expect("test operation should succeed");
fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
let common = fs::canonicalize(common).expect("test operation should succeed");
assert_eq!(repository_common_dir(&admin), common);
assert_eq!(repository_objects_dir(&admin), common.join("objects"));
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn reachable_object_helpers_walk_graph_and_install_pack() {
let root = temp_root("sley-reachable-pack");
let source_git_dir = root.join("source.git");
let destination_git_dir = root.join("destination.git");
fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
fs::create_dir_all(destination_git_dir.join("objects"))
.expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
let blob_oid = source
.write_object(blob.clone())
.expect("test operation should succeed");
let tree = EncodedObject::new(
ObjectType::Tree,
Tree {
entries: vec![TreeEntry {
mode: 0o100644,
name: BString::from(b"payload.txt"),
oid: blob_oid,
}],
}
.write(),
);
let tree_oid = source
.write_object(tree.clone())
.expect("test operation should succeed");
let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
let commit = EncodedObject::new(
ObjectType::Commit,
Commit {
tree: tree_oid,
parents: Vec::new(),
author: identity.clone(),
committer: identity,
encoding: None,
message: b"initial\n".to_vec(),
}
.write(),
);
let commit_oid = source
.write_object(commit.clone())
.expect("test operation should succeed");
let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
.expect("test operation should succeed");
assert!(reachable.contains(&commit_oid));
assert!(reachable.contains(&tree_oid));
assert!(reachable.contains(&blob_oid));
let install =
install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
.expect("test operation should succeed")
.expect("reachable pack should be written");
assert_eq!(install.object_ids.len(), 3);
for (oid, object) in [
(&commit_oid, &commit),
(&tree_oid, &tree),
(&blob_oid, &blob),
] {
assert!(
!destination
.loose()
.object_path(oid)
.expect("test operation should succeed")
.exists()
);
assert!(
destination
.contains(oid)
.expect("test operation should succeed")
);
assert_eq!(read_object_for_assert(&destination, oid), *object);
}
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
let root = temp_root("sley-reachable-exclusions");
let git_dir = root.join("repo.git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let db = FileObjectDatabase::from_git_dir(&git_dir, format);
let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
let blob_oid = db
.write_object(blob)
.expect("test operation should succeed");
let tree = EncodedObject::new(
ObjectType::Tree,
Tree {
entries: vec![TreeEntry {
mode: 0o100644,
name: BString::from(b"payload.txt"),
oid: blob_oid,
}],
}
.write(),
);
let tree_oid = db
.write_object(tree)
.expect("test operation should succeed");
let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
let commit = EncodedObject::new(
ObjectType::Commit,
Commit {
tree: tree_oid,
parents: Vec::new(),
author: identity.clone(),
committer: identity,
encoding: None,
message: b"initial\n".to_vec(),
}
.write(),
);
let commit_oid = db
.write_object(commit)
.expect("test operation should succeed");
let excluded = HashSet::from([tree_oid]);
let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
.expect("test operation should succeed");
assert_eq!(objects.len(), 1);
assert_eq!(
objects[0]
.object_id(format)
.expect("test operation should succeed"),
commit_oid
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
let root = temp_root("sley-build-reachable-pack");
let git_dir = root.join("repo.git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let db = FileObjectDatabase::from_git_dir(&git_dir, format);
let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
let oid = db
.write_object(object.clone())
.expect("test operation should succeed");
let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
.expect("test operation should succeed")
.expect("reachable pack should be built");
assert!(pack.pack.starts_with(b"PACK"));
assert_eq!(pack.entries.len(), 1);
assert_eq!(pack.entries[0].oid, oid);
let excluded = HashSet::from([oid]);
assert!(
build_reachable_pack(
&db,
format,
pack.entries.into_iter().map(|entry| entry.oid),
&excluded
)
.expect("test operation should succeed")
.is_none()
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
let root = temp_root("sley-reachable-tags");
let git_dir = root.join("repo.git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let db = FileObjectDatabase::from_git_dir(&git_dir, format);
let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
let blob_oid = db
.write_object(blob)
.expect("test operation should succeed");
let tag = EncodedObject::new(
ObjectType::Tag,
Tag {
object: blob_oid,
object_type: ObjectType::Blob,
name: b"v1".to_vec(),
tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
message: b"tag message\n".to_vec(),
raw_body: None,
}
.write(),
);
let tag_oid = db.write_object(tag).expect("test operation should succeed");
let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
.expect("test operation should succeed");
assert!(reachable.contains(&tag_oid));
assert!(reachable.contains(&blob_oid));
let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
.expect("test operation should succeed");
let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
.expect_err("missing traversal root should error");
let kind = err.not_found_kind().expect("typed not found");
assert_eq!(kind.object_id(), Some(missing));
assert_eq!(
kind.missing_object_context(),
Some(MissingObjectContext::Traversal)
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_reachable_pack_empty_starts_create_no_pack() {
let root = temp_root("sley-reachable-empty");
let source_git_dir = root.join("source.git");
let destination_git_dir = root.join("destination.git");
fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
fs::create_dir_all(destination_git_dir.join("objects"))
.expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
.expect("test operation should succeed");
assert!(result.is_none());
assert!(!destination_git_dir.join("objects").join("pack").exists());
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
let root = temp_root("sley-reachable-install-excluding");
let source_git_dir = root.join("source.git");
let destination_git_dir = root.join("destination.git");
fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
fs::create_dir_all(destination_git_dir.join("objects"))
.expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
let oid = source
.write_object(object)
.expect("test operation should succeed");
let excluded = HashSet::from([oid]);
let result = install_reachable_pack_excluding(
&source,
&destination,
format,
std::iter::once(oid),
&excluded,
)
.expect("test operation should succeed");
assert!(result.is_none());
assert!(!destination_git_dir.join("objects").join("pack").exists());
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_reachable_pack_supports_sha256() {
let root = temp_root("sley-reachable-pack-sha256");
let source_git_dir = root.join("source.git");
let destination_git_dir = root.join("destination.git");
fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
fs::create_dir_all(destination_git_dir.join("objects"))
.expect("test operation should succeed");
let format = ObjectFormat::Sha256;
let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
let oid = source
.write_object(object.clone())
.expect("test operation should succeed");
let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
.expect("test operation should succeed")
.expect("sha256 reachable pack should be built");
assert!(pack.pack.starts_with(b"PACK"));
assert_eq!(pack.entries[0].oid, oid);
let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
.expect("test operation should succeed")
.expect("sha256 reachable pack should be written");
assert_eq!(result.object_ids, vec![oid]);
assert!(
!destination
.loose()
.object_path(&oid)
.expect("test operation should succeed")
.exists()
);
assert_eq!(read_object_for_assert(&destination, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_helpers_accept_custom_raw_pack_installer() {
#[derive(Default)]
struct RecordingInstaller {
packs: std::cell::RefCell<Vec<Vec<u8>>>,
installed: std::cell::RefCell<Vec<ObjectId>>,
}
impl RawPackInstaller for RecordingInstaller {
fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
self.packs.borrow_mut().push(pack_bytes.to_vec());
let object_ids = self.installed.borrow().clone();
Ok(RawPackInstallResult { object_ids })
}
}
let format = ObjectFormat::Sha1;
let source = ObjectDatabase::new(format);
let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
let oid = source
.write_object(object)
.expect("test operation should succeed");
let installer = RecordingInstaller::default();
installer.installed.borrow_mut().push(oid);
let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
.expect("test operation should succeed")
.expect("custom installer should receive pack");
assert_eq!(result.object_ids, installer.installed.into_inner());
let packs = installer.packs.into_inner();
assert_eq!(packs.len(), 1);
assert!(packs[0].starts_with(b"PACK"));
}
#[test]
fn file_database_reads_object_from_multi_pack_index() {
let root = temp_root("sley-file-odb-midx");
let git_dir = root.join(".git");
let pack_dir = git_dir.join("objects").join("pack");
fs::create_dir_all(&pack_dir).expect("test operation should succeed");
let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
let first_oid = first
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let second_oid = second
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
.expect("test operation should succeed");
let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
.expect("test operation should succeed");
let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
fs::write(
pack_dir.join(first_pack_name.replace(".idx", ".pack")),
first_pack.pack,
)
.expect("test operation should succeed");
fs::write(
pack_dir.join(second_pack_name.replace(".idx", ".pack")),
second_pack.pack,
)
.expect("test operation should succeed");
let midx = MultiPackIndex::write(
ObjectFormat::Sha1,
2,
&[first_pack_name, second_pack_name],
&[
sley_pack::MultiPackIndexEntry {
oid: first_oid,
pack_int_id: 0,
offset: first_pack.entries[0].offset,
force_large_offset: false,
},
sley_pack::MultiPackIndexEntry {
oid: second_oid,
pack_int_id: 1,
offset: second_pack.entries[0].offset,
force_large_offset: false,
},
],
)
.expect("test operation should succeed");
fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
assert!(
db.contains(&second_oid)
.expect("test operation should succeed")
);
assert_eq!(
db.resolve_prefix(&second_oid.to_hex()[..8])
.expect("test operation should succeed"),
ObjectPrefixResolution::Unique(second_oid)
);
assert_eq!(read_object_for_assert(&db, &second_oid), second);
assert_eq!(read_object_for_assert(&db, &first_oid), first);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_finds_pack_added_after_registry_was_cached() {
let root = temp_root("sley-file-odb-pack-added-late");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
let first_oid = first
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
.expect("test operation should succeed");
db.install_pack(&first_pack)
.expect("test operation should succeed");
assert_eq!(read_object_for_assert(&db, &first_oid), first);
let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
let second_oid = second
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
assert!(matches!(
db.read_object(&second_oid),
Err(GitError::NotFound(_))
));
let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
.expect("test operation should succeed");
db.install_pack(&second_pack)
.expect("test operation should succeed");
assert!(
db.contains(&second_oid)
.expect("test operation should succeed")
);
assert_eq!(read_object_for_assert(&db, &second_oid), second);
assert_eq!(read_object_for_assert(&db, &first_oid), first);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
let root = temp_root("sley-presence-checker-pack-added-late");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
let first_oid = first
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
.expect("test operation should succeed");
db.install_pack(&first_pack)
.expect("test operation should succeed");
let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
let second_oid = second
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let mut checker = db.presence_checker();
assert!(
checker
.contains(&first_oid)
.expect("test operation should succeed")
);
assert!(
!checker
.contains(&second_oid)
.expect("test operation should succeed")
);
let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
.expect("test operation should succeed");
db.install_pack(&second_pack)
.expect("test operation should succeed");
assert!(
checker
.contains(&second_oid)
.expect("test operation should succeed")
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
let root = temp_root("sley-file-odb-pack-registry-refresh");
let git_dir = root.join(".git");
let pack_dir = git_dir.join("objects").join("pack");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
let first_oid = first
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
.expect("test operation should succeed");
db.install_pack(&first_pack)
.expect("test operation should succeed");
let first_registry = db
.cached_pack_registry(&pack_dir, false)
.expect("test operation should succeed");
assert_eq!(first_registry.fingerprint.idx_count, 1);
assert_eq!(first_registry.fingerprint.pack_count, 1);
assert_eq!(first_registry.packs.len(), 1);
assert!(
first_registry.packs[0]
.index
.lock()
.expect("test operation should succeed")
.is_none()
);
assert!(
first_registry.packs[0]
.data
.lock()
.expect("test operation should succeed")
.is_none()
);
assert!(
db.contains(&first_oid)
.expect("test operation should succeed")
);
assert!(
first_registry.packs[0]
.index
.lock()
.expect("test operation should succeed")
.is_some()
);
assert!(
first_registry.packs[0]
.data
.lock()
.expect("test operation should succeed")
.is_none()
);
assert_eq!(read_object_for_assert(&db, &first_oid), first);
assert!(
first_registry.packs[0]
.data
.lock()
.expect("test operation should succeed")
.is_some()
);
let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
let second_oid = second
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
.expect("test operation should succeed");
db.install_pack(&second_pack)
.expect("test operation should succeed");
let refreshed = db
.cached_pack_registry(&pack_dir, true)
.expect("test operation should succeed");
assert!(!Arc::ptr_eq(&first_registry, &refreshed));
assert_eq!(refreshed.fingerprint.idx_count, 2);
assert_eq!(refreshed.fingerprint.pack_count, 2);
assert_eq!(refreshed.packs.len(), 2);
assert_eq!(read_object_for_assert(&db, &second_oid), second);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_pack_search_hint_rebuilds_after_pack_added() {
let root = temp_root("sley-file-odb-pack-lookup-added-late");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
let first_oid = first
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let second_oid = second
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let third_oid = third
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
.expect("test operation should succeed");
let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
.expect("test operation should succeed");
db.install_pack(&first_pack)
.expect("test operation should succeed");
db.install_pack(&second_pack)
.expect("test operation should succeed");
assert_eq!(read_object_for_assert(&db, &first_oid), first);
assert_eq!(read_object_for_assert(&db, &second_oid), second);
assert!(matches!(
db.read_object(&third_oid),
Err(GitError::NotFound(_))
));
let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
.expect("test operation should succeed");
db.install_pack(&third_pack)
.expect("test operation should succeed");
assert_eq!(read_object_for_assert(&db, &third_oid), third);
assert_eq!(read_object_for_assert(&db, &first_oid), first);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn file_database_prefers_loose_object_over_packed_object() {
let root = temp_root("sley-file-odb-prefer-loose");
let git_dir = root.join(".git");
let pack_dir = git_dir.join("objects").join("pack");
fs::create_dir_all(&pack_dir).expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
.expect("test operation should succeed");
let pack_name = written.checksum.to_hex();
fs::write(
pack_dir.join(format!("pack-{pack_name}.pack")),
written.pack,
)
.expect("test operation should succeed");
fs::write(
pack_dir.join(format!("pack-{pack_name}.idx")),
written.index,
)
.expect("test operation should succeed");
let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let oid = db
.write_object(object.clone())
.expect("test operation should succeed");
assert_eq!(read_object_for_assert(&db, &oid), object);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn bundle_prerequisite_verification_reads_existing_objects() {
let db = ObjectDatabase::new(ObjectFormat::Sha1);
let oid = db
.write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
.expect("test operation should succeed");
let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
.expect("test operation should succeed");
verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
}
#[test]
fn bundle_prerequisite_verification_reports_missing_objects() {
let db = ObjectDatabase::new(ObjectFormat::Sha1);
let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
.expect("test operation should succeed");
let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
.expect("test operation should succeed");
assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
}
#[test]
fn unbundle_objects_writes_pack_entries_and_returns_refs() {
let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
.expect("test operation should succeed");
let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
.into_bytes()
.into_iter()
.chain(pack.pack)
.collect::<Vec<_>>();
let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
.expect("test operation should succeed");
let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
.expect("test operation should succeed");
assert_eq!(result.written_objects, vec![oid]);
assert_eq!(result.references, bundle.references);
assert_eq!(read_object_for_assert(&writer, &oid), object);
}
#[test]
fn install_bundle_pack_writes_pack_and_returns_refs() {
let root = temp_root("sley-install-bundle-pack");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
.expect("test operation should succeed");
let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
.into_bytes()
.into_iter()
.chain(pack.pack)
.collect::<Vec<_>>();
let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
.expect("test operation should succeed");
let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
.expect("test operation should succeed");
assert_eq!(result.written_objects, vec![oid]);
assert_eq!(result.references, bundle.references);
assert!(
database
.contains(&oid)
.expect("test operation should succeed")
);
assert_eq!(read_object_for_assert(&database, &oid), object);
assert!(
!database
.loose()
.object_path(&oid)
.expect("test operation should succeed")
.exists()
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn unpack_packfile_objects_writes_sha256_pack_entries() {
let writer = ObjectDatabase::new(ObjectFormat::Sha256);
let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha256)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
.expect("test operation should succeed");
let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
.expect("test operation should succeed");
assert_eq!(result.written_objects, vec![oid]);
assert_eq!(read_object_for_assert(&writer, &oid), object);
}
#[test]
fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
.expect("test operation should succeed");
let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
let oid = object
.object_id(ObjectFormat::Sha1)
.expect("test operation should succeed");
let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
.expect("test operation should succeed");
let bundle_bytes =
format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
.into_bytes()
.into_iter()
.chain(pack.pack)
.collect::<Vec<_>>();
let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
.expect("test operation should succeed");
assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
assert!(!writer.contains(&oid));
}
fn write_commit_graph(
db: &mut FileObjectDatabase,
payload: &[u8],
) -> Vec<(ObjectId, EncodedObject)> {
let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
let blob_oid = db
.write_object(blob.clone())
.expect("test operation should succeed");
let tree = EncodedObject::new(
ObjectType::Tree,
Tree {
entries: vec![TreeEntry {
mode: 0o100644,
name: BString::from(b"payload.txt"),
oid: blob_oid,
}],
}
.write(),
);
let tree_oid = db
.write_object(tree.clone())
.expect("test operation should succeed");
let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
let commit = EncodedObject::new(
ObjectType::Commit,
Commit {
tree: tree_oid,
parents: Vec::new(),
author: identity.clone(),
committer: identity,
encoding: None,
message: b"initial\n".to_vec(),
}
.write(),
);
let commit_oid = db
.write_object(commit.clone())
.expect("test operation should succeed");
vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
}
fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
let root = temp_root("sley-repack-all");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
let packed_oid = packed_blob
.object_id(format)
.expect("test operation should succeed");
let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
.expect("test operation should succeed");
let existing = db
.install_pack(&existing_pack)
.expect("test operation should succeed");
let graph = write_commit_graph(&mut db, b"repack payload\n");
let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
expected.insert(packed_oid, packed_blob.clone());
let result = repack_all_objects(&git_dir, format)
.expect("test operation should succeed")
.expect("repository has objects");
assert_eq!(result.object_count, expected.len());
let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
assert_eq!(parsed.entries.len(), expected.len());
for entry in &parsed.entries {
let want = expected
.get(&entry.entry.oid)
.expect("packed object was in the repository");
assert_eq!(&entry.object, want);
assert_eq!(
entry
.object
.object_id(format)
.expect("test operation should succeed"),
entry.entry.oid
);
}
let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
assert_eq!(idx.pack_checksum, parsed.checksum);
assert_eq!(idx.entries.len(), expected.len());
assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
want_loose.sort_by_key(ObjectId::to_hex);
assert_eq!(result.packed_loose, want_loose);
assert!(!result.packed_loose.contains(&packed_oid));
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn repack_all_objects_consolidates_loose_and_pack_sha1() {
repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
}
#[test]
fn repack_all_objects_consolidates_loose_and_pack_sha256() {
repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
}
#[test]
fn repack_all_objects_returns_none_for_empty_repository() {
let root = temp_root("sley-repack-empty");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
assert!(
repack_all_objects(&git_dir, ObjectFormat::Sha1)
.expect("test operation should succeed")
.is_none()
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_repack_result_writes_pack_without_pruning_by_default() {
let root = temp_root("sley-repack-install-nodelete");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
let graph = write_commit_graph(&mut db, b"install no prune\n");
let result = repack_all_objects(&git_dir, format)
.expect("test operation should succeed")
.expect("test operation should succeed");
install_repack_result(&git_dir, format, &result, false)
.expect("test operation should succeed");
let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
let pack_dir = git_dir.join("objects").join("pack");
let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
assert!(pack_path.exists());
assert!(idx_path.exists());
for (oid, object) in &graph {
assert!(
db.loose()
.object_path(oid)
.expect("test operation should succeed")
.exists()
);
assert_eq!(read_object_for_assert(&db, oid), *object);
}
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
let root = temp_root("sley-repack-install-prune");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
.expect("test operation should succeed");
let existing = db
.install_pack(&existing_pack)
.expect("test operation should succeed");
let graph = write_commit_graph(&mut db, b"prune payload\n");
let result = repack_all_objects(&git_dir, format)
.expect("test operation should succeed")
.expect("test operation should succeed");
let new_pack_checksum = PackFile::parse(&result.pack, format)
.expect("test operation should succeed")
.checksum;
install_repack_result(&git_dir, format, &result, true)
.expect("test operation should succeed");
assert!(!existing.pack_path.exists());
assert!(!existing.index_path.exists());
for (oid, _) in &graph {
assert!(
!db.loose()
.object_path(oid)
.expect("test operation should succeed")
.exists()
);
}
let pack_dir = git_dir.join("objects").join("pack");
assert!(
pack_dir
.join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
.exists()
);
let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
for (oid, object) in &graph {
assert!(
reopened
.contains(oid)
.expect("test operation should succeed")
);
assert_eq!(read_object_for_assert(&reopened, oid), *object);
}
let packed_oid = packed_blob
.object_id(format)
.expect("test operation should succeed");
assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_repack_result_preserves_keep_and_promisor_packs() {
let root = temp_root("sley-repack-install-keep-promisor");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
.expect("test operation should succeed");
let keep_install = db
.install_pack(&keep_pack)
.expect("test operation should succeed");
let keep_sidecar = keep_install.pack_path.with_extension("keep");
fs::write(&keep_sidecar, b"").expect("test operation should succeed");
let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
let promisor_pack =
PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
.expect("test operation should succeed");
let promisor_install = db
.install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
.expect("test operation should succeed");
let promisor_sidecar = promisor_install
.promisor_path
.clone()
.expect("promisor sidecar");
let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
let result = repack_all_objects(&git_dir, format)
.expect("test operation should succeed")
.expect("test operation should succeed");
assert!(result.obsolete_packs.contains(&keep_install.pack_path));
assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
install_repack_result(&git_dir, format, &result, true)
.expect("test operation should succeed");
for path in [
&keep_install.pack_path,
&keep_install.index_path,
&keep_sidecar,
&promisor_install.pack_path,
&promisor_install.index_path,
&promisor_sidecar,
] {
assert!(path.exists(), "{} should be preserved", path.display());
}
for (oid, _) in &graph {
assert!(
!db.loose()
.object_path(oid)
.expect("test operation should succeed")
.exists()
);
}
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
let root = temp_root("sley-repack-install-safety");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
let graph = write_commit_graph(&mut db, b"safety packed\n");
let mut result = repack_all_objects(&git_dir, format)
.expect("test operation should succeed")
.expect("test operation should succeed");
let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
let stray_oid = db
.write_object(stray.clone())
.expect("test operation should succeed");
assert!(!result.packed_loose.contains(&stray_oid));
result.packed_loose.push(stray_oid);
install_repack_result(&git_dir, format, &result, true)
.expect("test operation should succeed");
assert!(
db.loose()
.object_path(&stray_oid)
.expect("test operation should succeed")
.exists()
);
assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
for (oid, _) in &graph {
assert!(
!db.loose()
.object_path(oid)
.expect("test operation should succeed")
.exists()
);
}
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
let root = temp_root("sley-prune-unreachable");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
let graph = write_commit_graph(&mut db, b"reachable payload\n");
let commit_oid = graph[0].0.clone();
let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
let dangling_oid = db
.write_object(dangling)
.expect("test operation should succeed");
let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
.expect("test operation should succeed");
assert_eq!(reported, vec![dangling_oid]);
assert!(
db.loose()
.object_path(&dangling_oid)
.expect("test operation should succeed")
.exists()
);
let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
.expect("test operation should succeed");
assert_eq!(deleted, vec![dangling_oid]);
assert!(
!db.loose()
.object_path(&dangling_oid)
.expect("test operation should succeed")
.exists()
);
for (oid, object) in &graph {
assert!(
db.loose()
.object_path(oid)
.expect("test operation should succeed")
.exists()
);
assert_eq!(read_object_for_assert(&db, oid), *object);
}
fs::remove_dir_all(root).expect("test operation should succeed");
}
#[test]
fn prune_unreachable_loose_ignores_gitlink_targets() {
let root = temp_root("sley-prune-gitlink");
let git_dir = root.join(".git");
fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
let format = ObjectFormat::Sha1;
let db = FileObjectDatabase::from_git_dir(&git_dir, format);
let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
.expect("test operation should succeed");
let tree = EncodedObject::new(
ObjectType::Tree,
Tree {
entries: vec![TreeEntry {
mode: 0o160000,
name: BString::from(b"submodule"),
oid: submodule_oid,
}],
}
.write(),
);
let tree_oid = db
.write_object(tree)
.expect("test operation should succeed");
let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
let commit = EncodedObject::new(
ObjectType::Commit,
Commit {
tree: tree_oid,
parents: Vec::new(),
author: identity.clone(),
committer: identity,
encoding: None,
message: b"gitlink\n".to_vec(),
}
.write(),
);
let commit_oid = db
.write_object(commit)
.expect("test operation should succeed");
let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
let dangling_oid = db
.write_object(dangling)
.expect("test operation should succeed");
let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
.expect("test operation should succeed");
assert_eq!(deleted, vec![dangling_oid]);
assert!(
!db.loose()
.object_path(&dangling_oid)
.expect("test operation should succeed")
.exists()
);
fs::remove_dir_all(root).expect("test operation should succeed");
}
fn temp_root(prefix: &str) -> PathBuf {
std::env::temp_dir().join(format!(
"{prefix}-{}-{}",
std::process::id(),
TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
))
}
}