use crate::{
ArchivePath, ArchiveRead, ArchiveReader, ArchiveWrite, ArchiveWriter, BaleError, EntryKind,
};
use nix::sys::stat::SFlag;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
const DEFAULT_DIR_PERM: u32 = 0o755;
struct TempFileGuard {
path: PathBuf,
persist: bool,
}
impl TempFileGuard {
fn new(path: PathBuf) -> Self {
Self {
path,
persist: false,
}
}
fn persist(&mut self) {
self.persist = true;
}
}
impl Drop for TempFileGuard {
fn drop(&mut self) {
if !self.persist {
let _ = fs::remove_file(&self.path);
}
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct CompactStats {
pub original_size: u64,
pub compacted_size: u64,
pub entries_removed: usize,
pub bytes_reclaimed: u64,
}
pub fn compact(path: impl AsRef<Path>) -> Result<CompactStats, BaleError> {
let path = path.as_ref();
let original_size = fs::metadata(path)?.len();
let reader = ArchiveReader::open(path)?;
let alignment = reader.alignment();
let path_size = reader.path_size() as u16;
let mut seen_paths: HashSet<Vec<u8>> = HashSet::new();
let mut entries_to_copy: Vec<_> = Vec::new();
let mut total_entries = 0usize;
for (header, path_bytes) in reader.iter_entries() {
total_entries += 1;
let trimmed: Vec<u8> = path_bytes.iter().copied().take_while(|&b| b != 0).collect();
entries_to_copy.push((header, path_bytes.to_vec(), trimmed));
}
entries_to_copy.reverse();
let mut final_entries: Vec<_> = Vec::new();
for (header, path_bytes, trimmed) in entries_to_copy {
if seen_paths.insert(trimmed) {
final_entries.push((header, path_bytes));
}
}
final_entries.reverse();
let explicit_dirs: HashSet<Vec<u8>> = final_entries
.iter()
.filter(|(header, _)| header.kind() == EntryKind::Directory)
.map(|(_, path_bytes)| {
let trimmed: Vec<u8> = path_bytes.iter().copied().take_while(|&b| b != 0).collect();
if trimmed.ends_with(b"/") {
trimmed[..trimmed.len() - 1].to_vec()
} else {
trimmed
}
})
.collect();
let mut missing_dirs: HashSet<Vec<u8>> = HashSet::new();
for (_, path_bytes) in &final_entries {
let trimmed: Vec<u8> = path_bytes.iter().copied().take_while(|&b| b != 0).collect();
let mut parent = trimmed.as_slice();
while let Some(pos) = parent.iter().rposition(|&b| b == b'/') {
parent = &parent[..pos];
if parent.is_empty() {
break;
}
let parent_vec = parent.to_vec();
if !explicit_dirs.contains(&parent_vec) {
missing_dirs.insert(parent_vec);
}
}
}
final_entries.sort_by(|a, b| a.1.cmp(&b.1));
let entries_removed = total_entries - final_entries.len();
let parent = path.parent().unwrap_or(Path::new("."));
let temp_path = parent.join(format!(
".{}.compact.tmp",
path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("archive")
));
let mut guard = TempFileGuard::new(temp_path.clone());
{
let mut writer = ArchiveWriter::create_with_options(&temp_path, alignment, path_size)?;
let mut missing_dirs_sorted: Vec<_> = missing_dirs.into_iter().collect();
missing_dirs_sorted.sort();
for dir_bytes in &missing_dirs_sorted {
let dir_str = std::str::from_utf8(dir_bytes)?;
writer.add_folder(dir_str, SFlag::S_IFDIR.bits() | DEFAULT_DIR_PERM)?;
}
for (header, path_bytes) in &final_entries {
let data = reader.read_data(header)?;
let archive_path = ArchivePath::from_null_padded_bytes(path_bytes);
let path_str = archive_path.to_str_checked()?;
let mode = header.external_attrs.get() >> 16;
writer.add_entry(path_str, data, mode)?;
}
writer.sync()?;
}
let compacted_size = fs::metadata(&temp_path)?.len();
fs::rename(&temp_path, path)?;
guard.persist();
Ok(CompactStats {
original_size,
compacted_size,
entries_removed,
bytes_reclaimed: original_size.saturating_sub(compacted_size),
})
}
#[derive(Debug, Clone, Default)]
pub struct RenameStats {
pub entries_renamed: usize,
pub renames: Vec<(String, String)>,
}
pub fn rename_duplicates(path: impl AsRef<Path>) -> Result<RenameStats, BaleError> {
let path = path.as_ref();
let reader = ArchiveReader::open(path)?;
let alignment = reader.alignment();
let path_size = reader.path_size() as u16;
let mut path_counts: HashMap<String, usize> = HashMap::new();
for (_header, path_bytes) in reader.iter_entries() {
let archive_path = ArchivePath::from_null_padded_bytes(path_bytes);
let path_str = archive_path.to_str_checked()?;
*path_counts.entry(path_str.to_owned()).or_insert(0) += 1;
}
let has_duplicates = path_counts.values().any(|&count| count > 1);
if !has_duplicates {
return Ok(RenameStats::default());
}
let mut path_occurrences: HashMap<String, usize> = HashMap::new();
let mut entries: Vec<_> = Vec::new();
let mut renames: Vec<(String, String)> = Vec::new();
for (header, path_bytes) in reader.iter_entries() {
let archive_path = ArchivePath::from_null_padded_bytes(path_bytes);
let original_path = archive_path.to_str_checked()?.to_owned();
let total_count = path_counts[&original_path];
let occurrence = {
let entry = path_occurrences.entry(original_path.clone()).or_insert(0);
*entry += 1;
*entry
};
let new_path = if total_count > 1 && occurrence < total_count {
let renamed = archive_path.with_suffix(occurrence)?;
if renamed.len() > path_size as usize {
return Err(BaleError::PathTooLong {
path: renamed.to_string(),
max: path_size as usize,
});
}
let renamed_str = renamed.to_string();
renames.push((original_path, renamed_str.clone()));
renamed_str
} else {
original_path
};
entries.push((header, new_path));
}
entries.sort_by(|a, b| a.1.cmp(&b.1));
let parent = path.parent().unwrap_or(Path::new("."));
let temp_path = parent.join(format!(
".{}.rename.tmp",
path.file_name()
.and_then(|s| s.to_str())
.unwrap_or("archive")
));
let mut guard = TempFileGuard::new(temp_path.clone());
{
let mut writer = ArchiveWriter::create_with_options(&temp_path, alignment, path_size)?;
for (header, new_path) in &entries {
let data = reader.read_data(header)?;
let mode = header.external_attrs.get() >> 16;
writer.add_entry(new_path, data, mode)?;
}
writer.sync()?;
}
fs::rename(&temp_path, path)?;
guard.persist();
Ok(RenameStats {
entries_renamed: renames.len(),
renames,
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn compact_empty_archive() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("test.bale");
{
let mut writer = ArchiveWriter::create(&path).unwrap();
writer.sync().unwrap();
}
let stats = compact(&path).unwrap();
assert_eq!(stats.entries_removed, 0);
let reader = ArchiveReader::open(&path).unwrap();
assert_eq!(reader.entry_count(), 0);
}
#[test]
fn compact_removes_duplicates() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("test.bale");
{
let mut writer = ArchiveWriter::create(&path).unwrap();
writer.add_entry("file.txt", b"original", 0o644).unwrap();
writer.add_entry("file.txt", b"updated", 0o644).unwrap();
writer.add_entry("other.txt", b"other", 0o644).unwrap();
writer.sync().unwrap();
}
let original_size = fs::metadata(&path).unwrap().len();
let stats = compact(&path).unwrap();
assert_eq!(stats.entries_removed, 1); assert!(stats.compacted_size < original_size);
let reader = ArchiveReader::open(&path).unwrap();
assert_eq!(reader.entry_count(), 2);
let entry = reader.find_entry("file.txt").unwrap();
let data = reader.read_data(entry).unwrap();
assert_eq!(data, b"updated"); }
#[test]
fn compact_sorts_entries() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("test.bale");
{
let mut writer = ArchiveWriter::create(&path).unwrap();
writer.add_entry("c.txt", b"c", 0o644).unwrap();
writer.add_entry("a.txt", b"a", 0o644).unwrap();
writer.add_entry("b.txt", b"b", 0o644).unwrap();
writer.sync().unwrap();
}
compact(&path).unwrap();
let reader = ArchiveReader::open(&path).unwrap();
let paths: Vec<String> = reader
.iter_entries()
.map(|(_, p)| {
ArchivePath::from_null_padded_bytes(p)
.to_str_checked()
.unwrap()
.to_owned()
})
.collect();
assert_eq!(paths, vec!["a.txt", "b.txt", "c.txt"]);
}
#[test]
fn compact_preserves_mode() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("test.bale");
{
let mut writer = ArchiveWriter::create(&path).unwrap();
writer.add_entry("exec.sh", b"#!/bin/bash", 0o755).unwrap();
writer.add_entry("data.txt", b"data", 0o644).unwrap();
writer.sync().unwrap();
}
compact(&path).unwrap();
let reader = ArchiveReader::open(&path).unwrap();
let exec_entry = reader.find_entry("exec.sh").unwrap();
let data_entry = reader.find_entry("data.txt").unwrap();
assert_eq!(exec_entry.external_attrs.get() >> 16, 0o755);
assert_eq!(data_entry.external_attrs.get() >> 16, 0o644);
}
#[test]
fn insert_suffix_with_extension() {
let path = ArchivePath::from_bytes(b"file.txt");
assert_eq!(path.with_suffix(1).unwrap().as_str(), Some("file(1).txt"));
let path = ArchivePath::from_bytes(b"image.png");
assert_eq!(path.with_suffix(2).unwrap().as_str(), Some("image(2).png"));
let path = ArchivePath::from_bytes(b"archive.tar.gz");
assert_eq!(
path.with_suffix(3).unwrap().as_str(),
Some("archive.tar(3).gz")
);
}
#[test]
fn insert_suffix_no_extension() {
let path = ArchivePath::from_bytes(b"README");
assert_eq!(path.with_suffix(1).unwrap().as_str(), Some("README(1)"));
let path = ArchivePath::from_bytes(b"Makefile");
assert_eq!(path.with_suffix(5).unwrap().as_str(), Some("Makefile(5)"));
}
#[test]
fn insert_suffix_directory_with_dot() {
let path = ArchivePath::from_bytes(b"foo.d/bar");
assert_eq!(path.with_suffix(1).unwrap().as_str(), Some("foo.d/bar(1)"));
let path = ArchivePath::from_bytes(b"foo.d/bar.txt");
assert_eq!(
path.with_suffix(2).unwrap().as_str(),
Some("foo.d/bar(2).txt")
);
let path = ArchivePath::from_bytes(b"a.b/c.d/file.ext");
assert_eq!(
path.with_suffix(3).unwrap().as_str(),
Some("a.b/c.d/file(3).ext")
);
}
#[test]
fn rename_duplicates_no_duplicates() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("test.bale");
{
let mut writer = ArchiveWriter::create(&path).unwrap();
writer.add_entry("a.txt", b"a", 0o644).unwrap();
writer.add_entry("b.txt", b"b", 0o644).unwrap();
writer.sync().unwrap();
}
let stats = rename_duplicates(&path).unwrap();
assert_eq!(stats.entries_renamed, 0);
assert!(stats.renames.is_empty());
let reader = ArchiveReader::open(&path).unwrap();
assert_eq!(reader.entry_count(), 2);
}
#[test]
fn rename_duplicates_renames_earlier() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("test.bale");
{
let mut writer = ArchiveWriter::create(&path).unwrap();
writer.add_entry("file.txt", b"version 1", 0o644).unwrap();
writer.add_entry("file.txt", b"version 2", 0o644).unwrap();
writer.add_entry("file.txt", b"version 3", 0o644).unwrap();
writer.sync().unwrap();
}
let stats = rename_duplicates(&path).unwrap();
assert_eq!(stats.entries_renamed, 2);
assert!(
stats
.renames
.contains(&("file.txt".to_string(), "file(1).txt".to_string()))
);
assert!(
stats
.renames
.contains(&("file.txt".to_string(), "file(2).txt".to_string()))
);
let reader = ArchiveReader::open(&path).unwrap();
assert_eq!(reader.entry_count(), 3);
let entry = reader.find_entry("file.txt").unwrap();
let data = reader.read_data(entry).unwrap();
assert_eq!(data, b"version 3");
let entry1 = reader.find_entry("file(1).txt").unwrap();
let data1 = reader.read_data(entry1).unwrap();
assert_eq!(data1, b"version 1");
let entry2 = reader.find_entry("file(2).txt").unwrap();
let data2 = reader.read_data(entry2).unwrap();
assert_eq!(data2, b"version 2");
}
#[test]
fn rename_duplicates_sorts_entries() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("test.bale");
{
let mut writer = ArchiveWriter::create(&path).unwrap();
writer.add_entry("z.txt", b"z1", 0o644).unwrap();
writer.add_entry("z.txt", b"z2", 0o644).unwrap();
writer.add_entry("a.txt", b"a", 0o644).unwrap();
writer.sync().unwrap();
}
rename_duplicates(&path).unwrap();
let reader = ArchiveReader::open(&path).unwrap();
let paths: Vec<String> = reader
.iter_entries()
.map(|(_, p)| {
ArchivePath::from_null_padded_bytes(p)
.to_str_checked()
.unwrap()
.to_owned()
})
.collect();
assert_eq!(paths, vec!["a.txt", "z(1).txt", "z.txt"]);
}
}