use std::collections::{BTreeMap, HashSet};
use std::fs;
use std::io::Cursor;
use std::path::PathBuf;
#[derive(Debug, Clone, Copy)]
pub struct DedupSummary {
pub total_files: usize,
pub unique_blobs: usize,
pub duplicate_count: usize,
pub saved_bytes: usize,
}
#[derive(Debug, Clone)]
pub struct BlobInfo {
pub hash: String,
pub compressed_len: usize,
}
pub struct CompressionCache {
cache_dir: Option<PathBuf>,
level: i32,
used_files: HashSet<String>,
cache_hits: usize,
misses: usize,
dedup_hits: usize,
dedup_saved_bytes: usize,
blobs: BTreeMap<String, Vec<u8>>,
}
impl CompressionCache {
pub fn new(cache_dir: Option<PathBuf>, level: i32) -> Self {
if let Some(dir) = &cache_dir {
let _ = fs::create_dir_all(dir);
}
Self {
cache_dir,
level,
used_files: HashSet::new(),
cache_hits: 0,
misses: 0,
dedup_hits: 0,
dedup_saved_bytes: 0,
blobs: BTreeMap::new(),
}
}
pub fn compress(&mut self, data: &[u8]) -> BlobInfo {
let hash = blake3::hash(data).to_hex().to_string();
if let Some(existing) = self.blobs.get(&hash) {
self.dedup_hits += 1;
self.dedup_saved_bytes += existing.len();
return BlobInfo {
compressed_len: existing.len(),
hash,
};
}
let compressed = self.load_or_compress(data, &hash);
let compressed_len = compressed.len();
self.blobs.insert(hash.clone(), compressed);
BlobInfo {
compressed_len,
hash,
}
}
fn load_or_compress(&mut self, data: &[u8], hash: &str) -> Vec<u8> {
if let Some(cache_dir) = &self.cache_dir {
let cache_filename = format!("{hash}_{}.zst", self.level);
let cache_path = cache_dir.join(&cache_filename);
self.used_files.insert(cache_filename);
if let Ok(cached) = fs::read(&cache_path) {
self.cache_hits += 1;
return cached;
}
self.misses += 1;
let compressed = self.compress_raw(data);
let tmp_path = cache_dir.join(format!(".tmp_{}", std::process::id()));
if fs::write(&tmp_path, &compressed).is_ok() {
let _ = fs::rename(&tmp_path, &cache_path);
}
compressed
} else {
self.misses += 1;
self.compress_raw(data)
}
}
pub fn dedup_statics(&self) -> Vec<proc_macro2::TokenStream> {
self.blobs
.iter()
.map(|(hash, data)| {
let ident = quote::format_ident!("BLOB_{}", hash);
let len = data.len();
let bytes_literal = syn::LitByteStr::new(data, proc_macro2::Span::call_site());
quote::quote! {
static #ident: [u8; #len] = *#bytes_literal;
}
})
.collect()
}
pub fn cleanup(&self) {
let Some(cache_dir) = &self.cache_dir else {
return;
};
let entries = match fs::read_dir(cache_dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.filter_map(Result::ok) {
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) == Some("zst")
&& path
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|name| !self.used_files.contains(name))
{
let _ = fs::remove_file(&path);
}
}
}
pub fn log_summary(&self) {
let total = self.cache_hits + self.misses + self.dedup_hits;
let unique = self.blobs.len();
if self.cache_dir.is_some() {
eprintln!(
"typst-bake: Compression level {}, {total} files, {unique} unique blobs ({} cached, {} compressed)",
self.level, self.cache_hits, self.misses
);
} else {
eprintln!(
"typst-bake: Compression level {}, {total} files, {unique} unique blobs (cache disabled)",
self.level
);
}
if self.dedup_hits > 0 {
eprintln!(
"typst-bake: Dedup: removed {} duplicates, saved {}",
self.dedup_hits,
format_size(self.dedup_saved_bytes)
);
}
}
pub fn dedup_summary(&self) -> DedupSummary {
DedupSummary {
total_files: self.cache_hits + self.misses + self.dedup_hits,
unique_blobs: self.blobs.len(),
duplicate_count: self.dedup_hits,
saved_bytes: self.dedup_saved_bytes,
}
}
fn compress_raw(&self, data: &[u8]) -> Vec<u8> {
zstd::encode_all(Cursor::new(data), self.level).expect("zstd compression failed")
}
}
fn format_size(bytes: usize) -> String {
if bytes < 1024 {
format!("{bytes} B")
} else if bytes < 1024 * 1024 {
format!("{:.1} KB", bytes as f64 / 1024.0)
} else {
format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
}
}