use std::path::Path;
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use zccache_core::NormalizedPath;
use dashmap::DashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArtifactIndex {
pub output_names: Arc<[String]>,
pub output_sizes: Vec<u64>,
pub stdout: Arc<Vec<u8>>,
pub stderr: Arc<Vec<u8>>,
pub exit_code: i32,
pub total_size: u64,
pub stored_at_secs: u64,
}
impl ArtifactIndex {
pub fn new(
output_names: Vec<String>,
output_sizes: Vec<u64>,
stdout: impl Into<Arc<Vec<u8>>>,
stderr: impl Into<Arc<Vec<u8>>>,
exit_code: i32,
) -> Self {
let total_size = output_sizes.iter().sum();
let stored_at_secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
Self {
output_names: Arc::from(output_names),
output_sizes,
stdout: stdout.into(),
stderr: stderr.into(),
exit_code,
total_size,
stored_at_secs,
}
}
}
pub struct ArtifactStore {
path: NormalizedPath,
entries: DashMap<String, ArtifactIndex>,
}
impl ArtifactStore {
pub fn open(path: &Path) -> std::io::Result<Self> {
let entries = DashMap::new();
match std::fs::read(path) {
Ok(bytes) => match bincode::deserialize::<Vec<(String, ArtifactIndex)>>(&bytes) {
Ok(rows) => {
let count = rows.len();
for (k, v) in rows {
entries.insert(k, v);
}
if count > 0 {
tracing::info!(
path = %path.display(),
loaded = count,
"artifact index loaded"
);
} else {
tracing::info!(
path = %path.display(),
"artifact index loaded as empty (file present, 0 entries)"
);
}
}
Err(e) => {
tracing::warn!(
path = %path.display(),
"artifact index blob is corrupt, starting empty: {e}"
);
}
},
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
tracing::info!(
path = %path.display(),
"artifact index file not found, starting empty"
);
}
Err(e) => return Err(e),
};
Ok(Self {
path: NormalizedPath::new(path),
entries,
})
}
pub fn insert(&self, key: &str, meta: &ArtifactIndex) {
self.entries.insert(key.to_string(), meta.clone());
}
pub fn insert_many<I, K>(&self, entries: I) -> usize
where
I: IntoIterator<Item = (K, ArtifactIndex)>,
K: AsRef<str>,
{
let mut count = 0usize;
for (k, v) in entries {
self.entries.insert(k.as_ref().to_string(), v);
count += 1;
}
count
}
pub fn get(&self, key: &str) -> Option<ArtifactIndex> {
self.entries.get(key).map(|e| e.value().clone())
}
pub fn remove(&self, key: &str) -> bool {
self.entries.remove(key).is_some()
}
pub fn remove_batch(&self, keys: &[&str]) -> usize {
let mut removed = 0usize;
for key in keys {
if self.entries.remove(*key).is_some() {
removed += 1;
}
}
removed
}
pub fn load_all(&self) -> Vec<(String, ArtifactIndex)> {
self.entries
.iter()
.map(|e| (e.key().clone(), e.value().clone()))
.collect()
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn clear(&self) -> usize {
let n = self.entries.len();
self.entries.clear();
n
}
pub fn flush(&self) -> std::io::Result<()> {
let snapshot: Vec<(String, ArtifactIndex)> = self
.entries
.iter()
.map(|e| (e.key().clone(), e.value().clone()))
.collect();
let bytes = bincode::serialize(&snapshot)
.map_err(|e| std::io::Error::other(format!("bincode serialize: {e}")))?;
if let Some(parent) = self.path.parent() {
std::fs::create_dir_all(parent)?;
}
let name = self
.path
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_else(|| "index.bin".into());
let tmp = self
.path
.as_path()
.with_file_name(format!(".{name}.tmp-{}", std::process::id()));
let target = self.path.as_path();
let result = write_atomic_durable(&tmp, target, &bytes);
if result.is_err() {
let _ = std::fs::remove_file(&tmp);
}
if result.is_ok() {
tracing::info!(
path = %self.path.display(),
count = snapshot.len(),
bytes = bytes.len(),
"artifact index flushed to disk"
);
}
result
}
}
fn write_atomic_durable(tmp: &Path, target: &Path, bytes: &[u8]) -> std::io::Result<()> {
use std::io::Write;
{
let mut f = std::fs::File::create(tmp)?;
f.write_all(bytes)?;
f.sync_all()?;
}
std::fs::rename(tmp, target)?;
if let Some(parent) = target.parent() {
if let Ok(dir) = std::fs::File::open(parent) {
let _ = dir.sync_all();
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn temp_store() -> (tempfile::TempDir, ArtifactStore) {
let dir = tempfile::tempdir().unwrap();
let store = ArtifactStore::open(&dir.path().join("index.bin")).unwrap();
(dir, store)
}
fn sample_meta() -> ArtifactIndex {
ArtifactIndex::new(
vec!["foo.o".to_string()],
vec![1234],
b"compiler stdout".to_vec(),
b"compiler stderr".to_vec(),
0,
)
}
#[test]
fn open_creates_empty_index() {
let (_dir, store) = temp_store();
assert_eq!(store.len(), 0);
assert!(store.is_empty());
}
#[test]
fn insert_and_get() {
let (_dir, store) = temp_store();
store.insert("abc123", &sample_meta());
let loaded = store.get("abc123").unwrap();
assert_eq!(&*loaded.output_names, &["foo.o".to_string()]);
assert_eq!(loaded.output_sizes, vec![1234]);
assert_eq!(&*loaded.stdout, b"compiler stdout");
assert_eq!(loaded.exit_code, 0);
assert_eq!(loaded.total_size, 1234);
}
#[test]
fn get_missing_returns_none() {
let (_dir, store) = temp_store();
assert!(store.get("nonexistent").is_none());
}
#[test]
fn insert_overwrites() {
let (_dir, store) = temp_store();
let m1 = ArtifactIndex::new(vec!["a.o".into()], vec![100], vec![], vec![], 0);
let m2 = ArtifactIndex::new(vec!["b.o".into()], vec![200], vec![], vec![], 1);
store.insert("key", &m1);
store.insert("key", &m2);
assert_eq!(store.len(), 1);
assert_eq!(store.get("key").unwrap().exit_code, 1);
}
#[test]
fn remove_existing_and_missing() {
let (_dir, store) = temp_store();
store.insert("k", &sample_meta());
assert!(store.remove("k"));
assert!(!store.remove("k"));
assert!(store.get("k").is_none());
}
#[test]
fn remove_batch_multiple() {
let (_dir, store) = temp_store();
for i in 0..5 {
store.insert(&format!("k{i}"), &sample_meta());
}
let removed = store.remove_batch(&["k0", "k2", "k4", "missing"]);
assert_eq!(removed, 3);
assert_eq!(store.len(), 2);
}
#[test]
fn insert_many_and_load_all() {
let (_dir, store) = temp_store();
let entries: Vec<(String, ArtifactIndex)> = (0..50)
.map(|i| (format!("batch-{i:03}"), sample_meta()))
.collect();
let n = store.insert_many(entries);
assert_eq!(n, 50);
assert_eq!(store.load_all().len(), 50);
}
#[test]
fn insert_many_empty_is_noop() {
let (_dir, store) = temp_store();
let n = store.insert_many(std::iter::empty::<(String, ArtifactIndex)>());
assert_eq!(n, 0);
assert_eq!(store.len(), 0);
}
#[test]
fn clear_removes_all() {
let (_dir, store) = temp_store();
for i in 0..10 {
store.insert(&format!("k{i}"), &sample_meta());
}
let removed = store.clear();
assert_eq!(removed, 10);
assert!(store.is_empty());
}
#[test]
fn flush_and_reopen_round_trip() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("index.bin");
{
let store = ArtifactStore::open(&path).unwrap();
store.insert("persist_test", &sample_meta());
store.insert("another", &sample_meta());
store.flush().unwrap();
}
let store = ArtifactStore::open(&path).unwrap();
assert_eq!(store.len(), 2);
assert!(store.get("persist_test").is_some());
assert!(store.get("another").is_some());
}
#[test]
fn open_corrupt_file_starts_empty() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("index.bin");
std::fs::write(&path, b"not valid bincode").unwrap();
let store = ArtifactStore::open(&path).unwrap();
assert_eq!(store.len(), 0);
}
#[test]
fn flush_without_parent_dir_creates_it() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("nested/deeply/index.bin");
let store = ArtifactStore::open(&path).unwrap();
store.insert("k", &sample_meta());
store.flush().unwrap();
assert!(path.exists());
}
#[test]
fn flush_replaces_atomically() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("index.bin");
let store = ArtifactStore::open(&path).unwrap();
store.insert("a", &sample_meta());
store.flush().unwrap();
let first = std::fs::metadata(&path).unwrap().len();
store.insert("b", &sample_meta());
store.flush().unwrap();
let second = std::fs::metadata(&path).unwrap().len();
assert!(second > first);
}
}