use std::collections::BTreeSet;
use std::fs;
use anyhow::{Context, Result, bail};
use camino::{Utf8Path, Utf8PathBuf};
use chrono::Utc;
use glob::glob;
use sha2::{Digest, Sha256};
use walkdir::WalkDir;
use crate::model::{
CacheEntry, CacheIndex, CacheMatch, CacheMatchKind, CachePathRecord, Compression, RunnerOs,
};
pub struct CacheStore {
root: Utf8PathBuf,
index: CacheIndex,
}
#[derive(Debug, Clone)]
pub struct CacheRequest {
pub workspace: Utf8PathBuf,
pub key: String,
pub restore_keys: Vec<String>,
pub paths: Vec<String>,
pub scope: String,
pub accessible_scopes: Vec<String>,
pub runner_os: RunnerOs,
pub compression: Compression,
pub enable_cross_os_archive: bool,
}
#[derive(Debug, Clone)]
pub struct SaveResult {
pub entry: Option<CacheEntry>,
pub path_records: Vec<CachePathRecord>,
pub skipped_existing: bool,
}
#[derive(Debug, Clone)]
pub struct RestoreResult {
pub matched: Option<CacheMatch>,
pub restored_files: usize,
pub restored_bytes: u64,
pub skipped_absolute_files: usize,
}
pub(crate) const ABSOLUTE_SENTINEL: &str = "absolute";
impl CacheStore {
pub fn open(root: impl Into<Utf8PathBuf>) -> Result<Self> {
let root = root.into();
fs::create_dir_all(&root).with_context(|| format!("creating cache store {root}"))?;
let index_path = root.join("index.json");
let index = if index_path.exists() {
let raw = fs::read_to_string(&index_path)
.with_context(|| format!("reading cache index {index_path}"))?;
serde_json::from_str(&raw)
.with_context(|| format!("parsing cache index {index_path}"))?
} else {
CacheIndex::default()
};
Ok(Self { root, index })
}
pub fn lookup(&self, request: &CacheRequest) -> Option<CacheMatch> {
let version = cache_version(
&request.paths,
request.compression,
request.enable_cross_os_archive,
);
for scope in &request.accessible_scopes {
if let Some(entry) = self.find_exact(scope, &request.key, &version) {
return Some(match_from(entry, CacheMatchKind::ExactKey));
}
if let Some(entry) = self.find_prefix(scope, &request.key, &version) {
return Some(match_from(entry, CacheMatchKind::PrefixKey));
}
for restore_key in &request.restore_keys {
if let Some(entry) = self.find_exact(scope, restore_key, &version) {
return Some(match_from(entry, CacheMatchKind::RestoreExact));
}
if let Some(entry) = self.find_prefix(scope, restore_key, &version) {
return Some(match_from(entry, CacheMatchKind::RestorePrefix));
}
}
}
None
}
pub fn restore(&mut self, request: &CacheRequest, copy_files: bool) -> Result<RestoreResult> {
let Some(matched) = self.lookup(request) else {
return Ok(RestoreResult {
matched: None,
restored_files: 0,
restored_bytes: 0,
skipped_absolute_files: 0,
});
};
let mut restored_files = 0;
let mut restored_bytes = 0;
let mut skipped_absolute_files = 0;
if copy_files {
let entry_root = self.entry_files_dir(&matched.entry_id);
if entry_root.exists() {
for item in WalkDir::new(&entry_root).into_iter().filter_map(Result::ok) {
if !item.file_type().is_file() {
continue;
}
let src = Utf8PathBuf::from_path_buf(item.path().to_path_buf())
.map_err(|_| anyhow::anyhow!("non-UTF-8 cache entry path"))?;
let rel = src.strip_prefix(&entry_root).unwrap_or(&src);
if rel
.components()
.next()
.map(|c| c.as_str() == ABSOLUTE_SENTINEL)
.unwrap_or(false)
{
skipped_absolute_files += 1;
continue;
}
let dest = request.workspace.join(rel);
if let Some(parent) = dest.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("creating restore parent {parent}"))?;
}
fs::copy(&src, &dest)
.with_context(|| format!("restoring cached file {src} to {dest}"))?;
let bytes = fs::metadata(&dest)
.with_context(|| format!("stat restored file {dest}"))?
.len();
restored_files += 1;
restored_bytes += bytes;
}
}
}
let now = Utc::now();
if let Some(entry) = self
.index
.entries
.iter_mut()
.find(|entry| entry.id == matched.entry_id)
{
entry.last_accessed_at = now;
}
self.persist()?;
Ok(RestoreResult {
matched: Some(matched),
restored_files,
restored_bytes,
skipped_absolute_files,
})
}
pub fn save(&mut self, request: &CacheRequest) -> Result<SaveResult> {
let version = cache_version(
&request.paths,
request.compression,
request.enable_cross_os_archive,
);
if self
.find_exact(&request.scope, &request.key, &version)
.is_some()
{
return Ok(SaveResult {
entry: None,
path_records: inspect_paths(&request.workspace, &request.paths)?,
skipped_existing: true,
});
}
let path_records = inspect_paths(&request.workspace, &request.paths)?;
let files = path_records
.iter()
.map(|record| record.files)
.sum::<usize>();
let bytes = path_records.iter().map(|record| record.bytes).sum::<u64>();
if files == 0 {
return Ok(SaveResult {
entry: None,
path_records,
skipped_existing: false,
});
}
let now = Utc::now();
let id = entry_id(
&request.key,
&version,
&request.scope,
now.timestamp_nanos_opt().unwrap_or_default(),
);
let files_dir = self.entry_files_dir(&id);
fs::create_dir_all(&files_dir)
.with_context(|| format!("creating entry dir {files_dir}"))?;
for record in &path_records {
if !record.exists {
continue;
}
copy_input_to_entry(&request.workspace, &record.resolved, &files_dir)?;
}
let entry = CacheEntry {
id: id.clone(),
key: request.key.clone(),
version,
scope: request.scope.clone(),
paths: request.paths.clone(),
runner_os: request.runner_os,
compression: request.compression,
enable_cross_os_archive: request.enable_cross_os_archive,
created_at: now,
last_accessed_at: now,
files,
bytes,
};
self.index.entries.push(entry.clone());
self.persist()?;
Ok(SaveResult {
entry: Some(entry),
path_records,
skipped_existing: false,
})
}
pub fn inspect_paths(
&self,
workspace: &Utf8Path,
paths: &[String],
) -> Result<Vec<CachePathRecord>> {
inspect_paths(workspace, paths)
}
fn persist(&self) -> Result<()> {
let index_path = self.root.join("index.json");
fs::write(&index_path, serde_json::to_string_pretty(&self.index)?)
.with_context(|| format!("writing cache index {index_path}"))
}
fn entries(&self) -> impl Iterator<Item = &CacheEntry> {
self.index.entries.iter()
}
fn find_exact(&self, scope: &str, key: &str, version: &str) -> Option<&CacheEntry> {
self.entries()
.filter(|entry| entry.scope == scope && entry.key == key && entry.version == version)
.max_by_key(|entry| entry.created_at)
}
fn find_prefix(&self, scope: &str, prefix: &str, version: &str) -> Option<&CacheEntry> {
self.entries()
.filter(|entry| {
entry.scope == scope && entry.key.starts_with(prefix) && entry.version == version
})
.max_by_key(|entry| entry.created_at)
}
fn entry_files_dir(&self, id: &str) -> Utf8PathBuf {
self.root.join("entries").join(id).join("files")
}
}
pub fn cache_version(paths: &[String], compression: Compression, cross_os: bool) -> String {
let mut normalized = paths
.iter()
.map(|path| path.replace('\\', "/").trim().to_owned())
.collect::<Vec<_>>();
normalized.sort();
let mut hasher = Sha256::new();
hasher.update(format!("compression={compression:?}\n"));
hasher.update(format!("cross_os={cross_os}\n"));
for path in normalized {
hasher.update(path.as_bytes());
hasher.update(b"\n");
}
hex::encode(hasher.finalize())[..16].to_owned()
}
pub fn accessible_scopes(
current_scope: &str,
default_branch: &str,
base_ref: Option<&str>,
) -> Vec<String> {
let mut scopes = Vec::new();
push_unique(&mut scopes, current_scope.to_owned());
if let Some(base_ref) = base_ref {
push_unique(&mut scopes, normalize_ref(base_ref));
}
push_unique(&mut scopes, normalize_ref(default_branch));
scopes
}
pub fn normalize_ref(value: &str) -> String {
let trimmed = value.trim();
if trimmed.starts_with("refs/") {
trimmed.to_owned()
} else {
format!("refs/heads/{trimmed}")
}
}
pub fn scope_from_ref(value: &str) -> String {
normalize_ref(value)
}
fn push_unique(scopes: &mut Vec<String>, scope: String) {
if !scopes.iter().any(|existing| existing == &scope) {
scopes.push(scope);
}
}
fn match_from(entry: &CacheEntry, match_kind: CacheMatchKind) -> CacheMatch {
CacheMatch {
entry_id: entry.id.clone(),
key: entry.key.clone(),
version: entry.version.clone(),
scope: entry.scope.clone(),
match_kind,
created_at: entry.created_at,
}
}
fn entry_id(key: &str, version: &str, scope: &str, nonce: i64) -> String {
let mut hasher = Sha256::new();
hasher.update(key.as_bytes());
hasher.update(version.as_bytes());
hasher.update(scope.as_bytes());
hasher.update(nonce.to_string().as_bytes());
hex::encode(hasher.finalize())[..20].to_owned()
}
fn inspect_paths(workspace: &Utf8Path, paths: &[String]) -> Result<Vec<CachePathRecord>> {
let mut out = Vec::new();
for input in paths {
let expanded = expand_path(workspace, input)?;
let matches = resolve_matches(workspace, input, &expanded)?;
if matches.is_empty() {
out.push(CachePathRecord {
input: input.clone(),
resolved: expanded,
files: 0,
bytes: 0,
exists: false,
});
continue;
}
let mut files = BTreeSet::new();
let mut bytes = 0;
for matched in matches {
for file in files_under(&matched)? {
bytes += fs::metadata(&file)
.with_context(|| format!("stat cache path file {file}"))?
.len();
files.insert(file);
}
}
out.push(CachePathRecord {
input: input.clone(),
resolved: expanded,
files: files.len(),
bytes,
exists: !files.is_empty(),
});
}
Ok(out)
}
fn resolve_matches(
workspace: &Utf8Path,
input: &str,
expanded: &Utf8Path,
) -> Result<Vec<Utf8PathBuf>> {
if has_glob(input) {
let pattern = if Utf8Path::new(input).is_absolute() {
input.to_owned()
} else {
workspace.join(input).to_string()
};
let mut matches = Vec::new();
for item in glob(&pattern).with_context(|| format!("reading glob pattern {pattern}"))? {
let path = item.with_context(|| format!("matching glob pattern {pattern}"))?;
matches.push(
Utf8PathBuf::from_path_buf(path)
.map_err(|_| anyhow::anyhow!("glob produced non-UTF-8 path"))?,
);
}
Ok(matches)
} else if expanded.exists() {
Ok(vec![expanded.to_owned()])
} else {
Ok(Vec::new())
}
}
fn files_under(path: &Utf8Path) -> Result<Vec<Utf8PathBuf>> {
if path.is_file() {
return Ok(vec![path.to_owned()]);
}
if path.is_dir() {
let mut files = Vec::new();
for item in WalkDir::new(path).into_iter().filter_map(Result::ok) {
if item.file_type().is_file() {
files.push(
Utf8PathBuf::from_path_buf(item.path().to_path_buf())
.map_err(|_| anyhow::anyhow!("non-UTF-8 path under {path}"))?,
);
}
}
return Ok(files);
}
Ok(Vec::new())
}
fn copy_input_to_entry(
workspace: &Utf8Path,
source: &Utf8Path,
files_dir: &Utf8Path,
) -> Result<()> {
for file in files_under(source)? {
let rel = match file.strip_prefix(workspace) {
Ok(rel) => rel.to_owned(),
Err(_) => sanitize_absolute_path(&file),
};
let dest = files_dir.join(rel);
if let Some(parent) = dest.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("creating cache parent {parent}"))?;
}
fs::copy(&file, &dest).with_context(|| format!("copying cache file {file} to {dest}"))?;
}
Ok(())
}
fn sanitize_absolute_path(path: &Utf8Path) -> Utf8PathBuf {
let raw = path.to_string().replace(':', "").replace('\\', "/");
let safe = raw.trim_start_matches('/').replace('/', "__");
Utf8PathBuf::from(ABSOLUTE_SENTINEL).join(safe)
}
fn expand_path(workspace: &Utf8Path, input: &str) -> Result<Utf8PathBuf> {
let trimmed = input.trim();
if trimmed.is_empty() {
bail!("cache path cannot be empty");
}
let expanded = if trimmed == "~" || trimmed.starts_with("~/") || trimmed.starts_with("~\\") {
let home = std::env::var("HOME")
.or_else(|_| std::env::var("USERPROFILE"))
.context("expanding ~ requires HOME or USERPROFILE")?;
Utf8PathBuf::from(home).join(
trimmed
.trim_start_matches('~')
.trim_start_matches(['/', '\\']),
)
} else {
Utf8PathBuf::from(trimmed)
};
if expanded.is_absolute() {
Ok(expanded)
} else {
Ok(workspace.join(expanded))
}
}
fn has_glob(input: &str) -> bool {
input.contains('*') || input.contains('?') || input.contains('[')
}