use std::env;
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use super::dfa_assemble::{AssembleOptions, Pattern};
use super::types::{DfaPackFormat, PackedDfa, PatternError};
const CACHE_VERSION: &str = "vyre-std.dfa.v2";
#[must_use]
#[inline]
pub fn cache_path(patterns: &[Pattern<'_>], options: AssembleOptions) -> PathBuf {
let key = hash_key(patterns, options);
cache_dir().join(format!("{key}.vdfa"))
}
#[inline]
pub fn load_or_compute(
patterns: &[Pattern<'_>],
options: AssembleOptions,
) -> Result<PackedDfa, PatternError> {
if env::var_os("VYRE_NO_CACHE").is_some() {
return super::dfa_assemble::dfa_assemble(patterns, options);
}
let path = cache_path(patterns, options);
if let Ok(packed) = read_entry(&path) {
return Ok(packed);
}
let packed = super::dfa_assemble::dfa_assemble(patterns, options)?;
let _ = write_entry(&path, &packed);
Ok(packed)
}
#[inline]
pub fn clear() -> std::io::Result<()> {
let dir = cache_dir();
if !dir.exists() {
return Ok(());
}
for entry in fs::read_dir(&dir)? {
let entry = entry?;
if let Some(name) = entry.file_name().to_str() {
if name.ends_with(".vdfa") {
let _ = fs::remove_file(entry.path());
}
}
}
Ok(())
}
#[must_use]
#[inline]
pub fn size() -> u64 {
let dir = cache_dir();
if !dir.exists() {
return 0;
}
let Ok(reader) = fs::read_dir(&dir) else {
return 0;
};
reader
.filter_map(Result::ok)
.filter_map(|entry| {
let name = entry.file_name();
let name = name.to_string_lossy();
if !name.ends_with(".vdfa") {
return None;
}
entry.metadata().ok().map(|m| m.len())
})
.sum()
}
fn cache_dir() -> PathBuf {
if let Some(xdg) = env::var_os("XDG_CACHE_HOME") {
return PathBuf::from(xdg).join("vyre").join("dfa");
}
if let Some(home) = env::var_os("HOME") {
return PathBuf::from(home).join(".cache").join("vyre").join("dfa");
}
PathBuf::from(".vyre-cache").join("dfa")
}
fn hash_key(patterns: &[Pattern<'_>], options: AssembleOptions) -> String {
let mut hasher = Fnv1a::new();
hasher.update(CACHE_VERSION.as_bytes());
hasher.update(&[format_tag(options.format), options.minimize as u8]);
hasher.update(&(patterns.len() as u64).to_le_bytes());
for pattern in patterns {
match pattern {
Pattern::Literal(bytes) => {
hasher.update(b"lit");
hasher.update(&(bytes.len() as u64).to_le_bytes());
hasher.update(bytes);
}
Pattern::Regex(source) => {
hasher.update(b"rgx");
hasher.update(&(source.len() as u64).to_le_bytes());
hasher.update(source.as_bytes());
}
}
}
format!("{:016x}", hasher.finish())
}
fn format_tag(format: DfaPackFormat) -> u8 {
match format {
DfaPackFormat::Dense => 0,
DfaPackFormat::EquivClass => 1,
}
}
struct Fnv1a(u64);
impl Fnv1a {
fn new() -> Self {
Self(0xcbf29ce484222325)
}
fn update(&mut self, bytes: &[u8]) {
for &b in bytes {
self.0 ^= u64::from(b);
self.0 = self.0.wrapping_mul(0x100000001b3);
}
}
fn finish(&self) -> u64 {
self.0
}
}
fn read_entry(path: &Path) -> std::io::Result<PackedDfa> {
let buf = fs::read(path)?;
if buf.len() < 17 {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Fix: truncated cache entry",
));
}
let format = match buf[0] {
0 => DfaPackFormat::Dense,
1 => DfaPackFormat::EquivClass,
_ => {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Fix: unknown format tag in cache entry",
))
}
};
let start = u32::from_le_bytes(buf[1..5].try_into().unwrap());
let state_count = u32::from_le_bytes(buf[5..9].try_into().unwrap());
let payload_len_u64 = u64::from_le_bytes(buf[9..17].try_into().unwrap());
let payload_len = usize::try_from(payload_len_u64).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Fix: cache entry payload_len exceeds addressable memory",
)
})?;
if buf.len() < 17 + payload_len {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
"Fix: cache entry payload length mismatch",
));
}
Ok(PackedDfa {
format,
state_count,
start,
bytes: buf[17..17 + payload_len].to_vec(),
})
}
fn write_entry(path: &Path, packed: &PackedDfa) -> std::io::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let mut file = fs::File::create(path)?;
file.write_all(&[format_tag(packed.format)])?;
file.write_all(&packed.start.to_le_bytes())?;
file.write_all(&packed.state_count.to_le_bytes())?;
file.write_all(&(packed.bytes.len() as u64).to_le_bytes())?;
file.write_all(&packed.bytes)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pattern::dfa_assemble::{AssembleOptions, Pattern};
fn unique_cache_dir(label: &str) -> PathBuf {
let mut base = std::env::temp_dir();
base.push(format!(
"vyre-cache-test-{label}-{}",
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0)
));
base
}
#[test]
fn hash_key_is_stable_across_runs() {
let patterns = [Pattern::Literal(b"hello"), Pattern::Regex("[0-9]+")];
let options = AssembleOptions::default();
let a = hash_key(&patterns, options);
let b = hash_key(&patterns, options);
assert_eq!(a, b);
}
#[test]
fn hash_key_differs_for_different_patterns() {
let options = AssembleOptions::default();
let a = hash_key(&[Pattern::Literal(b"hello")], options);
let b = hash_key(&[Pattern::Literal(b"world")], options);
assert_ne!(a, b);
}
#[test]
fn hash_key_differs_for_different_options() {
let patterns = [Pattern::Literal(b"hello")];
let dense = hash_key(
&patterns,
AssembleOptions {
format: DfaPackFormat::Dense,
minimize: true,
},
);
let equiv = hash_key(
&patterns,
AssembleOptions {
format: DfaPackFormat::EquivClass,
minimize: true,
},
);
assert_ne!(dense, equiv);
}
#[test]
fn write_and_read_roundtrip() {
let dir = unique_cache_dir("roundtrip");
fs::create_dir_all(&dir).unwrap();
let path = dir.join("sample.vdfa");
let packed = super::super::dfa_assemble::dfa_assemble(
&[Pattern::Literal(b"hi")],
AssembleOptions::default(),
)
.unwrap();
write_entry(&path, &packed).unwrap();
let reloaded = read_entry(&path).unwrap();
assert_eq!(reloaded, packed);
let _ = fs::remove_dir_all(&dir);
}
}