use std::collections::{HashMap, VecDeque};
use std::hash::{Hash, Hasher};
use std::mem::{size_of, size_of_val};
use super::{SyntheticConfig, SyntheticGenerator};
use crate::error::Result;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct CacheKey {
seed_hash: u64,
config_hash: u64,
}
impl CacheKey {
fn new(seed_hash: u64, config_hash: u64) -> Self {
Self {
seed_hash,
config_hash,
}
}
}
#[derive(Debug, Clone)]
struct CacheEntry<T> {
samples: Vec<T>,
size_bytes: usize,
access_count: usize,
}
impl<T> CacheEntry<T> {
fn new(samples: Vec<T>, size_bytes: usize) -> Self {
Self {
samples,
size_bytes,
access_count: 0,
}
}
}
#[derive(Debug)]
pub struct SyntheticCache<T> {
cache: HashMap<CacheKey, CacheEntry<T>>,
lru_order: VecDeque<CacheKey>,
max_size: usize,
current_size: usize,
stats: CacheStats,
}
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
pub hits: usize,
pub misses: usize,
pub evictions: usize,
pub generations: usize,
}
impl CacheStats {
#[must_use]
pub fn hit_rate(&self) -> f32 {
let total = self.hits + self.misses;
if total == 0 {
0.0
} else {
self.hits as f32 / total as f32
}
}
}
impl<T: Clone> SyntheticCache<T> {
#[must_use]
pub fn new(max_size: usize) -> Self {
Self {
cache: HashMap::new(),
lru_order: VecDeque::new(),
max_size: max_size.max(1),
current_size: 0,
stats: CacheStats::default(),
}
}
pub fn get_or_generate<G>(
&mut self,
seeds: &[G::Input],
config: &SyntheticConfig,
generator: &G,
) -> Result<Vec<T>>
where
G: SyntheticGenerator<Output = T>,
G::Input: Hash,
{
let key = Self::compute_key(seeds, config);
if self.cache.contains_key(&key) {
self.update_lru(&key);
self.stats.hits += 1;
let entry = self.cache.get_mut(&key).expect("key should exist");
entry.access_count += 1;
return Ok(entry.samples.clone());
}
self.stats.misses += 1;
self.stats.generations += 1;
let samples = generator.generate(seeds, config)?;
let size_bytes = Self::estimate_size(&samples);
self.evict_until_fits(size_bytes);
self.insert(key.clone(), samples.clone(), size_bytes);
Ok(samples)
}
#[must_use]
pub fn contains<I: Hash>(&self, seeds: &[I], config: &SyntheticConfig) -> bool {
let key = Self::compute_key(seeds, config);
self.cache.contains_key(&key)
}
#[must_use]
pub fn get<I: Hash>(&mut self, seeds: &[I], config: &SyntheticConfig) -> Option<Vec<T>> {
let key = Self::compute_key(seeds, config);
if self.cache.contains_key(&key) {
self.update_lru(&key);
self.stats.hits += 1;
let entry = self.cache.get_mut(&key).expect("key should exist");
entry.access_count += 1;
Some(entry.samples.clone())
} else {
self.stats.misses += 1;
None
}
}
#[must_use]
pub fn stats(&self) -> &CacheStats {
&self.stats
}
#[must_use]
pub fn size(&self) -> usize {
self.current_size
}
#[must_use]
pub fn len(&self) -> usize {
self.cache.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.cache.is_empty()
}
pub fn clear(&mut self) {
self.cache.clear();
self.lru_order.clear();
self.current_size = 0;
}
fn compute_key<I: Hash>(seeds: &[I], config: &SyntheticConfig) -> CacheKey {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
seeds.hash(&mut hasher);
let seed_hash = hasher.finish();
let mut hasher = std::collections::hash_map::DefaultHasher::new();
config.augmentation_ratio.to_bits().hash(&mut hasher);
config.quality_threshold.to_bits().hash(&mut hasher);
config.diversity_weight.to_bits().hash(&mut hasher);
config.seed.hash(&mut hasher);
let config_hash = hasher.finish();
CacheKey::new(seed_hash, config_hash)
}
fn estimate_size(samples: &[T]) -> usize {
size_of_val(samples) + size_of::<Vec<T>>() + 64
}
fn insert(&mut self, key: CacheKey, samples: Vec<T>, size_bytes: usize) {
let entry = CacheEntry::new(samples, size_bytes);
self.cache.insert(key.clone(), entry);
self.lru_order.push_back(key);
self.current_size += size_bytes;
}
fn update_lru(&mut self, key: &CacheKey) {
if let Some(pos) = self.lru_order.iter().position(|k| k == key) {
self.lru_order.remove(pos);
}
self.lru_order.push_back(key.clone());
}
fn evict_until_fits(&mut self, new_size: usize) {
while self.current_size + new_size > self.max_size && !self.lru_order.is_empty() {
self.evict_lru();
}
}
fn evict_lru(&mut self) {
if let Some(key) = self.lru_order.pop_front() {
if let Some(entry) = self.cache.remove(&key) {
self.current_size = self.current_size.saturating_sub(entry.size_bytes);
self.stats.evictions += 1;
}
}
}
}
impl<T: Clone> Default for SyntheticCache<T> {
fn default() -> Self {
Self::new(10 * 1024 * 1024) }
}
#[cfg(test)]
#[path = "cache_tests.rs"]
mod tests;