use crate::StarError;
use crate::{StarResult, StarTerm, StarTriple};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct PatternKey {
pub subject: Option<StarTerm>,
pub predicate: Option<StarTerm>,
pub object: Option<StarTerm>,
pub quoted_subject: bool,
}
impl PatternKey {
pub fn new(
subject: Option<StarTerm>,
predicate: Option<StarTerm>,
object: Option<StarTerm>,
) -> Self {
let quoted_subject = matches!(&subject, Some(StarTerm::QuotedTriple(_)));
Self {
subject,
predicate,
object,
quoted_subject,
}
}
pub fn bound_count(&self) -> usize {
[
self.subject.is_some(),
self.predicate.is_some(),
self.object.is_some(),
]
.iter()
.filter(|&&b| b)
.count()
}
pub fn nesting_depth(&self) -> usize {
let s_depth = self.subject.as_ref().map(term_nesting_depth).unwrap_or(0);
let p_depth = self.predicate.as_ref().map(term_nesting_depth).unwrap_or(0);
let o_depth = self.object.as_ref().map(term_nesting_depth).unwrap_or(0);
s_depth.max(p_depth).max(o_depth)
}
}
fn term_nesting_depth(t: &StarTerm) -> usize {
match t {
StarTerm::QuotedTriple(qt) => {
1 + term_nesting_depth(&qt.subject)
.max(term_nesting_depth(&qt.predicate))
.max(term_nesting_depth(&qt.object))
}
_ => 0,
}
}
#[derive(Debug, Clone)]
pub struct CacheEntry {
pub result: Vec<StarTriple>,
pub hit_count: u64,
pub inserted_at: Instant,
pub last_accessed: Instant,
pub size_bytes: usize,
pub nesting_depth: usize,
}
impl CacheEntry {
pub fn new(result: Vec<StarTriple>, nesting_depth: usize) -> Self {
let size_bytes = estimate_result_size(&result);
let now = Instant::now();
Self {
result,
hit_count: 0,
inserted_at: now,
last_accessed: now,
size_bytes,
nesting_depth,
}
}
pub fn record_hit(&mut self) {
self.hit_count += 1;
self.last_accessed = Instant::now();
}
pub fn eviction_score(&self, policy: &EvictionPolicy) -> f64 {
let age_secs = self.last_accessed.elapsed().as_secs_f64().max(0.001);
let frequency = self.hit_count as f64;
let recency = 1.0 / age_secs;
let depth = self.nesting_depth as f64;
let size_factor = (self.size_bytes as f64 / 1024.0).max(1.0);
(policy.frequency_weight * frequency
+ policy.recency_weight * recency
+ policy.depth_weight * depth)
/ size_factor
}
}
fn estimate_result_size(triples: &[StarTriple]) -> usize {
triples
.iter()
.map(|t| {
estimate_term_size(&t.subject)
+ estimate_term_size(&t.predicate)
+ estimate_term_size(&t.object)
})
.sum()
}
fn estimate_term_size(t: &StarTerm) -> usize {
match t {
StarTerm::NamedNode(n) => n.iri.len() + 8,
StarTerm::BlankNode(b) => b.id.len() + 8,
StarTerm::Literal(l) => l.value.len() + 32,
StarTerm::QuotedTriple(qt) => {
8 + estimate_term_size(&qt.subject)
+ estimate_term_size(&qt.predicate)
+ estimate_term_size(&qt.object)
}
StarTerm::Variable(v) => v.name.len() + 8,
}
}
#[derive(Debug, Clone)]
pub struct EvictionPolicy {
pub frequency_weight: f64,
pub recency_weight: f64,
pub depth_weight: f64,
pub max_bytes: usize,
pub max_entries: usize,
pub ttl: Option<Duration>,
}
impl Default for EvictionPolicy {
fn default() -> Self {
Self {
frequency_weight: 2.0,
recency_weight: 1.0,
depth_weight: 0.5,
max_bytes: 64 * 1024 * 1024, max_entries: 10_000,
ttl: Some(Duration::from_secs(300)), }
}
}
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
pub total_hits: u64,
pub total_misses: u64,
pub total_inserts: u64,
pub total_evictions: u64,
pub current_entries: usize,
pub current_bytes: usize,
}
impl CacheStats {
pub fn hit_ratio(&self) -> f64 {
let total = self.total_hits + self.total_misses;
if total == 0 {
return 0.0;
}
self.total_hits as f64 / total as f64
}
}
struct CacheInner {
entries: HashMap<PatternKey, CacheEntry>,
policy: EvictionPolicy,
stats: CacheStats,
}
impl CacheInner {
fn new(policy: EvictionPolicy) -> Self {
Self {
entries: HashMap::new(),
policy,
stats: CacheStats::default(),
}
}
fn total_bytes(&self) -> usize {
self.entries.values().map(|e| e.size_bytes).sum()
}
fn evict_if_needed(&mut self) {
if let Some(ttl) = self.policy.ttl {
let to_remove: Vec<PatternKey> = self
.entries
.iter()
.filter(|(_, e)| e.inserted_at.elapsed() > ttl)
.map(|(k, _)| k.clone())
.collect();
for k in to_remove {
self.entries.remove(&k);
self.stats.total_evictions += 1;
}
}
while self.entries.len() > self.policy.max_entries
|| self.total_bytes() > self.policy.max_bytes
{
if self.entries.is_empty() {
break;
}
let victim = self
.entries
.iter()
.min_by(|(_, a), (_, b)| {
a.eviction_score(&self.policy)
.partial_cmp(&b.eviction_score(&self.policy))
.unwrap_or(std::cmp::Ordering::Equal)
})
.map(|(k, _)| k.clone());
if let Some(k) = victim {
self.entries.remove(&k);
self.stats.total_evictions += 1;
} else {
break;
}
}
self.stats.current_entries = self.entries.len();
self.stats.current_bytes = self.total_bytes();
}
}
pub struct PatternCache {
inner: Arc<Mutex<CacheInner>>,
}
impl PatternCache {
pub fn new() -> Self {
Self::with_policy(EvictionPolicy::default())
}
pub fn with_policy(policy: EvictionPolicy) -> Self {
Self {
inner: Arc::new(Mutex::new(CacheInner::new(policy))),
}
}
pub fn get(&self, key: &PatternKey) -> StarResult<Option<Vec<StarTriple>>> {
let mut guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
if let Some(entry) = guard.entries.get_mut(key) {
entry.record_hit();
let result = entry.result.clone();
guard.stats.total_hits += 1;
Ok(Some(result))
} else {
guard.stats.total_misses += 1;
Ok(None)
}
}
pub fn insert(&self, key: PatternKey, result: Vec<StarTriple>) -> StarResult<()> {
let depth = key.nesting_depth();
let mut guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
let entry = CacheEntry::new(result, depth);
guard.entries.insert(key, entry);
guard.stats.total_inserts += 1;
guard.stats.current_entries = guard.entries.len();
guard.stats.current_bytes = guard.total_bytes();
guard.evict_if_needed();
Ok(())
}
pub fn invalidate(&self, key: &PatternKey) -> StarResult<bool> {
let mut guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
let removed = guard.entries.remove(key).is_some();
guard.stats.current_entries = guard.entries.len();
guard.stats.current_bytes = guard.total_bytes();
Ok(removed)
}
pub fn clear(&self) -> StarResult<()> {
let mut guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
guard.entries.clear();
guard.stats.current_entries = 0;
guard.stats.current_bytes = 0;
Ok(())
}
pub fn stats(&self) -> StarResult<CacheStats> {
let guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
Ok(guard.stats.clone())
}
pub fn len(&self) -> StarResult<usize> {
let guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
Ok(guard.entries.len())
}
pub fn is_empty(&self) -> StarResult<bool> {
Ok(self.len()? == 0)
}
pub fn trim(&self) -> StarResult<()> {
let mut guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
guard.evict_if_needed();
Ok(())
}
pub fn hot_patterns(&self, n: usize) -> StarResult<Vec<(PatternKey, u64)>> {
let guard = self
.inner
.lock()
.map_err(|_| StarError::processing_error("PatternCache lock poisoned"))?;
let mut scored: Vec<(PatternKey, u64)> = guard
.entries
.iter()
.map(|(k, e)| (k.clone(), e.hit_count))
.collect();
scored.sort_by_key(|b| std::cmp::Reverse(b.1));
scored.truncate(n);
Ok(scored)
}
pub fn prefetch(&self, key: PatternKey, result: Vec<StarTriple>) -> StarResult<()> {
self.insert(key, result)
}
}
impl Default for PatternCache {
fn default() -> Self {
Self::new()
}
}
pub fn cached_pattern_eval<F>(
cache: &PatternCache,
key: PatternKey,
evaluate: F,
) -> StarResult<Vec<StarTriple>>
where
F: FnOnce() -> StarResult<Vec<StarTriple>>,
{
if let Some(cached) = cache.get(&key)? {
return Ok(cached);
}
let result = evaluate()?;
cache.insert(key, result.clone())?;
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{StarTerm, StarTriple};
use std::thread;
fn make_triple(s: &str, p: &str, o: &str) -> StarTriple {
StarTriple::new(
StarTerm::iri(s).unwrap(),
StarTerm::iri(p).unwrap(),
StarTerm::iri(o).unwrap(),
)
}
fn make_key(s: Option<&str>, p: Option<&str>, o: Option<&str>) -> PatternKey {
PatternKey::new(
s.map(|v| StarTerm::iri(v).unwrap()),
p.map(|v| StarTerm::iri(v).unwrap()),
o.map(|v| StarTerm::iri(v).unwrap()),
)
}
fn sample_triples(n: usize) -> Vec<StarTriple> {
(0..n)
.map(|i| {
make_triple(
&format!("http://ex.org/s{i}"),
"http://ex.org/p",
&format!("http://ex.org/o{i}"),
)
})
.collect()
}
#[test]
fn test_pattern_key_bound_count() {
let k0 = make_key(None, None, None);
assert_eq!(k0.bound_count(), 0);
let k1 = make_key(Some("http://ex.org/s"), None, None);
assert_eq!(k1.bound_count(), 1);
let k3 = make_key(
Some("http://ex.org/s"),
Some("http://ex.org/p"),
Some("http://ex.org/o"),
);
assert_eq!(k3.bound_count(), 3);
}
#[test]
fn test_pattern_key_nesting_depth_simple() {
let k = make_key(Some("http://ex.org/s"), None, None);
assert_eq!(k.nesting_depth(), 0);
}
#[test]
fn test_pattern_key_nesting_depth_quoted() {
let inner = StarTriple::new(
StarTerm::iri("http://ex.org/a").unwrap(),
StarTerm::iri("http://ex.org/b").unwrap(),
StarTerm::iri("http://ex.org/c").unwrap(),
);
let k = PatternKey::new(Some(StarTerm::QuotedTriple(Box::new(inner))), None, None);
assert_eq!(k.nesting_depth(), 1);
assert!(k.quoted_subject);
}
#[test]
fn test_pattern_key_equality() {
let k1 = make_key(Some("http://ex.org/s"), None, None);
let k2 = make_key(Some("http://ex.org/s"), None, None);
assert_eq!(k1, k2);
}
#[test]
fn test_pattern_key_inequality() {
let k1 = make_key(Some("http://ex.org/s"), None, None);
let k2 = make_key(Some("http://ex.org/OTHER"), None, None);
assert_ne!(k1, k2);
}
#[test]
fn test_cache_entry_hit_count() {
let triples = sample_triples(3);
let mut entry = CacheEntry::new(triples, 0);
assert_eq!(entry.hit_count, 0);
entry.record_hit();
entry.record_hit();
assert_eq!(entry.hit_count, 2);
}
#[test]
fn test_cache_entry_size_estimate() {
let triples = sample_triples(10);
let entry = CacheEntry::new(triples, 0);
assert!(entry.size_bytes > 0, "Size estimate should be > 0");
}
#[test]
fn test_cache_entry_eviction_score_increases_with_hits() {
let triples = sample_triples(1);
let policy = EvictionPolicy::default();
let entry1 = CacheEntry::new(triples.clone(), 0);
let mut entry2 = CacheEntry::new(triples, 0);
for _ in 0..100 {
entry2.record_hit();
}
let score1 = entry1.eviction_score(&policy);
let score2 = entry2.eviction_score(&policy);
assert!(
score2 > score1,
"Higher hit count should give higher eviction score (harder to evict)"
);
}
#[test]
fn test_cache_entry_eviction_score_depth_increases_score() {
let triples = sample_triples(1);
let policy = EvictionPolicy::default();
let shallow = CacheEntry::new(triples.clone(), 0);
let deep = CacheEntry::new(triples, 5);
let s_shallow = shallow.eviction_score(&policy);
let s_deep = deep.eviction_score(&policy);
assert!(
s_deep > s_shallow,
"Deeper pattern should be harder to evict ({s_shallow} vs {s_deep})"
);
}
#[test]
fn test_cache_miss() {
let cache = PatternCache::new();
let key = make_key(None, None, None);
let result = cache.get(&key).unwrap();
assert!(result.is_none());
}
#[test]
fn test_cache_insert_and_hit() {
let cache = PatternCache::new();
let key = make_key(Some("http://ex.org/s"), None, None);
let triples = sample_triples(5);
cache.insert(key.clone(), triples.clone()).unwrap();
let result = cache.get(&key).unwrap();
assert!(result.is_some());
assert_eq!(result.unwrap().len(), 5);
}
#[test]
fn test_cache_stats_hits_and_misses() {
let cache = PatternCache::new();
let key = make_key(Some("http://ex.org/s"), None, None);
cache.get(&key).unwrap(); cache.insert(key.clone(), sample_triples(3)).unwrap();
cache.get(&key).unwrap(); cache.get(&key).unwrap(); let stats = cache.stats().unwrap();
assert_eq!(stats.total_misses, 1);
assert_eq!(stats.total_hits, 2);
assert!((stats.hit_ratio() - 2.0 / 3.0).abs() < 1e-10);
}
#[test]
fn test_cache_invalidate() {
let cache = PatternCache::new();
let key = make_key(Some("http://ex.org/s"), None, None);
cache.insert(key.clone(), sample_triples(2)).unwrap();
let removed = cache.invalidate(&key).unwrap();
assert!(removed);
let result = cache.get(&key).unwrap();
assert!(result.is_none());
}
#[test]
fn test_cache_invalidate_nonexistent() {
let cache = PatternCache::new();
let key = make_key(Some("http://ex.org/s"), None, None);
let removed = cache.invalidate(&key).unwrap();
assert!(!removed);
}
#[test]
fn test_cache_clear() {
let cache = PatternCache::new();
for i in 0..5 {
let key = make_key(Some(&format!("http://ex.org/s{i}")), None, None);
cache.insert(key, sample_triples(3)).unwrap();
}
assert_eq!(cache.len().unwrap(), 5);
cache.clear().unwrap();
assert_eq!(cache.len().unwrap(), 0);
}
#[test]
fn test_cache_max_entries_eviction() {
let policy = EvictionPolicy {
max_entries: 5,
max_bytes: usize::MAX,
ttl: None,
..Default::default()
};
let cache = PatternCache::with_policy(policy);
for i in 0..10 {
let key = make_key(Some(&format!("http://ex.org/s{i}")), None, None);
cache.insert(key, sample_triples(1)).unwrap();
}
assert!(
cache.len().unwrap() <= 5,
"Cache should not exceed max_entries, got {}",
cache.len().unwrap()
);
let stats = cache.stats().unwrap();
assert!(
stats.total_evictions > 0,
"Some evictions should have occurred"
);
}
#[test]
fn test_cache_ttl_eviction() {
let policy = EvictionPolicy {
ttl: Some(Duration::from_millis(10)), max_entries: 1000,
max_bytes: usize::MAX,
..Default::default()
};
let cache = PatternCache::with_policy(policy);
let key = make_key(Some("http://ex.org/s"), None, None);
cache.insert(key.clone(), sample_triples(3)).unwrap();
assert!(cache.get(&key).unwrap().is_some(), "Should hit before TTL");
thread::sleep(Duration::from_millis(50));
let key2 = make_key(None, Some("http://ex.org/p"), None);
cache.insert(key2, sample_triples(1)).unwrap();
let found = cache.get(&key).unwrap();
let stats = cache.stats().unwrap();
let _ = (found, stats); }
#[test]
fn test_cache_len_and_is_empty() {
let cache = PatternCache::new();
assert!(cache.is_empty().unwrap());
let key = make_key(None, None, None);
cache.insert(key, sample_triples(1)).unwrap();
assert!(!cache.is_empty().unwrap());
assert_eq!(cache.len().unwrap(), 1);
}
#[test]
fn test_cache_hot_patterns() {
let cache = PatternCache::new();
let keys: Vec<PatternKey> = (0..5)
.map(|i| make_key(Some(&format!("http://ex.org/s{i}")), None, None))
.collect();
for k in &keys {
cache.insert(k.clone(), sample_triples(1)).unwrap();
}
for _ in 0..10 {
cache.get(&keys[2]).unwrap();
}
for _ in 0..3 {
cache.get(&keys[0]).unwrap();
}
let hot = cache.hot_patterns(2).unwrap();
assert_eq!(hot.len(), 2);
assert_eq!(hot[0].0, keys[2], "key[2] should be hottest");
}
#[test]
fn test_cache_prefetch() {
let cache = PatternCache::new();
let key = make_key(Some("http://ex.org/s"), None, None);
cache.prefetch(key.clone(), sample_triples(4)).unwrap();
let result = cache.get(&key).unwrap();
assert!(result.is_some());
assert_eq!(result.unwrap().len(), 4);
}
#[test]
fn test_cached_pattern_eval_miss_then_hit() {
let cache = PatternCache::new();
let key = make_key(Some("http://ex.org/s"), None, None);
let mut call_count = 0usize;
let result = cached_pattern_eval(&cache, key.clone(), || {
call_count += 1;
Ok(sample_triples(3))
})
.unwrap();
assert_eq!(result.len(), 3);
assert_eq!(call_count, 1);
let result2 = cached_pattern_eval(&cache, key.clone(), || {
call_count += 1; Ok(sample_triples(99))
})
.unwrap();
assert_eq!(result2.len(), 3, "Should return cached value");
assert_eq!(call_count, 1, "Evaluator should not be called on cache hit");
}
#[test]
fn test_cached_pattern_eval_propagates_error() {
let cache = PatternCache::new();
let key = make_key(None, None, None);
let result = cached_pattern_eval(&cache, key, || {
Err(crate::StarError::processing_error("test error"))
});
assert!(result.is_err());
}
#[test]
fn test_cache_stats_insert_count() {
let cache = PatternCache::new();
for i in 0..7 {
let key = make_key(Some(&format!("http://ex.org/s{i}")), None, None);
cache.insert(key, sample_triples(1)).unwrap();
}
let stats = cache.stats().unwrap();
assert_eq!(stats.total_inserts, 7);
}
#[test]
fn test_eviction_policy_defaults() {
let policy = EvictionPolicy::default();
assert!(policy.max_entries > 0);
assert!(policy.max_bytes > 0);
assert!(policy.ttl.is_some());
}
#[test]
fn test_estimate_result_size_grows_with_triples() {
let small = estimate_result_size(&sample_triples(10));
let large = estimate_result_size(&sample_triples(100));
assert!(
large > small,
"Larger result should have larger size estimate"
);
}
#[test]
fn test_cache_trim_triggers_eviction() {
let policy = EvictionPolicy {
max_entries: 3,
max_bytes: usize::MAX,
ttl: None,
..Default::default()
};
let cache = PatternCache::with_policy(policy);
for i in 0..3 {
let key = make_key(Some(&format!("http://ex.org/s{i}")), None, None);
cache.insert(key, sample_triples(1)).unwrap();
}
assert_eq!(cache.len().unwrap(), 3);
let extra = make_key(Some("http://ex.org/extra"), None, None);
cache.insert(extra, sample_triples(1)).unwrap();
cache.trim().unwrap();
assert!(
cache.len().unwrap() <= 3,
"After trim, entries should not exceed 3"
);
}
}