use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use std::time::{Duration, SystemTime};
use tensorlogic_adapters::SymbolTable;
use super::{ClassInfo, PropertyInfo};
type ParsedSchema = (
indexmap::IndexMap<String, ClassInfo>,
indexmap::IndexMap<String, PropertyInfo>,
);
#[derive(Debug, Clone, Serialize, Deserialize)]
struct CacheEntry<T> {
value: T,
created_at: SystemTime,
last_accessed: SystemTime,
access_count: usize,
}
impl<T> CacheEntry<T> {
fn new(value: T) -> Self {
let now = SystemTime::now();
Self {
value,
created_at: now,
last_accessed: now,
access_count: 0,
}
}
fn access(&mut self) -> &T {
self.last_accessed = SystemTime::now();
self.access_count += 1;
&self.value
}
fn is_expired(&self, ttl: Duration) -> bool {
self.created_at
.elapsed()
.map(|age| age > ttl)
.unwrap_or(false)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct SchemaCacheData {
classes: indexmap::IndexMap<String, ClassInfo>,
properties: indexmap::IndexMap<String, PropertyInfo>,
}
#[derive(Debug)]
pub struct SchemaCache {
schemas: HashMap<u64, CacheEntry<SchemaCacheData>>,
symbol_tables: HashMap<u64, CacheEntry<SymbolTable>>,
ttl: Duration,
max_size: usize,
hits: usize,
misses: usize,
}
impl SchemaCache {
pub fn new() -> Self {
Self::with_settings(Duration::from_secs(3600), 100)
}
pub fn with_settings(ttl: Duration, max_size: usize) -> Self {
Self {
schemas: HashMap::new(),
symbol_tables: HashMap::new(),
ttl,
max_size,
hits: 0,
misses: 0,
}
}
fn hash_content(content: &str) -> u64 {
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
hasher.finish()
}
pub fn get_schema(&mut self, content: &str) -> Option<ParsedSchema> {
let hash = Self::hash_content(content);
if let Some(entry) = self.schemas.get_mut(&hash) {
if !entry.is_expired(self.ttl) {
self.hits += 1;
let data = entry.access();
return Some((data.classes.clone(), data.properties.clone()));
} else {
self.schemas.remove(&hash);
}
}
self.misses += 1;
None
}
pub fn put_schema(
&mut self,
content: &str,
classes: indexmap::IndexMap<String, ClassInfo>,
properties: indexmap::IndexMap<String, PropertyInfo>,
) {
let hash = Self::hash_content(content);
if self.schemas.len() >= self.max_size {
if let Some(oldest_key) = self.find_oldest_schema() {
self.schemas.remove(&oldest_key);
}
}
self.schemas.insert(
hash,
CacheEntry::new(SchemaCacheData {
classes,
properties,
}),
);
}
pub fn get_symbol_table(&mut self, content: &str) -> Option<SymbolTable> {
let hash = Self::hash_content(content);
if let Some(entry) = self.symbol_tables.get_mut(&hash) {
if !entry.is_expired(self.ttl) {
self.hits += 1;
return Some(entry.access().clone());
} else {
self.symbol_tables.remove(&hash);
}
}
self.misses += 1;
None
}
pub fn put_symbol_table(&mut self, content: &str, table: SymbolTable) {
let hash = Self::hash_content(content);
if self.symbol_tables.len() >= self.max_size {
if let Some(oldest_key) = self.find_oldest_symbol_table() {
self.symbol_tables.remove(&oldest_key);
}
}
self.symbol_tables.insert(hash, CacheEntry::new(table));
}
fn find_oldest_schema(&self) -> Option<u64> {
self.schemas
.iter()
.min_by_key(|(_, entry)| entry.last_accessed)
.map(|(k, _)| *k)
}
fn find_oldest_symbol_table(&self) -> Option<u64> {
self.symbol_tables
.iter()
.min_by_key(|(_, entry)| entry.last_accessed)
.map(|(k, _)| *k)
}
pub fn cleanup_expired(&mut self) {
self.schemas.retain(|_, entry| !entry.is_expired(self.ttl));
self.symbol_tables
.retain(|_, entry| !entry.is_expired(self.ttl));
}
pub fn clear(&mut self) {
self.schemas.clear();
self.symbol_tables.clear();
self.hits = 0;
self.misses = 0;
}
pub fn stats(&self) -> CacheStats {
CacheStats {
schema_entries: self.schemas.len(),
symbol_table_entries: self.symbol_tables.len(),
total_hits: self.hits,
total_misses: self.misses,
hit_rate: if self.hits + self.misses > 0 {
(self.hits as f64) / ((self.hits + self.misses) as f64)
} else {
0.0
},
}
}
}
impl Default for SchemaCache {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheStats {
pub schema_entries: usize,
pub symbol_table_entries: usize,
pub total_hits: usize,
pub total_misses: usize,
pub hit_rate: f64,
}
impl std::fmt::Display for CacheStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Cache Statistics:")?;
writeln!(f, " Schema entries: {}", self.schema_entries)?;
writeln!(f, " Symbol table entries: {}", self.symbol_table_entries)?;
writeln!(f, " Total hits: {}", self.total_hits)?;
writeln!(f, " Total misses: {}", self.total_misses)?;
writeln!(f, " Hit rate: {:.2}%", self.hit_rate * 100.0)?;
Ok(())
}
}
pub struct PersistentCache {
cache_dir: PathBuf,
in_memory: SchemaCache,
}
impl PersistentCache {
pub fn new(cache_dir: impl AsRef<Path>) -> Result<Self> {
let cache_dir = cache_dir.as_ref().to_path_buf();
std::fs::create_dir_all(&cache_dir).context("Failed to create cache directory")?;
Ok(Self {
cache_dir,
in_memory: SchemaCache::new(),
})
}
fn cache_path(&self, content: &str, suffix: &str) -> PathBuf {
let hash = SchemaCache::hash_content(content);
self.cache_dir.join(format!("{:016x}.{}", hash, suffix))
}
pub fn load_symbol_table(&mut self, content: &str) -> Result<Option<SymbolTable>> {
if let Some(table) = self.in_memory.get_symbol_table(content) {
return Ok(Some(table));
}
let path = self.cache_path(content, "symboltable.json");
if path.exists() {
let json = std::fs::read_to_string(&path).context("Failed to read cache file")?;
let table: SymbolTable =
serde_json::from_str(&json).context("Failed to deserialize SymbolTable")?;
self.in_memory.put_symbol_table(content, table.clone());
return Ok(Some(table));
}
Ok(None)
}
pub fn save_symbol_table(&mut self, content: &str, table: &SymbolTable) -> Result<()> {
self.in_memory.put_symbol_table(content, table.clone());
let path = self.cache_path(content, "symboltable.json");
let json =
serde_json::to_string_pretty(table).context("Failed to serialize SymbolTable")?;
std::fs::write(&path, json).context("Failed to write cache file")?;
Ok(())
}
pub fn load_schema(&mut self, content: &str) -> Result<Option<ParsedSchema>> {
if let Some(result) = self.in_memory.get_schema(content) {
return Ok(Some(result));
}
let path = self.cache_path(content, "schema.json");
if path.exists() {
let json = std::fs::read_to_string(&path).context("Failed to read cache file")?;
let data: SchemaCacheData =
serde_json::from_str(&json).context("Failed to deserialize schema")?;
self.in_memory
.put_schema(content, data.classes.clone(), data.properties.clone());
return Ok(Some((data.classes, data.properties)));
}
Ok(None)
}
pub fn save_schema(
&mut self,
content: &str,
classes: &indexmap::IndexMap<String, ClassInfo>,
properties: &indexmap::IndexMap<String, PropertyInfo>,
) -> Result<()> {
self.in_memory
.put_schema(content, classes.clone(), properties.clone());
let path = self.cache_path(content, "schema.json");
let data = SchemaCacheData {
classes: classes.clone(),
properties: properties.clone(),
};
let json = serde_json::to_string_pretty(&data).context("Failed to serialize schema")?;
std::fs::write(&path, json).context("Failed to write cache file")?;
Ok(())
}
pub fn clear_all(&mut self) -> Result<()> {
self.in_memory.clear();
for entry in std::fs::read_dir(&self.cache_dir)? {
let entry = entry?;
if entry.path().is_file() {
std::fs::remove_file(entry.path())?;
}
}
Ok(())
}
pub fn stats(&self) -> CacheStats {
self.in_memory.stats()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::Duration;
#[test]
fn test_schema_cache_basic() {
let mut cache = SchemaCache::new();
let content = "@prefix ex: <http://example.org/> .";
let classes = indexmap::IndexMap::new();
let properties = indexmap::IndexMap::new();
assert!(cache.get_schema(content).is_none());
assert_eq!(cache.stats().total_misses, 1);
cache.put_schema(content, classes.clone(), properties.clone());
assert!(cache.get_schema(content).is_some());
assert_eq!(cache.stats().total_hits, 1);
}
#[test]
fn test_symbol_table_cache() {
let mut cache = SchemaCache::new();
let content = "@prefix ex: <http://example.org/> .";
let table = SymbolTable::new();
assert!(cache.get_symbol_table(content).is_none());
cache.put_symbol_table(content, table.clone());
assert!(cache.get_symbol_table(content).is_some());
}
#[test]
fn test_cache_expiration() {
let mut cache = SchemaCache::with_settings(Duration::from_millis(100), 10);
let content = "@prefix ex: <http://example.org/> .";
let table = SymbolTable::new();
cache.put_symbol_table(content, table);
assert!(cache.get_symbol_table(content).is_some());
thread::sleep(Duration::from_millis(150));
assert!(cache.get_symbol_table(content).is_none());
}
#[test]
fn test_cache_eviction() {
let mut cache = SchemaCache::with_settings(Duration::from_secs(3600), 2);
let table = SymbolTable::new();
cache.put_symbol_table("content1", table.clone());
cache.put_symbol_table("content2", table.clone());
cache.put_symbol_table("content3", table.clone());
assert_eq!(cache.stats().symbol_table_entries, 2);
}
#[test]
fn test_cache_stats() {
let mut cache = SchemaCache::new();
let content = "@prefix ex: <http://example.org/> .";
let table = SymbolTable::new();
cache.get_symbol_table(content); cache.put_symbol_table(content, table);
cache.get_symbol_table(content); cache.get_symbol_table(content);
let stats = cache.stats();
assert_eq!(stats.total_hits, 2);
assert_eq!(stats.total_misses, 1);
assert!((stats.hit_rate - 0.666).abs() < 0.01);
}
#[test]
fn test_cache_clear() {
let mut cache = SchemaCache::new();
let content = "@prefix ex: <http://example.org/> .";
let table = SymbolTable::new();
cache.put_symbol_table(content, table);
assert_eq!(cache.stats().symbol_table_entries, 1);
cache.clear();
assert_eq!(cache.stats().symbol_table_entries, 0);
assert_eq!(cache.stats().total_hits, 0);
}
#[test]
fn test_persistent_cache() -> Result<()> {
let temp_dir = std::env::temp_dir().join("tensorlogic_oxirs_test_cache");
std::fs::create_dir_all(&temp_dir)?;
let mut cache = PersistentCache::new(&temp_dir)?;
let content = "@prefix ex: <http://example.org/> .";
let table = SymbolTable::new();
cache.save_symbol_table(content, &table)?;
let loaded = cache.load_symbol_table(content)?;
assert!(loaded.is_some());
cache.clear_all()?;
std::fs::remove_dir_all(temp_dir)?;
Ok(())
}
}