tensorlogic_oxirs_bridge/schema/
cache.rs1use anyhow::{Context, Result};
81use serde::{Deserialize, Serialize};
82use std::collections::hash_map::DefaultHasher;
83use std::collections::HashMap;
84use std::hash::{Hash, Hasher};
85use std::path::{Path, PathBuf};
86use std::time::{Duration, SystemTime};
87use tensorlogic_adapters::SymbolTable;
88
89use super::{ClassInfo, PropertyInfo};
90
91type ParsedSchema = (
93 indexmap::IndexMap<String, ClassInfo>,
94 indexmap::IndexMap<String, PropertyInfo>,
95);
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
101struct CacheEntry<T> {
102 value: T,
103 created_at: SystemTime,
104 last_accessed: SystemTime,
105 access_count: usize,
106}
107
108impl<T> CacheEntry<T> {
109 fn new(value: T) -> Self {
110 let now = SystemTime::now();
111 Self {
112 value,
113 created_at: now,
114 last_accessed: now,
115 access_count: 0,
116 }
117 }
118
119 fn access(&mut self) -> &T {
120 self.last_accessed = SystemTime::now();
121 self.access_count += 1;
122 &self.value
123 }
124
125 fn is_expired(&self, ttl: Duration) -> bool {
126 self.created_at
127 .elapsed()
128 .map(|age| age > ttl)
129 .unwrap_or(false)
130 }
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
137struct SchemaCacheData {
138 classes: indexmap::IndexMap<String, ClassInfo>,
139 properties: indexmap::IndexMap<String, PropertyInfo>,
140}
141
142#[derive(Debug)]
234pub struct SchemaCache {
235 schemas: HashMap<u64, CacheEntry<SchemaCacheData>>,
237
238 symbol_tables: HashMap<u64, CacheEntry<SymbolTable>>,
240
241 ttl: Duration,
243
244 max_size: usize,
246
247 hits: usize,
249 misses: usize,
250}
251
252impl SchemaCache {
253 pub fn new() -> Self {
267 Self::with_settings(Duration::from_secs(3600), 100)
268 }
269
270 pub fn with_settings(ttl: Duration, max_size: usize) -> Self {
287 Self {
288 schemas: HashMap::new(),
289 symbol_tables: HashMap::new(),
290 ttl,
291 max_size,
292 hits: 0,
293 misses: 0,
294 }
295 }
296
297 fn hash_content(content: &str) -> u64 {
299 let mut hasher = DefaultHasher::new();
300 content.hash(&mut hasher);
301 hasher.finish()
302 }
303
304 pub fn get_schema(&mut self, content: &str) -> Option<ParsedSchema> {
306 let hash = Self::hash_content(content);
307
308 if let Some(entry) = self.schemas.get_mut(&hash) {
309 if !entry.is_expired(self.ttl) {
310 self.hits += 1;
311 let data = entry.access();
312 return Some((data.classes.clone(), data.properties.clone()));
313 } else {
314 self.schemas.remove(&hash);
316 }
317 }
318
319 self.misses += 1;
320 None
321 }
322
323 pub fn put_schema(
325 &mut self,
326 content: &str,
327 classes: indexmap::IndexMap<String, ClassInfo>,
328 properties: indexmap::IndexMap<String, PropertyInfo>,
329 ) {
330 let hash = Self::hash_content(content);
331
332 if self.schemas.len() >= self.max_size {
334 if let Some(oldest_key) = self.find_oldest_schema() {
335 self.schemas.remove(&oldest_key);
336 }
337 }
338
339 self.schemas.insert(
340 hash,
341 CacheEntry::new(SchemaCacheData {
342 classes,
343 properties,
344 }),
345 );
346 }
347
348 pub fn get_symbol_table(&mut self, content: &str) -> Option<SymbolTable> {
350 let hash = Self::hash_content(content);
351
352 if let Some(entry) = self.symbol_tables.get_mut(&hash) {
353 if !entry.is_expired(self.ttl) {
354 self.hits += 1;
355 return Some(entry.access().clone());
356 } else {
357 self.symbol_tables.remove(&hash);
359 }
360 }
361
362 self.misses += 1;
363 None
364 }
365
366 pub fn put_symbol_table(&mut self, content: &str, table: SymbolTable) {
368 let hash = Self::hash_content(content);
369
370 if self.symbol_tables.len() >= self.max_size {
372 if let Some(oldest_key) = self.find_oldest_symbol_table() {
373 self.symbol_tables.remove(&oldest_key);
374 }
375 }
376
377 self.symbol_tables.insert(hash, CacheEntry::new(table));
378 }
379
380 fn find_oldest_schema(&self) -> Option<u64> {
382 self.schemas
383 .iter()
384 .min_by_key(|(_, entry)| entry.last_accessed)
385 .map(|(k, _)| *k)
386 }
387
388 fn find_oldest_symbol_table(&self) -> Option<u64> {
390 self.symbol_tables
391 .iter()
392 .min_by_key(|(_, entry)| entry.last_accessed)
393 .map(|(k, _)| *k)
394 }
395
396 pub fn cleanup_expired(&mut self) {
398 self.schemas.retain(|_, entry| !entry.is_expired(self.ttl));
399 self.symbol_tables
400 .retain(|_, entry| !entry.is_expired(self.ttl));
401 }
402
403 pub fn clear(&mut self) {
405 self.schemas.clear();
406 self.symbol_tables.clear();
407 self.hits = 0;
408 self.misses = 0;
409 }
410
411 pub fn stats(&self) -> CacheStats {
413 CacheStats {
414 schema_entries: self.schemas.len(),
415 symbol_table_entries: self.symbol_tables.len(),
416 total_hits: self.hits,
417 total_misses: self.misses,
418 hit_rate: if self.hits + self.misses > 0 {
419 (self.hits as f64) / ((self.hits + self.misses) as f64)
420 } else {
421 0.0
422 },
423 }
424 }
425}
426
427impl Default for SchemaCache {
428 fn default() -> Self {
429 Self::new()
430 }
431}
432
433#[derive(Debug, Clone, Serialize, Deserialize)]
435pub struct CacheStats {
436 pub schema_entries: usize,
437 pub symbol_table_entries: usize,
438 pub total_hits: usize,
439 pub total_misses: usize,
440 pub hit_rate: f64,
441}
442
443impl std::fmt::Display for CacheStats {
444 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
445 writeln!(f, "Cache Statistics:")?;
446 writeln!(f, " Schema entries: {}", self.schema_entries)?;
447 writeln!(f, " Symbol table entries: {}", self.symbol_table_entries)?;
448 writeln!(f, " Total hits: {}", self.total_hits)?;
449 writeln!(f, " Total misses: {}", self.total_misses)?;
450 writeln!(f, " Hit rate: {:.2}%", self.hit_rate * 100.0)?;
451 Ok(())
452 }
453}
454
455pub struct PersistentCache {
457 cache_dir: PathBuf,
458 in_memory: SchemaCache,
459}
460
461impl PersistentCache {
462 pub fn new(cache_dir: impl AsRef<Path>) -> Result<Self> {
464 let cache_dir = cache_dir.as_ref().to_path_buf();
465 std::fs::create_dir_all(&cache_dir).context("Failed to create cache directory")?;
466
467 Ok(Self {
468 cache_dir,
469 in_memory: SchemaCache::new(),
470 })
471 }
472
473 fn cache_path(&self, content: &str, suffix: &str) -> PathBuf {
475 let hash = SchemaCache::hash_content(content);
476 self.cache_dir.join(format!("{:016x}.{}", hash, suffix))
477 }
478
479 pub fn load_symbol_table(&mut self, content: &str) -> Result<Option<SymbolTable>> {
481 if let Some(table) = self.in_memory.get_symbol_table(content) {
483 return Ok(Some(table));
484 }
485
486 let path = self.cache_path(content, "symboltable.json");
488 if path.exists() {
489 let json = std::fs::read_to_string(&path).context("Failed to read cache file")?;
490 let table: SymbolTable =
491 serde_json::from_str(&json).context("Failed to deserialize SymbolTable")?;
492
493 self.in_memory.put_symbol_table(content, table.clone());
495
496 return Ok(Some(table));
497 }
498
499 Ok(None)
500 }
501
502 pub fn save_symbol_table(&mut self, content: &str, table: &SymbolTable) -> Result<()> {
504 self.in_memory.put_symbol_table(content, table.clone());
506
507 let path = self.cache_path(content, "symboltable.json");
509 let json =
510 serde_json::to_string_pretty(table).context("Failed to serialize SymbolTable")?;
511 std::fs::write(&path, json).context("Failed to write cache file")?;
512
513 Ok(())
514 }
515
516 pub fn load_schema(&mut self, content: &str) -> Result<Option<ParsedSchema>> {
518 if let Some(result) = self.in_memory.get_schema(content) {
520 return Ok(Some(result));
521 }
522
523 let path = self.cache_path(content, "schema.json");
525 if path.exists() {
526 let json = std::fs::read_to_string(&path).context("Failed to read cache file")?;
527 let data: SchemaCacheData =
528 serde_json::from_str(&json).context("Failed to deserialize schema")?;
529
530 self.in_memory
532 .put_schema(content, data.classes.clone(), data.properties.clone());
533
534 return Ok(Some((data.classes, data.properties)));
535 }
536
537 Ok(None)
538 }
539
540 pub fn save_schema(
542 &mut self,
543 content: &str,
544 classes: &indexmap::IndexMap<String, ClassInfo>,
545 properties: &indexmap::IndexMap<String, PropertyInfo>,
546 ) -> Result<()> {
547 self.in_memory
549 .put_schema(content, classes.clone(), properties.clone());
550
551 let path = self.cache_path(content, "schema.json");
553 let data = SchemaCacheData {
554 classes: classes.clone(),
555 properties: properties.clone(),
556 };
557 let json = serde_json::to_string_pretty(&data).context("Failed to serialize schema")?;
558 std::fs::write(&path, json).context("Failed to write cache file")?;
559
560 Ok(())
561 }
562
563 pub fn clear_all(&mut self) -> Result<()> {
565 self.in_memory.clear();
566
567 for entry in std::fs::read_dir(&self.cache_dir)? {
568 let entry = entry?;
569 if entry.path().is_file() {
570 std::fs::remove_file(entry.path())?;
571 }
572 }
573
574 Ok(())
575 }
576
577 pub fn stats(&self) -> CacheStats {
579 self.in_memory.stats()
580 }
581}
582
583#[cfg(test)]
584mod tests {
585 use super::*;
586 use std::thread;
587 use std::time::Duration;
588
589 #[test]
590 fn test_schema_cache_basic() {
591 let mut cache = SchemaCache::new();
592
593 let content = "@prefix ex: <http://example.org/> .";
594 let classes = indexmap::IndexMap::new();
595 let properties = indexmap::IndexMap::new();
596
597 assert!(cache.get_schema(content).is_none());
599 assert_eq!(cache.stats().total_misses, 1);
600
601 cache.put_schema(content, classes.clone(), properties.clone());
603
604 assert!(cache.get_schema(content).is_some());
606 assert_eq!(cache.stats().total_hits, 1);
607 }
608
609 #[test]
610 fn test_symbol_table_cache() {
611 let mut cache = SchemaCache::new();
612
613 let content = "@prefix ex: <http://example.org/> .";
614 let table = SymbolTable::new();
615
616 assert!(cache.get_symbol_table(content).is_none());
618
619 cache.put_symbol_table(content, table.clone());
621
622 assert!(cache.get_symbol_table(content).is_some());
624 }
625
626 #[test]
627 fn test_cache_expiration() {
628 let mut cache = SchemaCache::with_settings(Duration::from_millis(100), 10);
629
630 let content = "@prefix ex: <http://example.org/> .";
631 let table = SymbolTable::new();
632
633 cache.put_symbol_table(content, table);
634
635 assert!(cache.get_symbol_table(content).is_some());
637
638 thread::sleep(Duration::from_millis(150));
640
641 assert!(cache.get_symbol_table(content).is_none());
643 }
644
645 #[test]
646 fn test_cache_eviction() {
647 let mut cache = SchemaCache::with_settings(Duration::from_secs(3600), 2);
648
649 let table = SymbolTable::new();
650
651 cache.put_symbol_table("content1", table.clone());
653 cache.put_symbol_table("content2", table.clone());
654
655 cache.put_symbol_table("content3", table.clone());
657
658 assert_eq!(cache.stats().symbol_table_entries, 2);
660 }
661
662 #[test]
663 fn test_cache_stats() {
664 let mut cache = SchemaCache::new();
665
666 let content = "@prefix ex: <http://example.org/> .";
667 let table = SymbolTable::new();
668
669 cache.get_symbol_table(content); cache.put_symbol_table(content, table);
671 cache.get_symbol_table(content); cache.get_symbol_table(content); let stats = cache.stats();
675 assert_eq!(stats.total_hits, 2);
676 assert_eq!(stats.total_misses, 1);
677 assert!((stats.hit_rate - 0.666).abs() < 0.01);
678 }
679
680 #[test]
681 fn test_cache_clear() {
682 let mut cache = SchemaCache::new();
683
684 let content = "@prefix ex: <http://example.org/> .";
685 let table = SymbolTable::new();
686
687 cache.put_symbol_table(content, table);
688 assert_eq!(cache.stats().symbol_table_entries, 1);
689
690 cache.clear();
691 assert_eq!(cache.stats().symbol_table_entries, 0);
692 assert_eq!(cache.stats().total_hits, 0);
693 }
694
695 #[test]
696 fn test_persistent_cache() -> Result<()> {
697 let temp_dir = std::env::temp_dir().join("tensorlogic_oxirs_test_cache");
698 std::fs::create_dir_all(&temp_dir)?;
699
700 let mut cache = PersistentCache::new(&temp_dir)?;
701
702 let content = "@prefix ex: <http://example.org/> .";
703 let table = SymbolTable::new();
704
705 cache.save_symbol_table(content, &table)?;
707
708 let loaded = cache.load_symbol_table(content)?;
710 assert!(loaded.is_some());
711
712 cache.clear_all()?;
714 std::fs::remove_dir_all(temp_dir)?;
715
716 Ok(())
717 }
718}