use crate::tdg::storage_backend::{StorageBackend, StorageBackendFactory, StorageConfig};
use crate::tdg::TdgScore;
use anyhow::{anyhow, Result};
use blake3::Hash as Blake3Hash;
use dashmap::DashMap;
use lz4_flex::{compress_prepend_size, decompress_size_prepended};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileIdentity {
pub path: PathBuf,
pub content_hash: Blake3Hash,
pub size_bytes: u64,
pub modified_time: SystemTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComponentScores {
pub complexity_breakdown: HashMap<String, f32>,
pub duplication_sources: Vec<String>,
pub coupling_dependencies: Vec<String>,
pub doc_missing_items: Vec<String>,
pub consistency_violations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SemanticSignature {
pub ast_structure_hash: u64,
pub identifier_pattern: String,
pub control_flow_pattern: String,
pub import_dependencies: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalysisMetadata {
pub analyzer_version: String,
pub analysis_duration_ms: u64,
pub language_confidence: f32,
pub analysis_timestamp: SystemTime,
pub cache_hit: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FullTdgRecord {
pub identity: FileIdentity,
pub score: TdgScore,
pub components: ComponentScores,
pub semantic_sig: SemanticSignature,
pub metadata: AnalysisMetadata,
}
#[derive(Debug, Clone, Copy)]
pub struct HotCacheEntry {
pub content_hash: [u8; 32],
pub grade: u8,
pub total_score: f32,
pub timestamp: i64,
}
impl HotCacheEntry {
#[must_use]
pub fn from_record(record: &FullTdgRecord) -> Self {
let mut hash_bytes = [0u8; 32];
hash_bytes.copy_from_slice(record.identity.content_hash.as_bytes());
Self {
content_hash: hash_bytes,
grade: record.score.grade as u8,
total_score: record.score.total,
timestamp: SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64,
}
}
}
pub struct TieredStore {
hot: Arc<DashMap<Blake3Hash, HotCacheEntry>>,
warm_backend: Box<dyn StorageBackend>,
cold_backend: Box<dyn StorageBackend>,
archive_after_days: u32,
}
impl TieredStore {
pub fn new(db_path: impl AsRef<Path>) -> Result<Self> {
let warm_config = StorageConfig {
backend_type: crate::tdg::storage_backend::StorageBackendType::Sled,
path: Some(db_path.as_ref().join(".pmat/tdg-warm")),
cache_size_mb: Some(128),
compression: true,
};
let cold_config = StorageConfig {
backend_type: crate::tdg::storage_backend::StorageBackendType::Sled,
path: Some(db_path.as_ref().join(".pmat/tdg-cold")),
cache_size_mb: Some(64),
compression: false, };
Self::with_config(warm_config, cold_config)
}
pub fn with_config(warm_config: StorageConfig, cold_config: StorageConfig) -> Result<Self> {
let warm_backend = StorageBackendFactory::create_from_config(&warm_config)?;
let cold_backend = StorageBackendFactory::create_from_config(&cold_config)?;
Ok(Self {
hot: Arc::new(DashMap::new()),
warm_backend,
cold_backend,
archive_after_days: 30,
})
}
#[must_use]
pub fn in_memory() -> Self {
Self {
hot: Arc::new(DashMap::new()),
warm_backend: StorageBackendFactory::create_in_memory(),
cold_backend: StorageBackendFactory::create_in_memory(),
archive_after_days: 30,
}
}
pub async fn store(&self, record: FullTdgRecord) -> Result<()> {
let hash = record.identity.content_hash;
let hot_entry = HotCacheEntry::from_record(&record);
self.hot.insert(hash, hot_entry);
let serialized = bincode::serialize(&record)?;
let compressed = compress_prepend_size(&serialized);
self.warm_backend.put(hash.as_bytes(), &compressed)?;
if self.should_archive(&record) {
self.archive_to_cold(record).await?;
}
Ok(())
}
#[must_use]
pub fn get_hot(&self, hash: &Blake3Hash) -> Option<HotCacheEntry> {
self.hot.get(hash).map(|entry| *entry.value())
}
pub async fn retrieve_full(&self, hash: &Blake3Hash) -> Result<Option<FullTdgRecord>> {
if let Some(compressed) = self.warm_backend.get(hash.as_bytes())? {
let decompressed = decompress_size_prepended(&compressed)?;
return Ok(Some(bincode::deserialize(&decompressed)?));
}
if let Some(archived) = self.cold_backend.get(hash.as_bytes())? {
return Ok(Some(bincode::deserialize(&archived)?));
}
Ok(None)
}
fn should_archive(&self, record: &FullTdgRecord) -> bool {
let age_days = record
.metadata
.analysis_timestamp
.elapsed()
.unwrap_or_default()
.as_secs()
/ (24 * 60 * 60);
age_days > u64::from(self.archive_after_days)
}
async fn archive_to_cold(&self, record: FullTdgRecord) -> Result<()> {
let hash = record.identity.content_hash;
let serialized = bincode::serialize(&record)?;
self.cold_backend.put(hash.as_bytes(), &serialized)?;
self.warm_backend.delete(hash.as_bytes())?;
Ok(())
}
#[must_use]
pub fn cleanup_hot_cache(&self, max_age_seconds: u64) -> usize {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
let mut removed = 0;
self.hot.retain(|_, entry| {
let age = now - entry.timestamp;
if age > max_age_seconds as i64 {
removed += 1;
false
} else {
true
}
});
removed
}
pub async fn migrate_backend(
&mut self,
new_warm_config: StorageConfig,
new_cold_config: StorageConfig,
) -> Result<()> {
let new_warm = StorageBackendFactory::create_from_config(&new_warm_config)?;
let new_cold = StorageBackendFactory::create_from_config(&new_cold_config)?;
if let Ok(iter) = self.warm_backend.iter() {
for result in iter {
let (key, value) = result?;
new_warm.put(&key, &value)?;
}
}
if let Ok(iter) = self.cold_backend.iter() {
for result in iter {
let (key, value) = result?;
new_cold.put(&key, &value)?;
}
}
self.warm_backend = new_warm;
self.cold_backend = new_cold;
Ok(())
}
pub fn flush(&self) -> Result<()> {
self.warm_backend.flush()?;
self.cold_backend.flush()?;
Ok(())
}
#[must_use]
pub fn get_statistics(&self) -> StorageStatistics {
let hot_entries = self.hot.len();
let hot_memory_kb = (hot_entries * std::mem::size_of::<HotCacheEntry>()) / 1024;
let warm_stats = self.warm_backend.get_stats();
let cold_stats = self.cold_backend.get_stats();
let warm_entries = warm_stats
.get("entry_count")
.and_then(|v| v.parse::<usize>().ok())
.unwrap_or(0);
let cold_entries = cold_stats
.get("entry_count")
.and_then(|v| v.parse::<usize>().ok())
.unwrap_or(0);
let total_entries = hot_entries + warm_entries + cold_entries;
let mut backend_stats = HashMap::new();
backend_stats.insert("warm".to_string(), warm_stats);
backend_stats.insert("cold".to_string(), cold_stats);
StorageStatistics {
hot_entries,
warm_entries,
cold_entries,
total_entries,
hot_memory_kb,
compression_ratio: 0.33, warm_backend: "sled".to_string(), cold_backend: "sled".to_string(), backend_stats,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StorageStatistics {
pub hot_entries: usize,
pub warm_entries: usize,
pub cold_entries: usize,
pub total_entries: usize,
pub hot_memory_kb: usize,
pub compression_ratio: f32,
pub warm_backend: String,
pub cold_backend: String,
pub backend_stats: HashMap<String, HashMap<String, String>>,
}
impl StorageStatistics {
#[must_use]
pub fn format_diagnostic(&self) -> String {
format!(
"Storage Tiers:\n\
- Hot (memory): {} entries, {} KB\n\
- Warm ({} backend): {} entries\n\
- Cold ({} backend): {} entries\n\
- Total: {} entries\n\
- Compression ratio: {:.1}%",
self.hot_entries,
self.hot_memory_kb,
self.warm_backend,
self.warm_entries,
self.cold_backend,
self.cold_entries,
self.total_entries,
self.compression_ratio * 100.0
)
}
}
pub struct TieredStorageFactory;
impl TieredStorageFactory {
pub fn create_default() -> Result<TieredStore> {
let home_dir = dirs::home_dir().ok_or_else(|| anyhow!("Could not find home directory"))?;
TieredStore::new(home_dir)
}
pub fn create_at_path(path: impl AsRef<Path>) -> Result<TieredStore> {
TieredStore::new(path)
}
#[must_use]
pub fn create_in_memory() -> TieredStore {
TieredStore::in_memory()
}
#[cfg(feature = "rocksdb-backend")]
pub fn create_with_rocksdb(path: impl AsRef<Path>) -> Result<TieredStore> {
use crate::tdg::storage_backend::StorageBackendType;
let warm_config = StorageConfig {
backend_type: StorageBackendType::RocksDb,
path: Some(path.as_ref().join(".pmat/tdg-warm-rocks")),
cache_size_mb: Some(256),
compression: true,
};
let cold_config = StorageConfig {
backend_type: StorageBackendType::RocksDb,
path: Some(path.as_ref().join(".pmat/tdg-cold-rocks")),
cache_size_mb: Some(128),
compression: false,
};
TieredStore::with_config(warm_config, cold_config)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tdg::language_simple::Language;
use crate::tdg::Grade;
use tempfile::TempDir;
fn create_test_record() -> FullTdgRecord {
let content = b"fn test() { println!(\"hello\"); }";
let hash = blake3::hash(content);
FullTdgRecord {
identity: FileIdentity {
path: PathBuf::from("test.rs"),
content_hash: hash,
size_bytes: content.len() as u64,
modified_time: SystemTime::now(),
},
score: TdgScore {
structural_complexity: 20.0,
semantic_complexity: 18.0,
duplication_ratio: 19.0,
coupling_score: 14.0,
doc_coverage: 9.0,
consistency_score: 8.0,
entropy_score: 16.0,
total: 88.0,
grade: Grade::AMinus,
confidence: 0.95,
language: Language::Rust,
file_path: Some(PathBuf::from("test.rs")),
penalties_applied: Vec::new(),
},
components: ComponentScores {
complexity_breakdown: HashMap::new(),
duplication_sources: Vec::new(),
coupling_dependencies: Vec::new(),
doc_missing_items: Vec::new(),
consistency_violations: Vec::new(),
},
semantic_sig: SemanticSignature {
ast_structure_hash: 123456789,
identifier_pattern: "test,println".to_string(),
control_flow_pattern: "function_call".to_string(),
import_dependencies: Vec::new(),
},
metadata: AnalysisMetadata {
analyzer_version: "2.38.0".to_string(),
analysis_duration_ms: 5,
language_confidence: 1.0,
analysis_timestamp: SystemTime::now(),
cache_hit: false,
},
}
}
#[tokio::test]
async fn test_tiered_storage_creation() {
let temp_dir = TempDir::new().unwrap();
let storage = TieredStore::new(temp_dir.path()).unwrap();
let stats = storage.get_statistics();
assert_eq!(stats.hot_entries, 0);
assert_eq!(stats.warm_entries, 0);
assert_eq!(stats.cold_entries, 0);
}
#[tokio::test]
async fn test_in_memory_storage() {
let storage = TieredStore::in_memory();
let record = create_test_record();
let hash = record.identity.content_hash;
storage.store(record.clone()).await.unwrap();
let hot_entry = storage.get_hot(&hash).unwrap();
assert_eq!(hot_entry.total_score, 88.0);
assert_eq!(hot_entry.grade, Grade::AMinus as u8);
let retrieved = storage.retrieve_full(&hash).await.unwrap().unwrap();
assert_eq!(retrieved.score.total, record.score.total);
assert_eq!(retrieved.identity.path, record.identity.path);
}
#[tokio::test]
async fn test_store_and_retrieve() {
let temp_dir = TempDir::new().unwrap();
let storage = TieredStore::new(temp_dir.path()).unwrap();
let record = create_test_record();
let hash = record.identity.content_hash;
storage.store(record.clone()).await.unwrap();
let hot_entry = storage.get_hot(&hash).unwrap();
assert_eq!(hot_entry.total_score, 88.0);
assert_eq!(hot_entry.grade, Grade::AMinus as u8);
let retrieved = storage.retrieve_full(&hash).await.unwrap().unwrap();
assert_eq!(retrieved.score.total, record.score.total);
assert_eq!(retrieved.identity.path, record.identity.path);
}
#[tokio::test]
async fn test_compression() {
let temp_dir = TempDir::new().unwrap();
let storage = TieredStore::new(temp_dir.path()).unwrap();
let record = create_test_record();
storage.store(record.clone()).await.unwrap();
storage.flush().unwrap();
let stats = storage.get_statistics();
assert!(stats.compression_ratio > 0.0);
assert!(stats.compression_ratio < 1.0); }
#[test]
fn test_hot_cache_cleanup() {
let storage = TieredStore::in_memory();
let old_hash = blake3::hash(b"old content");
let old_entry = HotCacheEntry {
content_hash: *old_hash.as_bytes(),
grade: Grade::B as u8,
total_score: 75.0,
timestamp: (SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs() as i64)
- 3600, };
storage.hot.insert(old_hash, old_entry);
let removed = storage.cleanup_hot_cache(1800);
assert_eq!(removed, 1);
assert!(storage.hot.is_empty());
}
#[tokio::test]
async fn test_backend_migration() {
use crate::tdg::storage_backend::StorageBackendType;
let temp_dir = TempDir::new().unwrap();
let mut storage = TieredStore::new(temp_dir.path()).unwrap();
let record1 = create_test_record();
let record2 = create_test_record();
storage.store(record1.clone()).await.unwrap();
storage.store(record2.clone()).await.unwrap();
let new_warm = StorageConfig {
backend_type: StorageBackendType::InMemory,
path: None,
cache_size_mb: None,
compression: true,
};
let new_cold = StorageConfig {
backend_type: StorageBackendType::InMemory,
path: None,
cache_size_mb: None,
compression: false,
};
storage.migrate_backend(new_warm, new_cold).await.unwrap();
let retrieved = storage
.retrieve_full(&record1.identity.content_hash)
.await
.unwrap();
assert!(retrieved.is_some());
}
}
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}