scribe_scaling/
caching.rs

1//! Intelligent caching system with persistent storage for scaling results.
2
3use crate::engine::ProcessingResult;
4use crate::error::{ScalingError, ScalingResult};
5use crate::ScalingConfig;
6use blake3::Hasher;
7use lru::LruCache;
8use serde::{Deserialize, Serialize};
9use std::fs;
10use std::num::NonZeroUsize;
11use std::path::{Path, PathBuf};
12use std::time::{Duration, SystemTime, UNIX_EPOCH};
13use walkdir::WalkDir;
14
15/// Configuration for caching system
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct CacheConfig {
18    /// Whether to enable persistent caching
19    pub enable_persistent_cache: bool,
20
21    /// Size of in-memory cache (number of repository entries to keep)
22    pub memory_cache_size: usize,
23
24    /// Whether to enable compression for cached data
25    pub compression_enabled: bool,
26
27    /// Directory for cache storage (None = use project-local default)
28    pub cache_dir: Option<PathBuf>,
29
30    /// Time-to-live for cache entries in seconds (0 = never expire)
31    #[serde(default = "CacheConfig::default_ttl")]
32    pub cache_ttl: u64,
33}
34
35impl CacheConfig {
36    fn default_ttl() -> u64 {
37        3600
38    }
39
40    fn resolved_dir(&self) -> PathBuf {
41        if let Some(dir) = &self.cache_dir {
42            dir.clone()
43        } else {
44            PathBuf::from(".scribe-cache")
45        }
46    }
47
48    fn cache_file_path(&self) -> PathBuf {
49        self.resolved_dir().join("scaling-cache.json")
50    }
51}
52
53impl Default for CacheConfig {
54    fn default() -> Self {
55        Self {
56            enable_persistent_cache: true,
57            memory_cache_size: 128,
58            compression_enabled: false,
59            cache_dir: None,
60            cache_ttl: Self::default_ttl(),
61        }
62    }
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66struct CachedProcessingResult {
67    repo_hash: u64,
68    config_hash: String,
69    last_updated_epoch: u64,
70    result: ProcessingResult,
71}
72
73impl CachedProcessingResult {
74    fn is_expired(&self, ttl_seconds: u64) -> bool {
75        if ttl_seconds == 0 {
76            return false;
77        }
78
79        let last_updated = UNIX_EPOCH + Duration::from_secs(self.last_updated_epoch);
80        match SystemTime::now().duration_since(last_updated) {
81            Ok(elapsed) => elapsed.as_secs() > ttl_seconds,
82            Err(_) => true,
83        }
84    }
85}
86
87/// Cache manager responsible for storing recent scaling results
88pub struct ProcessingCache {
89    config: CacheConfig,
90    enabled: bool,
91    entries: LruCache<String, CachedProcessingResult>,
92    dirty: bool,
93}
94
95impl ProcessingCache {
96    /// Create a new processing cache. Loads persistent data if enabled.
97    pub fn new(config: CacheConfig) -> Self {
98        let enabled = config.memory_cache_size > 0;
99        let capacity = NonZeroUsize::new(config.memory_cache_size.max(1)).unwrap();
100        let mut cache = Self {
101            entries: LruCache::new(capacity),
102            enabled,
103            dirty: false,
104            config,
105        };
106
107        if cache.config.enable_persistent_cache && cache.enabled {
108            cache.load_from_disk();
109        }
110
111        cache
112    }
113
114    /// Attempt to retrieve a cached processing result when repository and
115    /// configuration hashes match.
116    pub fn get(&mut self, repo_hash: u64, config_hash: &str) -> Option<ProcessingResult> {
117        if !self.enabled {
118            return None;
119        }
120
121        let key = Self::make_key(repo_hash, config_hash);
122        let ttl = self.config.cache_ttl;
123
124        if let Some(entry) = self.entries.peek(&key) {
125            if entry.is_expired(ttl) {
126                self.entries.pop(&key);
127                self.dirty = true;
128                return None;
129            }
130        }
131
132        self.entries.get(&key).map(|entry| entry.result.clone())
133    }
134
135    /// Store a processing result in cache.
136    pub fn insert(&mut self, repo_hash: u64, config_hash: &str, result: ProcessingResult) {
137        if !self.enabled {
138            return;
139        }
140
141        let key = Self::make_key(repo_hash, config_hash);
142        let cached = CachedProcessingResult {
143            repo_hash,
144            config_hash: config_hash.to_string(),
145            last_updated_epoch: SystemTime::now()
146                .duration_since(UNIX_EPOCH)
147                .unwrap_or_default()
148                .as_secs(),
149            result,
150        };
151
152        self.entries.put(key, cached);
153        self.dirty = true;
154    }
155
156    /// Persist cache contents to disk if configured.
157    pub fn flush(&mut self) {
158        if !self.config.enable_persistent_cache || !self.enabled || !self.dirty {
159            return;
160        }
161
162        let cache_dir = self.config.resolved_dir();
163        if let Err(err) = fs::create_dir_all(&cache_dir) {
164            if std::env::var("SCRIBE_DEBUG").is_ok() {
165                eprintln!(
166                    "⚠️  Failed to create cache directory {}: {}",
167                    cache_dir.display(),
168                    err
169                );
170            }
171            return;
172        }
173
174        let cache_file = self.config.cache_file_path();
175        let snapshot: Vec<&CachedProcessingResult> = self.entries.iter().map(|(_, v)| v).collect();
176
177        match serde_json::to_string_pretty(&snapshot) {
178            Ok(serialized) => {
179                if let Err(err) = fs::write(&cache_file, serialized) {
180                    if std::env::var("SCRIBE_DEBUG").is_ok() {
181                        eprintln!(
182                            "⚠️  Failed to write cache file {}: {}",
183                            cache_file.display(),
184                            err
185                        );
186                    }
187                } else {
188                    self.dirty = false;
189                }
190            }
191            Err(err) => {
192                if std::env::var("SCRIBE_DEBUG").is_ok() {
193                    eprintln!("⚠️  Failed to serialize cache: {}", err);
194                }
195            }
196        }
197    }
198
199    fn load_from_disk(&mut self) {
200        let cache_file = self.config.cache_file_path();
201        if !cache_file.exists() {
202            return;
203        }
204
205        match fs::read_to_string(&cache_file) {
206            Ok(content) => match serde_json::from_str::<Vec<CachedProcessingResult>>(&content) {
207                Ok(entries) => {
208                    for entry in entries {
209                        let key = Self::make_key(entry.repo_hash, &entry.config_hash);
210                        self.entries.put(key, entry);
211                    }
212                    self.dirty = false;
213                }
214                Err(err) => {
215                    if std::env::var("SCRIBE_DEBUG").is_ok() {
216                        eprintln!(
217                            "⚠️  Failed to parse cache file {}: {}",
218                            cache_file.display(),
219                            err
220                        );
221                    }
222                }
223            },
224            Err(err) => {
225                if std::env::var("SCRIBE_DEBUG").is_ok() {
226                    eprintln!(
227                        "⚠️  Failed to read cache file {}: {}",
228                        cache_file.display(),
229                        err
230                    );
231                }
232            }
233        }
234    }
235
236    fn make_key(repo_hash: u64, config_hash: &str) -> String {
237        format!("{}::{}", repo_hash, config_hash)
238    }
239}
240
241impl Drop for ProcessingCache {
242    fn drop(&mut self) {
243        self.flush();
244    }
245}
246
247/// Compute a stable hash representing the current repository state.
248pub fn compute_repository_hash(repo_path: &Path) -> ScalingResult<u64> {
249    let mut hasher = Hasher::new();
250
251    for entry in WalkDir::new(repo_path)
252        .into_iter()
253        .filter_entry(|e| e.file_type().is_dir() || e.file_type().is_file())
254    {
255        let entry = entry.map_err(|err| {
256            ScalingError::path(
257                "Failed to traverse repository",
258                err.path().unwrap_or(repo_path),
259            )
260        })?;
261        if entry.file_type().is_file() {
262            let metadata = entry
263                .metadata()
264                .map_err(|_| ScalingError::path("Failed to read file metadata", entry.path()))?;
265
266            hasher.update(entry.path().to_string_lossy().as_bytes());
267            hasher.update(&metadata.len().to_le_bytes());
268
269            if let Ok(modified) = metadata.modified() {
270                if let Ok(duration) = modified.duration_since(UNIX_EPOCH) {
271                    hasher.update(&duration.as_secs().to_le_bytes());
272                    hasher.update(&duration.subsec_nanos().to_le_bytes());
273                }
274            }
275        }
276    }
277
278    let digest = hasher.finalize();
279    let mut bytes = [0u8; 8];
280    bytes.copy_from_slice(&digest.as_bytes()[..8]);
281    Ok(u64::from_le_bytes(bytes))
282}
283
284/// Compute a stable hash for the scaling configuration.
285pub fn compute_config_hash(config: &ScalingConfig) -> String {
286    match serde_json::to_vec(config) {
287        Ok(bytes) => {
288            let mut hasher = Hasher::new();
289            hasher.update(&bytes);
290            hasher.finalize().to_hex().to_string()
291        }
292        Err(_) => "default".to_string(),
293    }
294}