Skip to main content

dlin_core/parser/
cache.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::time::SystemTime;
4
5use serde::{Deserialize, Serialize};
6
7use super::jinja::JinjaExtraction;
8
9/// Cache file name used when cache is stored under the project directory
10const CACHE_DIR: &str = ".dlin_cache";
11const CACHE_FILENAME: &str = "extraction_cache.json";
12
13/// A single cached extraction entry
14#[derive(Debug, Clone, Serialize, Deserialize)]
15struct CacheEntry {
16    /// File modification time (seconds since UNIX epoch)
17    mtime_secs: u64,
18    /// File size in bytes (secondary check for same-second modifications)
19    file_size: u64,
20    /// Extraction result
21    extraction: JinjaExtraction,
22}
23
24/// On-disk cache for minijinja extraction results
25#[derive(Debug, Serialize, Deserialize)]
26struct CacheFile {
27    /// dlin version that created this cache.
28    /// If the version changes, all entries are invalidated.
29    #[serde(default)]
30    version: String,
31    /// Hash of the macro prefix used during extraction.
32    /// If macros change, all entries are invalidated.
33    macro_prefix_hash: u64,
34    /// Hash of serialized dbt project vars.
35    /// If vars change, all entries are invalidated.
36    #[serde(default)]
37    vars_hash: u64,
38    /// Per-file extraction results keyed by relative path
39    entries: HashMap<String, CacheEntry>,
40}
41
42/// In-memory extraction cache that can be loaded from and saved to disk
43pub struct ExtractionCache {
44    version: String,
45    macro_prefix_hash: u64,
46    vars_hash: u64,
47    entries: HashMap<String, CacheEntry>,
48    /// `None` when the cache is disabled (no-op mode).
49    cache_path: Option<PathBuf>,
50    dirty: bool,
51}
52
53impl ExtractionCache {
54    /// Create a no-op cache that never reads from or writes to disk.
55    pub fn disabled() -> Self {
56        Self {
57            version: String::new(),
58            macro_prefix_hash: 0,
59            vars_hash: 0,
60            entries: HashMap::new(),
61            cache_path: None,
62            dirty: false,
63        }
64    }
65
66    /// Load the cache from disk, or create an empty one.
67    /// Entries are discarded when the dlin version, macro prefix hash, or vars
68    /// hash doesn't match.
69    ///
70    /// When `cache_dir` is `None`, the cache is stored under
71    /// `<project_dir>/.dlin_cache/extraction_cache.json`. When `cache_dir` is
72    /// provided, the cache file is placed directly inside it.
73    pub fn load(
74        project_dir: &Path,
75        macro_prefix: &str,
76        vars: &HashMap<String, serde_json::Value>,
77        cache_dir: Option<&Path>,
78    ) -> Self {
79        let cache_path = match cache_dir {
80            Some(dir) => dir.join(CACHE_FILENAME),
81            None => project_dir.join(CACHE_DIR).join(CACHE_FILENAME),
82        };
83        let version = env!("CARGO_PKG_VERSION").to_string();
84        let macro_hash = hash_str(macro_prefix);
85        let vars_hash = hash_vars(vars);
86
87        let entries = std::fs::read_to_string(&cache_path)
88            .ok()
89            .and_then(|content| serde_json::from_str::<CacheFile>(&content).ok())
90            .filter(|cf| {
91                cf.version == version
92                    && cf.macro_prefix_hash == macro_hash
93                    && cf.vars_hash == vars_hash
94            })
95            .map(|cf| cf.entries)
96            .unwrap_or_default();
97
98        Self {
99            version,
100            macro_prefix_hash: macro_hash,
101            vars_hash,
102            entries,
103            cache_path: Some(cache_path),
104            dirty: false,
105        }
106    }
107
108    /// Create an empty cache that ignores any existing on-disk entries but
109    /// still writes results to disk on [`save`](Self::save).
110    /// Used by `--refresh-cache` to rebuild the cache from scratch.
111    pub fn fresh(
112        project_dir: &Path,
113        macro_prefix: &str,
114        vars: &HashMap<String, serde_json::Value>,
115        cache_dir: Option<&Path>,
116    ) -> Self {
117        let cache_path = match cache_dir {
118            Some(dir) => dir.join(CACHE_FILENAME),
119            None => project_dir.join(CACHE_DIR).join(CACHE_FILENAME),
120        };
121        Self {
122            version: env!("CARGO_PKG_VERSION").to_string(),
123            macro_prefix_hash: hash_str(macro_prefix),
124            vars_hash: hash_vars(vars),
125            entries: HashMap::new(),
126            cache_path: Some(cache_path),
127            dirty: false,
128        }
129    }
130
131    /// Look up a cached extraction for the given file path.
132    /// Returns `None` if not cached or if the file has been modified.
133    pub fn get(&self, path: &Path, project_dir: &Path) -> Option<&JinjaExtraction> {
134        let key = relative_key(path, project_dir);
135        let entry = self.entries.get(&key)?;
136        let stat = file_stat(path)?;
137        if entry.mtime_secs == stat.mtime_secs && entry.file_size == stat.file_size {
138            Some(&entry.extraction)
139        } else {
140            None
141        }
142    }
143
144    /// Insert an extraction result into the cache.
145    pub fn insert(&mut self, path: &Path, project_dir: &Path, extraction: &JinjaExtraction) {
146        let key = relative_key(path, project_dir);
147        if let Some(stat) = file_stat(path) {
148            self.entries.insert(
149                key,
150                CacheEntry {
151                    mtime_secs: stat.mtime_secs,
152                    file_size: stat.file_size,
153                    extraction: extraction.clone(),
154                },
155            );
156            self.dirty = true;
157        }
158    }
159
160    /// Save the cache to disk if it has been modified.
161    pub fn save(&self) {
162        let cache_path = match (&self.cache_path, self.dirty) {
163            (Some(p), true) => p,
164            _ => return,
165        };
166        let cf = CacheFile {
167            version: self.version.clone(),
168            macro_prefix_hash: self.macro_prefix_hash,
169            vars_hash: self.vars_hash,
170            entries: self.entries.clone(),
171        };
172        if let Some(parent) = cache_path.parent() {
173            if std::fs::create_dir_all(parent).is_err() {
174                crate::warn!("could not create cache directory: {}", parent.display());
175                return;
176            }
177            // Auto-create .gitignore to prevent accidental commits
178            let gitignore = parent.join(".gitignore");
179            if !gitignore.exists()
180                && let Err(e) = std::fs::write(&gitignore, "# Automatically created by dlin\n*\n")
181            {
182                crate::warn!("could not create {}: {}", gitignore.display(), e);
183            }
184        }
185        match serde_json::to_string(&cf) {
186            Ok(json) => {
187                if let Err(e) = std::fs::write(cache_path, json) {
188                    crate::warn!("could not write cache file {}: {}", cache_path.display(), e);
189                }
190            }
191            Err(e) => {
192                crate::warn!("could not serialize cache: {}", e);
193            }
194        }
195    }
196}
197
198/// Simple string hash using FNV-1a for deterministic, fast hashing
199pub(crate) fn hash_str(s: &str) -> u64 {
200    let mut hash: u64 = 0xcbf29ce484222325;
201    for byte in s.bytes() {
202        hash ^= byte as u64;
203        hash = hash.wrapping_mul(0x100000001b3);
204    }
205    hash
206}
207
208/// Hash project vars deterministically (sorted keys → JSON → FNV-1a)
209fn hash_vars(vars: &HashMap<String, serde_json::Value>) -> u64 {
210    if vars.is_empty() {
211        return 0;
212    }
213    let mut keys: Vec<&String> = vars.keys().collect();
214    keys.sort();
215    let mut s = String::new();
216    for k in keys {
217        s.push_str(k);
218        s.push('=');
219        s.push_str(&vars[k].to_string());
220        s.push('\n');
221    }
222    hash_str(&s)
223}
224
225/// File metadata relevant for cache invalidation
226struct FileStat {
227    mtime_secs: u64,
228    file_size: u64,
229}
230
231/// Get file modification time and size from a single stat call
232fn file_stat(path: &Path) -> Option<FileStat> {
233    let meta = std::fs::metadata(path).ok()?;
234    let mtime_secs = meta
235        .modified()
236        .ok()?
237        .duration_since(SystemTime::UNIX_EPOCH)
238        .ok()?
239        .as_secs();
240    Some(FileStat {
241        mtime_secs,
242        file_size: meta.len(),
243    })
244}
245
246/// Convert an absolute path to a relative key string for cache storage
247fn relative_key(path: &Path, project_dir: &Path) -> String {
248    path.strip_prefix(project_dir)
249        .unwrap_or(path)
250        .to_string_lossy()
251        .into_owned()
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257    use crate::parser::sql::{RefCall, SqlConfig};
258    use std::fs;
259    use tempfile::tempdir;
260
261    #[test]
262    fn test_cache_hit() {
263        let tmp = tempdir().unwrap();
264        let project_dir = tmp.path();
265        let sql_file = project_dir.join("model.sql");
266        fs::write(&sql_file, "SELECT 1").unwrap();
267
268        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
269        assert!(cache.get(&sql_file, project_dir).is_none());
270
271        let extraction = JinjaExtraction {
272            refs: vec![RefCall {
273                package: None,
274                name: "orders".to_string(),
275                version: None,
276            }],
277            sources: vec![],
278            config: SqlConfig::default(),
279        };
280        cache.insert(&sql_file, project_dir, &extraction);
281        cache.save();
282
283        // Reload from disk
284        let cache2 = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
285        let hit = cache2.get(&sql_file, project_dir).unwrap();
286        assert_eq!(hit.refs.len(), 1);
287        assert_eq!(hit.refs[0].name, "orders");
288    }
289
290    #[test]
291    fn test_cache_invalidated_by_macro_change() {
292        let tmp = tempdir().unwrap();
293        let project_dir = tmp.path();
294        let sql_file = project_dir.join("model.sql");
295        fs::write(&sql_file, "SELECT 1").unwrap();
296
297        let mut cache = ExtractionCache::load(project_dir, "prefix_v1", &HashMap::new(), None);
298        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
299        cache.save();
300
301        // Different macro prefix → cache miss
302        let cache2 = ExtractionCache::load(project_dir, "prefix_v2", &HashMap::new(), None);
303        assert!(cache2.get(&sql_file, project_dir).is_none());
304    }
305
306    #[test]
307    fn test_cache_invalidated_by_file_change() {
308        let tmp = tempdir().unwrap();
309        let project_dir = tmp.path();
310        let sql_file = project_dir.join("model.sql");
311        fs::write(&sql_file, "SELECT 1").unwrap();
312
313        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
314        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
315        cache.save();
316
317        // Modify file (change both mtime and size)
318        std::thread::sleep(std::time::Duration::from_secs(1));
319        fs::write(&sql_file, "SELECT 1, 2, 3").unwrap();
320
321        let cache2 = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
322        assert!(cache2.get(&sql_file, project_dir).is_none());
323    }
324
325    #[test]
326    fn test_cache_invalidated_by_size_change() {
327        let tmp = tempdir().unwrap();
328        let project_dir = tmp.path();
329        let sql_file = project_dir.join("model.sql");
330        fs::write(&sql_file, "SELECT 1").unwrap();
331
332        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
333        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
334
335        // Tamper with the entry to have the correct mtime but wrong size
336        let key = relative_key(&sql_file, project_dir);
337        if let Some(entry) = cache.entries.get_mut(&key) {
338            entry.file_size += 1;
339        }
340
341        assert!(cache.get(&sql_file, project_dir).is_none());
342    }
343
344    #[test]
345    fn test_gitignore_created_on_save() {
346        let tmp = tempdir().unwrap();
347        let project_dir = tmp.path();
348        let sql_file = project_dir.join("model.sql");
349        fs::write(&sql_file, "SELECT 1").unwrap();
350
351        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
352        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
353        cache.save();
354
355        let gitignore = project_dir.join(".dlin_cache/.gitignore");
356        assert!(gitignore.exists());
357        let content = fs::read_to_string(&gitignore).unwrap();
358        assert!(content.contains("*"));
359    }
360
361    #[test]
362    fn test_gitignore_not_overwritten() {
363        let tmp = tempdir().unwrap();
364        let project_dir = tmp.path();
365        let sql_file = project_dir.join("model.sql");
366        fs::write(&sql_file, "SELECT 1").unwrap();
367
368        // Pre-create .gitignore with custom content
369        let dlin_dir = project_dir.join(".dlin_cache");
370        fs::create_dir_all(&dlin_dir).unwrap();
371        fs::write(dlin_dir.join(".gitignore"), "custom\n").unwrap();
372
373        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
374        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
375        cache.save();
376
377        let content = fs::read_to_string(dlin_dir.join(".gitignore")).unwrap();
378        assert_eq!(content, "custom\n");
379    }
380
381    #[test]
382    fn test_custom_cache_dir() {
383        let tmp = tempdir().unwrap();
384        let project_dir = tmp.path();
385        let cache_dir = tmp.path().join("my_cache");
386        let sql_file = project_dir.join("model.sql");
387        fs::write(&sql_file, "SELECT 1").unwrap();
388
389        let mut cache =
390            ExtractionCache::load(project_dir, "prefix", &HashMap::new(), Some(&cache_dir));
391        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
392        cache.save();
393
394        // Cache file should be directly in cache_dir, not nested under .dlin_cache/
395        assert!(cache_dir.join(CACHE_FILENAME).exists());
396        assert!(!cache_dir.join(CACHE_DIR).exists());
397    }
398
399    #[test]
400    fn test_cache_invalidated_by_version_change() {
401        let tmp = tempdir().unwrap();
402        let project_dir = tmp.path();
403        let sql_file = project_dir.join("model.sql");
404        fs::write(&sql_file, "SELECT 1").unwrap();
405
406        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
407        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
408        cache.save();
409
410        // Tamper with version in the saved file
411        let cache_path = project_dir.join(CACHE_DIR).join(CACHE_FILENAME);
412        let content = fs::read_to_string(&cache_path).unwrap();
413        let mut cf: CacheFile = serde_json::from_str(&content).unwrap();
414        cf.version = "0.0.0-fake".to_string();
415        fs::write(&cache_path, serde_json::to_string(&cf).unwrap()).unwrap();
416
417        // Reload → entries should be discarded
418        let cache2 = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
419        assert!(cache2.get(&sql_file, project_dir).is_none());
420    }
421
422    #[test]
423    fn test_cache_invalidated_by_vars_change() {
424        let tmp = tempdir().unwrap();
425        let project_dir = tmp.path();
426        let sql_file = project_dir.join("model.sql");
427        fs::write(&sql_file, "SELECT 1").unwrap();
428
429        let mut vars = HashMap::new();
430        vars.insert("schema".to_string(), serde_json::json!("staging"));
431
432        let mut cache = ExtractionCache::load(project_dir, "prefix", &vars, None);
433        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
434        cache.save();
435
436        // Different vars → cache miss
437        let mut vars2 = HashMap::new();
438        vars2.insert("schema".to_string(), serde_json::json!("production"));
439        let cache2 = ExtractionCache::load(project_dir, "prefix", &vars2, None);
440        assert!(cache2.get(&sql_file, project_dir).is_none());
441    }
442
443    #[test]
444    fn test_cache_valid_with_same_vars() {
445        let tmp = tempdir().unwrap();
446        let project_dir = tmp.path();
447        let sql_file = project_dir.join("model.sql");
448        fs::write(&sql_file, "SELECT 1").unwrap();
449
450        let mut vars = HashMap::new();
451        vars.insert("schema".to_string(), serde_json::json!("staging"));
452
453        let mut cache = ExtractionCache::load(project_dir, "prefix", &vars, None);
454        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
455        cache.save();
456
457        // Same vars → cache hit
458        let cache2 = ExtractionCache::load(project_dir, "prefix", &vars, None);
459        assert!(cache2.get(&sql_file, project_dir).is_some());
460    }
461
462    #[test]
463    fn test_fresh_ignores_existing_but_saves() {
464        let tmp = tempdir().unwrap();
465        let project_dir = tmp.path();
466        let sql_file = project_dir.join("model.sql");
467        fs::write(&sql_file, "SELECT 1").unwrap();
468
469        // Populate cache
470        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
471        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
472        cache.save();
473
474        // Fresh cache ignores existing entries
475        let fresh = ExtractionCache::fresh(project_dir, "prefix", &HashMap::new(), None);
476        assert!(fresh.get(&sql_file, project_dir).is_none());
477
478        // But can still save new entries
479        let mut fresh = ExtractionCache::fresh(project_dir, "prefix", &HashMap::new(), None);
480        let extraction = JinjaExtraction {
481            refs: vec![RefCall {
482                package: None,
483                name: "fresh_ref".to_string(),
484                version: None,
485            }],
486            sources: vec![],
487            config: SqlConfig::default(),
488        };
489        fresh.insert(&sql_file, project_dir, &extraction);
490        fresh.save();
491
492        // Reload → new entry is there
493        let reloaded = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
494        let hit = reloaded.get(&sql_file, project_dir).unwrap();
495        assert_eq!(hit.refs[0].name, "fresh_ref");
496    }
497}