Skip to main content

dlin_core/parser/
cache.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::time::SystemTime;
4
5use serde::{Deserialize, Serialize};
6
7use super::jinja::JinjaExtraction;
8
9/// Cache file name used when cache is stored under the project directory
10const CACHE_DIR: &str = ".dlin_cache";
11const CACHE_FILENAME: &str = "extraction_cache.json";
12
13/// A single cached extraction entry
14#[derive(Debug, Clone, Serialize, Deserialize)]
15struct CacheEntry {
16    /// File modification time (seconds since UNIX epoch)
17    mtime_secs: u64,
18    /// File size in bytes (secondary check for same-second modifications)
19    file_size: u64,
20    /// Extraction result
21    extraction: JinjaExtraction,
22}
23
24/// On-disk cache for minijinja extraction results
25#[derive(Debug, Serialize, Deserialize)]
26struct CacheFile {
27    /// dlin version that created this cache.
28    /// If the version changes, all entries are invalidated.
29    #[serde(default)]
30    version: String,
31    /// Hash of the macro prefix used during extraction.
32    /// If macros change, all entries are invalidated.
33    macro_prefix_hash: u64,
34    /// Hash of serialized dbt project vars.
35    /// If vars change, all entries are invalidated.
36    #[serde(default)]
37    vars_hash: u64,
38    /// Per-file extraction results keyed by relative path
39    entries: HashMap<String, CacheEntry>,
40}
41
42/// In-memory extraction cache that can be loaded from and saved to disk
43pub struct ExtractionCache {
44    version: String,
45    macro_prefix_hash: u64,
46    vars_hash: u64,
47    entries: HashMap<String, CacheEntry>,
48    /// `None` when the cache is disabled (no-op mode).
49    cache_path: Option<PathBuf>,
50    dirty: bool,
51}
52
53impl ExtractionCache {
54    /// Create a no-op cache that never reads from or writes to disk.
55    pub fn disabled() -> Self {
56        Self {
57            version: String::new(),
58            macro_prefix_hash: 0,
59            vars_hash: 0,
60            entries: HashMap::new(),
61            cache_path: None,
62            dirty: false,
63        }
64    }
65
66    /// Load the cache from disk, or create an empty one.
67    /// Entries are discarded when the dlin version, macro prefix hash, or vars
68    /// hash doesn't match.
69    ///
70    /// When `cache_dir` is `None`, the cache is stored under
71    /// `<project_dir>/.dlin_cache/extraction_cache.json`. When `cache_dir` is
72    /// provided, the cache file is placed directly inside it.
73    pub fn load(
74        project_dir: &Path,
75        macro_prefix: &str,
76        vars: &HashMap<String, serde_json::Value>,
77        cache_dir: Option<&Path>,
78    ) -> Self {
79        let cache_path = match cache_dir {
80            Some(dir) => dir.join(CACHE_FILENAME),
81            None => project_dir.join(CACHE_DIR).join(CACHE_FILENAME),
82        };
83        let version = env!("CARGO_PKG_VERSION").to_string();
84        let macro_hash = hash_str(macro_prefix);
85        let vars_hash = hash_vars(vars);
86
87        let entries = std::fs::read_to_string(&cache_path)
88            .ok()
89            .and_then(|content| serde_json::from_str::<CacheFile>(&content).ok())
90            .filter(|cf| {
91                cf.version == version
92                    && cf.macro_prefix_hash == macro_hash
93                    && cf.vars_hash == vars_hash
94            })
95            .map(|cf| cf.entries)
96            .unwrap_or_default();
97
98        Self {
99            version,
100            macro_prefix_hash: macro_hash,
101            vars_hash,
102            entries,
103            cache_path: Some(cache_path),
104            dirty: false,
105        }
106    }
107
108    /// Create an empty cache that ignores any existing on-disk entries but
109    /// still writes results to disk on [`save`](Self::save).
110    /// Used by `--refresh-cache` to rebuild the cache from scratch.
111    pub fn fresh(
112        project_dir: &Path,
113        macro_prefix: &str,
114        vars: &HashMap<String, serde_json::Value>,
115        cache_dir: Option<&Path>,
116    ) -> Self {
117        let cache_path = match cache_dir {
118            Some(dir) => dir.join(CACHE_FILENAME),
119            None => project_dir.join(CACHE_DIR).join(CACHE_FILENAME),
120        };
121        Self {
122            version: env!("CARGO_PKG_VERSION").to_string(),
123            macro_prefix_hash: hash_str(macro_prefix),
124            vars_hash: hash_vars(vars),
125            entries: HashMap::new(),
126            cache_path: Some(cache_path),
127            dirty: false,
128        }
129    }
130
131    /// Look up a cached extraction for the given file path.
132    /// Returns `None` if not cached or if the file has been modified.
133    pub fn get(&self, path: &Path, project_dir: &Path) -> Option<&JinjaExtraction> {
134        let key = relative_key(path, project_dir);
135        let entry = self.entries.get(&key)?;
136        let stat = file_stat(path)?;
137        if entry.mtime_secs == stat.mtime_secs && entry.file_size == stat.file_size {
138            Some(&entry.extraction)
139        } else {
140            None
141        }
142    }
143
144    /// Insert an extraction result into the cache.
145    pub fn insert(&mut self, path: &Path, project_dir: &Path, extraction: &JinjaExtraction) {
146        let key = relative_key(path, project_dir);
147        if let Some(stat) = file_stat(path) {
148            self.entries.insert(
149                key,
150                CacheEntry {
151                    mtime_secs: stat.mtime_secs,
152                    file_size: stat.file_size,
153                    extraction: extraction.clone(),
154                },
155            );
156            self.dirty = true;
157        }
158    }
159
160    /// Save the cache to disk if it has been modified.
161    pub fn save(&self) {
162        let cache_path = match (&self.cache_path, self.dirty) {
163            (Some(p), true) => p,
164            _ => return,
165        };
166        let cf = CacheFile {
167            version: self.version.clone(),
168            macro_prefix_hash: self.macro_prefix_hash,
169            vars_hash: self.vars_hash,
170            entries: self.entries.clone(),
171        };
172        if let Some(parent) = cache_path.parent() {
173            if std::fs::create_dir_all(parent).is_err() {
174                crate::warn!("could not create cache directory: {}", parent.display());
175                return;
176            }
177            // Auto-create .gitignore to prevent accidental commits
178            let gitignore = parent.join(".gitignore");
179            if !gitignore.exists()
180                && let Err(e) = std::fs::write(&gitignore, "# Automatically created by dlin\n*\n")
181            {
182                crate::warn!("could not create {}: {}", gitignore.display(), e);
183            }
184        }
185        match serde_json::to_string(&cf) {
186            Ok(json) => {
187                if let Err(e) = std::fs::write(cache_path, json) {
188                    crate::warn!("could not write cache file {}: {}", cache_path.display(), e);
189                }
190            }
191            Err(e) => {
192                crate::warn!("could not serialize cache: {}", e);
193            }
194        }
195    }
196}
197
198/// Simple string hash using FNV-1a for deterministic, fast hashing
199pub(crate) fn hash_str(s: &str) -> u64 {
200    let mut hash: u64 = 0xcbf29ce484222325;
201    for byte in s.bytes() {
202        hash ^= byte as u64;
203        hash = hash.wrapping_mul(0x100000001b3);
204    }
205    hash
206}
207
208/// Hash project vars deterministically (sorted keys → JSON → FNV-1a)
209fn hash_vars(vars: &HashMap<String, serde_json::Value>) -> u64 {
210    if vars.is_empty() {
211        return 0;
212    }
213    let mut keys: Vec<&String> = vars.keys().collect();
214    keys.sort();
215    let mut s = String::new();
216    for k in keys {
217        s.push_str(k);
218        s.push('=');
219        s.push_str(&vars[k].to_string());
220        s.push('\n');
221    }
222    hash_str(&s)
223}
224
225/// File metadata relevant for cache invalidation
226struct FileStat {
227    mtime_secs: u64,
228    file_size: u64,
229}
230
231/// Get file modification time and size from a single stat call
232fn file_stat(path: &Path) -> Option<FileStat> {
233    let meta = std::fs::metadata(path).ok()?;
234    let mtime_secs = meta
235        .modified()
236        .ok()?
237        .duration_since(SystemTime::UNIX_EPOCH)
238        .ok()?
239        .as_secs();
240    Some(FileStat {
241        mtime_secs,
242        file_size: meta.len(),
243    })
244}
245
246/// Convert an absolute path to a relative key string for cache storage
247fn relative_key(path: &Path, project_dir: &Path) -> String {
248    path.strip_prefix(project_dir)
249        .unwrap_or(path)
250        .to_string_lossy()
251        .into_owned()
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257    use crate::parser::sql::{RefCall, SqlConfig};
258    use std::fs;
259    use tempfile::tempdir;
260
261    #[test]
262    fn test_cache_hit() {
263        let tmp = tempdir().unwrap();
264        let project_dir = tmp.path();
265        let sql_file = project_dir.join("model.sql");
266        fs::write(&sql_file, "SELECT 1").unwrap();
267
268        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
269        assert!(cache.get(&sql_file, project_dir).is_none());
270
271        let extraction = JinjaExtraction {
272            refs: vec![RefCall {
273                package: None,
274                name: "orders".to_string(),
275            }],
276            sources: vec![],
277            config: SqlConfig::default(),
278        };
279        cache.insert(&sql_file, project_dir, &extraction);
280        cache.save();
281
282        // Reload from disk
283        let cache2 = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
284        let hit = cache2.get(&sql_file, project_dir).unwrap();
285        assert_eq!(hit.refs.len(), 1);
286        assert_eq!(hit.refs[0].name, "orders");
287    }
288
289    #[test]
290    fn test_cache_invalidated_by_macro_change() {
291        let tmp = tempdir().unwrap();
292        let project_dir = tmp.path();
293        let sql_file = project_dir.join("model.sql");
294        fs::write(&sql_file, "SELECT 1").unwrap();
295
296        let mut cache = ExtractionCache::load(project_dir, "prefix_v1", &HashMap::new(), None);
297        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
298        cache.save();
299
300        // Different macro prefix → cache miss
301        let cache2 = ExtractionCache::load(project_dir, "prefix_v2", &HashMap::new(), None);
302        assert!(cache2.get(&sql_file, project_dir).is_none());
303    }
304
305    #[test]
306    fn test_cache_invalidated_by_file_change() {
307        let tmp = tempdir().unwrap();
308        let project_dir = tmp.path();
309        let sql_file = project_dir.join("model.sql");
310        fs::write(&sql_file, "SELECT 1").unwrap();
311
312        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
313        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
314        cache.save();
315
316        // Modify file (change both mtime and size)
317        std::thread::sleep(std::time::Duration::from_secs(1));
318        fs::write(&sql_file, "SELECT 1, 2, 3").unwrap();
319
320        let cache2 = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
321        assert!(cache2.get(&sql_file, project_dir).is_none());
322    }
323
324    #[test]
325    fn test_cache_invalidated_by_size_change() {
326        let tmp = tempdir().unwrap();
327        let project_dir = tmp.path();
328        let sql_file = project_dir.join("model.sql");
329        fs::write(&sql_file, "SELECT 1").unwrap();
330
331        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
332        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
333
334        // Tamper with the entry to have the correct mtime but wrong size
335        let key = relative_key(&sql_file, project_dir);
336        if let Some(entry) = cache.entries.get_mut(&key) {
337            entry.file_size += 1;
338        }
339
340        assert!(cache.get(&sql_file, project_dir).is_none());
341    }
342
343    #[test]
344    fn test_gitignore_created_on_save() {
345        let tmp = tempdir().unwrap();
346        let project_dir = tmp.path();
347        let sql_file = project_dir.join("model.sql");
348        fs::write(&sql_file, "SELECT 1").unwrap();
349
350        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
351        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
352        cache.save();
353
354        let gitignore = project_dir.join(".dlin_cache/.gitignore");
355        assert!(gitignore.exists());
356        let content = fs::read_to_string(&gitignore).unwrap();
357        assert!(content.contains("*"));
358    }
359
360    #[test]
361    fn test_gitignore_not_overwritten() {
362        let tmp = tempdir().unwrap();
363        let project_dir = tmp.path();
364        let sql_file = project_dir.join("model.sql");
365        fs::write(&sql_file, "SELECT 1").unwrap();
366
367        // Pre-create .gitignore with custom content
368        let dlin_dir = project_dir.join(".dlin_cache");
369        fs::create_dir_all(&dlin_dir).unwrap();
370        fs::write(dlin_dir.join(".gitignore"), "custom\n").unwrap();
371
372        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
373        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
374        cache.save();
375
376        let content = fs::read_to_string(dlin_dir.join(".gitignore")).unwrap();
377        assert_eq!(content, "custom\n");
378    }
379
380    #[test]
381    fn test_custom_cache_dir() {
382        let tmp = tempdir().unwrap();
383        let project_dir = tmp.path();
384        let cache_dir = tmp.path().join("my_cache");
385        let sql_file = project_dir.join("model.sql");
386        fs::write(&sql_file, "SELECT 1").unwrap();
387
388        let mut cache =
389            ExtractionCache::load(project_dir, "prefix", &HashMap::new(), Some(&cache_dir));
390        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
391        cache.save();
392
393        // Cache file should be directly in cache_dir, not nested under .dlin_cache/
394        assert!(cache_dir.join(CACHE_FILENAME).exists());
395        assert!(!cache_dir.join(CACHE_DIR).exists());
396    }
397
398    #[test]
399    fn test_cache_invalidated_by_version_change() {
400        let tmp = tempdir().unwrap();
401        let project_dir = tmp.path();
402        let sql_file = project_dir.join("model.sql");
403        fs::write(&sql_file, "SELECT 1").unwrap();
404
405        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
406        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
407        cache.save();
408
409        // Tamper with version in the saved file
410        let cache_path = project_dir.join(CACHE_DIR).join(CACHE_FILENAME);
411        let content = fs::read_to_string(&cache_path).unwrap();
412        let mut cf: CacheFile = serde_json::from_str(&content).unwrap();
413        cf.version = "0.0.0-fake".to_string();
414        fs::write(&cache_path, serde_json::to_string(&cf).unwrap()).unwrap();
415
416        // Reload → entries should be discarded
417        let cache2 = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
418        assert!(cache2.get(&sql_file, project_dir).is_none());
419    }
420
421    #[test]
422    fn test_cache_invalidated_by_vars_change() {
423        let tmp = tempdir().unwrap();
424        let project_dir = tmp.path();
425        let sql_file = project_dir.join("model.sql");
426        fs::write(&sql_file, "SELECT 1").unwrap();
427
428        let mut vars = HashMap::new();
429        vars.insert("schema".to_string(), serde_json::json!("staging"));
430
431        let mut cache = ExtractionCache::load(project_dir, "prefix", &vars, None);
432        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
433        cache.save();
434
435        // Different vars → cache miss
436        let mut vars2 = HashMap::new();
437        vars2.insert("schema".to_string(), serde_json::json!("production"));
438        let cache2 = ExtractionCache::load(project_dir, "prefix", &vars2, None);
439        assert!(cache2.get(&sql_file, project_dir).is_none());
440    }
441
442    #[test]
443    fn test_cache_valid_with_same_vars() {
444        let tmp = tempdir().unwrap();
445        let project_dir = tmp.path();
446        let sql_file = project_dir.join("model.sql");
447        fs::write(&sql_file, "SELECT 1").unwrap();
448
449        let mut vars = HashMap::new();
450        vars.insert("schema".to_string(), serde_json::json!("staging"));
451
452        let mut cache = ExtractionCache::load(project_dir, "prefix", &vars, None);
453        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
454        cache.save();
455
456        // Same vars → cache hit
457        let cache2 = ExtractionCache::load(project_dir, "prefix", &vars, None);
458        assert!(cache2.get(&sql_file, project_dir).is_some());
459    }
460
461    #[test]
462    fn test_fresh_ignores_existing_but_saves() {
463        let tmp = tempdir().unwrap();
464        let project_dir = tmp.path();
465        let sql_file = project_dir.join("model.sql");
466        fs::write(&sql_file, "SELECT 1").unwrap();
467
468        // Populate cache
469        let mut cache = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
470        cache.insert(&sql_file, project_dir, &JinjaExtraction::default());
471        cache.save();
472
473        // Fresh cache ignores existing entries
474        let fresh = ExtractionCache::fresh(project_dir, "prefix", &HashMap::new(), None);
475        assert!(fresh.get(&sql_file, project_dir).is_none());
476
477        // But can still save new entries
478        let mut fresh = ExtractionCache::fresh(project_dir, "prefix", &HashMap::new(), None);
479        let extraction = JinjaExtraction {
480            refs: vec![RefCall {
481                package: None,
482                name: "fresh_ref".to_string(),
483            }],
484            sources: vec![],
485            config: SqlConfig::default(),
486        };
487        fresh.insert(&sql_file, project_dir, &extraction);
488        fresh.save();
489
490        // Reload → new entry is there
491        let reloaded = ExtractionCache::load(project_dir, "prefix", &HashMap::new(), None);
492        let hit = reloaded.get(&sql_file, project_dir).unwrap();
493        assert_eq!(hit.refs[0].name, "fresh_ref");
494    }
495}