Skip to main content

dnx_core/
cache.rs

1use crate::errors::{DnxError, Result};
2use crate::integrity::{integrity_to_path_safe, verify_integrity};
3use flate2::read::GzDecoder;
4use serde::Serialize;
5use std::collections::HashSet;
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, RwLock};
9use tar::Archive;
10use walkdir::WalkDir;
11
12/// Content-addressable cache for storing package tarballs
13#[derive(Debug, Clone)]
14pub struct ContentCache {
15    root: PathBuf,
16    /// In-memory index of known integrity hashes in the cache.
17    /// Avoids filesystem stat calls on every `has()` check.
18    index: Arc<RwLock<HashSet<String>>>,
19}
20
21/// Statistics from cleaning the cache
22#[derive(Debug, Default, Serialize)]
23pub struct CleanStats {
24    pub removed: usize,
25    pub freed_bytes: u64,
26}
27
28impl ContentCache {
29    /// Create a new ContentCache with the given root directory
30    /// If root is None, uses the default cache directory
31    pub fn new(root: Option<PathBuf>) -> Self {
32        let root = root.unwrap_or_else(Self::default_cache_dir);
33
34        let cache = Self {
35            root,
36            index: Arc::new(RwLock::new(HashSet::new())),
37        };
38
39        // Create cache directories if they don't exist
40        fs::create_dir_all(cache.content_dir()).unwrap_or_else(|e| {
41            eprintln!("Warning: Failed to create cache content directory: {}", e)
42        });
43        fs::create_dir_all(cache.tmp_dir())
44            .unwrap_or_else(|e| eprintln!("Warning: Failed to create cache tmp directory: {}", e));
45
46        // Populate in-memory index from existing content directory
47        cache.populate_index();
48
49        cache
50    }
51
52    /// Scan content directory and populate the in-memory index.
53    /// Only indexes entries that have a complete `package/` subdirectory.
54    /// Auto-repairs entries where the tarball extracted to a non-standard root
55    /// (e.g. `react/` instead of `package/`) by renaming to `package/`.
56    fn populate_index(&self) {
57        let content_dir = self.content_dir();
58        if let Ok(entries) = fs::read_dir(&content_dir) {
59            let mut index = self.index.write().unwrap_or_else(|e| e.into_inner());
60            for entry in entries.flatten() {
61                if let Some(name) = entry.file_name().to_str() {
62                    let entry_path = entry.path();
63                    if entry_path.join("package").exists() {
64                        index.insert(name.to_string());
65                    } else if entry_path.is_dir() {
66                        // Try to auto-repair: if there's exactly one subdirectory,
67                        // rename it to "package" (tarball had non-standard root).
68                        if let Ok(sub_entries) = fs::read_dir(&entry_path) {
69                            let dirs: Vec<_> = sub_entries
70                                .filter_map(|e| e.ok())
71                                .filter(|e| e.file_type().map(|t| t.is_dir()).unwrap_or(false))
72                                .collect();
73                            if dirs.len() == 1 {
74                                let target = entry_path.join("package");
75                                if fs::rename(dirs[0].path(), &target).is_ok() {
76                                    index.insert(name.to_string());
77                                }
78                            }
79                        }
80                    }
81                }
82            }
83        }
84    }
85
86    /// Get the default cache directory (~/.dnx/cache)
87    pub fn default_cache_dir() -> PathBuf {
88        dirs::home_dir()
89            .unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
90            .join(".dnx")
91            .join("cache")
92    }
93
94    /// Get the content directory path
95    pub fn content_dir(&self) -> PathBuf {
96        self.root.join("content")
97    }
98
99    /// Get the temporary directory path
100    pub fn tmp_dir(&self) -> PathBuf {
101        self.root.join("tmp")
102    }
103
104    /// Check if content with the given integrity exists in the cache (O(1) memory lookup)
105    pub fn has(&self, integrity: &str) -> bool {
106        let safe = integrity_to_path_safe(integrity);
107        if let Ok(index) = self.index.read() {
108            index.contains(&safe)
109        } else {
110            // Fallback to filesystem check if lock is poisoned
111            self.content_dir().join(&safe).exists()
112        }
113    }
114
115    /// Get the path to cached content for the given integrity
116    pub fn get_path(&self, integrity: &str) -> PathBuf {
117        self.content_dir()
118            .join(integrity_to_path_safe(integrity))
119            .join("package")
120    }
121
122    /// Store tarball data in the cache after verifying its integrity
123    /// Returns the path to the extracted package directory
124    pub fn store(&self, integrity: &str, tarball_data: &[u8]) -> Result<PathBuf> {
125        // Compute or verify integrity
126        let effective_integrity = if integrity.is_empty() || !integrity.starts_with("sha512-") {
127            // No usable integrity hash - compute SHA-512 from tarball data
128            crate::integrity::compute_sha512(tarball_data)
129        } else {
130            // Verify the integrity of the tarball data
131            if !verify_integrity(tarball_data, integrity)? {
132                return Err(DnxError::Integrity(format!(
133                    "Integrity check failed (expected {})",
134                    integrity
135                )));
136            }
137            integrity.to_string()
138        };
139
140        // If the package directory already exists on disk, return it.
141        // Content-addressed: same integrity hash ⇒ same content, no need to re-extract.
142        let cached_path = self.get_path(&effective_integrity);
143        if cached_path.exists() {
144            // Ensure in-memory index is consistent
145            let safe = integrity_to_path_safe(&effective_integrity);
146            if let Ok(mut index) = self.index.write() {
147                index.insert(safe);
148            }
149            return Ok(cached_path);
150        }
151
152        // Self-heal: remove corrupt cache entry (directory exists but package/ doesn't)
153        let safe_integrity = integrity_to_path_safe(&effective_integrity);
154        let existing_dir = self.content_dir().join(&safe_integrity);
155        if existing_dir.exists() {
156            let _ = fs::remove_dir_all(&existing_dir);
157            if let Ok(mut index) = self.index.write() {
158                index.remove(&safe_integrity);
159            }
160        }
161
162        // Create a temporary directory for extraction
163        let temp_dir = tempfile::tempdir_in(self.tmp_dir())
164            .map_err(|e| DnxError::Cache(format!("Failed to create temp directory: {}", e)))?;
165
166        // Streaming decompress + extract (no intermediate buffer)
167        let decoder = GzDecoder::new(tarball_data);
168        let mut archive = Archive::new(decoder);
169        archive
170            .unpack(temp_dir.path())
171            .map_err(|e| DnxError::Cache(format!("Failed to extract tarball: {}", e)))?;
172
173        // Normalize root directory: npm tarballs usually extract to "package/"
174        // but some packages (e.g. @types/*) use the package name instead.
175        // Rename the root to "package" for consistent cache layout.
176        let package_subdir = temp_dir.path().join("package");
177        if !package_subdir.exists() {
178            // Find the single root directory and rename it to "package"
179            let entries: Vec<_> = fs::read_dir(temp_dir.path())
180                .map_err(|e| DnxError::Cache(format!("Failed to read temp dir: {}", e)))?
181                .filter_map(|e| e.ok())
182                .filter(|e| e.file_type().map(|t| t.is_dir()).unwrap_or(false))
183                .collect();
184
185            if entries.len() == 1 {
186                fs::rename(entries[0].path(), &package_subdir).map_err(|e| {
187                    DnxError::Cache(format!(
188                        "Failed to normalize tarball root '{}' to 'package': {}",
189                        entries[0].file_name().to_string_lossy(),
190                        e
191                    ))
192                })?;
193            } else if entries.is_empty() {
194                return Err(DnxError::Cache(
195                    "Tarball extraction produced no directories".to_string(),
196                ));
197            }
198            // If multiple dirs, leave as-is (unusual but don't break)
199        }
200
201        // Consume TempDir to get a plain PathBuf — prevents the destructor from
202        // removing the directory after we rename/move it to the final location.
203        let temp_path = temp_dir.keep();
204
205        // Prepare the final destination path (reuse safe_integrity from above)
206        let final_path = self.content_dir().join(&safe_integrity);
207
208        // Ensure the parent directory exists
209        if let Some(parent) = final_path.parent() {
210            fs::create_dir_all(parent).map_err(|e| {
211                DnxError::Cache(format!("Failed to create parent directory: {}", e))
212            })?;
213        }
214
215        // Move the temp directory to the final location.
216        // Another process may have stored the same content concurrently —
217        // if the final path appeared while we were extracting, just discard our copy.
218        if final_path.exists() {
219            let _ = fs::remove_dir_all(&temp_path);
220        } else {
221            // On Windows, fs::rename can fail across different drives/volumes
222            // so we need to handle that case with copy+delete
223            let move_result = fs::rename(&temp_path, &final_path);
224            if let Err(ref e) = move_result {
225                // If rename fails, try copy and delete
226                copy_dir_all(&temp_path, &final_path).map_err(|copy_err| {
227                    DnxError::Cache(format!(
228                        "Failed to move cache content (rename error: {}, copy error: {})",
229                        e, copy_err
230                    ))
231                })?;
232            }
233            // Clean up the temp directory (no-op if rename succeeded, removes original if copied)
234            let _ = fs::remove_dir_all(&temp_path);
235        }
236
237        // Update in-memory index
238        if let Ok(mut index) = self.index.write() {
239            index.insert(safe_integrity.clone());
240        }
241
242        // If the original integrity differs from effective (e.g., sha1 → computed sha512),
243        // create an alias so lookups with the original integrity also work.
244        if !integrity.is_empty() && integrity != effective_integrity {
245            let original_safe = integrity_to_path_safe(integrity);
246            if original_safe != safe_integrity {
247                let alias_path = self.content_dir().join(&original_safe);
248                if !alias_path.exists() {
249                    // Create a directory junction/symlink from original → effective
250                    #[cfg(windows)]
251                    {
252                        let _ = junction::create(&final_path, &alias_path);
253                    }
254                    #[cfg(unix)]
255                    {
256                        let _ = std::os::unix::fs::symlink(&final_path, &alias_path);
257                    }
258                    if let Ok(mut index) = self.index.write() {
259                        index.insert(original_safe);
260                    }
261                }
262            }
263        }
264
265        Ok(self.get_path(&effective_integrity))
266    }
267
268    /// Calculate the total disk usage of the cache
269    pub fn disk_usage(&self) -> Result<u64> {
270        let mut total_size = 0u64;
271
272        for entry in WalkDir::new(self.content_dir())
273            .into_iter()
274            .filter_map(|e| e.ok())
275        {
276            if entry.file_type().is_file() {
277                if let Ok(metadata) = entry.metadata() {
278                    total_size += metadata.len();
279                }
280            }
281        }
282
283        Ok(total_size)
284    }
285
286    /// Clean the cache, removing entries not in the keep set
287    /// Returns statistics about the cleaning operation
288    pub fn clean(&self, keep: &HashSet<String>) -> Result<CleanStats> {
289        let mut stats = CleanStats::default();
290        let content_dir = self.content_dir();
291
292        if !content_dir.exists() {
293            return Ok(stats);
294        }
295
296        // Iterate over entries in the content directory
297        let entries = fs::read_dir(&content_dir)
298            .map_err(|e| DnxError::Cache(format!("Failed to read content directory: {}", e)))?;
299
300        for entry in entries {
301            let entry = entry
302                .map_err(|e| DnxError::Cache(format!("Failed to read directory entry: {}", e)))?;
303
304            let path = entry.path();
305
306            // Get the directory name (which is the safe integrity hash)
307            if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) {
308                // Check if this entry should be kept
309                if !keep.contains(dir_name) {
310                    // Calculate size before removing
311                    let size = calculate_dir_size(&path);
312
313                    // Remove the directory
314                    if let Err(e) = fs::remove_dir_all(&path) {
315                        eprintln!("Warning: Failed to remove {}: {}", path.display(), e);
316                        continue;
317                    }
318
319                    stats.removed += 1;
320                    stats.freed_bytes += size;
321                }
322            }
323        }
324
325        // Also clean up temporary files
326        let tmp_dir = self.tmp_dir();
327        if tmp_dir.exists() {
328            if let Ok(entries) = fs::read_dir(&tmp_dir) {
329                for entry in entries.flatten() {
330                    let path = entry.path();
331                    let size = calculate_dir_size(&path);
332                    if fs::remove_dir_all(&path).is_ok() {
333                        stats.removed += 1;
334                        stats.freed_bytes += size;
335                    }
336                }
337            }
338        }
339
340        Ok(stats)
341    }
342}
343
344/// Helper function to copy a directory recursively
345fn copy_dir_all(src: &std::path::Path, dst: &std::path::Path) -> std::io::Result<()> {
346    fs::create_dir_all(dst)?;
347
348    for entry in fs::read_dir(src)? {
349        let entry = entry?;
350        let ty = entry.file_type()?;
351        let src_path = entry.path();
352        let dst_path = dst.join(entry.file_name());
353
354        if ty.is_dir() {
355            copy_dir_all(&src_path, &dst_path)?;
356        } else {
357            fs::copy(&src_path, &dst_path)?;
358        }
359    }
360
361    Ok(())
362}
363
364/// Helper function to calculate the total size of a directory
365fn calculate_dir_size(path: &std::path::Path) -> u64 {
366    let mut total_size = 0u64;
367
368    for entry in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
369        if entry.file_type().is_file() {
370            if let Ok(metadata) = entry.metadata() {
371                total_size += metadata.len();
372            }
373        }
374    }
375
376    total_size
377}
378
379/// Cache for package build outputs (side effects of install scripts).
380/// After a package runs its install scripts (preinstall, install, postinstall),
381/// the resulting output is cached keyed by (package, version, platform, arch).
382pub struct SideEffectsCache {
383    cache_dir: PathBuf,
384}
385
386impl SideEffectsCache {
387    /// Create a new SideEffectsCache, storing cached outputs under the given directory.
388    pub fn new(base_dir: Option<PathBuf>) -> Self {
389        let cache_dir = base_dir.unwrap_or_else(|| {
390            dirs::home_dir()
391                .unwrap_or_else(|| PathBuf::from("."))
392                .join(".dnx")
393                .join("side-effects")
394        });
395        Self { cache_dir }
396    }
397
398    /// Generate a cache key for a package's side effects.
399    fn cache_key(package_name: &str, version: &str) -> String {
400        let platform = std::env::consts::OS;
401        let arch = std::env::consts::ARCH;
402        format!(
403            "{}_{}_{}_{}",
404            package_name.replace(['/', '@'], "_"),
405            version,
406            platform,
407            arch
408        )
409    }
410
411    /// Check if side effects are cached for a given package.
412    pub fn has(&self, package_name: &str, version: &str) -> bool {
413        let key = Self::cache_key(package_name, version);
414        self.cache_dir.join(&key).exists()
415    }
416
417    /// Store the side effects (built output) for a package.
418    /// `package_dir` is the directory of the installed package after scripts ran.
419    pub fn store(&self, package_name: &str, version: &str, package_dir: &Path) -> Result<()> {
420        let key = Self::cache_key(package_name, version);
421        let dest = self.cache_dir.join(&key);
422
423        // Create cache directory
424        std::fs::create_dir_all(&dest).map_err(|e| {
425            DnxError::Cache(format!("Failed to create side-effects cache dir: {}", e))
426        })?;
427
428        // Copy the built files that are typically modified by install scripts
429        // Common patterns: binding.gyp outputs, .node files, build/ directories
430        let side_effect_patterns = ["build", "prebuilds", "Release", "Debug"];
431
432        for entry in std::fs::read_dir(package_dir)
433            .map_err(|e| DnxError::Cache(format!("Failed to read package dir: {}", e)))?
434        {
435            let entry =
436                entry.map_err(|e| DnxError::Cache(format!("Failed to read dir entry: {}", e)))?;
437            let name = entry.file_name();
438            let name_str = name.to_string_lossy();
439
440            // Cache built artifacts and .node files
441            let should_cache = side_effect_patterns.iter().any(|p| *p == name_str.as_ref())
442                || name_str.ends_with(".node");
443
444            if should_cache && entry.path().exists() {
445                let target = dest.join(&name);
446                if entry.path().is_dir() {
447                    copy_dir_all(&entry.path(), &target).map_err(|e| {
448                        DnxError::Cache(format!("Failed to cache side effect: {}", e))
449                    })?;
450                } else {
451                    std::fs::copy(entry.path(), &target).map_err(|e| {
452                        DnxError::Cache(format!("Failed to cache side effect file: {}", e))
453                    })?;
454                }
455            }
456        }
457
458        Ok(())
459    }
460
461    /// Restore cached side effects into a package directory.
462    /// Returns true if side effects were restored, false if not cached.
463    pub fn restore(&self, package_name: &str, version: &str, package_dir: &Path) -> Result<bool> {
464        let key = Self::cache_key(package_name, version);
465        let source = self.cache_dir.join(&key);
466
467        if !source.exists() {
468            return Ok(false);
469        }
470
471        // Check if the cache entry has any files
472        let has_entries = std::fs::read_dir(&source)
473            .map(|mut entries| entries.next().is_some())
474            .unwrap_or(false);
475
476        if !has_entries {
477            return Ok(false);
478        }
479
480        // Copy cached artifacts back into the package directory
481        for entry in std::fs::read_dir(&source)
482            .map_err(|e| DnxError::Cache(format!("Failed to read side-effects cache: {}", e)))?
483        {
484            let entry =
485                entry.map_err(|e| DnxError::Cache(format!("Failed to read cache entry: {}", e)))?;
486            let target = package_dir.join(entry.file_name());
487
488            if entry.path().is_dir() {
489                copy_dir_all(&entry.path(), &target).map_err(|e| {
490                    DnxError::Cache(format!("Failed to restore side effect: {}", e))
491                })?;
492            } else {
493                std::fs::copy(entry.path(), &target).map_err(|e| {
494                    DnxError::Cache(format!("Failed to restore side effect file: {}", e))
495                })?;
496            }
497        }
498
499        Ok(true)
500    }
501
502    /// Clean all cached side effects.
503    pub fn clean(&self) -> Result<usize> {
504        if !self.cache_dir.exists() {
505            return Ok(0);
506        }
507        let mut removed = 0;
508        for entry in std::fs::read_dir(&self.cache_dir)
509            .map_err(|e| DnxError::Cache(format!("Failed to read side-effects cache: {}", e)))?
510            .flatten()
511        {
512            if entry.path().is_dir() {
513                let _ = std::fs::remove_dir_all(entry.path());
514                removed += 1;
515            }
516        }
517        Ok(removed)
518    }
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524
525    #[test]
526    fn test_cache_paths() {
527        let temp_dir = tempfile::tempdir().unwrap();
528        let cache = ContentCache::new(Some(temp_dir.path().to_path_buf()));
529
530        assert_eq!(cache.content_dir(), temp_dir.path().join("content"));
531        assert_eq!(cache.tmp_dir(), temp_dir.path().join("tmp"));
532        assert!(cache.content_dir().exists());
533        assert!(cache.tmp_dir().exists());
534    }
535
536    #[test]
537    fn test_has_returns_false_for_nonexistent() {
538        let temp_dir = tempfile::tempdir().unwrap();
539        let cache = ContentCache::new(Some(temp_dir.path().to_path_buf()));
540
541        assert!(!cache.has("sha512-nonexistent"));
542    }
543
544    #[test]
545    fn test_get_path() {
546        let temp_dir = tempfile::tempdir().unwrap();
547        let cache = ContentCache::new(Some(temp_dir.path().to_path_buf()));
548
549        let integrity = "sha512-test";
550        let expected = cache
551            .content_dir()
552            .join(integrity_to_path_safe(integrity))
553            .join("package");
554
555        assert_eq!(cache.get_path(integrity), expected);
556    }
557
558    #[test]
559    fn test_disk_usage_empty() {
560        let temp_dir = tempfile::tempdir().unwrap();
561        let cache = ContentCache::new(Some(temp_dir.path().to_path_buf()));
562
563        let usage = cache.disk_usage().unwrap();
564        assert_eq!(usage, 0);
565    }
566
567    #[test]
568    fn test_clean_stats_default() {
569        let stats = CleanStats::default();
570        assert_eq!(stats.removed, 0);
571        assert_eq!(stats.freed_bytes, 0);
572    }
573}