Skip to main content

uv_cache/
lib.rs

1use std::fmt::{Display, Formatter};
2use std::io;
3use std::io::Write;
4use std::ops::Deref;
5use std::path::{Path, PathBuf};
6use std::str::FromStr;
7use std::sync::Arc;
8
9use rustc_hash::FxHashMap;
10use tracing::{debug, trace, warn};
11
12use uv_cache_info::Timestamp;
13use uv_fs::{LockedFile, LockedFileError, LockedFileMode, Simplified, cachedir, directories};
14use uv_normalize::PackageName;
15use uv_pypi_types::ResolutionMetadata;
16
17pub use crate::by_timestamp::CachedByTimestamp;
18#[cfg(feature = "clap")]
19pub use crate::cli::CacheArgs;
20use crate::removal::Remover;
21pub use crate::removal::{Removal, rm_rf};
22pub use crate::wheel::WheelCache;
23use crate::wheel::WheelCacheKind;
24pub use archive::ArchiveId;
25
26mod archive;
27mod by_timestamp;
28#[cfg(feature = "clap")]
29mod cli;
30mod removal;
31mod wheel;
32
33/// The version of the archive bucket.
34///
35/// Must be kept in-sync with the version in [`CacheBucket::to_str`].
36pub const ARCHIVE_VERSION: u8 = 0;
37
38/// Error locking a cache entry or shard
39#[derive(Debug, thiserror::Error)]
40pub enum Error {
41    #[error(transparent)]
42    Io(#[from] io::Error),
43    #[error("Failed to initialize cache at `{}`", _0.user_display())]
44    Init(PathBuf, #[source] io::Error),
45    #[error("Could not make the path absolute")]
46    Absolute(#[source] io::Error),
47    #[error("Could not acquire lock")]
48    Acquire(#[from] LockedFileError),
49}
50
51/// A [`CacheEntry`] which may or may not exist yet.
52#[derive(Debug, Clone)]
53pub struct CacheEntry(PathBuf);
54
55impl CacheEntry {
56    /// Create a new [`CacheEntry`] from a directory and a file name.
57    pub fn new(dir: impl Into<PathBuf>, file: impl AsRef<Path>) -> Self {
58        Self(dir.into().join(file))
59    }
60
61    /// Create a new [`CacheEntry`] from a path.
62    pub fn from_path(path: impl Into<PathBuf>) -> Self {
63        Self(path.into())
64    }
65
66    /// Return the cache entry's parent directory.
67    pub fn shard(&self) -> CacheShard {
68        CacheShard(self.dir().to_path_buf())
69    }
70
71    /// Convert the [`CacheEntry`] into a [`PathBuf`].
72    #[inline]
73    pub fn into_path_buf(self) -> PathBuf {
74        self.0
75    }
76
77    /// Return the path to the [`CacheEntry`].
78    #[inline]
79    pub fn path(&self) -> &Path {
80        &self.0
81    }
82
83    /// Return the cache entry's parent directory.
84    #[inline]
85    pub fn dir(&self) -> &Path {
86        self.0.parent().expect("Cache entry has no parent")
87    }
88
89    /// Create a new [`CacheEntry`] with the given file name.
90    #[must_use]
91    pub fn with_file(&self, file: impl AsRef<Path>) -> Self {
92        Self(self.dir().join(file))
93    }
94
95    /// Acquire the [`CacheEntry`] as an exclusive lock.
96    pub async fn lock(&self) -> Result<LockedFile, Error> {
97        fs_err::create_dir_all(self.dir())?;
98        Ok(LockedFile::acquire(
99            self.path(),
100            LockedFileMode::Exclusive,
101            self.path().display(),
102        )
103        .await?)
104    }
105}
106
107impl AsRef<Path> for CacheEntry {
108    fn as_ref(&self) -> &Path {
109        &self.0
110    }
111}
112
113/// A subdirectory within the cache.
114#[derive(Debug, Clone)]
115pub struct CacheShard(PathBuf);
116
117impl CacheShard {
118    /// Return a [`CacheEntry`] within this shard.
119    pub fn entry(&self, file: impl AsRef<Path>) -> CacheEntry {
120        CacheEntry::new(&self.0, file)
121    }
122
123    /// Return a [`CacheShard`] within this shard.
124    #[must_use]
125    pub fn shard(&self, dir: impl AsRef<Path>) -> Self {
126        Self(self.0.join(dir.as_ref()))
127    }
128
129    /// Acquire the cache entry as an exclusive lock.
130    pub async fn lock(&self) -> Result<LockedFile, Error> {
131        fs_err::create_dir_all(self.as_ref())?;
132        Ok(LockedFile::acquire(
133            self.join(".lock"),
134            LockedFileMode::Exclusive,
135            self.display(),
136        )
137        .await?)
138    }
139
140    /// Return the [`CacheShard`] as a [`PathBuf`].
141    pub fn into_path_buf(self) -> PathBuf {
142        self.0
143    }
144}
145
146impl AsRef<Path> for CacheShard {
147    fn as_ref(&self) -> &Path {
148        &self.0
149    }
150}
151
152impl Deref for CacheShard {
153    type Target = Path;
154
155    fn deref(&self) -> &Self::Target {
156        &self.0
157    }
158}
159
160/// The main cache abstraction.
161///
162/// While the cache is active, it holds a read (shared) lock that prevents cache cleaning
163#[derive(Debug, Clone)]
164pub struct Cache {
165    /// The cache directory.
166    root: PathBuf,
167    /// The refresh strategy to use when reading from the cache.
168    refresh: Refresh,
169    /// A temporary cache directory, if the user requested `--no-cache`.
170    ///
171    /// Included to ensure that the temporary directory exists for the length of the operation, but
172    /// is dropped at the end as appropriate.
173    temp_dir: Option<Arc<tempfile::TempDir>>,
174    /// Ensure that `uv cache` operations don't remove items from the cache that are used by another
175    /// uv process.
176    lock_file: Option<Arc<LockedFile>>,
177}
178
179impl Cache {
180    /// A persistent cache directory at `root`.
181    pub fn from_path(root: impl Into<PathBuf>) -> Self {
182        Self {
183            root: root.into(),
184            refresh: Refresh::None(Timestamp::now()),
185            temp_dir: None,
186            lock_file: None,
187        }
188    }
189
190    /// Create a temporary cache directory.
191    pub fn temp() -> Result<Self, io::Error> {
192        let temp_dir = tempfile::tempdir()?;
193        Ok(Self {
194            root: temp_dir.path().to_path_buf(),
195            refresh: Refresh::None(Timestamp::now()),
196            temp_dir: Some(Arc::new(temp_dir)),
197            lock_file: None,
198        })
199    }
200
201    /// Set the [`Refresh`] policy for the cache.
202    #[must_use]
203    pub fn with_refresh(self, refresh: Refresh) -> Self {
204        Self { refresh, ..self }
205    }
206
207    /// Acquire a lock that allows removing entries from the cache.
208    pub async fn with_exclusive_lock(self) -> Result<Self, LockedFileError> {
209        let Self {
210            root,
211            refresh,
212            temp_dir,
213            lock_file,
214        } = self;
215
216        // Release the existing lock, avoid deadlocks from a cloned cache.
217        if let Some(lock_file) = lock_file {
218            drop(
219                Arc::try_unwrap(lock_file).expect(
220                    "cloning the cache before acquiring an exclusive lock causes a deadlock",
221                ),
222            );
223        }
224        let lock_file = LockedFile::acquire(
225            root.join(".lock"),
226            LockedFileMode::Exclusive,
227            root.simplified_display(),
228        )
229        .await?;
230
231        Ok(Self {
232            root,
233            refresh,
234            temp_dir,
235            lock_file: Some(Arc::new(lock_file)),
236        })
237    }
238
239    /// Acquire a lock that allows removing entries from the cache, if available.
240    ///
241    /// If the lock is not immediately available, returns [`Err`] with self.
242    pub fn with_exclusive_lock_no_wait(self) -> Result<Self, Self> {
243        let Self {
244            root,
245            refresh,
246            temp_dir,
247            lock_file,
248        } = self;
249
250        match LockedFile::acquire_no_wait(
251            root.join(".lock"),
252            LockedFileMode::Exclusive,
253            root.simplified_display(),
254        ) {
255            Some(lock_file) => Ok(Self {
256                root,
257                refresh,
258                temp_dir,
259                lock_file: Some(Arc::new(lock_file)),
260            }),
261            None => Err(Self {
262                root,
263                refresh,
264                temp_dir,
265                lock_file,
266            }),
267        }
268    }
269
270    /// Return the root of the cache.
271    pub fn root(&self) -> &Path {
272        &self.root
273    }
274
275    /// The folder for a specific cache bucket
276    pub fn bucket(&self, cache_bucket: CacheBucket) -> PathBuf {
277        self.root.join(cache_bucket.to_str())
278    }
279
280    /// Compute an entry in the cache.
281    pub fn shard(&self, cache_bucket: CacheBucket, dir: impl AsRef<Path>) -> CacheShard {
282        CacheShard(self.bucket(cache_bucket).join(dir.as_ref()))
283    }
284
285    /// Compute an entry in the cache.
286    pub fn entry(
287        &self,
288        cache_bucket: CacheBucket,
289        dir: impl AsRef<Path>,
290        file: impl AsRef<Path>,
291    ) -> CacheEntry {
292        CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
293    }
294
295    /// Return the path to an archive in the cache.
296    pub fn archive(&self, id: &ArchiveId) -> PathBuf {
297        self.bucket(CacheBucket::Archive).join(id)
298    }
299
300    /// Create a temporary directory to be used as a Python virtual environment.
301    pub fn venv_dir(&self) -> io::Result<tempfile::TempDir> {
302        fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
303        tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
304    }
305
306    /// Create a temporary directory to be used for executing PEP 517 source distribution builds.
307    pub fn build_dir(&self) -> io::Result<tempfile::TempDir> {
308        fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
309        tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
310    }
311
312    /// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
313    pub fn must_revalidate_package(&self, package: &PackageName) -> bool {
314        match &self.refresh {
315            Refresh::None(_) => false,
316            Refresh::All(_) => true,
317            Refresh::Packages(packages, _, _) => packages.contains(package),
318        }
319    }
320
321    /// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
322    pub fn must_revalidate_path(&self, path: &Path) -> bool {
323        match &self.refresh {
324            Refresh::None(_) => false,
325            Refresh::All(_) => true,
326            Refresh::Packages(_, paths, _) => paths
327                .iter()
328                .any(|target| same_file::is_same_file(path, target).unwrap_or(false)),
329        }
330    }
331
332    /// Returns the [`Freshness`] for a cache entry, validating it against the [`Refresh`] policy.
333    ///
334    /// A cache entry is considered fresh if it was created after the cache itself was
335    /// initialized, or if the [`Refresh`] policy does not require revalidation.
336    pub fn freshness(
337        &self,
338        entry: &CacheEntry,
339        package: Option<&PackageName>,
340        path: Option<&Path>,
341    ) -> io::Result<Freshness> {
342        // Grab the cutoff timestamp, if it's relevant.
343        let timestamp = match &self.refresh {
344            Refresh::None(_) => return Ok(Freshness::Fresh),
345            Refresh::All(timestamp) => timestamp,
346            Refresh::Packages(packages, paths, timestamp) => {
347                if package.is_none_or(|package| packages.contains(package))
348                    || path.is_some_and(|path| {
349                        paths
350                            .iter()
351                            .any(|target| same_file::is_same_file(path, target).unwrap_or(false))
352                    })
353                {
354                    timestamp
355                } else {
356                    return Ok(Freshness::Fresh);
357                }
358            }
359        };
360
361        match fs_err::metadata(entry.path()) {
362            Ok(metadata) => {
363                if Timestamp::from_metadata(&metadata) >= *timestamp {
364                    Ok(Freshness::Fresh)
365                } else {
366                    Ok(Freshness::Stale)
367                }
368            }
369            Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Freshness::Missing),
370            Err(err) => Err(err),
371        }
372    }
373
374    /// Persist a temporary directory to the artifact store, returning its unique ID.
375    pub async fn persist(
376        &self,
377        temp_dir: impl AsRef<Path>,
378        path: impl AsRef<Path>,
379    ) -> io::Result<ArchiveId> {
380        // Create a unique ID for the artifact.
381        // TODO(charlie): Support content-addressed persistence via SHAs.
382        let id = ArchiveId::new();
383
384        // Move the temporary directory into the directory store.
385        let archive_entry = self.entry(CacheBucket::Archive, "", &id);
386        fs_err::create_dir_all(archive_entry.dir())?;
387        uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await?;
388
389        // Create a symlink to the directory store.
390        fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?;
391        self.create_link(&id, path.as_ref())?;
392
393        Ok(id)
394    }
395
396    /// Returns `true` if the [`Cache`] is temporary.
397    pub fn is_temporary(&self) -> bool {
398        self.temp_dir.is_some()
399    }
400
401    /// Populate the cache scaffold.
402    fn create_base_files(root: &PathBuf) -> io::Result<()> {
403        // Create the cache directory, if it doesn't exist.
404        fs_err::create_dir_all(root)?;
405
406        // Add the CACHEDIR.TAG.
407        cachedir::ensure_tag(root)?;
408
409        // Add the .gitignore.
410        match fs_err::OpenOptions::new()
411            .write(true)
412            .create_new(true)
413            .open(root.join(".gitignore"))
414        {
415            Ok(mut file) => file.write_all(b"*")?,
416            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
417            Err(err) => return Err(err),
418        }
419
420        // Add an empty .gitignore to the build bucket, to ensure that the cache's own .gitignore
421        // doesn't interfere with source distribution builds. Build backends (like hatchling) will
422        // traverse upwards to look for .gitignore files.
423        fs_err::create_dir_all(root.join(CacheBucket::SourceDistributions.to_str()))?;
424        match fs_err::OpenOptions::new()
425            .write(true)
426            .create_new(true)
427            .open(
428                root.join(CacheBucket::SourceDistributions.to_str())
429                    .join(".gitignore"),
430            ) {
431            Ok(_) => {}
432            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
433            Err(err) => return Err(err),
434        }
435
436        // Add a phony .git, if it doesn't exist, to ensure that the cache isn't considered to be
437        // part of a Git repository. (Some packages will include Git metadata (like a hash) in the
438        // built version if they're in a Git repository, but the cache should be viewed as an
439        // isolated store.).
440        // We have to put this below the gitignore. Otherwise, if the build backend uses the rust
441        // ignore crate it will walk up to the top level .gitignore and ignore its python source
442        // files.
443        let phony_git = root
444            .join(CacheBucket::SourceDistributions.to_str())
445            .join(".git");
446        match fs_err::OpenOptions::new()
447            .create(true)
448            .write(true)
449            .open(&phony_git)
450        {
451            Ok(_) => {}
452            // Handle read-only caches including sandboxed environments.
453            Err(err) if err.kind() == io::ErrorKind::ReadOnlyFilesystem => {
454                if !phony_git.exists() {
455                    return Err(err);
456                }
457            }
458            Err(err) => return Err(err),
459        }
460
461        Ok(())
462    }
463
464    /// Initialize the [`Cache`].
465    pub async fn init(self) -> Result<Self, Error> {
466        let root = &self.root;
467
468        Self::create_base_files(root).map_err(|err| Error::Init(root.clone(), err))?;
469
470        // Block cache removal operations from interfering.
471        let lock_file = match LockedFile::acquire(
472            root.join(".lock"),
473            LockedFileMode::Shared,
474            root.simplified_display(),
475        )
476        .await
477        {
478            Ok(lock_file) => Some(Arc::new(lock_file)),
479            Err(err)
480                if err
481                    .as_io_error()
482                    .is_some_and(|err| err.kind() == io::ErrorKind::Unsupported) =>
483            {
484                warn!(
485                    "Shared locking is not supported by the current platform or filesystem, \
486                        reduced parallel process safety with `uv cache clean` and `uv cache prune`."
487                );
488                None
489            }
490            Err(err) => return Err(err.into()),
491        };
492
493        Ok(Self {
494            root: std::path::absolute(root).map_err(Error::Absolute)?,
495            lock_file,
496            ..self
497        })
498    }
499
500    /// Initialize the [`Cache`], assuming that there are no other uv processes running.
501    pub fn init_no_wait(self) -> Result<Option<Self>, Error> {
502        let root = &self.root;
503
504        Self::create_base_files(root).map_err(|err| Error::Init(root.clone(), err))?;
505
506        // Block cache removal operations from interfering.
507        let Some(lock_file) = LockedFile::acquire_no_wait(
508            root.join(".lock"),
509            LockedFileMode::Shared,
510            root.simplified_display(),
511        ) else {
512            return Ok(None);
513        };
514        Ok(Some(Self {
515            root: std::path::absolute(root).map_err(Error::Absolute)?,
516            lock_file: Some(Arc::new(lock_file)),
517            ..self
518        }))
519    }
520
521    /// Clear the cache, removing all entries.
522    pub fn clear(self, reporter: Box<dyn CleanReporter>) -> Result<Removal, io::Error> {
523        // Remove everything but `.lock`, Windows does not allow removal of a locked file
524        let mut removal = Remover::new(reporter).rm_rf(&self.root, true)?;
525        let Self {
526            root, lock_file, ..
527        } = self;
528
529        // Remove the `.lock` file, unlocking it first
530        if let Some(lock) = lock_file {
531            drop(lock);
532            fs_err::remove_file(root.join(".lock"))?;
533        }
534        removal.num_files += 1;
535
536        // Remove the root directory
537        match fs_err::remove_dir(root) {
538            Ok(()) => {
539                removal.num_dirs += 1;
540            }
541            // On Windows, when `--force` is used, the `.lock` file can exist and be unremovable,
542            // so we make this non-fatal
543            Err(err) if err.kind() == io::ErrorKind::DirectoryNotEmpty => {
544                trace!("Failed to remove root cache directory: not empty");
545            }
546            Err(err) => return Err(err),
547        }
548
549        Ok(removal)
550    }
551
552    /// Remove a package from the cache.
553    ///
554    /// Returns the number of entries removed from the cache.
555    pub fn remove(&self, name: &PackageName) -> io::Result<Removal> {
556        // Collect the set of referenced archives.
557        let references = self.find_archive_references()?;
558
559        // Remove any entries for the package from the cache.
560        let mut summary = Removal::default();
561        for bucket in CacheBucket::iter() {
562            summary += bucket.remove(self, name)?;
563        }
564
565        if references.is_empty() {
566            return Ok(summary);
567        }
568
569        // Only remove targets in the archive bucket. Cache entries may contain unexpected links
570        // to paths outside the cache.
571        let archive_root = fs_err::canonicalize(&self.root)?.join(CacheBucket::Archive.to_str());
572
573        // Remove any archives that are no longer referenced.
574        for (target, references) in references {
575            if target.starts_with(&archive_root) && references.iter().all(|path| !path.exists()) {
576                debug!("Removing dangling cache entry: {}", target.display());
577                summary += rm_rf(target)?;
578            }
579        }
580
581        Ok(summary)
582    }
583
584    /// Run the garbage collector on the cache, removing any dangling entries.
585    pub fn prune(&self, ci: bool) -> Result<Removal, io::Error> {
586        let mut summary = Removal::default();
587
588        // First, remove any top-level directories that are unused. These typically represent
589        // outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
590        for entry in fs_err::read_dir(&self.root)? {
591            let entry = entry?;
592            let metadata = entry.metadata()?;
593
594            if entry.file_name() == "CACHEDIR.TAG"
595                || entry.file_name() == ".gitignore"
596                || entry.file_name() == ".git"
597                || entry.file_name() == ".lock"
598            {
599                continue;
600            }
601
602            if metadata.is_dir() {
603                // If the directory is not a cache bucket, remove it.
604                if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
605                    let path = entry.path();
606                    debug!("Removing dangling cache bucket: {}", path.display());
607                    summary += rm_rf(path)?;
608                }
609            } else {
610                // If the file is not a marker file, remove it.
611                let path = entry.path();
612                debug!("Removing dangling cache bucket: {}", path.display());
613                summary += rm_rf(path)?;
614            }
615        }
616
617        // Second, remove any cached environments. These are never referenced by symlinks, so we can
618        // remove them directly.
619        match fs_err::read_dir(self.bucket(CacheBucket::Environments)) {
620            Ok(entries) => {
621                for entry in entries {
622                    let entry = entry?;
623                    let path = entry.path();
624                    debug!("Removing dangling cache environment: {}", path.display());
625                    summary += rm_rf(path)?;
626                }
627            }
628            Err(err) if err.kind() == io::ErrorKind::NotFound => (),
629            Err(err) => return Err(err),
630        }
631
632        // Third, if enabled, remove all unzipped wheels, leaving only the wheel archives.
633        if ci {
634            // Remove the entire pre-built wheel cache, since every entry is an unzipped wheel.
635            match fs_err::read_dir(self.bucket(CacheBucket::Wheels)) {
636                Ok(entries) => {
637                    for entry in entries {
638                        let entry = entry?;
639                        let path = entry.path();
640                        if path.is_dir() {
641                            debug!("Removing unzipped wheel entry: {}", path.display());
642                            summary += rm_rf(path)?;
643                        }
644                    }
645                }
646                Err(err) if err.kind() == io::ErrorKind::NotFound => (),
647                Err(err) => return Err(err),
648            }
649
650            let source_distributions = self.bucket(CacheBucket::SourceDistributions);
651            if source_distributions.try_exists()? {
652                for entry in walkdir::WalkDir::new(source_distributions) {
653                    let entry = entry?;
654
655                    // If the directory contains a `metadata.msgpack`, then it's a built wheel revision.
656                    if !entry.file_type().is_dir() {
657                        continue;
658                    }
659
660                    if !entry.path().join("metadata.msgpack").exists() {
661                        continue;
662                    }
663
664                    // Remove everything except the built wheel archive and the metadata.
665                    for entry in fs_err::read_dir(entry.path())? {
666                        let entry = entry?;
667                        let path = entry.path();
668
669                        // Retain the resolved metadata (`metadata.msgpack`).
670                        if path
671                            .file_name()
672                            .is_some_and(|file_name| file_name == "metadata.msgpack")
673                        {
674                            continue;
675                        }
676
677                        // Retain any built wheel archives.
678                        if path
679                            .extension()
680                            .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
681                        {
682                            continue;
683                        }
684
685                        debug!("Removing unzipped built wheel entry: {}", path.display());
686                        summary += rm_rf(path)?;
687                    }
688                }
689            }
690        }
691
692        // Fourth, remove any unused archives (by searching for archives that are not symlinked).
693        let references = self.find_archive_references()?;
694
695        match fs_err::read_dir(self.bucket(CacheBucket::Archive)) {
696            Ok(entries) => {
697                for entry in entries {
698                    let entry = entry?;
699                    let path = entry.path();
700                    let target = fs_err::canonicalize(&path)?;
701                    if !references.contains_key(&target) {
702                        debug!("Removing dangling cache archive: {}", path.display());
703                        summary += rm_rf(path)?;
704                    }
705                }
706            }
707            Err(err) if err.kind() == io::ErrorKind::NotFound => (),
708            Err(err) => return Err(err),
709        }
710
711        Ok(summary)
712    }
713
714    /// Find all references to entries in the archive bucket.
715    ///
716    /// Archive entries are often referenced by symlinks in other cache buckets. This method
717    /// searches for all such references.
718    ///
719    /// Returns a map from archive path to paths that reference it.
720    fn find_archive_references(&self) -> Result<FxHashMap<PathBuf, Vec<PathBuf>>, io::Error> {
721        let mut references = FxHashMap::<PathBuf, Vec<PathBuf>>::default();
722        for bucket in [CacheBucket::SourceDistributions, CacheBucket::Wheels] {
723            let bucket_path = self.bucket(bucket);
724            if bucket_path.is_dir() {
725                let walker = walkdir::WalkDir::new(&bucket_path).into_iter();
726                for entry in walker.filter_entry(|entry| {
727                    !(
728                        // As an optimization, ignore any `.lock`, `.whl`, `.msgpack`, `.rev`, or
729                        // `.http` files, along with the `src` directory, which represents the
730                        // unpacked source distribution.
731                        entry.file_name() == "src"
732                            || entry.file_name() == ".lock"
733                            || entry.file_name() == ".gitignore"
734                            || entry.path().extension().is_some_and(|ext| {
735                                ext.eq_ignore_ascii_case("lock")
736                                    || ext.eq_ignore_ascii_case("whl")
737                                    || ext.eq_ignore_ascii_case("http")
738                                    || ext.eq_ignore_ascii_case("rev")
739                                    || ext.eq_ignore_ascii_case("msgpack")
740                            })
741                    )
742                }) {
743                    let entry = entry?;
744
745                    // On Unix, archive references use symlinks.
746                    if cfg!(unix) {
747                        if !entry.file_type().is_symlink() {
748                            continue;
749                        }
750                    }
751
752                    // On Windows, archive references are files containing structured data.
753                    if cfg!(windows) {
754                        if !entry.file_type().is_file() {
755                            continue;
756                        }
757                    }
758
759                    if let Ok(target) = self.resolve_link(entry.path()) {
760                        references
761                            .entry(target)
762                            .or_default()
763                            .push(entry.path().to_path_buf());
764                    }
765                }
766            }
767        }
768        Ok(references)
769    }
770
771    /// Create a link to a directory in the archive bucket.
772    ///
773    /// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
774    /// version. On Unix, we create a symlink to the target directory.
775    #[cfg(windows)]
776    #[expect(clippy::unused_self)]
777    fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
778        // Serialize the link.
779        let link = Link::new(id.clone());
780        let contents = link.to_string();
781
782        // First, attempt to create a file at the location, but fail if it already exists.
783        match fs_err::OpenOptions::new()
784            .write(true)
785            .create_new(true)
786            .open(dst.as_ref())
787        {
788            Ok(mut file) => {
789                // Write the target path to the file.
790                file.write_all(contents.as_bytes())?;
791                Ok(())
792            }
793            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
794                // Write to a temporary file, then move it into place.
795                let temp_dir = tempfile::tempdir_in(dst.as_ref().parent().unwrap())?;
796                let temp_file = temp_dir.path().join("link");
797                fs_err::write(&temp_file, contents.as_bytes())?;
798
799                // Move the symlink into the target location.
800                fs_err::rename(&temp_file, dst.as_ref())?;
801
802                Ok(())
803            }
804            Err(err) => Err(err),
805        }
806    }
807
808    /// Resolve an archive link, returning the fully-resolved path.
809    ///
810    /// Returns an error if the link target does not exist.
811    #[cfg(windows)]
812    pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
813        // Deserialize the link.
814        let contents = fs_err::read_to_string(path.as_ref())?;
815        let link = Link::from_str(&contents)?;
816
817        // Ignore stale links.
818        if link.version != ARCHIVE_VERSION {
819            return Err(io::Error::new(
820                io::ErrorKind::NotFound,
821                "The link target does not exist.",
822            ));
823        }
824
825        // Reconstruct the path.
826        let path = self.archive(&link.id);
827        path.canonicalize()
828    }
829
830    /// Create a link to a directory in the archive bucket.
831    ///
832    /// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
833    /// version. On Unix, we create a symlink to the target directory.
834    #[cfg(unix)]
835    fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
836        let dst = dst.as_ref();
837        let dst_parent = dst.parent().expect("Cache entry to have parent");
838        // Construct the relative link target.
839        let src = uv_fs::relative_to(self.archive(id), dst_parent)?;
840
841        // Attempt to create the symlink directly.
842        match fs_err::os::unix::fs::symlink(&src, dst) {
843            Ok(()) => Ok(()),
844            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
845                // Create a symlink, using a temporary file to ensure atomicity.
846                let temp_dir = tempfile::tempdir_in(dst_parent)?;
847                let temp_file = temp_dir.path().join("link");
848                fs_err::os::unix::fs::symlink(&src, &temp_file)?;
849
850                // Move the symlink into the target location.
851                fs_err::rename(&temp_file, dst)?;
852
853                Ok(())
854            }
855            Err(err) => Err(err),
856        }
857    }
858
859    /// Resolve an archive link, returning the fully-resolved path.
860    ///
861    /// Returns an error if the link target does not exist.
862    #[cfg(unix)]
863    pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
864        path.as_ref().canonicalize()
865    }
866}
867
868/// An archive (unzipped wheel) that exists in the local cache.
869#[derive(Debug, Clone)]
870#[allow(unused)]
871struct Link {
872    /// The unique ID of the entry in the archive bucket.
873    id: ArchiveId,
874    /// The version of the archive bucket.
875    version: u8,
876}
877
878#[allow(unused)]
879impl Link {
880    /// Create a new [`Archive`] with the given ID and hashes.
881    fn new(id: ArchiveId) -> Self {
882        Self {
883            id,
884            version: ARCHIVE_VERSION,
885        }
886    }
887}
888
889impl Display for Link {
890    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
891        write!(f, "archive-v{}/{}", self.version, self.id)
892    }
893}
894
895impl FromStr for Link {
896    type Err = io::Error;
897
898    fn from_str(s: &str) -> Result<Self, Self::Err> {
899        let mut parts = s.splitn(2, '/');
900        let version = parts
901            .next()
902            .filter(|s| !s.is_empty())
903            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version"))?;
904        let id = parts
905            .next()
906            .filter(|s| !s.is_empty())
907            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing ID"))?;
908
909        // Parse the archive version from `archive-v{version}/{id}`.
910        let version = version
911            .strip_prefix("archive-v")
912            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version prefix"))?;
913        let version = u8::from_str(version).map_err(|err| {
914            io::Error::new(
915                io::ErrorKind::InvalidData,
916                format!("failed to parse version: {err}"),
917            )
918        })?;
919
920        // Parse the ID from `archive-v{version}/{id}`.
921        let id = ArchiveId::from_str(id).map_err(|err| {
922            io::Error::new(
923                io::ErrorKind::InvalidData,
924                format!("failed to parse ID: {err}"),
925            )
926        })?;
927
928        Ok(Self { id, version })
929    }
930}
931
932pub trait CleanReporter: Send + Sync {
933    /// Called after one file or directory is removed.
934    fn on_clean(&self);
935
936    /// Called after all files and directories are removed.
937    fn on_complete(&self);
938}
939
940/// The different kinds of data in the cache are stored in different bucket, which in our case
941/// are subdirectories of the cache root.
942#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
943pub enum CacheBucket {
944    /// Wheels (excluding built wheels), alongside their metadata and cache policy.
945    ///
946    /// There are three kinds from cache entries: Wheel metadata and policy as `MsgPack` files, the
947    /// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
948    /// size threshold, we first download the zip file into the cache, then unzip it into a
949    /// directory with the same name (exclusive of the `.whl` extension).
950    ///
951    /// Cache structure:
952    ///  * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
953    ///  * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
954    ///  * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
955    ///
956    /// See `uv_client::RegistryClient::wheel_metadata` for information on how wheel metadata
957    /// is fetched.
958    ///
959    /// # Example
960    ///
961    /// Consider the following `requirements.in`:
962    /// ```text
963    /// # pypi wheel
964    /// pandas
965    /// # url wheel
966    /// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
967    /// ```
968    ///
969    /// When we run `pip compile`, it will only fetch and cache the metadata (and cache policy), it
970    /// doesn't need the actual wheels yet:
971    /// ```text
972    /// wheel-v0
973    /// ├── pypi
974    /// │   ...
975    /// │   ├── pandas
976    /// │   │   └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
977    /// │   ...
978    /// └── url
979    ///     └── 4b8be67c801a7ecb
980    ///         └── flask
981    ///             └── flask-3.0.0-py3-none-any.msgpack
982    /// ```
983    ///
984    /// We get the following `requirement.txt` from `pip compile`:
985    ///
986    /// ```text
987    /// [...]
988    /// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
989    /// [...]
990    /// pandas==2.1.3
991    /// [...]
992    /// ```
993    ///
994    /// If we run `pip sync` on `requirements.txt` on a different machine, it also fetches the
995    /// wheels:
996    ///
997    /// TODO(konstin): This is still wrong, we need to store the cache policy too!
998    /// ```text
999    /// wheel-v0
1000    /// ├── pypi
1001    /// │   ...
1002    /// │   ├── pandas
1003    /// │   │   ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
1004    /// │   │   ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64
1005    /// │   ...
1006    /// └── url
1007    ///     └── 4b8be67c801a7ecb
1008    ///         └── flask
1009    ///             └── flask-3.0.0-py3-none-any.whl
1010    ///                 ├── flask
1011    ///                 │   └── ...
1012    ///                 └── flask-3.0.0.dist-info
1013    ///                     └── ...
1014    /// ```
1015    ///
1016    /// If we run first `pip compile` and then `pip sync` on the same machine, we get both:
1017    ///
1018    /// ```text
1019    /// wheels-v0
1020    /// ├── pypi
1021    /// │   ├── ...
1022    /// │   ├── pandas
1023    /// │   │   ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
1024    /// │   │   ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
1025    /// │   │   └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
1026    /// │   │       ├── pandas
1027    /// │   │       │   ├── ...
1028    /// │   │       ├── pandas-2.1.3.dist-info
1029    /// │   │       │   ├── ...
1030    /// │   │       └── pandas.libs
1031    /// │   ├── ...
1032    /// └── url
1033    ///     └── 4b8be67c801a7ecb
1034    ///         └── flask
1035    ///             ├── flask-3.0.0-py3-none-any.msgpack
1036    ///             ├── flask-3.0.0-py3-none-any.msgpack
1037    ///             └── flask-3.0.0-py3-none-any
1038    ///                 ├── flask
1039    ///                 │   └── ...
1040    ///                 └── flask-3.0.0.dist-info
1041    ///                     └── ...
1042    Wheels,
1043    /// Source distributions, wheels built from source distributions, their extracted metadata, and the
1044    /// cache policy of the source distribution.
1045    ///
1046    /// The structure is similar of that of the `Wheel` bucket, except we have an additional layer
1047    /// for the source distribution filename and the metadata is at the source distribution-level,
1048    /// not at the wheel level.
1049    ///
1050    /// TODO(konstin): The cache policy should be on the source distribution level, the metadata we
1051    /// can put next to the wheels as in the `Wheels` bucket.
1052    ///
1053    /// The unzipped source distribution is stored in a directory matching the source distribution
1054    /// archive name.
1055    ///
1056    /// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped
1057    /// lazily before installing. So when resolving, we only build the wheel and store the archive
1058    /// file in the cache, when installing, we unpack it under the same name (exclusive of the
1059    /// `.whl` extension). You may find a mix of wheel archive zip files and unzipped wheel
1060    /// directories in the cache.
1061    ///
1062    /// Cache structure:
1063    ///  * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{manifest.msgpack, metadata.msgpack, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1064    ///  * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1065    ///  * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1066    ///  * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1067    ///
1068    /// But the url filename does not need to be a valid source dist filename
1069    /// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
1070    /// so it could also be the following and we have to take any string as filename:
1071    ///  * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.msgpack`
1072    ///
1073    /// # Example
1074    ///
1075    /// The following requirements:
1076    /// ```text
1077    /// # git source dist
1078    /// pydantic-extra-types @ git+https://github.com/pydantic/pydantic-extra-types.git
1079    /// # pypi source dist
1080    /// django_allauth==0.51.0
1081    /// # url source dist
1082    /// werkzeug @ https://files.pythonhosted.org/packages/0d/cc/ff1904eb5eb4b455e442834dabf9427331ac0fa02853bf83db817a7dd53d/werkzeug-3.0.1.tar.gz
1083    /// ```
1084    ///
1085    /// ...may be cached as:
1086    /// ```text
1087    /// built-wheels-v4/
1088    /// ├── git
1089    /// │   └── 2122faf3e081fb7a
1090    /// │       └── 7a2d650a4a7b4d04
1091    /// │           ├── metadata.msgpack
1092    /// │           └── pydantic_extra_types-2.9.0-py3-none-any.whl
1093    /// ├── pypi
1094    /// │   └── django-allauth
1095    /// │       └── 0.51.0
1096    /// │           ├── 0gH-_fwv8tdJ7JwwjJsUc
1097    /// │           │   ├── django-allauth-0.51.0.tar.gz
1098    /// │           │   │   └── [UNZIPPED CONTENTS]
1099    /// │           │   ├── django_allauth-0.51.0-py3-none-any.whl
1100    /// │           │   └── metadata.msgpack
1101    /// │           └── revision.http
1102    /// └── url
1103    ///     └── 6781bd6440ae72c2
1104    ///         ├── APYY01rbIfpAo_ij9sCY6
1105    ///         │   ├── metadata.msgpack
1106    ///         │   ├── werkzeug-3.0.1-py3-none-any.whl
1107    ///         │   └── werkzeug-3.0.1.tar.gz
1108    ///         │       └── [UNZIPPED CONTENTS]
1109    ///         └── revision.http
1110    /// ```
1111    ///
1112    /// Structurally, the `manifest.msgpack` is empty, and only contains the caching information
1113    /// needed to invalidate the cache. The `metadata.msgpack` contains the metadata of the source
1114    /// distribution.
1115    SourceDistributions,
1116    /// Flat index responses, a format very similar to the simple metadata API.
1117    ///
1118    /// Cache structure:
1119    ///  * `flat-index-v0/index/<digest(flat_index_url)>.msgpack`
1120    ///
1121    /// The response is stored as `Vec<File>`.
1122    FlatIndex,
1123    /// Git repositories.
1124    Git,
1125    /// Information about an interpreter at a path.
1126    ///
1127    /// To avoid caching pyenv shims, bash scripts which may redirect to a new python version
1128    /// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
1129    /// the path we're running is the python executable itself and not a shim.
1130    ///
1131    /// Cache structure: `interpreter-v0/<digest(path)>.msgpack`
1132    ///
1133    /// # Example
1134    ///
1135    /// The contents of each of the `MsgPack` files has a timestamp field in unix time, the [PEP 508]
1136    /// markers and some information from the `sys`/`sysconfig` modules.
1137    ///
1138    /// ```json
1139    /// {
1140    ///   "timestamp": 1698047994491,
1141    ///   "data": {
1142    ///     "markers": {
1143    ///       "implementation_name": "cpython",
1144    ///       "implementation_version": "3.12.0",
1145    ///       "os_name": "posix",
1146    ///       "platform_machine": "x86_64",
1147    ///       "platform_python_implementation": "CPython",
1148    ///       "platform_release": "6.5.0-13-generic",
1149    ///       "platform_system": "Linux",
1150    ///       "platform_version": "#13-Ubuntu SMP PREEMPT_DYNAMIC Fri Nov  3 12:16:05 UTC 2023",
1151    ///       "python_full_version": "3.12.0",
1152    ///       "python_version": "3.12",
1153    ///       "sys_platform": "linux"
1154    ///     },
1155    ///     "base_exec_prefix": "/home/ferris/.pyenv/versions/3.12.0",
1156    ///     "base_prefix": "/home/ferris/.pyenv/versions/3.12.0",
1157    ///     "sys_executable": "/home/ferris/projects/uv/.venv/bin/python"
1158    ///   }
1159    /// }
1160    /// ```
1161    ///
1162    /// [PEP 508]: https://peps.python.org/pep-0508/#environment-markers
1163    Interpreter,
1164    /// Index responses through the simple metadata API.
1165    ///
1166    /// Cache structure:
1167    ///  * `simple-v0/pypi/<package_name>.rkyv`
1168    ///  * `simple-v0/<digest(index_url)>/<package_name>.rkyv`
1169    ///
1170    /// The response is parsed into `uv_client::SimpleDetailMetadata` before storage.
1171    Simple,
1172    /// A cache of unzipped wheels, stored as directories. This is used internally within the cache.
1173    /// When other buckets need to store directories, they should persist them to
1174    /// [`CacheBucket::Archive`], and then symlink them into the appropriate bucket. This ensures
1175    /// that cache entries can be atomically replaced and removed, as storing directories in the
1176    /// other buckets directly would make atomic operations impossible.
1177    Archive,
1178    /// Ephemeral virtual environments used to execute PEP 517 builds and other operations.
1179    Builds,
1180    /// Reusable virtual environments used to invoke Python tools.
1181    Environments,
1182    /// Cached Python downloads
1183    Python,
1184    /// Downloaded tool binaries (e.g., Ruff).
1185    Binaries,
1186    /// Cached vulnerability data from [OSV](https://osv.dev/).
1187    ///
1188    /// Cache structure:
1189    ///  * `osv-v0/vulnerability/<vuln_id>.msgpack` — cached full vulnerability records
1190    Osv,
1191}
1192
1193impl CacheBucket {
1194    fn to_str(self) -> &'static str {
1195        match self {
1196            // Note that when bumping this, you'll also need to bump it
1197            // in `crates/uv/tests/build/cache_prune.rs`.
1198            Self::SourceDistributions => "sdists-v9",
1199            Self::FlatIndex => "flat-index-v2",
1200            Self::Git => "git-v0",
1201            Self::Interpreter => "interpreter-v4",
1202            // Note that when bumping this, you'll also need to bump it
1203            // in `crates/uv/tests/build/cache_clean.rs`.
1204            Self::Simple => "simple-v21",
1205            // Note that when bumping this, you'll also need to bump it
1206            // in `crates/uv/tests/build/cache_prune.rs`.
1207            Self::Wheels => "wheels-v6",
1208            // Note that when bumping this, you'll also need to bump
1209            // `ARCHIVE_VERSION` in `crates/uv-cache/src/lib.rs`.
1210            Self::Archive => "archive-v0",
1211            Self::Builds => "builds-v0",
1212            Self::Environments => "environments-v2",
1213            Self::Python => "python-v0",
1214            Self::Binaries => "binaries-v0",
1215            Self::Osv => "osv-v0",
1216        }
1217    }
1218
1219    /// Remove a package from the cache bucket.
1220    ///
1221    /// Returns the number of entries removed from the cache.
1222    fn remove(self, cache: &Cache, name: &PackageName) -> Result<Removal, io::Error> {
1223        /// Returns `true` if the [`Path`] represents a built wheel for the given package.
1224        fn is_match(path: &Path, name: &PackageName) -> bool {
1225            let Ok(metadata) = fs_err::read(path.join("metadata.msgpack")) else {
1226                return false;
1227            };
1228            let Ok(metadata) = rmp_serde::from_slice::<ResolutionMetadata>(&metadata) else {
1229                return false;
1230            };
1231            metadata.name == *name
1232        }
1233
1234        let mut summary = Removal::default();
1235        match self {
1236            Self::Wheels => {
1237                // For `pypi` wheels, we expect a directory per package (indexed by name).
1238                let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1239                summary += rm_rf(root.join(name.to_string()))?;
1240
1241                // For alternate indices, we expect a directory for every index (under an `index`
1242                // subdirectory), followed by a directory per package (indexed by name).
1243                let root = cache.bucket(self).join(WheelCacheKind::Index);
1244                for directory in directories(root)? {
1245                    summary += rm_rf(directory.join(name.to_string()))?;
1246                }
1247
1248                // For direct URLs, we expect a directory for every URL, followed by a
1249                // directory per package (indexed by name).
1250                let root = cache.bucket(self).join(WheelCacheKind::Url);
1251                for directory in directories(root)? {
1252                    summary += rm_rf(directory.join(name.to_string()))?;
1253                }
1254            }
1255            Self::SourceDistributions => {
1256                // For `pypi` wheels, we expect a directory per package (indexed by name).
1257                let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1258                summary += rm_rf(root.join(name.to_string()))?;
1259
1260                // For alternate indices, we expect a directory for every index (under an `index`
1261                // subdirectory), followed by a directory per package (indexed by name).
1262                let root = cache.bucket(self).join(WheelCacheKind::Index);
1263                for directory in directories(root)? {
1264                    summary += rm_rf(directory.join(name.to_string()))?;
1265                }
1266
1267                // For direct URLs, we expect a directory for every URL, followed by a
1268                // directory per version. To determine whether the URL is relevant, we need to
1269                // search for a wheel matching the package name.
1270                let root = cache.bucket(self).join(WheelCacheKind::Url);
1271                for url in directories(root)? {
1272                    if directories(&url)?.any(|version| is_match(&version, name)) {
1273                        summary += rm_rf(url)?;
1274                    }
1275                }
1276
1277                // For local dependencies, we expect a directory for every path, followed by a
1278                // directory per version. To determine whether the path is relevant, we need to
1279                // search for a wheel matching the package name.
1280                let root = cache.bucket(self).join(WheelCacheKind::Path);
1281                for path in directories(root)? {
1282                    if directories(&path)?.any(|version| is_match(&version, name)) {
1283                        summary += rm_rf(path)?;
1284                    }
1285                }
1286
1287                // For Git dependencies, we expect a directory for every repository, followed by a
1288                // directory for every SHA. To determine whether the SHA is relevant, we need to
1289                // search for a wheel matching the package name.
1290                let root = cache.bucket(self).join(WheelCacheKind::Git);
1291                for repository in directories(root)? {
1292                    for sha in directories(repository)? {
1293                        if is_match(&sha, name) {
1294                            summary += rm_rf(sha)?;
1295                        }
1296                    }
1297                }
1298            }
1299            Self::Simple => {
1300                // For `pypi` wheels, we expect a rkyv file per package, indexed by name.
1301                let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1302                summary += rm_rf(root.join(format!("{name}.rkyv")))?;
1303
1304                // For alternate indices, we expect a directory for every index (under an `index`
1305                // subdirectory), followed by a directory per package (indexed by name).
1306                let root = cache.bucket(self).join(WheelCacheKind::Index);
1307                for directory in directories(root)? {
1308                    summary += rm_rf(directory.join(format!("{name}.rkyv")))?;
1309                }
1310            }
1311            Self::FlatIndex => {
1312                // We can't know if the flat index includes a package, so we just remove the entire
1313                // cache entry.
1314                let root = cache.bucket(self);
1315                summary += rm_rf(root)?;
1316            }
1317            Self::Git
1318            | Self::Interpreter
1319            | Self::Archive
1320            | Self::Builds
1321            | Self::Environments
1322            | Self::Python
1323            | Self::Binaries
1324            | Self::Osv => {
1325                // Nothing to do.
1326            }
1327        }
1328        Ok(summary)
1329    }
1330
1331    /// Return an iterator over all cache buckets.
1332    fn iter() -> impl Iterator<Item = Self> {
1333        [
1334            Self::Wheels,
1335            Self::SourceDistributions,
1336            Self::FlatIndex,
1337            Self::Git,
1338            Self::Interpreter,
1339            Self::Simple,
1340            Self::Archive,
1341            Self::Builds,
1342            Self::Environments,
1343            Self::Python,
1344            Self::Binaries,
1345            Self::Osv,
1346        ]
1347        .iter()
1348        .copied()
1349    }
1350}
1351
1352impl Display for CacheBucket {
1353    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1354        f.write_str(self.to_str())
1355    }
1356}
1357
1358#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1359pub enum Freshness {
1360    /// The cache entry is fresh according to the [`Refresh`] policy.
1361    Fresh,
1362    /// The cache entry is stale according to the [`Refresh`] policy.
1363    Stale,
1364    /// The cache entry does not exist.
1365    Missing,
1366}
1367
1368impl Freshness {
1369    pub const fn is_fresh(self) -> bool {
1370        matches!(self, Self::Fresh)
1371    }
1372}
1373
1374/// A refresh policy for cache entries.
1375#[derive(Debug, Clone)]
1376pub enum Refresh {
1377    /// Don't refresh any entries.
1378    None(Timestamp),
1379    /// Refresh entries linked to the given packages, if created before the given timestamp.
1380    Packages(Vec<PackageName>, Vec<Box<Path>>, Timestamp),
1381    /// Refresh all entries created before the given timestamp.
1382    All(Timestamp),
1383}
1384
1385impl Refresh {
1386    /// Determine the refresh strategy to use based on the command-line arguments.
1387    pub fn from_args(refresh: Option<bool>, refresh_package: Vec<PackageName>) -> Self {
1388        let timestamp = Timestamp::now();
1389        match refresh {
1390            Some(true) => Self::All(timestamp),
1391            Some(false) => Self::None(timestamp),
1392            None => {
1393                if refresh_package.is_empty() {
1394                    Self::None(timestamp)
1395                } else {
1396                    Self::Packages(refresh_package, vec![], timestamp)
1397                }
1398            }
1399        }
1400    }
1401
1402    /// Combine two [`Refresh`] policies, taking the "max" of the two policies.
1403    #[must_use]
1404    pub fn combine(self, other: Self) -> Self {
1405        match (self, other) {
1406            // If the policy is `None`, return the existing refresh policy.
1407            // Take the `max` of the two timestamps.
1408            (Self::None(t1), Self::None(t2)) => Self::None(t1.max(t2)),
1409            (Self::None(t1), Self::All(t2)) => Self::All(t1.max(t2)),
1410            (Self::None(t1), Self::Packages(packages, paths, t2)) => {
1411                Self::Packages(packages, paths, t1.max(t2))
1412            }
1413
1414            // If the policy is `All`, refresh all packages.
1415            (Self::All(t1), Self::None(t2) | Self::All(t2) | Self::Packages(.., t2)) => {
1416                Self::All(t1.max(t2))
1417            }
1418
1419            // If the policy is `Packages`, take the "max" of the two policies.
1420            (Self::Packages(packages, paths, t1), Self::None(t2)) => {
1421                Self::Packages(packages, paths, t1.max(t2))
1422            }
1423            (Self::Packages(.., t1), Self::All(t2)) => Self::All(t1.max(t2)),
1424            (Self::Packages(packages1, paths1, t1), Self::Packages(packages2, paths2, t2)) => {
1425                Self::Packages(
1426                    packages1.into_iter().chain(packages2).collect(),
1427                    paths1.into_iter().chain(paths2).collect(),
1428                    t1.max(t2),
1429                )
1430            }
1431        }
1432    }
1433}
1434
1435#[cfg(test)]
1436mod tests {
1437    use std::str::FromStr;
1438
1439    use crate::ArchiveId;
1440
1441    use super::Link;
1442
1443    #[test]
1444    fn test_link_round_trip() {
1445        let id = ArchiveId::new();
1446        let link = Link::new(id);
1447        let s = link.to_string();
1448        let parsed = Link::from_str(&s).unwrap();
1449        assert_eq!(link.id, parsed.id);
1450        assert_eq!(link.version, parsed.version);
1451    }
1452
1453    #[test]
1454    fn test_link_deserialize() {
1455        assert!(Link::from_str("archive-v0/foo").is_ok());
1456        assert!(Link::from_str("archive/foo").is_err());
1457        assert!(Link::from_str("v1/foo").is_err());
1458        assert!(Link::from_str("archive-v0/").is_err());
1459    }
1460
1461    #[test]
1462    #[cfg(unix)]
1463    fn prune_does_not_follow_environment_symlinks() {
1464        use super::{Cache, CacheBucket};
1465
1466        let cache_root = tempfile::tempdir().unwrap();
1467        let victim_root = tempfile::tempdir().unwrap();
1468        let environments = cache_root.path().join(CacheBucket::Environments.to_str());
1469        let victim_dir = victim_root.path().join("victim-dir");
1470
1471        fs_err::create_dir_all(&environments).unwrap();
1472        fs_err::create_dir_all(&victim_dir).unwrap();
1473        fs_err::write(victim_dir.join("payload.txt"), "payload").unwrap();
1474        fs_err::os::unix::fs::symlink(&victim_dir, environments.join("escape")).unwrap();
1475
1476        let summary = Cache::from_path(cache_root.path()).prune(false).unwrap();
1477
1478        assert_eq!(summary.num_files, 1);
1479        assert_eq!(summary.num_dirs, 0);
1480        assert!(victim_dir.is_dir());
1481        assert!(victim_dir.join("payload.txt").is_file());
1482        assert!(fs_err::symlink_metadata(environments.join("escape")).is_err());
1483    }
1484
1485    #[test]
1486    #[cfg(unix)]
1487    fn prune_ci_does_not_follow_wheel_symlinks() {
1488        use super::{Cache, CacheBucket};
1489
1490        let cache_root = tempfile::tempdir().unwrap();
1491        let victim_root = tempfile::tempdir().unwrap();
1492        let wheels = cache_root.path().join(CacheBucket::Wheels.to_str());
1493        let source_distributions = cache_root
1494            .path()
1495            .join(CacheBucket::SourceDistributions.to_str());
1496        let victim_dir = victim_root.path().join("victim-dir");
1497        let symlink = wheels.join("escape");
1498
1499        fs_err::create_dir_all(&wheels).unwrap();
1500        fs_err::create_dir_all(&source_distributions).unwrap();
1501        fs_err::create_dir_all(&victim_dir).unwrap();
1502        fs_err::write(victim_dir.join("payload.txt"), "payload").unwrap();
1503        fs_err::os::unix::fs::symlink(&victim_dir, &symlink).unwrap();
1504
1505        let summary = Cache::from_path(cache_root.path()).prune(true).unwrap();
1506
1507        assert_eq!(summary.num_files, 1);
1508        assert_eq!(summary.num_dirs, 0);
1509        assert!(victim_dir.is_dir());
1510        assert!(victim_dir.join("payload.txt").is_file());
1511        assert!(fs_err::symlink_metadata(symlink).is_err());
1512    }
1513
1514    #[test]
1515    #[cfg(unix)]
1516    fn prune_does_not_follow_archive_symlinks() {
1517        use super::{Cache, CacheBucket};
1518
1519        let cache_root = tempfile::tempdir().unwrap();
1520        let victim_root = tempfile::tempdir().unwrap();
1521        let archives = cache_root.path().join(CacheBucket::Archive.to_str());
1522        let victim_dir = victim_root.path().join("victim-dir");
1523        let symlink = archives.join("escape");
1524
1525        fs_err::create_dir_all(&archives).unwrap();
1526        fs_err::create_dir_all(&victim_dir).unwrap();
1527        fs_err::write(victim_dir.join("payload.txt"), "payload").unwrap();
1528        fs_err::os::unix::fs::symlink(&victim_dir, &symlink).unwrap();
1529
1530        let summary = Cache::from_path(cache_root.path()).prune(false).unwrap();
1531
1532        assert_eq!(summary.num_files, 1);
1533        assert_eq!(summary.num_dirs, 0);
1534        assert!(victim_dir.is_dir());
1535        assert!(victim_dir.join("payload.txt").is_file());
1536        assert!(fs_err::symlink_metadata(symlink).is_err());
1537    }
1538}