uv_cache/
lib.rs

1use std::fmt::{Display, Formatter};
2use std::io;
3use std::io::Write;
4use std::ops::Deref;
5use std::path::{Path, PathBuf};
6use std::str::FromStr;
7use std::sync::Arc;
8
9use rustc_hash::FxHashMap;
10use tracing::{debug, trace, warn};
11
12use uv_cache_info::Timestamp;
13use uv_fs::{LockedFile, LockedFileError, LockedFileMode, Simplified, cachedir, directories};
14use uv_normalize::PackageName;
15use uv_pypi_types::ResolutionMetadata;
16
17pub use crate::by_timestamp::CachedByTimestamp;
18#[cfg(feature = "clap")]
19pub use crate::cli::CacheArgs;
20use crate::removal::Remover;
21pub use crate::removal::{Removal, rm_rf};
22pub use crate::wheel::WheelCache;
23use crate::wheel::WheelCacheKind;
24pub use archive::ArchiveId;
25
26mod archive;
27mod by_timestamp;
28#[cfg(feature = "clap")]
29mod cli;
30mod removal;
31mod wheel;
32
33/// The version of the archive bucket.
34///
35/// Must be kept in-sync with the version in [`CacheBucket::to_str`].
36pub const ARCHIVE_VERSION: u8 = 0;
37
38/// Error locking a cache entry or shard
39#[derive(Debug, thiserror::Error)]
40pub enum Error {
41    #[error(transparent)]
42    Io(#[from] io::Error),
43    #[error("Failed to initialize cache at `{}`", _0.user_display())]
44    Init(PathBuf, #[source] io::Error),
45    #[error("Could not make the path absolute")]
46    Absolute(#[source] io::Error),
47    #[error("Could not acquire lock")]
48    Acquire(#[from] LockedFileError),
49}
50
51/// A [`CacheEntry`] which may or may not exist yet.
52#[derive(Debug, Clone)]
53pub struct CacheEntry(PathBuf);
54
55impl CacheEntry {
56    /// Create a new [`CacheEntry`] from a directory and a file name.
57    pub fn new(dir: impl Into<PathBuf>, file: impl AsRef<Path>) -> Self {
58        Self(dir.into().join(file))
59    }
60
61    /// Create a new [`CacheEntry`] from a path.
62    pub fn from_path(path: impl Into<PathBuf>) -> Self {
63        Self(path.into())
64    }
65
66    /// Return the cache entry's parent directory.
67    pub fn shard(&self) -> CacheShard {
68        CacheShard(self.dir().to_path_buf())
69    }
70
71    /// Convert the [`CacheEntry`] into a [`PathBuf`].
72    #[inline]
73    pub fn into_path_buf(self) -> PathBuf {
74        self.0
75    }
76
77    /// Return the path to the [`CacheEntry`].
78    #[inline]
79    pub fn path(&self) -> &Path {
80        &self.0
81    }
82
83    /// Return the cache entry's parent directory.
84    #[inline]
85    pub fn dir(&self) -> &Path {
86        self.0.parent().expect("Cache entry has no parent")
87    }
88
89    /// Create a new [`CacheEntry`] with the given file name.
90    #[must_use]
91    pub fn with_file(&self, file: impl AsRef<Path>) -> Self {
92        Self(self.dir().join(file))
93    }
94
95    /// Acquire the [`CacheEntry`] as an exclusive lock.
96    pub async fn lock(&self) -> Result<LockedFile, Error> {
97        fs_err::create_dir_all(self.dir())?;
98        Ok(LockedFile::acquire(
99            self.path(),
100            LockedFileMode::Exclusive,
101            self.path().display(),
102        )
103        .await?)
104    }
105}
106
107impl AsRef<Path> for CacheEntry {
108    fn as_ref(&self) -> &Path {
109        &self.0
110    }
111}
112
113/// A subdirectory within the cache.
114#[derive(Debug, Clone)]
115pub struct CacheShard(PathBuf);
116
117impl CacheShard {
118    /// Return a [`CacheEntry`] within this shard.
119    pub fn entry(&self, file: impl AsRef<Path>) -> CacheEntry {
120        CacheEntry::new(&self.0, file)
121    }
122
123    /// Return a [`CacheShard`] within this shard.
124    #[must_use]
125    pub fn shard(&self, dir: impl AsRef<Path>) -> Self {
126        Self(self.0.join(dir.as_ref()))
127    }
128
129    /// Acquire the cache entry as an exclusive lock.
130    pub async fn lock(&self) -> Result<LockedFile, Error> {
131        fs_err::create_dir_all(self.as_ref())?;
132        Ok(LockedFile::acquire(
133            self.join(".lock"),
134            LockedFileMode::Exclusive,
135            self.display(),
136        )
137        .await?)
138    }
139
140    /// Return the [`CacheShard`] as a [`PathBuf`].
141    pub fn into_path_buf(self) -> PathBuf {
142        self.0
143    }
144}
145
146impl AsRef<Path> for CacheShard {
147    fn as_ref(&self) -> &Path {
148        &self.0
149    }
150}
151
152impl Deref for CacheShard {
153    type Target = Path;
154
155    fn deref(&self) -> &Self::Target {
156        &self.0
157    }
158}
159
160/// The main cache abstraction.
161///
162/// While the cache is active, it holds a read (shared) lock that prevents cache cleaning
163#[derive(Debug, Clone)]
164pub struct Cache {
165    /// The cache directory.
166    root: PathBuf,
167    /// The refresh strategy to use when reading from the cache.
168    refresh: Refresh,
169    /// A temporary cache directory, if the user requested `--no-cache`.
170    ///
171    /// Included to ensure that the temporary directory exists for the length of the operation, but
172    /// is dropped at the end as appropriate.
173    temp_dir: Option<Arc<tempfile::TempDir>>,
174    /// Ensure that `uv cache` operations don't remove items from the cache that are used by another
175    /// uv process.
176    lock_file: Option<Arc<LockedFile>>,
177}
178
179impl Cache {
180    /// A persistent cache directory at `root`.
181    pub fn from_path(root: impl Into<PathBuf>) -> Self {
182        Self {
183            root: root.into(),
184            refresh: Refresh::None(Timestamp::now()),
185            temp_dir: None,
186            lock_file: None,
187        }
188    }
189
190    /// Create a temporary cache directory.
191    pub fn temp() -> Result<Self, io::Error> {
192        let temp_dir = tempfile::tempdir()?;
193        Ok(Self {
194            root: temp_dir.path().to_path_buf(),
195            refresh: Refresh::None(Timestamp::now()),
196            temp_dir: Some(Arc::new(temp_dir)),
197            lock_file: None,
198        })
199    }
200
201    /// Set the [`Refresh`] policy for the cache.
202    #[must_use]
203    pub fn with_refresh(self, refresh: Refresh) -> Self {
204        Self { refresh, ..self }
205    }
206
207    /// Acquire a lock that allows removing entries from the cache.
208    pub async fn with_exclusive_lock(self) -> Result<Self, LockedFileError> {
209        let Self {
210            root,
211            refresh,
212            temp_dir,
213            lock_file,
214        } = self;
215
216        // Release the existing lock, avoid deadlocks from a cloned cache.
217        if let Some(lock_file) = lock_file {
218            drop(
219                Arc::try_unwrap(lock_file).expect(
220                    "cloning the cache before acquiring an exclusive lock causes a deadlock",
221                ),
222            );
223        }
224        let lock_file = LockedFile::acquire(
225            root.join(".lock"),
226            LockedFileMode::Exclusive,
227            root.simplified_display(),
228        )
229        .await?;
230
231        Ok(Self {
232            root,
233            refresh,
234            temp_dir,
235            lock_file: Some(Arc::new(lock_file)),
236        })
237    }
238
239    /// Acquire a lock that allows removing entries from the cache, if available.
240    ///
241    /// If the lock is not immediately available, returns [`Err`] with self.
242    pub fn with_exclusive_lock_no_wait(self) -> Result<Self, Self> {
243        let Self {
244            root,
245            refresh,
246            temp_dir,
247            lock_file,
248        } = self;
249
250        match LockedFile::acquire_no_wait(
251            root.join(".lock"),
252            LockedFileMode::Exclusive,
253            root.simplified_display(),
254        ) {
255            Some(lock_file) => Ok(Self {
256                root,
257                refresh,
258                temp_dir,
259                lock_file: Some(Arc::new(lock_file)),
260            }),
261            None => Err(Self {
262                root,
263                refresh,
264                temp_dir,
265                lock_file,
266            }),
267        }
268    }
269
270    /// Return the root of the cache.
271    pub fn root(&self) -> &Path {
272        &self.root
273    }
274
275    /// Return the [`Refresh`] policy for the cache.
276    pub fn refresh(&self) -> &Refresh {
277        &self.refresh
278    }
279
280    /// The folder for a specific cache bucket
281    pub fn bucket(&self, cache_bucket: CacheBucket) -> PathBuf {
282        self.root.join(cache_bucket.to_str())
283    }
284
285    /// Compute an entry in the cache.
286    pub fn shard(&self, cache_bucket: CacheBucket, dir: impl AsRef<Path>) -> CacheShard {
287        CacheShard(self.bucket(cache_bucket).join(dir.as_ref()))
288    }
289
290    /// Compute an entry in the cache.
291    pub fn entry(
292        &self,
293        cache_bucket: CacheBucket,
294        dir: impl AsRef<Path>,
295        file: impl AsRef<Path>,
296    ) -> CacheEntry {
297        CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
298    }
299
300    /// Return the path to an archive in the cache.
301    pub fn archive(&self, id: &ArchiveId) -> PathBuf {
302        self.bucket(CacheBucket::Archive).join(id)
303    }
304
305    /// Create a temporary directory to be used as a Python virtual environment.
306    pub fn venv_dir(&self) -> io::Result<tempfile::TempDir> {
307        fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
308        tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
309    }
310
311    /// Create a temporary directory to be used for executing PEP 517 source distribution builds.
312    pub fn build_dir(&self) -> io::Result<tempfile::TempDir> {
313        fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
314        tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
315    }
316
317    /// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
318    pub fn must_revalidate_package(&self, package: &PackageName) -> bool {
319        match &self.refresh {
320            Refresh::None(_) => false,
321            Refresh::All(_) => true,
322            Refresh::Packages(packages, _, _) => packages.contains(package),
323        }
324    }
325
326    /// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
327    pub fn must_revalidate_path(&self, path: &Path) -> bool {
328        match &self.refresh {
329            Refresh::None(_) => false,
330            Refresh::All(_) => true,
331            Refresh::Packages(_, paths, _) => paths
332                .iter()
333                .any(|target| same_file::is_same_file(path, target).unwrap_or(false)),
334        }
335    }
336
337    /// Returns the [`Freshness`] for a cache entry, validating it against the [`Refresh`] policy.
338    ///
339    /// A cache entry is considered fresh if it was created after the cache itself was
340    /// initialized, or if the [`Refresh`] policy does not require revalidation.
341    pub fn freshness(
342        &self,
343        entry: &CacheEntry,
344        package: Option<&PackageName>,
345        path: Option<&Path>,
346    ) -> io::Result<Freshness> {
347        // Grab the cutoff timestamp, if it's relevant.
348        let timestamp = match &self.refresh {
349            Refresh::None(_) => return Ok(Freshness::Fresh),
350            Refresh::All(timestamp) => timestamp,
351            Refresh::Packages(packages, paths, timestamp) => {
352                if package.is_none_or(|package| packages.contains(package))
353                    || path.is_some_and(|path| {
354                        paths
355                            .iter()
356                            .any(|target| same_file::is_same_file(path, target).unwrap_or(false))
357                    })
358                {
359                    timestamp
360                } else {
361                    return Ok(Freshness::Fresh);
362                }
363            }
364        };
365
366        match fs_err::metadata(entry.path()) {
367            Ok(metadata) => {
368                if Timestamp::from_metadata(&metadata) >= *timestamp {
369                    Ok(Freshness::Fresh)
370                } else {
371                    Ok(Freshness::Stale)
372                }
373            }
374            Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Freshness::Missing),
375            Err(err) => Err(err),
376        }
377    }
378
379    /// Persist a temporary directory to the artifact store, returning its unique ID.
380    pub async fn persist(
381        &self,
382        temp_dir: impl AsRef<Path>,
383        path: impl AsRef<Path>,
384    ) -> io::Result<ArchiveId> {
385        // Create a unique ID for the artifact.
386        // TODO(charlie): Support content-addressed persistence via SHAs.
387        let id = ArchiveId::new();
388
389        // Move the temporary directory into the directory store.
390        let archive_entry = self.entry(CacheBucket::Archive, "", &id);
391        fs_err::create_dir_all(archive_entry.dir())?;
392        uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await?;
393
394        // Create a symlink to the directory store.
395        fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?;
396        self.create_link(&id, path.as_ref())?;
397
398        Ok(id)
399    }
400
401    /// Returns `true` if the [`Cache`] is temporary.
402    pub fn is_temporary(&self) -> bool {
403        self.temp_dir.is_some()
404    }
405
406    /// Populate the cache scaffold.
407    fn create_base_files(root: &PathBuf) -> io::Result<()> {
408        // Create the cache directory, if it doesn't exist.
409        fs_err::create_dir_all(root)?;
410
411        // Add the CACHEDIR.TAG.
412        cachedir::ensure_tag(root)?;
413
414        // Add the .gitignore.
415        match fs_err::OpenOptions::new()
416            .write(true)
417            .create_new(true)
418            .open(root.join(".gitignore"))
419        {
420            Ok(mut file) => file.write_all(b"*")?,
421            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
422            Err(err) => return Err(err),
423        }
424
425        // Add an empty .gitignore to the build bucket, to ensure that the cache's own .gitignore
426        // doesn't interfere with source distribution builds. Build backends (like hatchling) will
427        // traverse upwards to look for .gitignore files.
428        fs_err::create_dir_all(root.join(CacheBucket::SourceDistributions.to_str()))?;
429        match fs_err::OpenOptions::new()
430            .write(true)
431            .create_new(true)
432            .open(
433                root.join(CacheBucket::SourceDistributions.to_str())
434                    .join(".gitignore"),
435            ) {
436            Ok(_) => {}
437            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
438            Err(err) => return Err(err),
439        }
440
441        // Add a phony .git, if it doesn't exist, to ensure that the cache isn't considered to be
442        // part of a Git repository. (Some packages will include Git metadata (like a hash) in the
443        // built version if they're in a Git repository, but the cache should be viewed as an
444        // isolated store.).
445        // We have to put this below the gitignore. Otherwise, if the build backend uses the rust
446        // ignore crate it will walk up to the top level .gitignore and ignore its python source
447        // files.
448        fs_err::OpenOptions::new().create(true).write(true).open(
449            root.join(CacheBucket::SourceDistributions.to_str())
450                .join(".git"),
451        )?;
452
453        Ok(())
454    }
455
456    /// Initialize the [`Cache`].
457    pub async fn init(self) -> Result<Self, Error> {
458        let root = &self.root;
459
460        Self::create_base_files(root).map_err(|err| Error::Init(root.clone(), err))?;
461
462        // Block cache removal operations from interfering.
463        let lock_file = match LockedFile::acquire(
464            root.join(".lock"),
465            LockedFileMode::Shared,
466            root.simplified_display(),
467        )
468        .await
469        {
470            Ok(lock_file) => Some(Arc::new(lock_file)),
471            Err(err)
472                if err
473                    .as_io_error()
474                    .is_some_and(|err| err.kind() == io::ErrorKind::Unsupported) =>
475            {
476                warn!(
477                    "Shared locking is not supported by the current platform or filesystem, \
478                        reduced parallel process safety with `uv cache clean` and `uv cache prune`."
479                );
480                None
481            }
482            Err(err) => return Err(err.into()),
483        };
484
485        Ok(Self {
486            root: std::path::absolute(root).map_err(Error::Absolute)?,
487            lock_file,
488            ..self
489        })
490    }
491
492    /// Initialize the [`Cache`], assuming that there are no other uv processes running.
493    pub fn init_no_wait(self) -> Result<Option<Self>, Error> {
494        let root = &self.root;
495
496        Self::create_base_files(root).map_err(|err| Error::Init(root.clone(), err))?;
497
498        // Block cache removal operations from interfering.
499        let Some(lock_file) = LockedFile::acquire_no_wait(
500            root.join(".lock"),
501            LockedFileMode::Shared,
502            root.simplified_display(),
503        ) else {
504            return Ok(None);
505        };
506        Ok(Some(Self {
507            root: std::path::absolute(root).map_err(Error::Absolute)?,
508            lock_file: Some(Arc::new(lock_file)),
509            ..self
510        }))
511    }
512
513    /// Clear the cache, removing all entries.
514    pub fn clear(self, reporter: Box<dyn CleanReporter>) -> Result<Removal, io::Error> {
515        // Remove everything but `.lock`, Windows does not allow removal of a locked file
516        let mut removal = Remover::new(reporter).rm_rf(&self.root, true)?;
517        let Self {
518            root, lock_file, ..
519        } = self;
520
521        // Remove the `.lock` file, unlocking it first
522        if let Some(lock) = lock_file {
523            drop(lock);
524            fs_err::remove_file(root.join(".lock"))?;
525        }
526        removal.num_files += 1;
527
528        // Remove the root directory
529        match fs_err::remove_dir(root) {
530            Ok(()) => {
531                removal.num_dirs += 1;
532            }
533            // On Windows, when `--force` is used, the `.lock` file can exist and be unremovable,
534            // so we make this non-fatal
535            Err(err) if err.kind() == io::ErrorKind::DirectoryNotEmpty => {
536                trace!("Failed to remove root cache directory: not empty");
537            }
538            Err(err) => return Err(err),
539        }
540
541        Ok(removal)
542    }
543
544    /// Remove a package from the cache.
545    ///
546    /// Returns the number of entries removed from the cache.
547    pub fn remove(&self, name: &PackageName) -> io::Result<Removal> {
548        // Collect the set of referenced archives.
549        let references = self.find_archive_references()?;
550
551        // Remove any entries for the package from the cache.
552        let mut summary = Removal::default();
553        for bucket in CacheBucket::iter() {
554            summary += bucket.remove(self, name)?;
555        }
556
557        // Remove any archives that are no longer referenced.
558        for (target, references) in references {
559            if references.iter().all(|path| !path.exists()) {
560                debug!("Removing dangling cache entry: {}", target.display());
561                summary += rm_rf(target)?;
562            }
563        }
564
565        Ok(summary)
566    }
567
568    /// Run the garbage collector on the cache, removing any dangling entries.
569    pub fn prune(&self, ci: bool) -> Result<Removal, io::Error> {
570        let mut summary = Removal::default();
571
572        // First, remove any top-level directories that are unused. These typically represent
573        // outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
574        for entry in fs_err::read_dir(&self.root)? {
575            let entry = entry?;
576            let metadata = entry.metadata()?;
577
578            if entry.file_name() == "CACHEDIR.TAG"
579                || entry.file_name() == ".gitignore"
580                || entry.file_name() == ".git"
581                || entry.file_name() == ".lock"
582            {
583                continue;
584            }
585
586            if metadata.is_dir() {
587                // If the directory is not a cache bucket, remove it.
588                if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
589                    let path = entry.path();
590                    debug!("Removing dangling cache bucket: {}", path.display());
591                    summary += rm_rf(path)?;
592                }
593            } else {
594                // If the file is not a marker file, remove it.
595                let path = entry.path();
596                debug!("Removing dangling cache bucket: {}", path.display());
597                summary += rm_rf(path)?;
598            }
599        }
600
601        // Second, remove any cached environments. These are never referenced by symlinks, so we can
602        // remove them directly.
603        match fs_err::read_dir(self.bucket(CacheBucket::Environments)) {
604            Ok(entries) => {
605                for entry in entries {
606                    let entry = entry?;
607                    let path = fs_err::canonicalize(entry.path())?;
608                    debug!("Removing dangling cache environment: {}", path.display());
609                    summary += rm_rf(path)?;
610                }
611            }
612            Err(err) if err.kind() == io::ErrorKind::NotFound => (),
613            Err(err) => return Err(err),
614        }
615
616        // Third, if enabled, remove all unzipped wheels, leaving only the wheel archives.
617        if ci {
618            // Remove the entire pre-built wheel cache, since every entry is an unzipped wheel.
619            match fs_err::read_dir(self.bucket(CacheBucket::Wheels)) {
620                Ok(entries) => {
621                    for entry in entries {
622                        let entry = entry?;
623                        let path = fs_err::canonicalize(entry.path())?;
624                        if path.is_dir() {
625                            debug!("Removing unzipped wheel entry: {}", path.display());
626                            summary += rm_rf(path)?;
627                        }
628                    }
629                }
630                Err(err) if err.kind() == io::ErrorKind::NotFound => (),
631                Err(err) => return Err(err),
632            }
633
634            for entry in walkdir::WalkDir::new(self.bucket(CacheBucket::SourceDistributions)) {
635                let entry = entry?;
636
637                // If the directory contains a `metadata.msgpack`, then it's a built wheel revision.
638                if !entry.file_type().is_dir() {
639                    continue;
640                }
641
642                if !entry.path().join("metadata.msgpack").exists() {
643                    continue;
644                }
645
646                // Remove everything except the built wheel archive and the metadata.
647                for entry in fs_err::read_dir(entry.path())? {
648                    let entry = entry?;
649                    let path = entry.path();
650
651                    // Retain the resolved metadata (`metadata.msgpack`).
652                    if path
653                        .file_name()
654                        .is_some_and(|file_name| file_name == "metadata.msgpack")
655                    {
656                        continue;
657                    }
658
659                    // Retain any built wheel archives.
660                    if path
661                        .extension()
662                        .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
663                    {
664                        continue;
665                    }
666
667                    debug!("Removing unzipped built wheel entry: {}", path.display());
668                    summary += rm_rf(path)?;
669                }
670            }
671        }
672
673        // Fourth, remove any unused archives (by searching for archives that are not symlinked).
674        let references = self.find_archive_references()?;
675
676        match fs_err::read_dir(self.bucket(CacheBucket::Archive)) {
677            Ok(entries) => {
678                for entry in entries {
679                    let entry = entry?;
680                    let path = fs_err::canonicalize(entry.path())?;
681                    if !references.contains_key(&path) {
682                        debug!("Removing dangling cache archive: {}", path.display());
683                        summary += rm_rf(path)?;
684                    }
685                }
686            }
687            Err(err) if err.kind() == io::ErrorKind::NotFound => (),
688            Err(err) => return Err(err),
689        }
690
691        Ok(summary)
692    }
693
694    /// Find all references to entries in the archive bucket.
695    ///
696    /// Archive entries are often referenced by symlinks in other cache buckets. This method
697    /// searches for all such references.
698    ///
699    /// Returns a map from archive path to paths that reference it.
700    fn find_archive_references(&self) -> Result<FxHashMap<PathBuf, Vec<PathBuf>>, io::Error> {
701        let mut references = FxHashMap::<PathBuf, Vec<PathBuf>>::default();
702        for bucket in [CacheBucket::SourceDistributions, CacheBucket::Wheels] {
703            let bucket_path = self.bucket(bucket);
704            if bucket_path.is_dir() {
705                let walker = walkdir::WalkDir::new(&bucket_path).into_iter();
706                for entry in walker.filter_entry(|entry| {
707                    !(
708                        // As an optimization, ignore any `.lock`, `.whl`, `.msgpack`, `.rev`, or
709                        // `.http` files, along with the `src` directory, which represents the
710                        // unpacked source distribution.
711                        entry.file_name() == "src"
712                            || entry.file_name() == ".lock"
713                            || entry.file_name() == ".gitignore"
714                            || entry.path().extension().is_some_and(|ext| {
715                                ext.eq_ignore_ascii_case("lock")
716                                    || ext.eq_ignore_ascii_case("whl")
717                                    || ext.eq_ignore_ascii_case("http")
718                                    || ext.eq_ignore_ascii_case("rev")
719                                    || ext.eq_ignore_ascii_case("msgpack")
720                            })
721                    )
722                }) {
723                    let entry = entry?;
724
725                    // On Unix, archive references use symlinks.
726                    if cfg!(unix) {
727                        if !entry.file_type().is_symlink() {
728                            continue;
729                        }
730                    }
731
732                    // On Windows, archive references are files containing structured data.
733                    if cfg!(windows) {
734                        if !entry.file_type().is_file() {
735                            continue;
736                        }
737                    }
738
739                    if let Ok(target) = self.resolve_link(entry.path()) {
740                        references
741                            .entry(target)
742                            .or_default()
743                            .push(entry.path().to_path_buf());
744                    }
745                }
746            }
747        }
748        Ok(references)
749    }
750
751    /// Create a link to a directory in the archive bucket.
752    ///
753    /// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
754    /// version. On Unix, we create a symlink to the target directory.
755    #[cfg(windows)]
756    pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
757        // Serialize the link.
758        let link = Link::new(id.clone());
759        let contents = link.to_string();
760
761        // First, attempt to create a file at the location, but fail if it already exists.
762        match fs_err::OpenOptions::new()
763            .write(true)
764            .create_new(true)
765            .open(dst.as_ref())
766        {
767            Ok(mut file) => {
768                // Write the target path to the file.
769                file.write_all(contents.as_bytes())?;
770                Ok(())
771            }
772            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
773                // Write to a temporary file, then move it into place.
774                let temp_dir = tempfile::tempdir_in(dst.as_ref().parent().unwrap())?;
775                let temp_file = temp_dir.path().join("link");
776                fs_err::write(&temp_file, contents.as_bytes())?;
777
778                // Move the symlink into the target location.
779                fs_err::rename(&temp_file, dst.as_ref())?;
780
781                Ok(())
782            }
783            Err(err) => Err(err),
784        }
785    }
786
787    /// Resolve an archive link, returning the fully-resolved path.
788    ///
789    /// Returns an error if the link target does not exist.
790    #[cfg(windows)]
791    pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
792        // Deserialize the link.
793        let contents = fs_err::read_to_string(path.as_ref())?;
794        let link = Link::from_str(&contents)?;
795
796        // Ignore stale links.
797        if link.version != ARCHIVE_VERSION {
798            return Err(io::Error::new(
799                io::ErrorKind::NotFound,
800                "The link target does not exist.",
801            ));
802        }
803
804        // Reconstruct the path.
805        let path = self.archive(&link.id);
806        path.canonicalize()
807    }
808
809    /// Create a link to a directory in the archive bucket.
810    ///
811    /// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
812    /// version. On Unix, we create a symlink to the target directory.
813    #[cfg(unix)]
814    pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
815        // Construct the link target.
816        let src = self.archive(id);
817        let dst = dst.as_ref();
818
819        // Attempt to create the symlink directly.
820        match fs_err::os::unix::fs::symlink(&src, dst) {
821            Ok(()) => Ok(()),
822            Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
823                // Create a symlink, using a temporary file to ensure atomicity.
824                let temp_dir = tempfile::tempdir_in(dst.parent().unwrap())?;
825                let temp_file = temp_dir.path().join("link");
826                fs_err::os::unix::fs::symlink(&src, &temp_file)?;
827
828                // Move the symlink into the target location.
829                fs_err::rename(&temp_file, dst)?;
830
831                Ok(())
832            }
833            Err(err) => Err(err),
834        }
835    }
836
837    /// Resolve an archive link, returning the fully-resolved path.
838    ///
839    /// Returns an error if the link target does not exist.
840    #[cfg(unix)]
841    pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
842        path.as_ref().canonicalize()
843    }
844}
845
846/// An archive (unzipped wheel) that exists in the local cache.
847#[derive(Debug, Clone)]
848#[allow(unused)]
849struct Link {
850    /// The unique ID of the entry in the archive bucket.
851    id: ArchiveId,
852    /// The version of the archive bucket.
853    version: u8,
854}
855
856#[allow(unused)]
857impl Link {
858    /// Create a new [`Archive`] with the given ID and hashes.
859    fn new(id: ArchiveId) -> Self {
860        Self {
861            id,
862            version: ARCHIVE_VERSION,
863        }
864    }
865}
866
867impl Display for Link {
868    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
869        write!(f, "archive-v{}/{}", self.version, self.id)
870    }
871}
872
873impl FromStr for Link {
874    type Err = io::Error;
875
876    fn from_str(s: &str) -> Result<Self, Self::Err> {
877        let mut parts = s.splitn(2, '/');
878        let version = parts
879            .next()
880            .filter(|s| !s.is_empty())
881            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version"))?;
882        let id = parts
883            .next()
884            .filter(|s| !s.is_empty())
885            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing ID"))?;
886
887        // Parse the archive version from `archive-v{version}/{id}`.
888        let version = version
889            .strip_prefix("archive-v")
890            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version prefix"))?;
891        let version = u8::from_str(version).map_err(|err| {
892            io::Error::new(
893                io::ErrorKind::InvalidData,
894                format!("failed to parse version: {err}"),
895            )
896        })?;
897
898        // Parse the ID from `archive-v{version}/{id}`.
899        let id = ArchiveId::from_str(id).map_err(|err| {
900            io::Error::new(
901                io::ErrorKind::InvalidData,
902                format!("failed to parse ID: {err}"),
903            )
904        })?;
905
906        Ok(Self { id, version })
907    }
908}
909
910pub trait CleanReporter: Send + Sync {
911    /// Called after one file or directory is removed.
912    fn on_clean(&self);
913
914    /// Called after all files and directories are removed.
915    fn on_complete(&self);
916}
917
918/// The different kinds of data in the cache are stored in different bucket, which in our case
919/// are subdirectories of the cache root.
920#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
921pub enum CacheBucket {
922    /// Wheels (excluding built wheels), alongside their metadata and cache policy.
923    ///
924    /// There are three kinds from cache entries: Wheel metadata and policy as `MsgPack` files, the
925    /// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
926    /// size threshold, we first download the zip file into the cache, then unzip it into a
927    /// directory with the same name (exclusive of the `.whl` extension).
928    ///
929    /// Cache structure:
930    ///  * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
931    ///  * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
932    ///  * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
933    ///
934    /// See `uv_client::RegistryClient::wheel_metadata` for information on how wheel metadata
935    /// is fetched.
936    ///
937    /// # Example
938    ///
939    /// Consider the following `requirements.in`:
940    /// ```text
941    /// # pypi wheel
942    /// pandas
943    /// # url wheel
944    /// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
945    /// ```
946    ///
947    /// When we run `pip compile`, it will only fetch and cache the metadata (and cache policy), it
948    /// doesn't need the actual wheels yet:
949    /// ```text
950    /// wheel-v0
951    /// ├── pypi
952    /// │   ...
953    /// │   ├── pandas
954    /// │   │   └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
955    /// │   ...
956    /// └── url
957    ///     └── 4b8be67c801a7ecb
958    ///         └── flask
959    ///             └── flask-3.0.0-py3-none-any.msgpack
960    /// ```
961    ///
962    /// We get the following `requirement.txt` from `pip compile`:
963    ///
964    /// ```text
965    /// [...]
966    /// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
967    /// [...]
968    /// pandas==2.1.3
969    /// [...]
970    /// ```
971    ///
972    /// If we run `pip sync` on `requirements.txt` on a different machine, it also fetches the
973    /// wheels:
974    ///
975    /// TODO(konstin): This is still wrong, we need to store the cache policy too!
976    /// ```text
977    /// wheel-v0
978    /// ├── pypi
979    /// │   ...
980    /// │   ├── pandas
981    /// │   │   ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
982    /// │   │   ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64
983    /// │   ...
984    /// └── url
985    ///     └── 4b8be67c801a7ecb
986    ///         └── flask
987    ///             └── flask-3.0.0-py3-none-any.whl
988    ///                 ├── flask
989    ///                 │   └── ...
990    ///                 └── flask-3.0.0.dist-info
991    ///                     └── ...
992    /// ```
993    ///
994    /// If we run first `pip compile` and then `pip sync` on the same machine, we get both:
995    ///
996    /// ```text
997    /// wheels-v0
998    /// ├── pypi
999    /// │   ├── ...
1000    /// │   ├── pandas
1001    /// │   │   ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
1002    /// │   │   ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
1003    /// │   │   └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
1004    /// │   │       ├── pandas
1005    /// │   │       │   ├── ...
1006    /// │   │       ├── pandas-2.1.3.dist-info
1007    /// │   │       │   ├── ...
1008    /// │   │       └── pandas.libs
1009    /// │   ├── ...
1010    /// └── url
1011    ///     └── 4b8be67c801a7ecb
1012    ///         └── flask
1013    ///             ├── flask-3.0.0-py3-none-any.msgpack
1014    ///             ├── flask-3.0.0-py3-none-any.msgpack
1015    ///             └── flask-3.0.0-py3-none-any
1016    ///                 ├── flask
1017    ///                 │   └── ...
1018    ///                 └── flask-3.0.0.dist-info
1019    ///                     └── ...
1020    Wheels,
1021    /// Source distributions, wheels built from source distributions, their extracted metadata, and the
1022    /// cache policy of the source distribution.
1023    ///
1024    /// The structure is similar of that of the `Wheel` bucket, except we have an additional layer
1025    /// for the source distribution filename and the metadata is at the source distribution-level,
1026    /// not at the wheel level.
1027    ///
1028    /// TODO(konstin): The cache policy should be on the source distribution level, the metadata we
1029    /// can put next to the wheels as in the `Wheels` bucket.
1030    ///
1031    /// The unzipped source distribution is stored in a directory matching the source distribution
1032    /// archive name.
1033    ///
1034    /// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped
1035    /// lazily before installing. So when resolving, we only build the wheel and store the archive
1036    /// file in the cache, when installing, we unpack it under the same name (exclusive of the
1037    /// `.whl` extension). You may find a mix of wheel archive zip files and unzipped wheel
1038    /// directories in the cache.
1039    ///
1040    /// Cache structure:
1041    ///  * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{manifest.msgpack, metadata.msgpack, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1042    ///  * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1043    ///  * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1044    ///  * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1045    ///
1046    /// But the url filename does not need to be a valid source dist filename
1047    /// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
1048    /// so it could also be the following and we have to take any string as filename:
1049    ///  * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.msgpack`
1050    ///
1051    /// # Example
1052    ///
1053    /// The following requirements:
1054    /// ```text
1055    /// # git source dist
1056    /// pydantic-extra-types @ git+https://github.com/pydantic/pydantic-extra-types.git
1057    /// # pypi source dist
1058    /// django_allauth==0.51.0
1059    /// # url source dist
1060    /// werkzeug @ https://files.pythonhosted.org/packages/0d/cc/ff1904eb5eb4b455e442834dabf9427331ac0fa02853bf83db817a7dd53d/werkzeug-3.0.1.tar.gz
1061    /// ```
1062    ///
1063    /// ...may be cached as:
1064    /// ```text
1065    /// built-wheels-v4/
1066    /// ├── git
1067    /// │   └── 2122faf3e081fb7a
1068    /// │       └── 7a2d650a4a7b4d04
1069    /// │           ├── metadata.msgpack
1070    /// │           └── pydantic_extra_types-2.9.0-py3-none-any.whl
1071    /// ├── pypi
1072    /// │   └── django-allauth
1073    /// │       └── 0.51.0
1074    /// │           ├── 0gH-_fwv8tdJ7JwwjJsUc
1075    /// │           │   ├── django-allauth-0.51.0.tar.gz
1076    /// │           │   │   └── [UNZIPPED CONTENTS]
1077    /// │           │   ├── django_allauth-0.51.0-py3-none-any.whl
1078    /// │           │   └── metadata.msgpack
1079    /// │           └── revision.http
1080    /// └── url
1081    ///     └── 6781bd6440ae72c2
1082    ///         ├── APYY01rbIfpAo_ij9sCY6
1083    ///         │   ├── metadata.msgpack
1084    ///         │   ├── werkzeug-3.0.1-py3-none-any.whl
1085    ///         │   └── werkzeug-3.0.1.tar.gz
1086    ///         │       └── [UNZIPPED CONTENTS]
1087    ///         └── revision.http
1088    /// ```
1089    ///
1090    /// Structurally, the `manifest.msgpack` is empty, and only contains the caching information
1091    /// needed to invalidate the cache. The `metadata.msgpack` contains the metadata of the source
1092    /// distribution.
1093    SourceDistributions,
1094    /// Flat index responses, a format very similar to the simple metadata API.
1095    ///
1096    /// Cache structure:
1097    ///  * `flat-index-v0/index/<digest(flat_index_url)>.msgpack`
1098    ///
1099    /// The response is stored as `Vec<File>`.
1100    FlatIndex,
1101    /// Git repositories.
1102    Git,
1103    /// Information about an interpreter at a path.
1104    ///
1105    /// To avoid caching pyenv shims, bash scripts which may redirect to a new python version
1106    /// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
1107    /// the path we're running is the python executable itself and not a shim.
1108    ///
1109    /// Cache structure: `interpreter-v0/<digest(path)>.msgpack`
1110    ///
1111    /// # Example
1112    ///
1113    /// The contents of each of the `MsgPack` files has a timestamp field in unix time, the [PEP 508]
1114    /// markers and some information from the `sys`/`sysconfig` modules.
1115    ///
1116    /// ```json
1117    /// {
1118    ///   "timestamp": 1698047994491,
1119    ///   "data": {
1120    ///     "markers": {
1121    ///       "implementation_name": "cpython",
1122    ///       "implementation_version": "3.12.0",
1123    ///       "os_name": "posix",
1124    ///       "platform_machine": "x86_64",
1125    ///       "platform_python_implementation": "CPython",
1126    ///       "platform_release": "6.5.0-13-generic",
1127    ///       "platform_system": "Linux",
1128    ///       "platform_version": "#13-Ubuntu SMP PREEMPT_DYNAMIC Fri Nov  3 12:16:05 UTC 2023",
1129    ///       "python_full_version": "3.12.0",
1130    ///       "python_version": "3.12",
1131    ///       "sys_platform": "linux"
1132    ///     },
1133    ///     "base_exec_prefix": "/home/ferris/.pyenv/versions/3.12.0",
1134    ///     "base_prefix": "/home/ferris/.pyenv/versions/3.12.0",
1135    ///     "sys_executable": "/home/ferris/projects/uv/.venv/bin/python"
1136    ///   }
1137    /// }
1138    /// ```
1139    ///
1140    /// [PEP 508]: https://peps.python.org/pep-0508/#environment-markers
1141    Interpreter,
1142    /// Index responses through the simple metadata API.
1143    ///
1144    /// Cache structure:
1145    ///  * `simple-v0/pypi/<package_name>.rkyv`
1146    ///  * `simple-v0/<digest(index_url)>/<package_name>.rkyv`
1147    ///
1148    /// The response is parsed into `uv_client::SimpleMetadata` before storage.
1149    Simple,
1150    /// A cache of unzipped wheels, stored as directories. This is used internally within the cache.
1151    /// When other buckets need to store directories, they should persist them to
1152    /// [`CacheBucket::Archive`], and then symlink them into the appropriate bucket. This ensures
1153    /// that cache entries can be atomically replaced and removed, as storing directories in the
1154    /// other buckets directly would make atomic operations impossible.
1155    Archive,
1156    /// Ephemeral virtual environments used to execute PEP 517 builds and other operations.
1157    Builds,
1158    /// Reusable virtual environments used to invoke Python tools.
1159    Environments,
1160    /// Cached Python downloads
1161    Python,
1162    /// Downloaded tool binaries (e.g., Ruff).
1163    Binaries,
1164}
1165
1166impl CacheBucket {
1167    fn to_str(self) -> &'static str {
1168        match self {
1169            // Note that when bumping this, you'll also need to bump it
1170            // in `crates/uv/tests/it/cache_prune.rs`.
1171            Self::SourceDistributions => "sdists-v9",
1172            Self::FlatIndex => "flat-index-v2",
1173            Self::Git => "git-v0",
1174            Self::Interpreter => "interpreter-v4",
1175            // Note that when bumping this, you'll also need to bump it
1176            // in `crates/uv/tests/it/cache_clean.rs`.
1177            Self::Simple => "simple-v18",
1178            // Note that when bumping this, you'll also need to bump it
1179            // in `crates/uv/tests/it/cache_prune.rs`.
1180            Self::Wheels => "wheels-v5",
1181            // Note that when bumping this, you'll also need to bump
1182            // `ARCHIVE_VERSION` in `crates/uv-cache/src/lib.rs`.
1183            Self::Archive => "archive-v0",
1184            Self::Builds => "builds-v0",
1185            Self::Environments => "environments-v2",
1186            Self::Python => "python-v0",
1187            Self::Binaries => "binaries-v0",
1188        }
1189    }
1190
1191    /// Remove a package from the cache bucket.
1192    ///
1193    /// Returns the number of entries removed from the cache.
1194    fn remove(self, cache: &Cache, name: &PackageName) -> Result<Removal, io::Error> {
1195        /// Returns `true` if the [`Path`] represents a built wheel for the given package.
1196        fn is_match(path: &Path, name: &PackageName) -> bool {
1197            let Ok(metadata) = fs_err::read(path.join("metadata.msgpack")) else {
1198                return false;
1199            };
1200            let Ok(metadata) = rmp_serde::from_slice::<ResolutionMetadata>(&metadata) else {
1201                return false;
1202            };
1203            metadata.name == *name
1204        }
1205
1206        let mut summary = Removal::default();
1207        match self {
1208            Self::Wheels => {
1209                // For `pypi` wheels, we expect a directory per package (indexed by name).
1210                let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1211                summary += rm_rf(root.join(name.to_string()))?;
1212
1213                // For alternate indices, we expect a directory for every index (under an `index`
1214                // subdirectory), followed by a directory per package (indexed by name).
1215                let root = cache.bucket(self).join(WheelCacheKind::Index);
1216                for directory in directories(root)? {
1217                    summary += rm_rf(directory.join(name.to_string()))?;
1218                }
1219
1220                // For direct URLs, we expect a directory for every URL, followed by a
1221                // directory per package (indexed by name).
1222                let root = cache.bucket(self).join(WheelCacheKind::Url);
1223                for directory in directories(root)? {
1224                    summary += rm_rf(directory.join(name.to_string()))?;
1225                }
1226            }
1227            Self::SourceDistributions => {
1228                // For `pypi` wheels, we expect a directory per package (indexed by name).
1229                let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1230                summary += rm_rf(root.join(name.to_string()))?;
1231
1232                // For alternate indices, we expect a directory for every index (under an `index`
1233                // subdirectory), followed by a directory per package (indexed by name).
1234                let root = cache.bucket(self).join(WheelCacheKind::Index);
1235                for directory in directories(root)? {
1236                    summary += rm_rf(directory.join(name.to_string()))?;
1237                }
1238
1239                // For direct URLs, we expect a directory for every URL, followed by a
1240                // directory per version. To determine whether the URL is relevant, we need to
1241                // search for a wheel matching the package name.
1242                let root = cache.bucket(self).join(WheelCacheKind::Url);
1243                for url in directories(root)? {
1244                    if directories(&url)?.any(|version| is_match(&version, name)) {
1245                        summary += rm_rf(url)?;
1246                    }
1247                }
1248
1249                // For local dependencies, we expect a directory for every path, followed by a
1250                // directory per version. To determine whether the path is relevant, we need to
1251                // search for a wheel matching the package name.
1252                let root = cache.bucket(self).join(WheelCacheKind::Path);
1253                for path in directories(root)? {
1254                    if directories(&path)?.any(|version| is_match(&version, name)) {
1255                        summary += rm_rf(path)?;
1256                    }
1257                }
1258
1259                // For Git dependencies, we expect a directory for every repository, followed by a
1260                // directory for every SHA. To determine whether the SHA is relevant, we need to
1261                // search for a wheel matching the package name.
1262                let root = cache.bucket(self).join(WheelCacheKind::Git);
1263                for repository in directories(root)? {
1264                    for sha in directories(repository)? {
1265                        if is_match(&sha, name) {
1266                            summary += rm_rf(sha)?;
1267                        }
1268                    }
1269                }
1270            }
1271            Self::Simple => {
1272                // For `pypi` wheels, we expect a rkyv file per package, indexed by name.
1273                let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1274                summary += rm_rf(root.join(format!("{name}.rkyv")))?;
1275
1276                // For alternate indices, we expect a directory for every index (under an `index`
1277                // subdirectory), followed by a directory per package (indexed by name).
1278                let root = cache.bucket(self).join(WheelCacheKind::Index);
1279                for directory in directories(root)? {
1280                    summary += rm_rf(directory.join(format!("{name}.rkyv")))?;
1281                }
1282            }
1283            Self::FlatIndex => {
1284                // We can't know if the flat index includes a package, so we just remove the entire
1285                // cache entry.
1286                let root = cache.bucket(self);
1287                summary += rm_rf(root)?;
1288            }
1289            Self::Git
1290            | Self::Interpreter
1291            | Self::Archive
1292            | Self::Builds
1293            | Self::Environments
1294            | Self::Python
1295            | Self::Binaries => {
1296                // Nothing to do.
1297            }
1298        }
1299        Ok(summary)
1300    }
1301
1302    /// Return an iterator over all cache buckets.
1303    pub fn iter() -> impl Iterator<Item = Self> {
1304        [
1305            Self::Wheels,
1306            Self::SourceDistributions,
1307            Self::FlatIndex,
1308            Self::Git,
1309            Self::Interpreter,
1310            Self::Simple,
1311            Self::Archive,
1312            Self::Builds,
1313            Self::Environments,
1314            Self::Binaries,
1315        ]
1316        .iter()
1317        .copied()
1318    }
1319}
1320
1321impl Display for CacheBucket {
1322    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1323        f.write_str(self.to_str())
1324    }
1325}
1326
1327#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1328pub enum Freshness {
1329    /// The cache entry is fresh according to the [`Refresh`] policy.
1330    Fresh,
1331    /// The cache entry is stale according to the [`Refresh`] policy.
1332    Stale,
1333    /// The cache entry does not exist.
1334    Missing,
1335}
1336
1337impl Freshness {
1338    pub const fn is_fresh(self) -> bool {
1339        matches!(self, Self::Fresh)
1340    }
1341
1342    pub const fn is_stale(self) -> bool {
1343        matches!(self, Self::Stale)
1344    }
1345}
1346
1347/// A refresh policy for cache entries.
1348#[derive(Debug, Clone)]
1349pub enum Refresh {
1350    /// Don't refresh any entries.
1351    None(Timestamp),
1352    /// Refresh entries linked to the given packages, if created before the given timestamp.
1353    Packages(Vec<PackageName>, Vec<Box<Path>>, Timestamp),
1354    /// Refresh all entries created before the given timestamp.
1355    All(Timestamp),
1356}
1357
1358impl Refresh {
1359    /// Determine the refresh strategy to use based on the command-line arguments.
1360    pub fn from_args(refresh: Option<bool>, refresh_package: Vec<PackageName>) -> Self {
1361        let timestamp = Timestamp::now();
1362        match refresh {
1363            Some(true) => Self::All(timestamp),
1364            Some(false) => Self::None(timestamp),
1365            None => {
1366                if refresh_package.is_empty() {
1367                    Self::None(timestamp)
1368                } else {
1369                    Self::Packages(refresh_package, vec![], timestamp)
1370                }
1371            }
1372        }
1373    }
1374
1375    /// Return the [`Timestamp`] associated with the refresh policy.
1376    pub fn timestamp(&self) -> Timestamp {
1377        match self {
1378            Self::None(timestamp) => *timestamp,
1379            Self::Packages(.., timestamp) => *timestamp,
1380            Self::All(timestamp) => *timestamp,
1381        }
1382    }
1383
1384    /// Returns `true` if no packages should be reinstalled.
1385    pub fn is_none(&self) -> bool {
1386        matches!(self, Self::None(_))
1387    }
1388
1389    /// Combine two [`Refresh`] policies, taking the "max" of the two policies.
1390    #[must_use]
1391    pub fn combine(self, other: Self) -> Self {
1392        match (self, other) {
1393            // If the policy is `None`, return the existing refresh policy.
1394            // Take the `max` of the two timestamps.
1395            (Self::None(t1), Self::None(t2)) => Self::None(t1.max(t2)),
1396            (Self::None(t1), Self::All(t2)) => Self::All(t1.max(t2)),
1397            (Self::None(t1), Self::Packages(packages, paths, t2)) => {
1398                Self::Packages(packages, paths, t1.max(t2))
1399            }
1400
1401            // If the policy is `All`, refresh all packages.
1402            (Self::All(t1), Self::None(t2) | Self::All(t2) | Self::Packages(.., t2)) => {
1403                Self::All(t1.max(t2))
1404            }
1405
1406            // If the policy is `Packages`, take the "max" of the two policies.
1407            (Self::Packages(packages, paths, t1), Self::None(t2)) => {
1408                Self::Packages(packages, paths, t1.max(t2))
1409            }
1410            (Self::Packages(.., t1), Self::All(t2)) => Self::All(t1.max(t2)),
1411            (Self::Packages(packages1, paths1, t1), Self::Packages(packages2, paths2, t2)) => {
1412                Self::Packages(
1413                    packages1.into_iter().chain(packages2).collect(),
1414                    paths1.into_iter().chain(paths2).collect(),
1415                    t1.max(t2),
1416                )
1417            }
1418        }
1419    }
1420}
1421
1422#[cfg(test)]
1423mod tests {
1424    use std::str::FromStr;
1425
1426    use crate::ArchiveId;
1427
1428    use super::Link;
1429
1430    #[test]
1431    fn test_link_round_trip() {
1432        let id = ArchiveId::new();
1433        let link = Link::new(id);
1434        let s = link.to_string();
1435        let parsed = Link::from_str(&s).unwrap();
1436        assert_eq!(link.id, parsed.id);
1437        assert_eq!(link.version, parsed.version);
1438    }
1439
1440    #[test]
1441    fn test_link_deserialize() {
1442        assert!(Link::from_str("archive-v0/foo").is_ok());
1443        assert!(Link::from_str("archive/foo").is_err());
1444        assert!(Link::from_str("v1/foo").is_err());
1445        assert!(Link::from_str("archive-v0/").is_err());
1446    }
1447}