uv_cache/lib.rs
1use std::fmt::{Display, Formatter};
2use std::io;
3use std::io::Write;
4use std::ops::Deref;
5use std::path::{Path, PathBuf};
6use std::str::FromStr;
7use std::sync::Arc;
8
9use rustc_hash::FxHashMap;
10use tracing::{debug, trace, warn};
11
12use uv_cache_info::Timestamp;
13use uv_fs::{LockedFile, LockedFileError, LockedFileMode, Simplified, cachedir, directories};
14use uv_normalize::PackageName;
15use uv_pypi_types::ResolutionMetadata;
16
17pub use crate::by_timestamp::CachedByTimestamp;
18#[cfg(feature = "clap")]
19pub use crate::cli::CacheArgs;
20use crate::removal::Remover;
21pub use crate::removal::{Removal, rm_rf};
22pub use crate::wheel::WheelCache;
23use crate::wheel::WheelCacheKind;
24pub use archive::ArchiveId;
25
26mod archive;
27mod by_timestamp;
28#[cfg(feature = "clap")]
29mod cli;
30mod removal;
31mod wheel;
32
33/// The version of the archive bucket.
34///
35/// Must be kept in-sync with the version in [`CacheBucket::to_str`].
36pub const ARCHIVE_VERSION: u8 = 0;
37
38/// Error locking a cache entry or shard
39#[derive(Debug, thiserror::Error)]
40pub enum Error {
41 #[error(transparent)]
42 Io(#[from] io::Error),
43 #[error("Failed to initialize cache at `{}`", _0.user_display())]
44 Init(PathBuf, #[source] io::Error),
45 #[error("Could not make the path absolute")]
46 Absolute(#[source] io::Error),
47 #[error("Could not acquire lock")]
48 Acquire(#[from] LockedFileError),
49}
50
51/// A [`CacheEntry`] which may or may not exist yet.
52#[derive(Debug, Clone)]
53pub struct CacheEntry(PathBuf);
54
55impl CacheEntry {
56 /// Create a new [`CacheEntry`] from a directory and a file name.
57 pub fn new(dir: impl Into<PathBuf>, file: impl AsRef<Path>) -> Self {
58 Self(dir.into().join(file))
59 }
60
61 /// Create a new [`CacheEntry`] from a path.
62 pub fn from_path(path: impl Into<PathBuf>) -> Self {
63 Self(path.into())
64 }
65
66 /// Return the cache entry's parent directory.
67 pub fn shard(&self) -> CacheShard {
68 CacheShard(self.dir().to_path_buf())
69 }
70
71 /// Convert the [`CacheEntry`] into a [`PathBuf`].
72 #[inline]
73 pub fn into_path_buf(self) -> PathBuf {
74 self.0
75 }
76
77 /// Return the path to the [`CacheEntry`].
78 #[inline]
79 pub fn path(&self) -> &Path {
80 &self.0
81 }
82
83 /// Return the cache entry's parent directory.
84 #[inline]
85 pub fn dir(&self) -> &Path {
86 self.0.parent().expect("Cache entry has no parent")
87 }
88
89 /// Create a new [`CacheEntry`] with the given file name.
90 #[must_use]
91 pub fn with_file(&self, file: impl AsRef<Path>) -> Self {
92 Self(self.dir().join(file))
93 }
94
95 /// Acquire the [`CacheEntry`] as an exclusive lock.
96 pub async fn lock(&self) -> Result<LockedFile, Error> {
97 fs_err::create_dir_all(self.dir())?;
98 Ok(LockedFile::acquire(
99 self.path(),
100 LockedFileMode::Exclusive,
101 self.path().display(),
102 )
103 .await?)
104 }
105}
106
107impl AsRef<Path> for CacheEntry {
108 fn as_ref(&self) -> &Path {
109 &self.0
110 }
111}
112
113/// A subdirectory within the cache.
114#[derive(Debug, Clone)]
115pub struct CacheShard(PathBuf);
116
117impl CacheShard {
118 /// Return a [`CacheEntry`] within this shard.
119 pub fn entry(&self, file: impl AsRef<Path>) -> CacheEntry {
120 CacheEntry::new(&self.0, file)
121 }
122
123 /// Return a [`CacheShard`] within this shard.
124 #[must_use]
125 pub fn shard(&self, dir: impl AsRef<Path>) -> Self {
126 Self(self.0.join(dir.as_ref()))
127 }
128
129 /// Acquire the cache entry as an exclusive lock.
130 pub async fn lock(&self) -> Result<LockedFile, Error> {
131 fs_err::create_dir_all(self.as_ref())?;
132 Ok(LockedFile::acquire(
133 self.join(".lock"),
134 LockedFileMode::Exclusive,
135 self.display(),
136 )
137 .await?)
138 }
139
140 /// Return the [`CacheShard`] as a [`PathBuf`].
141 pub fn into_path_buf(self) -> PathBuf {
142 self.0
143 }
144}
145
146impl AsRef<Path> for CacheShard {
147 fn as_ref(&self) -> &Path {
148 &self.0
149 }
150}
151
152impl Deref for CacheShard {
153 type Target = Path;
154
155 fn deref(&self) -> &Self::Target {
156 &self.0
157 }
158}
159
160/// The main cache abstraction.
161///
162/// While the cache is active, it holds a read (shared) lock that prevents cache cleaning
163#[derive(Debug, Clone)]
164pub struct Cache {
165 /// The cache directory.
166 root: PathBuf,
167 /// The refresh strategy to use when reading from the cache.
168 refresh: Refresh,
169 /// A temporary cache directory, if the user requested `--no-cache`.
170 ///
171 /// Included to ensure that the temporary directory exists for the length of the operation, but
172 /// is dropped at the end as appropriate.
173 temp_dir: Option<Arc<tempfile::TempDir>>,
174 /// Ensure that `uv cache` operations don't remove items from the cache that are used by another
175 /// uv process.
176 lock_file: Option<Arc<LockedFile>>,
177}
178
179impl Cache {
180 /// A persistent cache directory at `root`.
181 pub fn from_path(root: impl Into<PathBuf>) -> Self {
182 Self {
183 root: root.into(),
184 refresh: Refresh::None(Timestamp::now()),
185 temp_dir: None,
186 lock_file: None,
187 }
188 }
189
190 /// Create a temporary cache directory.
191 pub fn temp() -> Result<Self, io::Error> {
192 let temp_dir = tempfile::tempdir()?;
193 Ok(Self {
194 root: temp_dir.path().to_path_buf(),
195 refresh: Refresh::None(Timestamp::now()),
196 temp_dir: Some(Arc::new(temp_dir)),
197 lock_file: None,
198 })
199 }
200
201 /// Set the [`Refresh`] policy for the cache.
202 #[must_use]
203 pub fn with_refresh(self, refresh: Refresh) -> Self {
204 Self { refresh, ..self }
205 }
206
207 /// Acquire a lock that allows removing entries from the cache.
208 pub async fn with_exclusive_lock(self) -> Result<Self, LockedFileError> {
209 let Self {
210 root,
211 refresh,
212 temp_dir,
213 lock_file,
214 } = self;
215
216 // Release the existing lock, avoid deadlocks from a cloned cache.
217 if let Some(lock_file) = lock_file {
218 drop(
219 Arc::try_unwrap(lock_file).expect(
220 "cloning the cache before acquiring an exclusive lock causes a deadlock",
221 ),
222 );
223 }
224 let lock_file = LockedFile::acquire(
225 root.join(".lock"),
226 LockedFileMode::Exclusive,
227 root.simplified_display(),
228 )
229 .await?;
230
231 Ok(Self {
232 root,
233 refresh,
234 temp_dir,
235 lock_file: Some(Arc::new(lock_file)),
236 })
237 }
238
239 /// Acquire a lock that allows removing entries from the cache, if available.
240 ///
241 /// If the lock is not immediately available, returns [`Err`] with self.
242 pub fn with_exclusive_lock_no_wait(self) -> Result<Self, Self> {
243 let Self {
244 root,
245 refresh,
246 temp_dir,
247 lock_file,
248 } = self;
249
250 match LockedFile::acquire_no_wait(
251 root.join(".lock"),
252 LockedFileMode::Exclusive,
253 root.simplified_display(),
254 ) {
255 Some(lock_file) => Ok(Self {
256 root,
257 refresh,
258 temp_dir,
259 lock_file: Some(Arc::new(lock_file)),
260 }),
261 None => Err(Self {
262 root,
263 refresh,
264 temp_dir,
265 lock_file,
266 }),
267 }
268 }
269
270 /// Return the root of the cache.
271 pub fn root(&self) -> &Path {
272 &self.root
273 }
274
275 /// The folder for a specific cache bucket
276 pub fn bucket(&self, cache_bucket: CacheBucket) -> PathBuf {
277 self.root.join(cache_bucket.to_str())
278 }
279
280 /// Compute an entry in the cache.
281 pub fn shard(&self, cache_bucket: CacheBucket, dir: impl AsRef<Path>) -> CacheShard {
282 CacheShard(self.bucket(cache_bucket).join(dir.as_ref()))
283 }
284
285 /// Compute an entry in the cache.
286 pub fn entry(
287 &self,
288 cache_bucket: CacheBucket,
289 dir: impl AsRef<Path>,
290 file: impl AsRef<Path>,
291 ) -> CacheEntry {
292 CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
293 }
294
295 /// Return the path to an archive in the cache.
296 pub fn archive(&self, id: &ArchiveId) -> PathBuf {
297 self.bucket(CacheBucket::Archive).join(id)
298 }
299
300 /// Create a temporary directory to be used as a Python virtual environment.
301 pub fn venv_dir(&self) -> io::Result<tempfile::TempDir> {
302 fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
303 tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
304 }
305
306 /// Create a temporary directory to be used for executing PEP 517 source distribution builds.
307 pub fn build_dir(&self) -> io::Result<tempfile::TempDir> {
308 fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
309 tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
310 }
311
312 /// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
313 pub fn must_revalidate_package(&self, package: &PackageName) -> bool {
314 match &self.refresh {
315 Refresh::None(_) => false,
316 Refresh::All(_) => true,
317 Refresh::Packages(packages, _, _) => packages.contains(package),
318 }
319 }
320
321 /// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
322 pub fn must_revalidate_path(&self, path: &Path) -> bool {
323 match &self.refresh {
324 Refresh::None(_) => false,
325 Refresh::All(_) => true,
326 Refresh::Packages(_, paths, _) => paths
327 .iter()
328 .any(|target| same_file::is_same_file(path, target).unwrap_or(false)),
329 }
330 }
331
332 /// Returns the [`Freshness`] for a cache entry, validating it against the [`Refresh`] policy.
333 ///
334 /// A cache entry is considered fresh if it was created after the cache itself was
335 /// initialized, or if the [`Refresh`] policy does not require revalidation.
336 pub fn freshness(
337 &self,
338 entry: &CacheEntry,
339 package: Option<&PackageName>,
340 path: Option<&Path>,
341 ) -> io::Result<Freshness> {
342 // Grab the cutoff timestamp, if it's relevant.
343 let timestamp = match &self.refresh {
344 Refresh::None(_) => return Ok(Freshness::Fresh),
345 Refresh::All(timestamp) => timestamp,
346 Refresh::Packages(packages, paths, timestamp) => {
347 if package.is_none_or(|package| packages.contains(package))
348 || path.is_some_and(|path| {
349 paths
350 .iter()
351 .any(|target| same_file::is_same_file(path, target).unwrap_or(false))
352 })
353 {
354 timestamp
355 } else {
356 return Ok(Freshness::Fresh);
357 }
358 }
359 };
360
361 match fs_err::metadata(entry.path()) {
362 Ok(metadata) => {
363 if Timestamp::from_metadata(&metadata) >= *timestamp {
364 Ok(Freshness::Fresh)
365 } else {
366 Ok(Freshness::Stale)
367 }
368 }
369 Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Freshness::Missing),
370 Err(err) => Err(err),
371 }
372 }
373
374 /// Persist a temporary directory to the artifact store, returning its unique ID.
375 pub async fn persist(
376 &self,
377 temp_dir: impl AsRef<Path>,
378 path: impl AsRef<Path>,
379 ) -> io::Result<ArchiveId> {
380 // Create a unique ID for the artifact.
381 // TODO(charlie): Support content-addressed persistence via SHAs.
382 let id = ArchiveId::new();
383
384 // Move the temporary directory into the directory store.
385 let archive_entry = self.entry(CacheBucket::Archive, "", &id);
386 fs_err::create_dir_all(archive_entry.dir())?;
387 uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await?;
388
389 // Create a symlink to the directory store.
390 fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?;
391 self.create_link(&id, path.as_ref())?;
392
393 Ok(id)
394 }
395
396 /// Returns `true` if the [`Cache`] is temporary.
397 pub fn is_temporary(&self) -> bool {
398 self.temp_dir.is_some()
399 }
400
401 /// Populate the cache scaffold.
402 fn create_base_files(root: &PathBuf) -> io::Result<()> {
403 // Create the cache directory, if it doesn't exist.
404 fs_err::create_dir_all(root)?;
405
406 // Add the CACHEDIR.TAG.
407 cachedir::ensure_tag(root)?;
408
409 // Add the .gitignore.
410 match fs_err::OpenOptions::new()
411 .write(true)
412 .create_new(true)
413 .open(root.join(".gitignore"))
414 {
415 Ok(mut file) => file.write_all(b"*")?,
416 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
417 Err(err) => return Err(err),
418 }
419
420 // Add an empty .gitignore to the build bucket, to ensure that the cache's own .gitignore
421 // doesn't interfere with source distribution builds. Build backends (like hatchling) will
422 // traverse upwards to look for .gitignore files.
423 fs_err::create_dir_all(root.join(CacheBucket::SourceDistributions.to_str()))?;
424 match fs_err::OpenOptions::new()
425 .write(true)
426 .create_new(true)
427 .open(
428 root.join(CacheBucket::SourceDistributions.to_str())
429 .join(".gitignore"),
430 ) {
431 Ok(_) => {}
432 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
433 Err(err) => return Err(err),
434 }
435
436 // Add a phony .git, if it doesn't exist, to ensure that the cache isn't considered to be
437 // part of a Git repository. (Some packages will include Git metadata (like a hash) in the
438 // built version if they're in a Git repository, but the cache should be viewed as an
439 // isolated store.).
440 // We have to put this below the gitignore. Otherwise, if the build backend uses the rust
441 // ignore crate it will walk up to the top level .gitignore and ignore its python source
442 // files.
443 let phony_git = root
444 .join(CacheBucket::SourceDistributions.to_str())
445 .join(".git");
446 match fs_err::OpenOptions::new()
447 .create(true)
448 .write(true)
449 .open(&phony_git)
450 {
451 Ok(_) => {}
452 // Handle read-only caches including sandboxed environments.
453 Err(err) if err.kind() == io::ErrorKind::ReadOnlyFilesystem => {
454 if !phony_git.exists() {
455 return Err(err);
456 }
457 }
458 Err(err) => return Err(err),
459 }
460
461 Ok(())
462 }
463
464 /// Initialize the [`Cache`].
465 pub async fn init(self) -> Result<Self, Error> {
466 let root = &self.root;
467
468 Self::create_base_files(root).map_err(|err| Error::Init(root.clone(), err))?;
469
470 // Block cache removal operations from interfering.
471 let lock_file = match LockedFile::acquire(
472 root.join(".lock"),
473 LockedFileMode::Shared,
474 root.simplified_display(),
475 )
476 .await
477 {
478 Ok(lock_file) => Some(Arc::new(lock_file)),
479 Err(err)
480 if err
481 .as_io_error()
482 .is_some_and(|err| err.kind() == io::ErrorKind::Unsupported) =>
483 {
484 warn!(
485 "Shared locking is not supported by the current platform or filesystem, \
486 reduced parallel process safety with `uv cache clean` and `uv cache prune`."
487 );
488 None
489 }
490 Err(err) => return Err(err.into()),
491 };
492
493 Ok(Self {
494 root: std::path::absolute(root).map_err(Error::Absolute)?,
495 lock_file,
496 ..self
497 })
498 }
499
500 /// Initialize the [`Cache`], assuming that there are no other uv processes running.
501 pub fn init_no_wait(self) -> Result<Option<Self>, Error> {
502 let root = &self.root;
503
504 Self::create_base_files(root).map_err(|err| Error::Init(root.clone(), err))?;
505
506 // Block cache removal operations from interfering.
507 let Some(lock_file) = LockedFile::acquire_no_wait(
508 root.join(".lock"),
509 LockedFileMode::Shared,
510 root.simplified_display(),
511 ) else {
512 return Ok(None);
513 };
514 Ok(Some(Self {
515 root: std::path::absolute(root).map_err(Error::Absolute)?,
516 lock_file: Some(Arc::new(lock_file)),
517 ..self
518 }))
519 }
520
521 /// Clear the cache, removing all entries.
522 pub fn clear(self, reporter: Box<dyn CleanReporter>) -> Result<Removal, io::Error> {
523 // Remove everything but `.lock`, Windows does not allow removal of a locked file
524 let mut removal = Remover::new(reporter).rm_rf(&self.root, true)?;
525 let Self {
526 root, lock_file, ..
527 } = self;
528
529 // Remove the `.lock` file, unlocking it first
530 if let Some(lock) = lock_file {
531 drop(lock);
532 fs_err::remove_file(root.join(".lock"))?;
533 }
534 removal.num_files += 1;
535
536 // Remove the root directory
537 match fs_err::remove_dir(root) {
538 Ok(()) => {
539 removal.num_dirs += 1;
540 }
541 // On Windows, when `--force` is used, the `.lock` file can exist and be unremovable,
542 // so we make this non-fatal
543 Err(err) if err.kind() == io::ErrorKind::DirectoryNotEmpty => {
544 trace!("Failed to remove root cache directory: not empty");
545 }
546 Err(err) => return Err(err),
547 }
548
549 Ok(removal)
550 }
551
552 /// Remove a package from the cache.
553 ///
554 /// Returns the number of entries removed from the cache.
555 pub fn remove(&self, name: &PackageName) -> io::Result<Removal> {
556 // Collect the set of referenced archives.
557 let references = self.find_archive_references()?;
558
559 // Remove any entries for the package from the cache.
560 let mut summary = Removal::default();
561 for bucket in CacheBucket::iter() {
562 summary += bucket.remove(self, name)?;
563 }
564
565 if references.is_empty() {
566 return Ok(summary);
567 }
568
569 // Only remove targets in the archive bucket. Cache entries may contain unexpected links
570 // to paths outside the cache.
571 let archive_root = fs_err::canonicalize(&self.root)?.join(CacheBucket::Archive.to_str());
572
573 // Remove any archives that are no longer referenced.
574 for (target, references) in references {
575 if target.starts_with(&archive_root) && references.iter().all(|path| !path.exists()) {
576 debug!("Removing dangling cache entry: {}", target.display());
577 summary += rm_rf(target)?;
578 }
579 }
580
581 Ok(summary)
582 }
583
584 /// Run the garbage collector on the cache, removing any dangling entries.
585 pub fn prune(&self, ci: bool) -> Result<Removal, io::Error> {
586 let mut summary = Removal::default();
587
588 // First, remove any top-level directories that are unused. These typically represent
589 // outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
590 for entry in fs_err::read_dir(&self.root)? {
591 let entry = entry?;
592 let metadata = entry.metadata()?;
593
594 if entry.file_name() == "CACHEDIR.TAG"
595 || entry.file_name() == ".gitignore"
596 || entry.file_name() == ".git"
597 || entry.file_name() == ".lock"
598 {
599 continue;
600 }
601
602 if metadata.is_dir() {
603 // If the directory is not a cache bucket, remove it.
604 if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
605 let path = entry.path();
606 debug!("Removing dangling cache bucket: {}", path.display());
607 summary += rm_rf(path)?;
608 }
609 } else {
610 // If the file is not a marker file, remove it.
611 let path = entry.path();
612 debug!("Removing dangling cache bucket: {}", path.display());
613 summary += rm_rf(path)?;
614 }
615 }
616
617 // Second, remove any cached environments. These are never referenced by symlinks, so we can
618 // remove them directly.
619 match fs_err::read_dir(self.bucket(CacheBucket::Environments)) {
620 Ok(entries) => {
621 for entry in entries {
622 let entry = entry?;
623 let path = entry.path();
624 debug!("Removing dangling cache environment: {}", path.display());
625 summary += rm_rf(path)?;
626 }
627 }
628 Err(err) if err.kind() == io::ErrorKind::NotFound => (),
629 Err(err) => return Err(err),
630 }
631
632 // Third, if enabled, remove all unzipped wheels, leaving only the wheel archives.
633 if ci {
634 // Remove the entire pre-built wheel cache, since every entry is an unzipped wheel.
635 match fs_err::read_dir(self.bucket(CacheBucket::Wheels)) {
636 Ok(entries) => {
637 for entry in entries {
638 let entry = entry?;
639 let path = entry.path();
640 if path.is_dir() {
641 debug!("Removing unzipped wheel entry: {}", path.display());
642 summary += rm_rf(path)?;
643 }
644 }
645 }
646 Err(err) if err.kind() == io::ErrorKind::NotFound => (),
647 Err(err) => return Err(err),
648 }
649
650 let source_distributions = self.bucket(CacheBucket::SourceDistributions);
651 if source_distributions.try_exists()? {
652 for entry in walkdir::WalkDir::new(source_distributions) {
653 let entry = entry?;
654
655 // If the directory contains a `metadata.msgpack`, then it's a built wheel revision.
656 if !entry.file_type().is_dir() {
657 continue;
658 }
659
660 if !entry.path().join("metadata.msgpack").exists() {
661 continue;
662 }
663
664 // Remove everything except the built wheel archive and the metadata.
665 for entry in fs_err::read_dir(entry.path())? {
666 let entry = entry?;
667 let path = entry.path();
668
669 // Retain the resolved metadata (`metadata.msgpack`).
670 if path
671 .file_name()
672 .is_some_and(|file_name| file_name == "metadata.msgpack")
673 {
674 continue;
675 }
676
677 // Retain any built wheel archives.
678 if path
679 .extension()
680 .is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
681 {
682 continue;
683 }
684
685 debug!("Removing unzipped built wheel entry: {}", path.display());
686 summary += rm_rf(path)?;
687 }
688 }
689 }
690 }
691
692 // Fourth, remove any unused archives (by searching for archives that are not symlinked).
693 let references = self.find_archive_references()?;
694
695 match fs_err::read_dir(self.bucket(CacheBucket::Archive)) {
696 Ok(entries) => {
697 for entry in entries {
698 let entry = entry?;
699 let path = entry.path();
700 let target = fs_err::canonicalize(&path)?;
701 if !references.contains_key(&target) {
702 debug!("Removing dangling cache archive: {}", path.display());
703 summary += rm_rf(path)?;
704 }
705 }
706 }
707 Err(err) if err.kind() == io::ErrorKind::NotFound => (),
708 Err(err) => return Err(err),
709 }
710
711 Ok(summary)
712 }
713
714 /// Find all references to entries in the archive bucket.
715 ///
716 /// Archive entries are often referenced by symlinks in other cache buckets. This method
717 /// searches for all such references.
718 ///
719 /// Returns a map from archive path to paths that reference it.
720 fn find_archive_references(&self) -> Result<FxHashMap<PathBuf, Vec<PathBuf>>, io::Error> {
721 let mut references = FxHashMap::<PathBuf, Vec<PathBuf>>::default();
722 for bucket in [CacheBucket::SourceDistributions, CacheBucket::Wheels] {
723 let bucket_path = self.bucket(bucket);
724 if bucket_path.is_dir() {
725 let walker = walkdir::WalkDir::new(&bucket_path).into_iter();
726 for entry in walker.filter_entry(|entry| {
727 !(
728 // As an optimization, ignore any `.lock`, `.whl`, `.msgpack`, `.rev`, or
729 // `.http` files, along with the `src` directory, which represents the
730 // unpacked source distribution.
731 entry.file_name() == "src"
732 || entry.file_name() == ".lock"
733 || entry.file_name() == ".gitignore"
734 || entry.path().extension().is_some_and(|ext| {
735 ext.eq_ignore_ascii_case("lock")
736 || ext.eq_ignore_ascii_case("whl")
737 || ext.eq_ignore_ascii_case("http")
738 || ext.eq_ignore_ascii_case("rev")
739 || ext.eq_ignore_ascii_case("msgpack")
740 })
741 )
742 }) {
743 let entry = entry?;
744
745 // On Unix, archive references use symlinks.
746 if cfg!(unix) {
747 if !entry.file_type().is_symlink() {
748 continue;
749 }
750 }
751
752 // On Windows, archive references are files containing structured data.
753 if cfg!(windows) {
754 if !entry.file_type().is_file() {
755 continue;
756 }
757 }
758
759 if let Ok(target) = self.resolve_link(entry.path()) {
760 references
761 .entry(target)
762 .or_default()
763 .push(entry.path().to_path_buf());
764 }
765 }
766 }
767 }
768 Ok(references)
769 }
770
771 /// Create a link to a directory in the archive bucket.
772 ///
773 /// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
774 /// version. On Unix, we create a symlink to the target directory.
775 #[cfg(windows)]
776 #[expect(clippy::unused_self)]
777 fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
778 // Serialize the link.
779 let link = Link::new(id.clone());
780 let contents = link.to_string();
781
782 // First, attempt to create a file at the location, but fail if it already exists.
783 match fs_err::OpenOptions::new()
784 .write(true)
785 .create_new(true)
786 .open(dst.as_ref())
787 {
788 Ok(mut file) => {
789 // Write the target path to the file.
790 file.write_all(contents.as_bytes())?;
791 Ok(())
792 }
793 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
794 // Write to a temporary file, then move it into place.
795 let temp_dir = tempfile::tempdir_in(dst.as_ref().parent().unwrap())?;
796 let temp_file = temp_dir.path().join("link");
797 fs_err::write(&temp_file, contents.as_bytes())?;
798
799 // Move the symlink into the target location.
800 fs_err::rename(&temp_file, dst.as_ref())?;
801
802 Ok(())
803 }
804 Err(err) => Err(err),
805 }
806 }
807
808 /// Resolve an archive link, returning the fully-resolved path.
809 ///
810 /// Returns an error if the link target does not exist.
811 #[cfg(windows)]
812 pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
813 // Deserialize the link.
814 let contents = fs_err::read_to_string(path.as_ref())?;
815 let link = Link::from_str(&contents)?;
816
817 // Ignore stale links.
818 if link.version != ARCHIVE_VERSION {
819 return Err(io::Error::new(
820 io::ErrorKind::NotFound,
821 "The link target does not exist.",
822 ));
823 }
824
825 // Reconstruct the path.
826 let path = self.archive(&link.id);
827 path.canonicalize()
828 }
829
830 /// Create a link to a directory in the archive bucket.
831 ///
832 /// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
833 /// version. On Unix, we create a symlink to the target directory.
834 #[cfg(unix)]
835 fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
836 let dst = dst.as_ref();
837 let dst_parent = dst.parent().expect("Cache entry to have parent");
838 // Construct the relative link target.
839 let src = uv_fs::relative_to(self.archive(id), dst_parent)?;
840
841 // Attempt to create the symlink directly.
842 match fs_err::os::unix::fs::symlink(&src, dst) {
843 Ok(()) => Ok(()),
844 Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
845 // Create a symlink, using a temporary file to ensure atomicity.
846 let temp_dir = tempfile::tempdir_in(dst_parent)?;
847 let temp_file = temp_dir.path().join("link");
848 fs_err::os::unix::fs::symlink(&src, &temp_file)?;
849
850 // Move the symlink into the target location.
851 fs_err::rename(&temp_file, dst)?;
852
853 Ok(())
854 }
855 Err(err) => Err(err),
856 }
857 }
858
859 /// Resolve an archive link, returning the fully-resolved path.
860 ///
861 /// Returns an error if the link target does not exist.
862 #[cfg(unix)]
863 pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
864 path.as_ref().canonicalize()
865 }
866}
867
868/// An archive (unzipped wheel) that exists in the local cache.
869#[derive(Debug, Clone)]
870#[allow(unused)]
871struct Link {
872 /// The unique ID of the entry in the archive bucket.
873 id: ArchiveId,
874 /// The version of the archive bucket.
875 version: u8,
876}
877
878#[allow(unused)]
879impl Link {
880 /// Create a new [`Archive`] with the given ID and hashes.
881 fn new(id: ArchiveId) -> Self {
882 Self {
883 id,
884 version: ARCHIVE_VERSION,
885 }
886 }
887}
888
889impl Display for Link {
890 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
891 write!(f, "archive-v{}/{}", self.version, self.id)
892 }
893}
894
895impl FromStr for Link {
896 type Err = io::Error;
897
898 fn from_str(s: &str) -> Result<Self, Self::Err> {
899 let mut parts = s.splitn(2, '/');
900 let version = parts
901 .next()
902 .filter(|s| !s.is_empty())
903 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version"))?;
904 let id = parts
905 .next()
906 .filter(|s| !s.is_empty())
907 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing ID"))?;
908
909 // Parse the archive version from `archive-v{version}/{id}`.
910 let version = version
911 .strip_prefix("archive-v")
912 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version prefix"))?;
913 let version = u8::from_str(version).map_err(|err| {
914 io::Error::new(
915 io::ErrorKind::InvalidData,
916 format!("failed to parse version: {err}"),
917 )
918 })?;
919
920 // Parse the ID from `archive-v{version}/{id}`.
921 let id = ArchiveId::from_str(id).map_err(|err| {
922 io::Error::new(
923 io::ErrorKind::InvalidData,
924 format!("failed to parse ID: {err}"),
925 )
926 })?;
927
928 Ok(Self { id, version })
929 }
930}
931
932pub trait CleanReporter: Send + Sync {
933 /// Called after one file or directory is removed.
934 fn on_clean(&self);
935
936 /// Called after all files and directories are removed.
937 fn on_complete(&self);
938}
939
940/// The different kinds of data in the cache are stored in different bucket, which in our case
941/// are subdirectories of the cache root.
942#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
943pub enum CacheBucket {
944 /// Wheels (excluding built wheels), alongside their metadata and cache policy.
945 ///
946 /// There are three kinds from cache entries: Wheel metadata and policy as `MsgPack` files, the
947 /// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
948 /// size threshold, we first download the zip file into the cache, then unzip it into a
949 /// directory with the same name (exclusive of the `.whl` extension).
950 ///
951 /// Cache structure:
952 /// * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
953 /// * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
954 /// * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
955 ///
956 /// See `uv_client::RegistryClient::wheel_metadata` for information on how wheel metadata
957 /// is fetched.
958 ///
959 /// # Example
960 ///
961 /// Consider the following `requirements.in`:
962 /// ```text
963 /// # pypi wheel
964 /// pandas
965 /// # url wheel
966 /// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
967 /// ```
968 ///
969 /// When we run `pip compile`, it will only fetch and cache the metadata (and cache policy), it
970 /// doesn't need the actual wheels yet:
971 /// ```text
972 /// wheel-v0
973 /// ├── pypi
974 /// │ ...
975 /// │ ├── pandas
976 /// │ │ └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
977 /// │ ...
978 /// └── url
979 /// └── 4b8be67c801a7ecb
980 /// └── flask
981 /// └── flask-3.0.0-py3-none-any.msgpack
982 /// ```
983 ///
984 /// We get the following `requirement.txt` from `pip compile`:
985 ///
986 /// ```text
987 /// [...]
988 /// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
989 /// [...]
990 /// pandas==2.1.3
991 /// [...]
992 /// ```
993 ///
994 /// If we run `pip sync` on `requirements.txt` on a different machine, it also fetches the
995 /// wheels:
996 ///
997 /// TODO(konstin): This is still wrong, we need to store the cache policy too!
998 /// ```text
999 /// wheel-v0
1000 /// ├── pypi
1001 /// │ ...
1002 /// │ ├── pandas
1003 /// │ │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
1004 /// │ │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64
1005 /// │ ...
1006 /// └── url
1007 /// └── 4b8be67c801a7ecb
1008 /// └── flask
1009 /// └── flask-3.0.0-py3-none-any.whl
1010 /// ├── flask
1011 /// │ └── ...
1012 /// └── flask-3.0.0.dist-info
1013 /// └── ...
1014 /// ```
1015 ///
1016 /// If we run first `pip compile` and then `pip sync` on the same machine, we get both:
1017 ///
1018 /// ```text
1019 /// wheels-v0
1020 /// ├── pypi
1021 /// │ ├── ...
1022 /// │ ├── pandas
1023 /// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
1024 /// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
1025 /// │ │ └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
1026 /// │ │ ├── pandas
1027 /// │ │ │ ├── ...
1028 /// │ │ ├── pandas-2.1.3.dist-info
1029 /// │ │ │ ├── ...
1030 /// │ │ └── pandas.libs
1031 /// │ ├── ...
1032 /// └── url
1033 /// └── 4b8be67c801a7ecb
1034 /// └── flask
1035 /// ├── flask-3.0.0-py3-none-any.msgpack
1036 /// ├── flask-3.0.0-py3-none-any.msgpack
1037 /// └── flask-3.0.0-py3-none-any
1038 /// ├── flask
1039 /// │ └── ...
1040 /// └── flask-3.0.0.dist-info
1041 /// └── ...
1042 Wheels,
1043 /// Source distributions, wheels built from source distributions, their extracted metadata, and the
1044 /// cache policy of the source distribution.
1045 ///
1046 /// The structure is similar of that of the `Wheel` bucket, except we have an additional layer
1047 /// for the source distribution filename and the metadata is at the source distribution-level,
1048 /// not at the wheel level.
1049 ///
1050 /// TODO(konstin): The cache policy should be on the source distribution level, the metadata we
1051 /// can put next to the wheels as in the `Wheels` bucket.
1052 ///
1053 /// The unzipped source distribution is stored in a directory matching the source distribution
1054 /// archive name.
1055 ///
1056 /// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped
1057 /// lazily before installing. So when resolving, we only build the wheel and store the archive
1058 /// file in the cache, when installing, we unpack it under the same name (exclusive of the
1059 /// `.whl` extension). You may find a mix of wheel archive zip files and unzipped wheel
1060 /// directories in the cache.
1061 ///
1062 /// Cache structure:
1063 /// * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{manifest.msgpack, metadata.msgpack, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1064 /// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1065 /// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1066 /// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
1067 ///
1068 /// But the url filename does not need to be a valid source dist filename
1069 /// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
1070 /// so it could also be the following and we have to take any string as filename:
1071 /// * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.msgpack`
1072 ///
1073 /// # Example
1074 ///
1075 /// The following requirements:
1076 /// ```text
1077 /// # git source dist
1078 /// pydantic-extra-types @ git+https://github.com/pydantic/pydantic-extra-types.git
1079 /// # pypi source dist
1080 /// django_allauth==0.51.0
1081 /// # url source dist
1082 /// werkzeug @ https://files.pythonhosted.org/packages/0d/cc/ff1904eb5eb4b455e442834dabf9427331ac0fa02853bf83db817a7dd53d/werkzeug-3.0.1.tar.gz
1083 /// ```
1084 ///
1085 /// ...may be cached as:
1086 /// ```text
1087 /// built-wheels-v4/
1088 /// ├── git
1089 /// │ └── 2122faf3e081fb7a
1090 /// │ └── 7a2d650a4a7b4d04
1091 /// │ ├── metadata.msgpack
1092 /// │ └── pydantic_extra_types-2.9.0-py3-none-any.whl
1093 /// ├── pypi
1094 /// │ └── django-allauth
1095 /// │ └── 0.51.0
1096 /// │ ├── 0gH-_fwv8tdJ7JwwjJsUc
1097 /// │ │ ├── django-allauth-0.51.0.tar.gz
1098 /// │ │ │ └── [UNZIPPED CONTENTS]
1099 /// │ │ ├── django_allauth-0.51.0-py3-none-any.whl
1100 /// │ │ └── metadata.msgpack
1101 /// │ └── revision.http
1102 /// └── url
1103 /// └── 6781bd6440ae72c2
1104 /// ├── APYY01rbIfpAo_ij9sCY6
1105 /// │ ├── metadata.msgpack
1106 /// │ ├── werkzeug-3.0.1-py3-none-any.whl
1107 /// │ └── werkzeug-3.0.1.tar.gz
1108 /// │ └── [UNZIPPED CONTENTS]
1109 /// └── revision.http
1110 /// ```
1111 ///
1112 /// Structurally, the `manifest.msgpack` is empty, and only contains the caching information
1113 /// needed to invalidate the cache. The `metadata.msgpack` contains the metadata of the source
1114 /// distribution.
1115 SourceDistributions,
1116 /// Flat index responses, a format very similar to the simple metadata API.
1117 ///
1118 /// Cache structure:
1119 /// * `flat-index-v0/index/<digest(flat_index_url)>.msgpack`
1120 ///
1121 /// The response is stored as `Vec<File>`.
1122 FlatIndex,
1123 /// Git repositories.
1124 Git,
1125 /// Information about an interpreter at a path.
1126 ///
1127 /// To avoid caching pyenv shims, bash scripts which may redirect to a new python version
1128 /// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
1129 /// the path we're running is the python executable itself and not a shim.
1130 ///
1131 /// Cache structure: `interpreter-v0/<digest(path)>.msgpack`
1132 ///
1133 /// # Example
1134 ///
1135 /// The contents of each of the `MsgPack` files has a timestamp field in unix time, the [PEP 508]
1136 /// markers and some information from the `sys`/`sysconfig` modules.
1137 ///
1138 /// ```json
1139 /// {
1140 /// "timestamp": 1698047994491,
1141 /// "data": {
1142 /// "markers": {
1143 /// "implementation_name": "cpython",
1144 /// "implementation_version": "3.12.0",
1145 /// "os_name": "posix",
1146 /// "platform_machine": "x86_64",
1147 /// "platform_python_implementation": "CPython",
1148 /// "platform_release": "6.5.0-13-generic",
1149 /// "platform_system": "Linux",
1150 /// "platform_version": "#13-Ubuntu SMP PREEMPT_DYNAMIC Fri Nov 3 12:16:05 UTC 2023",
1151 /// "python_full_version": "3.12.0",
1152 /// "python_version": "3.12",
1153 /// "sys_platform": "linux"
1154 /// },
1155 /// "base_exec_prefix": "/home/ferris/.pyenv/versions/3.12.0",
1156 /// "base_prefix": "/home/ferris/.pyenv/versions/3.12.0",
1157 /// "sys_executable": "/home/ferris/projects/uv/.venv/bin/python"
1158 /// }
1159 /// }
1160 /// ```
1161 ///
1162 /// [PEP 508]: https://peps.python.org/pep-0508/#environment-markers
1163 Interpreter,
1164 /// Index responses through the simple metadata API.
1165 ///
1166 /// Cache structure:
1167 /// * `simple-v0/pypi/<package_name>.rkyv`
1168 /// * `simple-v0/<digest(index_url)>/<package_name>.rkyv`
1169 ///
1170 /// The response is parsed into `uv_client::SimpleDetailMetadata` before storage.
1171 Simple,
1172 /// A cache of unzipped wheels, stored as directories. This is used internally within the cache.
1173 /// When other buckets need to store directories, they should persist them to
1174 /// [`CacheBucket::Archive`], and then symlink them into the appropriate bucket. This ensures
1175 /// that cache entries can be atomically replaced and removed, as storing directories in the
1176 /// other buckets directly would make atomic operations impossible.
1177 Archive,
1178 /// Ephemeral virtual environments used to execute PEP 517 builds and other operations.
1179 Builds,
1180 /// Reusable virtual environments used to invoke Python tools.
1181 Environments,
1182 /// Cached Python downloads
1183 Python,
1184 /// Downloaded tool binaries (e.g., Ruff).
1185 Binaries,
1186 /// Cached vulnerability data from [OSV](https://osv.dev/).
1187 ///
1188 /// Cache structure:
1189 /// * `osv-v0/vulnerability/<vuln_id>.msgpack` — cached full vulnerability records
1190 Osv,
1191}
1192
1193impl CacheBucket {
1194 fn to_str(self) -> &'static str {
1195 match self {
1196 // Note that when bumping this, you'll also need to bump it
1197 // in `crates/uv/tests/build/cache_prune.rs`.
1198 Self::SourceDistributions => "sdists-v9",
1199 Self::FlatIndex => "flat-index-v2",
1200 Self::Git => "git-v0",
1201 Self::Interpreter => "interpreter-v4",
1202 // Note that when bumping this, you'll also need to bump it
1203 // in `crates/uv/tests/build/cache_clean.rs`.
1204 Self::Simple => "simple-v21",
1205 // Note that when bumping this, you'll also need to bump it
1206 // in `crates/uv/tests/build/cache_prune.rs`.
1207 Self::Wheels => "wheels-v6",
1208 // Note that when bumping this, you'll also need to bump
1209 // `ARCHIVE_VERSION` in `crates/uv-cache/src/lib.rs`.
1210 Self::Archive => "archive-v0",
1211 Self::Builds => "builds-v0",
1212 Self::Environments => "environments-v2",
1213 Self::Python => "python-v0",
1214 Self::Binaries => "binaries-v0",
1215 Self::Osv => "osv-v0",
1216 }
1217 }
1218
1219 /// Remove a package from the cache bucket.
1220 ///
1221 /// Returns the number of entries removed from the cache.
1222 fn remove(self, cache: &Cache, name: &PackageName) -> Result<Removal, io::Error> {
1223 /// Returns `true` if the [`Path`] represents a built wheel for the given package.
1224 fn is_match(path: &Path, name: &PackageName) -> bool {
1225 let Ok(metadata) = fs_err::read(path.join("metadata.msgpack")) else {
1226 return false;
1227 };
1228 let Ok(metadata) = rmp_serde::from_slice::<ResolutionMetadata>(&metadata) else {
1229 return false;
1230 };
1231 metadata.name == *name
1232 }
1233
1234 let mut summary = Removal::default();
1235 match self {
1236 Self::Wheels => {
1237 // For `pypi` wheels, we expect a directory per package (indexed by name).
1238 let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1239 summary += rm_rf(root.join(name.to_string()))?;
1240
1241 // For alternate indices, we expect a directory for every index (under an `index`
1242 // subdirectory), followed by a directory per package (indexed by name).
1243 let root = cache.bucket(self).join(WheelCacheKind::Index);
1244 for directory in directories(root)? {
1245 summary += rm_rf(directory.join(name.to_string()))?;
1246 }
1247
1248 // For direct URLs, we expect a directory for every URL, followed by a
1249 // directory per package (indexed by name).
1250 let root = cache.bucket(self).join(WheelCacheKind::Url);
1251 for directory in directories(root)? {
1252 summary += rm_rf(directory.join(name.to_string()))?;
1253 }
1254 }
1255 Self::SourceDistributions => {
1256 // For `pypi` wheels, we expect a directory per package (indexed by name).
1257 let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1258 summary += rm_rf(root.join(name.to_string()))?;
1259
1260 // For alternate indices, we expect a directory for every index (under an `index`
1261 // subdirectory), followed by a directory per package (indexed by name).
1262 let root = cache.bucket(self).join(WheelCacheKind::Index);
1263 for directory in directories(root)? {
1264 summary += rm_rf(directory.join(name.to_string()))?;
1265 }
1266
1267 // For direct URLs, we expect a directory for every URL, followed by a
1268 // directory per version. To determine whether the URL is relevant, we need to
1269 // search for a wheel matching the package name.
1270 let root = cache.bucket(self).join(WheelCacheKind::Url);
1271 for url in directories(root)? {
1272 if directories(&url)?.any(|version| is_match(&version, name)) {
1273 summary += rm_rf(url)?;
1274 }
1275 }
1276
1277 // For local dependencies, we expect a directory for every path, followed by a
1278 // directory per version. To determine whether the path is relevant, we need to
1279 // search for a wheel matching the package name.
1280 let root = cache.bucket(self).join(WheelCacheKind::Path);
1281 for path in directories(root)? {
1282 if directories(&path)?.any(|version| is_match(&version, name)) {
1283 summary += rm_rf(path)?;
1284 }
1285 }
1286
1287 // For Git dependencies, we expect a directory for every repository, followed by a
1288 // directory for every SHA. To determine whether the SHA is relevant, we need to
1289 // search for a wheel matching the package name.
1290 let root = cache.bucket(self).join(WheelCacheKind::Git);
1291 for repository in directories(root)? {
1292 for sha in directories(repository)? {
1293 if is_match(&sha, name) {
1294 summary += rm_rf(sha)?;
1295 }
1296 }
1297 }
1298 }
1299 Self::Simple => {
1300 // For `pypi` wheels, we expect a rkyv file per package, indexed by name.
1301 let root = cache.bucket(self).join(WheelCacheKind::Pypi);
1302 summary += rm_rf(root.join(format!("{name}.rkyv")))?;
1303
1304 // For alternate indices, we expect a directory for every index (under an `index`
1305 // subdirectory), followed by a directory per package (indexed by name).
1306 let root = cache.bucket(self).join(WheelCacheKind::Index);
1307 for directory in directories(root)? {
1308 summary += rm_rf(directory.join(format!("{name}.rkyv")))?;
1309 }
1310 }
1311 Self::FlatIndex => {
1312 // We can't know if the flat index includes a package, so we just remove the entire
1313 // cache entry.
1314 let root = cache.bucket(self);
1315 summary += rm_rf(root)?;
1316 }
1317 Self::Git
1318 | Self::Interpreter
1319 | Self::Archive
1320 | Self::Builds
1321 | Self::Environments
1322 | Self::Python
1323 | Self::Binaries
1324 | Self::Osv => {
1325 // Nothing to do.
1326 }
1327 }
1328 Ok(summary)
1329 }
1330
1331 /// Return an iterator over all cache buckets.
1332 fn iter() -> impl Iterator<Item = Self> {
1333 [
1334 Self::Wheels,
1335 Self::SourceDistributions,
1336 Self::FlatIndex,
1337 Self::Git,
1338 Self::Interpreter,
1339 Self::Simple,
1340 Self::Archive,
1341 Self::Builds,
1342 Self::Environments,
1343 Self::Python,
1344 Self::Binaries,
1345 Self::Osv,
1346 ]
1347 .iter()
1348 .copied()
1349 }
1350}
1351
1352impl Display for CacheBucket {
1353 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1354 f.write_str(self.to_str())
1355 }
1356}
1357
1358#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1359pub enum Freshness {
1360 /// The cache entry is fresh according to the [`Refresh`] policy.
1361 Fresh,
1362 /// The cache entry is stale according to the [`Refresh`] policy.
1363 Stale,
1364 /// The cache entry does not exist.
1365 Missing,
1366}
1367
1368impl Freshness {
1369 pub const fn is_fresh(self) -> bool {
1370 matches!(self, Self::Fresh)
1371 }
1372}
1373
1374/// A refresh policy for cache entries.
1375#[derive(Debug, Clone)]
1376pub enum Refresh {
1377 /// Don't refresh any entries.
1378 None(Timestamp),
1379 /// Refresh entries linked to the given packages, if created before the given timestamp.
1380 Packages(Vec<PackageName>, Vec<Box<Path>>, Timestamp),
1381 /// Refresh all entries created before the given timestamp.
1382 All(Timestamp),
1383}
1384
1385impl Refresh {
1386 /// Determine the refresh strategy to use based on the command-line arguments.
1387 pub fn from_args(refresh: Option<bool>, refresh_package: Vec<PackageName>) -> Self {
1388 let timestamp = Timestamp::now();
1389 match refresh {
1390 Some(true) => Self::All(timestamp),
1391 Some(false) => Self::None(timestamp),
1392 None => {
1393 if refresh_package.is_empty() {
1394 Self::None(timestamp)
1395 } else {
1396 Self::Packages(refresh_package, vec![], timestamp)
1397 }
1398 }
1399 }
1400 }
1401
1402 /// Combine two [`Refresh`] policies, taking the "max" of the two policies.
1403 #[must_use]
1404 pub fn combine(self, other: Self) -> Self {
1405 match (self, other) {
1406 // If the policy is `None`, return the existing refresh policy.
1407 // Take the `max` of the two timestamps.
1408 (Self::None(t1), Self::None(t2)) => Self::None(t1.max(t2)),
1409 (Self::None(t1), Self::All(t2)) => Self::All(t1.max(t2)),
1410 (Self::None(t1), Self::Packages(packages, paths, t2)) => {
1411 Self::Packages(packages, paths, t1.max(t2))
1412 }
1413
1414 // If the policy is `All`, refresh all packages.
1415 (Self::All(t1), Self::None(t2) | Self::All(t2) | Self::Packages(.., t2)) => {
1416 Self::All(t1.max(t2))
1417 }
1418
1419 // If the policy is `Packages`, take the "max" of the two policies.
1420 (Self::Packages(packages, paths, t1), Self::None(t2)) => {
1421 Self::Packages(packages, paths, t1.max(t2))
1422 }
1423 (Self::Packages(.., t1), Self::All(t2)) => Self::All(t1.max(t2)),
1424 (Self::Packages(packages1, paths1, t1), Self::Packages(packages2, paths2, t2)) => {
1425 Self::Packages(
1426 packages1.into_iter().chain(packages2).collect(),
1427 paths1.into_iter().chain(paths2).collect(),
1428 t1.max(t2),
1429 )
1430 }
1431 }
1432 }
1433}
1434
1435#[cfg(test)]
1436mod tests {
1437 use std::str::FromStr;
1438
1439 use crate::ArchiveId;
1440
1441 use super::Link;
1442
1443 #[test]
1444 fn test_link_round_trip() {
1445 let id = ArchiveId::new();
1446 let link = Link::new(id);
1447 let s = link.to_string();
1448 let parsed = Link::from_str(&s).unwrap();
1449 assert_eq!(link.id, parsed.id);
1450 assert_eq!(link.version, parsed.version);
1451 }
1452
1453 #[test]
1454 fn test_link_deserialize() {
1455 assert!(Link::from_str("archive-v0/foo").is_ok());
1456 assert!(Link::from_str("archive/foo").is_err());
1457 assert!(Link::from_str("v1/foo").is_err());
1458 assert!(Link::from_str("archive-v0/").is_err());
1459 }
1460
1461 #[test]
1462 #[cfg(unix)]
1463 fn prune_does_not_follow_environment_symlinks() {
1464 use super::{Cache, CacheBucket};
1465
1466 let cache_root = tempfile::tempdir().unwrap();
1467 let victim_root = tempfile::tempdir().unwrap();
1468 let environments = cache_root.path().join(CacheBucket::Environments.to_str());
1469 let victim_dir = victim_root.path().join("victim-dir");
1470
1471 fs_err::create_dir_all(&environments).unwrap();
1472 fs_err::create_dir_all(&victim_dir).unwrap();
1473 fs_err::write(victim_dir.join("payload.txt"), "payload").unwrap();
1474 fs_err::os::unix::fs::symlink(&victim_dir, environments.join("escape")).unwrap();
1475
1476 let summary = Cache::from_path(cache_root.path()).prune(false).unwrap();
1477
1478 assert_eq!(summary.num_files, 1);
1479 assert_eq!(summary.num_dirs, 0);
1480 assert!(victim_dir.is_dir());
1481 assert!(victim_dir.join("payload.txt").is_file());
1482 assert!(fs_err::symlink_metadata(environments.join("escape")).is_err());
1483 }
1484
1485 #[test]
1486 #[cfg(unix)]
1487 fn prune_ci_does_not_follow_wheel_symlinks() {
1488 use super::{Cache, CacheBucket};
1489
1490 let cache_root = tempfile::tempdir().unwrap();
1491 let victim_root = tempfile::tempdir().unwrap();
1492 let wheels = cache_root.path().join(CacheBucket::Wheels.to_str());
1493 let source_distributions = cache_root
1494 .path()
1495 .join(CacheBucket::SourceDistributions.to_str());
1496 let victim_dir = victim_root.path().join("victim-dir");
1497 let symlink = wheels.join("escape");
1498
1499 fs_err::create_dir_all(&wheels).unwrap();
1500 fs_err::create_dir_all(&source_distributions).unwrap();
1501 fs_err::create_dir_all(&victim_dir).unwrap();
1502 fs_err::write(victim_dir.join("payload.txt"), "payload").unwrap();
1503 fs_err::os::unix::fs::symlink(&victim_dir, &symlink).unwrap();
1504
1505 let summary = Cache::from_path(cache_root.path()).prune(true).unwrap();
1506
1507 assert_eq!(summary.num_files, 1);
1508 assert_eq!(summary.num_dirs, 0);
1509 assert!(victim_dir.is_dir());
1510 assert!(victim_dir.join("payload.txt").is_file());
1511 assert!(fs_err::symlink_metadata(symlink).is_err());
1512 }
1513
1514 #[test]
1515 #[cfg(unix)]
1516 fn prune_does_not_follow_archive_symlinks() {
1517 use super::{Cache, CacheBucket};
1518
1519 let cache_root = tempfile::tempdir().unwrap();
1520 let victim_root = tempfile::tempdir().unwrap();
1521 let archives = cache_root.path().join(CacheBucket::Archive.to_str());
1522 let victim_dir = victim_root.path().join("victim-dir");
1523 let symlink = archives.join("escape");
1524
1525 fs_err::create_dir_all(&archives).unwrap();
1526 fs_err::create_dir_all(&victim_dir).unwrap();
1527 fs_err::write(victim_dir.join("payload.txt"), "payload").unwrap();
1528 fs_err::os::unix::fs::symlink(&victim_dir, &symlink).unwrap();
1529
1530 let summary = Cache::from_path(cache_root.path()).prune(false).unwrap();
1531
1532 assert_eq!(summary.num_files, 1);
1533 assert_eq!(summary.num_dirs, 0);
1534 assert!(victim_dir.is_dir());
1535 assert!(victim_dir.join("payload.txt").is_file());
1536 assert!(fs_err::symlink_metadata(symlink).is_err());
1537 }
1538}