uv_cache_info/
cache_info.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::path::{Path, PathBuf};
4
5use serde::Deserialize;
6use tracing::{debug, warn};
7
8use uv_fs::Simplified;
9
10use crate::git_info::{Commit, Tags};
11use crate::glob::cluster_globs;
12use crate::timestamp::Timestamp;
13
14#[derive(Debug, thiserror::Error)]
15pub enum CacheInfoError {
16    #[error("Failed to parse glob patterns for `cache-keys`: {0}")]
17    Glob(#[from] globwalk::GlobError),
18    #[error(transparent)]
19    Io(#[from] std::io::Error),
20}
21
22/// The information used to determine whether a built distribution is up-to-date, based on the
23/// timestamps of relevant files, the current commit of a repository, etc.
24#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
25#[serde(rename_all = "kebab-case")]
26pub struct CacheInfo {
27    /// The timestamp of the most recent `ctime` of any relevant files, at the time of the build.
28    /// The timestamp will typically be the maximum of the `ctime` values of the `pyproject.toml`,
29    /// `setup.py`, and `setup.cfg` files, if they exist; however, users can provide additional
30    /// files to timestamp via the `cache-keys` field.
31    timestamp: Option<Timestamp>,
32    /// The commit at which the distribution was built.
33    commit: Option<Commit>,
34    /// The Git tags present at the time of the build.
35    tags: Option<Tags>,
36    /// Environment variables to include in the cache key.
37    #[serde(default)]
38    env: BTreeMap<String, Option<String>>,
39    /// The timestamp or inode of any directories that should be considered in the cache key.
40    #[serde(default)]
41    directories: BTreeMap<Cow<'static, str>, Option<DirectoryTimestamp>>,
42}
43
44impl CacheInfo {
45    /// Return the [`CacheInfo`] for a given timestamp.
46    pub fn from_timestamp(timestamp: Timestamp) -> Self {
47        Self {
48            timestamp: Some(timestamp),
49            ..Self::default()
50        }
51    }
52
53    /// Compute the cache info for a given path, which may be a file or a directory.
54    pub fn from_path(path: &Path) -> Result<Self, CacheInfoError> {
55        let metadata = fs_err::metadata(path)?;
56        if metadata.is_file() {
57            Ok(Self::from_file(path)?)
58        } else {
59            Self::from_directory(path)
60        }
61    }
62
63    /// Compute the cache info for a given directory.
64    pub fn from_directory(directory: &Path) -> Result<Self, CacheInfoError> {
65        let mut commit = None;
66        let mut tags = None;
67        let mut last_changed: Option<(PathBuf, Timestamp)> = None;
68        let mut directories = BTreeMap::new();
69        let mut env = BTreeMap::new();
70
71        // Read the cache keys.
72        let cache_keys =
73            if let Ok(contents) = fs_err::read_to_string(directory.join("pyproject.toml")) {
74                if let Ok(pyproject_toml) = toml::from_str::<PyProjectToml>(&contents) {
75                    pyproject_toml
76                        .tool
77                        .and_then(|tool| tool.uv)
78                        .and_then(|tool_uv| tool_uv.cache_keys)
79                } else {
80                    None
81                }
82            } else {
83                None
84            };
85
86        // If no cache keys were defined, use the defaults.
87        let cache_keys = cache_keys.unwrap_or_else(|| {
88            vec![
89                CacheKey::Path(Cow::Borrowed("pyproject.toml")),
90                CacheKey::Path(Cow::Borrowed("setup.py")),
91                CacheKey::Path(Cow::Borrowed("setup.cfg")),
92                CacheKey::Directory {
93                    dir: Cow::Borrowed("src"),
94                },
95            ]
96        });
97
98        // Incorporate timestamps from any direct filepaths.
99        let mut globs = vec![];
100        for cache_key in cache_keys {
101            match cache_key {
102                CacheKey::Path(file) | CacheKey::File { file } => {
103                    if file
104                        .as_ref()
105                        .chars()
106                        .any(|c| matches!(c, '*' | '?' | '[' | '{'))
107                    {
108                        // Defer globs to a separate pass.
109                        globs.push(file);
110                        continue;
111                    }
112
113                    // Treat the path as a file.
114                    let path = directory.join(file.as_ref());
115                    let metadata = match path.metadata() {
116                        Ok(metadata) => metadata,
117                        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
118                            continue;
119                        }
120                        Err(err) => {
121                            warn!("Failed to read metadata for file: {err}");
122                            continue;
123                        }
124                    };
125                    if !metadata.is_file() {
126                        warn!(
127                            "Expected file for cache key, but found directory: `{}`",
128                            path.display()
129                        );
130                        continue;
131                    }
132                    let timestamp = Timestamp::from_metadata(&metadata);
133                    if last_changed.as_ref().is_none_or(|(_, prev_timestamp)| {
134                        *prev_timestamp < Timestamp::from_metadata(&metadata)
135                    }) {
136                        last_changed = Some((path, timestamp));
137                    }
138                }
139                CacheKey::Directory { dir } => {
140                    // Treat the path as a directory.
141                    let path = directory.join(dir.as_ref());
142                    let metadata = match path.metadata() {
143                        Ok(metadata) => metadata,
144                        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
145                            directories.insert(dir, None);
146                            continue;
147                        }
148                        Err(err) => {
149                            warn!("Failed to read metadata for directory: {err}");
150                            continue;
151                        }
152                    };
153                    if !metadata.is_dir() {
154                        warn!(
155                            "Expected directory for cache key, but found file: `{}`",
156                            path.display()
157                        );
158                        continue;
159                    }
160
161                    if let Ok(created) = metadata.created() {
162                        // Prefer the creation time.
163                        directories.insert(
164                            dir,
165                            Some(DirectoryTimestamp::Timestamp(Timestamp::from(created))),
166                        );
167                    } else {
168                        // Fall back to the inode.
169                        #[cfg(unix)]
170                        {
171                            use std::os::unix::fs::MetadataExt;
172                            directories
173                                .insert(dir, Some(DirectoryTimestamp::Inode(metadata.ino())));
174                        }
175                        #[cfg(not(unix))]
176                        {
177                            warn!(
178                                "Failed to read creation time for directory: `{}`",
179                                path.display()
180                            );
181                        }
182                    }
183                }
184                CacheKey::Git {
185                    git: GitPattern::Bool(true),
186                } => match Commit::from_repository(directory) {
187                    Ok(commit_info) => commit = Some(commit_info),
188                    Err(err) => {
189                        debug!("Failed to read the current commit: {err}");
190                    }
191                },
192                CacheKey::Git {
193                    git: GitPattern::Set(set),
194                } => {
195                    if set.commit.unwrap_or(false) {
196                        match Commit::from_repository(directory) {
197                            Ok(commit_info) => commit = Some(commit_info),
198                            Err(err) => {
199                                debug!("Failed to read the current commit: {err}");
200                            }
201                        }
202                    }
203                    if set.tags.unwrap_or(false) {
204                        match Tags::from_repository(directory) {
205                            Ok(tags_info) => tags = Some(tags_info),
206                            Err(err) => {
207                                debug!("Failed to read the current tags: {err}");
208                            }
209                        }
210                    }
211                }
212                CacheKey::Git {
213                    git: GitPattern::Bool(false),
214                } => {}
215                CacheKey::Environment { env: var } => {
216                    let value = std::env::var(&var).ok();
217                    env.insert(var, value);
218                }
219            }
220        }
221
222        // If we have any globs, first cluster them using LCP and then do a single pass on each group.
223        if !globs.is_empty() {
224            for (glob_base, glob_patterns) in cluster_globs(&globs) {
225                let walker = globwalk::GlobWalkerBuilder::from_patterns(
226                    directory.join(glob_base),
227                    &glob_patterns,
228                )
229                .file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK)
230                .build()?;
231                for entry in walker {
232                    let entry = match entry {
233                        Ok(entry) => entry,
234                        Err(err) => {
235                            warn!("Failed to read glob entry: {err}");
236                            continue;
237                        }
238                    };
239                    let metadata = if entry.path_is_symlink() {
240                        // resolve symlinks for leaf entries without following symlinks while globbing
241                        match fs_err::metadata(entry.path()) {
242                            Ok(metadata) => metadata,
243                            Err(err) => {
244                                warn!("Failed to resolve symlink for glob entry: {err}");
245                                continue;
246                            }
247                        }
248                    } else {
249                        match entry.metadata() {
250                            Ok(metadata) => metadata,
251                            Err(err) => {
252                                warn!("Failed to read metadata for glob entry: {err}");
253                                continue;
254                            }
255                        }
256                    };
257                    if !metadata.is_file() {
258                        if !entry.path_is_symlink() {
259                            // don't warn if it was a symlink - it may legitimately resolve to a directory
260                            warn!(
261                                "Expected file for cache key, but found directory: `{}`",
262                                entry.path().display()
263                            );
264                        }
265                        continue;
266                    }
267                    let timestamp = Timestamp::from_metadata(&metadata);
268                    if last_changed.as_ref().is_none_or(|(_, prev_timestamp)| {
269                        *prev_timestamp < Timestamp::from_metadata(&metadata)
270                    }) {
271                        last_changed = Some((entry.into_path(), timestamp));
272                    }
273                }
274            }
275        }
276
277        let timestamp = if let Some((path, timestamp)) = last_changed {
278            debug!(
279                "Computed cache info: {timestamp:?}, {commit:?}, {tags:?}, {env:?}, {directories:?}. Most recently modified: {}",
280                path.user_display()
281            );
282            Some(timestamp)
283        } else {
284            None
285        };
286
287        Ok(Self {
288            timestamp,
289            commit,
290            tags,
291            env,
292            directories,
293        })
294    }
295
296    /// Compute the cache info for a given file, assumed to be a binary or source distribution
297    /// represented as (e.g.) a `.whl` or `.tar.gz` archive.
298    pub fn from_file(path: impl AsRef<Path>) -> std::io::Result<Self> {
299        let metadata = fs_err::metadata(path.as_ref())?;
300        let timestamp = Timestamp::from_metadata(&metadata);
301        Ok(Self {
302            timestamp: Some(timestamp),
303            ..Self::default()
304        })
305    }
306
307    /// Returns `true` if the cache info is empty.
308    pub fn is_empty(&self) -> bool {
309        self.timestamp.is_none()
310            && self.commit.is_none()
311            && self.tags.is_none()
312            && self.env.is_empty()
313            && self.directories.is_empty()
314    }
315}
316
317/// A `pyproject.toml` with an (optional) `[tool.uv]` section.
318#[derive(Debug, Deserialize)]
319#[serde(rename_all = "kebab-case")]
320struct PyProjectToml {
321    tool: Option<Tool>,
322}
323
324#[derive(Debug, Deserialize)]
325#[serde(rename_all = "kebab-case")]
326struct Tool {
327    uv: Option<ToolUv>,
328}
329
330#[derive(Debug, Deserialize)]
331#[serde(rename_all = "kebab-case")]
332struct ToolUv {
333    cache_keys: Option<Vec<CacheKey>>,
334}
335
336#[derive(Debug, Clone, serde::Deserialize)]
337#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
338#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
339pub enum CacheKey {
340    /// Ex) `"Cargo.lock"` or `"**/*.toml"`
341    Path(Cow<'static, str>),
342    /// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }`
343    File { file: Cow<'static, str> },
344    /// Ex) `{ dir = "src" }`
345    Directory { dir: Cow<'static, str> },
346    /// Ex) `{ git = true }` or `{ git = { commit = true, tags = false } }`
347    Git { git: GitPattern },
348    /// Ex) `{ env = "UV_CACHE_INFO" }`
349    Environment { env: String },
350}
351
352#[derive(Debug, Clone, serde::Deserialize)]
353#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
354#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
355pub enum GitPattern {
356    Bool(bool),
357    Set(GitSet),
358}
359
360#[derive(Debug, Clone, serde::Deserialize)]
361#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
362#[serde(rename_all = "kebab-case", deny_unknown_fields)]
363pub struct GitSet {
364    commit: Option<bool>,
365    tags: Option<bool>,
366}
367
368pub enum FilePattern {
369    Glob(String),
370    Path(PathBuf),
371}
372
373/// A timestamp used to measure changes to a directory.
374#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
375#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
376enum DirectoryTimestamp {
377    Timestamp(Timestamp),
378    Inode(u64),
379}
380
381#[cfg(all(test, unix))]
382mod tests_unix {
383    use anyhow::Result;
384
385    use super::{CacheInfo, Timestamp};
386
387    #[test]
388    fn test_cache_info_symlink_resolve() -> Result<()> {
389        let dir = tempfile::tempdir()?;
390        let dir = dir.path().join("dir");
391        fs_err::create_dir_all(&dir)?;
392
393        let write_manifest = |cache_key: &str| {
394            fs_err::write(
395                dir.join("pyproject.toml"),
396                format!(
397                    r#"
398                [tool.uv]
399                cache-keys = [
400                    "{cache_key}"
401                ]
402                "#
403                ),
404            )
405        };
406
407        let touch = |path: &str| -> Result<_> {
408            let path = dir.join(path);
409            fs_err::create_dir_all(path.parent().unwrap())?;
410            fs_err::write(&path, "")?;
411            Ok(Timestamp::from_metadata(&path.metadata()?))
412        };
413
414        let cache_timestamp = || -> Result<_> { Ok(CacheInfo::from_directory(&dir)?.timestamp) };
415
416        write_manifest("x/**")?;
417        assert_eq!(cache_timestamp()?, None);
418        let y = touch("x/y")?;
419        assert_eq!(cache_timestamp()?, Some(y));
420        let z = touch("x/z")?;
421        assert_eq!(cache_timestamp()?, Some(z));
422
423        // leaf entry symlink should be resolved
424        let a = touch("../a")?;
425        fs_err::os::unix::fs::symlink(dir.join("../a"), dir.join("x/a"))?;
426        assert_eq!(cache_timestamp()?, Some(a));
427
428        // symlink directories should not be followed while globbing
429        let c = touch("../b/c")?;
430        fs_err::os::unix::fs::symlink(dir.join("../b"), dir.join("x/b"))?;
431        assert_eq!(cache_timestamp()?, Some(a));
432
433        // no globs, should work as expected
434        write_manifest("x/y")?;
435        assert_eq!(cache_timestamp()?, Some(y));
436        write_manifest("x/a")?;
437        assert_eq!(cache_timestamp()?, Some(a));
438        write_manifest("x/b/c")?;
439        assert_eq!(cache_timestamp()?, Some(c));
440
441        // symlink pointing to a directory
442        write_manifest("x/*b*")?;
443        assert_eq!(cache_timestamp()?, None);
444
445        Ok(())
446    }
447}