Skip to main content

uv_cache_info/
cache_info.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::path::{Path, PathBuf};
4
5use serde::Deserialize;
6use tracing::{debug, info_span, warn};
7
8use uv_fs::Simplified;
9
10use crate::git_info::{Commit, Tags};
11use crate::glob::cluster_globs;
12use crate::timestamp::Timestamp;
13
14#[derive(Debug, thiserror::Error)]
15pub enum CacheInfoError {
16    #[error("Failed to parse glob patterns for `cache-keys`: {0}")]
17    Glob(#[from] globwalk::GlobError),
18    #[error(transparent)]
19    Io(#[from] std::io::Error),
20}
21
22/// The information used to determine whether a built distribution is up-to-date, based on the
23/// timestamps of relevant files, the current commit of a repository, etc.
24#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
25#[serde(rename_all = "kebab-case")]
26pub struct CacheInfo {
27    /// The timestamp of the most recent `ctime` of any relevant files, at the time of the build.
28    /// The timestamp will typically be the maximum of the `ctime` values of the `pyproject.toml`,
29    /// `setup.py`, and `setup.cfg` files, if they exist; however, users can provide additional
30    /// files to timestamp via the `cache-keys` field.
31    timestamp: Option<Timestamp>,
32    /// The commit at which the distribution was built.
33    commit: Option<Commit>,
34    /// The Git tags present at the time of the build.
35    tags: Option<Tags>,
36    /// Environment variables to include in the cache key.
37    #[serde(default)]
38    env: BTreeMap<String, Option<String>>,
39    /// The timestamp or inode of any directories that should be considered in the cache key.
40    #[serde(default)]
41    directories: BTreeMap<Cow<'static, str>, Option<DirectoryTimestamp>>,
42}
43
44impl CacheInfo {
45    /// Return the [`CacheInfo`] for a given timestamp.
46    pub fn from_timestamp(timestamp: Timestamp) -> Self {
47        Self {
48            timestamp: Some(timestamp),
49            ..Self::default()
50        }
51    }
52
53    /// Compute the cache info for a given path, which may be a file or a directory.
54    pub fn from_path(path: &Path) -> Result<Self, CacheInfoError> {
55        let metadata = fs_err::metadata(path)?;
56        if metadata.is_file() {
57            Ok(Self::from_file(path)?)
58        } else {
59            Self::from_directory(path)
60        }
61    }
62
63    /// Compute the cache info for a given directory.
64    pub fn from_directory(directory: &Path) -> Result<Self, CacheInfoError> {
65        let mut commit = None;
66        let mut tags = None;
67        let mut last_changed: Option<(PathBuf, Timestamp)> = None;
68        let mut directories = BTreeMap::new();
69        let mut env = BTreeMap::new();
70
71        // Read the cache keys.
72        let pyproject_path = directory.join("pyproject.toml");
73        let cache_keys = if let Ok(contents) = fs_err::read_to_string(&pyproject_path) {
74            let result = info_span!("toml::from_str cache keys", path = %pyproject_path.display())
75                .in_scope(|| toml::from_str::<PyProjectToml>(&contents));
76            if let Ok(pyproject_toml) = result {
77                pyproject_toml
78                    .tool
79                    .and_then(|tool| tool.uv)
80                    .and_then(|tool_uv| tool_uv.cache_keys)
81            } else {
82                None
83            }
84        } else {
85            None
86        };
87
88        // If no cache keys were defined, use the defaults.
89        let cache_keys = cache_keys.unwrap_or_else(|| {
90            vec![
91                CacheKey::Path(Cow::Borrowed("pyproject.toml")),
92                CacheKey::Path(Cow::Borrowed("setup.py")),
93                CacheKey::Path(Cow::Borrowed("setup.cfg")),
94                CacheKey::Directory {
95                    dir: Cow::Borrowed("src"),
96                },
97            ]
98        });
99
100        // Incorporate timestamps from any direct filepaths.
101        let mut globs = vec![];
102        for cache_key in cache_keys {
103            match cache_key {
104                CacheKey::Path(file) | CacheKey::File { file } => {
105                    if file
106                        .as_ref()
107                        .chars()
108                        .any(|c| matches!(c, '*' | '?' | '[' | '{'))
109                    {
110                        // Defer globs to a separate pass.
111                        globs.push(file);
112                        continue;
113                    }
114
115                    // Treat the path as a file.
116                    let path = directory.join(file.as_ref());
117                    let metadata = match path.metadata() {
118                        Ok(metadata) => metadata,
119                        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
120                            continue;
121                        }
122                        Err(err) => {
123                            warn!("Failed to read metadata for file: {err}");
124                            continue;
125                        }
126                    };
127                    if !metadata.is_file() {
128                        warn!(
129                            "Expected file for cache key, but found directory: `{}`",
130                            path.display()
131                        );
132                        continue;
133                    }
134                    let timestamp = Timestamp::from_metadata(&metadata);
135                    if last_changed.as_ref().is_none_or(|(_, prev_timestamp)| {
136                        *prev_timestamp < Timestamp::from_metadata(&metadata)
137                    }) {
138                        last_changed = Some((path, timestamp));
139                    }
140                }
141                CacheKey::Directory { dir } => {
142                    // Treat the path as a directory.
143                    let path = directory.join(dir.as_ref());
144                    let metadata = match path.metadata() {
145                        Ok(metadata) => metadata,
146                        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
147                            directories.insert(dir, None);
148                            continue;
149                        }
150                        Err(err) => {
151                            warn!("Failed to read metadata for directory: {err}");
152                            continue;
153                        }
154                    };
155                    if !metadata.is_dir() {
156                        warn!(
157                            "Expected directory for cache key, but found file: `{}`",
158                            path.display()
159                        );
160                        continue;
161                    }
162
163                    if let Ok(created) = metadata.created() {
164                        // Prefer the creation time.
165                        directories.insert(
166                            dir,
167                            Some(DirectoryTimestamp::Timestamp(Timestamp::from(created))),
168                        );
169                    } else {
170                        // Fall back to the inode.
171                        #[cfg(unix)]
172                        {
173                            use std::os::unix::fs::MetadataExt;
174                            directories
175                                .insert(dir, Some(DirectoryTimestamp::Inode(metadata.ino())));
176                        }
177                        #[cfg(not(unix))]
178                        {
179                            warn!(
180                                "Failed to read creation time for directory: `{}`",
181                                path.display()
182                            );
183                        }
184                    }
185                }
186                CacheKey::Git {
187                    git: GitPattern::Bool(true),
188                } => match Commit::from_repository(directory) {
189                    Ok(commit_info) => commit = Some(commit_info),
190                    Err(err) => {
191                        debug!("Failed to read the current commit: {err}");
192                    }
193                },
194                CacheKey::Git {
195                    git: GitPattern::Set(set),
196                } => {
197                    if set.commit.unwrap_or(false) {
198                        match Commit::from_repository(directory) {
199                            Ok(commit_info) => commit = Some(commit_info),
200                            Err(err) => {
201                                debug!("Failed to read the current commit: {err}");
202                            }
203                        }
204                    }
205                    if set.tags.unwrap_or(false) {
206                        match Tags::from_repository(directory) {
207                            Ok(tags_info) => tags = Some(tags_info),
208                            Err(err) => {
209                                debug!("Failed to read the current tags: {err}");
210                            }
211                        }
212                    }
213                }
214                CacheKey::Git {
215                    git: GitPattern::Bool(false),
216                } => {}
217                CacheKey::Environment { env: var } => {
218                    let value = std::env::var(&var).ok();
219                    env.insert(var, value);
220                }
221            }
222        }
223
224        // If we have any globs, first cluster them using LCP and then do a single pass on each group.
225        if !globs.is_empty() {
226            for (glob_base, glob_patterns) in cluster_globs(&globs) {
227                let walker = globwalk::GlobWalkerBuilder::from_patterns(
228                    directory.join(glob_base),
229                    &glob_patterns,
230                )
231                .file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK)
232                .build()?;
233                for entry in walker {
234                    let entry = match entry {
235                        Ok(entry) => entry,
236                        Err(err) => {
237                            warn!("Failed to read glob entry: {err}");
238                            continue;
239                        }
240                    };
241                    let metadata = if entry.path_is_symlink() {
242                        // resolve symlinks for leaf entries without following symlinks while globbing
243                        match fs_err::metadata(entry.path()) {
244                            Ok(metadata) => metadata,
245                            Err(err) => {
246                                warn!("Failed to resolve symlink for glob entry: {err}");
247                                continue;
248                            }
249                        }
250                    } else {
251                        match entry.metadata() {
252                            Ok(metadata) => metadata,
253                            Err(err) => {
254                                warn!("Failed to read metadata for glob entry: {err}");
255                                continue;
256                            }
257                        }
258                    };
259                    if !metadata.is_file() {
260                        if !entry.path_is_symlink() {
261                            // don't warn if it was a symlink - it may legitimately resolve to a directory
262                            warn!(
263                                "Expected file for cache key, but found directory: `{}`",
264                                entry.path().display()
265                            );
266                        }
267                        continue;
268                    }
269                    let timestamp = Timestamp::from_metadata(&metadata);
270                    if last_changed.as_ref().is_none_or(|(_, prev_timestamp)| {
271                        *prev_timestamp < Timestamp::from_metadata(&metadata)
272                    }) {
273                        last_changed = Some((entry.into_path(), timestamp));
274                    }
275                }
276            }
277        }
278
279        let timestamp = if let Some((path, timestamp)) = last_changed {
280            debug!(
281                "Computed cache info: {timestamp:?}, {commit:?}, {tags:?}, {env:?}, {directories:?}. Most recently modified: {}",
282                path.user_display()
283            );
284            Some(timestamp)
285        } else {
286            None
287        };
288
289        Ok(Self {
290            timestamp,
291            commit,
292            tags,
293            env,
294            directories,
295        })
296    }
297
298    /// Compute the cache info for a given file, assumed to be a binary or source distribution
299    /// represented as (e.g.) a `.whl` or `.tar.gz` archive.
300    pub fn from_file(path: impl AsRef<Path>) -> std::io::Result<Self> {
301        let metadata = fs_err::metadata(path.as_ref())?;
302        let timestamp = Timestamp::from_metadata(&metadata);
303        Ok(Self {
304            timestamp: Some(timestamp),
305            ..Self::default()
306        })
307    }
308
309    /// Returns `true` if the cache info is empty.
310    pub fn is_empty(&self) -> bool {
311        self.timestamp.is_none()
312            && self.commit.is_none()
313            && self.tags.is_none()
314            && self.env.is_empty()
315            && self.directories.is_empty()
316    }
317}
318
319/// A `pyproject.toml` with an (optional) `[tool.uv]` section.
320#[derive(Debug, Deserialize)]
321#[serde(rename_all = "kebab-case")]
322struct PyProjectToml {
323    tool: Option<Tool>,
324}
325
326#[derive(Debug, Deserialize)]
327#[serde(rename_all = "kebab-case")]
328struct Tool {
329    uv: Option<ToolUv>,
330}
331
332#[derive(Debug, Deserialize)]
333#[serde(rename_all = "kebab-case")]
334struct ToolUv {
335    cache_keys: Option<Vec<CacheKey>>,
336}
337
338#[derive(Debug, Clone, serde::Deserialize)]
339#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
340#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
341pub enum CacheKey {
342    /// Ex) `"Cargo.lock"` or `"**/*.toml"`
343    Path(Cow<'static, str>),
344    /// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }`
345    File { file: Cow<'static, str> },
346    /// Ex) `{ dir = "src" }`
347    Directory { dir: Cow<'static, str> },
348    /// Ex) `{ git = true }` or `{ git = { commit = true, tags = false } }`
349    Git { git: GitPattern },
350    /// Ex) `{ env = "UV_CACHE_INFO" }`
351    Environment { env: String },
352}
353
354#[derive(Debug, Clone, serde::Deserialize)]
355#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
356#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
357pub enum GitPattern {
358    Bool(bool),
359    Set(GitSet),
360}
361
362#[derive(Debug, Clone, serde::Deserialize)]
363#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
364#[serde(rename_all = "kebab-case", deny_unknown_fields)]
365pub struct GitSet {
366    commit: Option<bool>,
367    tags: Option<bool>,
368}
369
370pub enum FilePattern {
371    Glob(String),
372    Path(PathBuf),
373}
374
375/// A timestamp used to measure changes to a directory.
376#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
377#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
378enum DirectoryTimestamp {
379    Timestamp(Timestamp),
380    Inode(u64),
381}
382
383#[cfg(all(test, unix))]
384mod tests_unix {
385    use anyhow::Result;
386
387    use super::{CacheInfo, Timestamp};
388
389    #[test]
390    fn test_cache_info_symlink_resolve() -> Result<()> {
391        let dir = tempfile::tempdir()?;
392        let dir = dir.path().join("dir");
393        fs_err::create_dir_all(&dir)?;
394
395        let write_manifest = |cache_key: &str| {
396            fs_err::write(
397                dir.join("pyproject.toml"),
398                format!(
399                    r#"
400                [tool.uv]
401                cache-keys = [
402                    "{cache_key}"
403                ]
404                "#
405                ),
406            )
407        };
408
409        let touch = |path: &str| -> Result<_> {
410            let path = dir.join(path);
411            fs_err::create_dir_all(path.parent().unwrap())?;
412            fs_err::write(&path, "")?;
413            Ok(Timestamp::from_metadata(&path.metadata()?))
414        };
415
416        let cache_timestamp = || -> Result<_> { Ok(CacheInfo::from_directory(&dir)?.timestamp) };
417
418        write_manifest("x/**")?;
419        assert_eq!(cache_timestamp()?, None);
420        let y = touch("x/y")?;
421        assert_eq!(cache_timestamp()?, Some(y));
422        let z = touch("x/z")?;
423        assert_eq!(cache_timestamp()?, Some(z));
424
425        // leaf entry symlink should be resolved
426        let a = touch("../a")?;
427        fs_err::os::unix::fs::symlink(dir.join("../a"), dir.join("x/a"))?;
428        assert_eq!(cache_timestamp()?, Some(a));
429
430        // symlink directories should not be followed while globbing
431        let c = touch("../b/c")?;
432        fs_err::os::unix::fs::symlink(dir.join("../b"), dir.join("x/b"))?;
433        assert_eq!(cache_timestamp()?, Some(a));
434
435        // no globs, should work as expected
436        write_manifest("x/y")?;
437        assert_eq!(cache_timestamp()?, Some(y));
438        write_manifest("x/a")?;
439        assert_eq!(cache_timestamp()?, Some(a));
440        write_manifest("x/b/c")?;
441        assert_eq!(cache_timestamp()?, Some(c));
442
443        // symlink pointing to a directory
444        write_manifest("x/*b*")?;
445        assert_eq!(cache_timestamp()?, None);
446
447        Ok(())
448    }
449}