1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::path::{Path, PathBuf};
4
5use serde::Deserialize;
6use tracing::{debug, warn};
7
8use uv_fs::Simplified;
9
10use crate::git_info::{Commit, Tags};
11use crate::glob::cluster_globs;
12use crate::timestamp::Timestamp;
13
14#[derive(Debug, thiserror::Error)]
15pub enum CacheInfoError {
16 #[error("Failed to parse glob patterns for `cache-keys`: {0}")]
17 Glob(#[from] globwalk::GlobError),
18 #[error(transparent)]
19 Io(#[from] std::io::Error),
20}
21
22#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
25#[serde(rename_all = "kebab-case")]
26pub struct CacheInfo {
27 timestamp: Option<Timestamp>,
32 commit: Option<Commit>,
34 tags: Option<Tags>,
36 #[serde(default)]
38 env: BTreeMap<String, Option<String>>,
39 #[serde(default)]
41 directories: BTreeMap<Cow<'static, str>, Option<DirectoryTimestamp>>,
42}
43
44impl CacheInfo {
45 pub fn from_timestamp(timestamp: Timestamp) -> Self {
47 Self {
48 timestamp: Some(timestamp),
49 ..Self::default()
50 }
51 }
52
53 pub fn from_path(path: &Path) -> Result<Self, CacheInfoError> {
55 let metadata = fs_err::metadata(path)?;
56 if metadata.is_file() {
57 Ok(Self::from_file(path)?)
58 } else {
59 Self::from_directory(path)
60 }
61 }
62
63 pub fn from_directory(directory: &Path) -> Result<Self, CacheInfoError> {
65 let mut commit = None;
66 let mut tags = None;
67 let mut last_changed: Option<(PathBuf, Timestamp)> = None;
68 let mut directories = BTreeMap::new();
69 let mut env = BTreeMap::new();
70
71 let cache_keys =
73 if let Ok(contents) = fs_err::read_to_string(directory.join("pyproject.toml")) {
74 if let Ok(pyproject_toml) = toml::from_str::<PyProjectToml>(&contents) {
75 pyproject_toml
76 .tool
77 .and_then(|tool| tool.uv)
78 .and_then(|tool_uv| tool_uv.cache_keys)
79 } else {
80 None
81 }
82 } else {
83 None
84 };
85
86 let cache_keys = cache_keys.unwrap_or_else(|| {
88 vec![
89 CacheKey::Path(Cow::Borrowed("pyproject.toml")),
90 CacheKey::Path(Cow::Borrowed("setup.py")),
91 CacheKey::Path(Cow::Borrowed("setup.cfg")),
92 CacheKey::Directory {
93 dir: Cow::Borrowed("src"),
94 },
95 ]
96 });
97
98 let mut globs = vec![];
100 for cache_key in cache_keys {
101 match cache_key {
102 CacheKey::Path(file) | CacheKey::File { file } => {
103 if file
104 .as_ref()
105 .chars()
106 .any(|c| matches!(c, '*' | '?' | '[' | '{'))
107 {
108 globs.push(file);
110 continue;
111 }
112
113 let path = directory.join(file.as_ref());
115 let metadata = match path.metadata() {
116 Ok(metadata) => metadata,
117 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
118 continue;
119 }
120 Err(err) => {
121 warn!("Failed to read metadata for file: {err}");
122 continue;
123 }
124 };
125 if !metadata.is_file() {
126 warn!(
127 "Expected file for cache key, but found directory: `{}`",
128 path.display()
129 );
130 continue;
131 }
132 let timestamp = Timestamp::from_metadata(&metadata);
133 if last_changed.as_ref().is_none_or(|(_, prev_timestamp)| {
134 *prev_timestamp < Timestamp::from_metadata(&metadata)
135 }) {
136 last_changed = Some((path, timestamp));
137 }
138 }
139 CacheKey::Directory { dir } => {
140 let path = directory.join(dir.as_ref());
142 let metadata = match path.metadata() {
143 Ok(metadata) => metadata,
144 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
145 directories.insert(dir, None);
146 continue;
147 }
148 Err(err) => {
149 warn!("Failed to read metadata for directory: {err}");
150 continue;
151 }
152 };
153 if !metadata.is_dir() {
154 warn!(
155 "Expected directory for cache key, but found file: `{}`",
156 path.display()
157 );
158 continue;
159 }
160
161 if let Ok(created) = metadata.created() {
162 directories.insert(
164 dir,
165 Some(DirectoryTimestamp::Timestamp(Timestamp::from(created))),
166 );
167 } else {
168 #[cfg(unix)]
170 {
171 use std::os::unix::fs::MetadataExt;
172 directories
173 .insert(dir, Some(DirectoryTimestamp::Inode(metadata.ino())));
174 }
175 #[cfg(not(unix))]
176 {
177 warn!(
178 "Failed to read creation time for directory: `{}`",
179 path.display()
180 );
181 }
182 }
183 }
184 CacheKey::Git {
185 git: GitPattern::Bool(true),
186 } => match Commit::from_repository(directory) {
187 Ok(commit_info) => commit = Some(commit_info),
188 Err(err) => {
189 debug!("Failed to read the current commit: {err}");
190 }
191 },
192 CacheKey::Git {
193 git: GitPattern::Set(set),
194 } => {
195 if set.commit.unwrap_or(false) {
196 match Commit::from_repository(directory) {
197 Ok(commit_info) => commit = Some(commit_info),
198 Err(err) => {
199 debug!("Failed to read the current commit: {err}");
200 }
201 }
202 }
203 if set.tags.unwrap_or(false) {
204 match Tags::from_repository(directory) {
205 Ok(tags_info) => tags = Some(tags_info),
206 Err(err) => {
207 debug!("Failed to read the current tags: {err}");
208 }
209 }
210 }
211 }
212 CacheKey::Git {
213 git: GitPattern::Bool(false),
214 } => {}
215 CacheKey::Environment { env: var } => {
216 let value = std::env::var(&var).ok();
217 env.insert(var, value);
218 }
219 }
220 }
221
222 if !globs.is_empty() {
224 for (glob_base, glob_patterns) in cluster_globs(&globs) {
225 let walker = globwalk::GlobWalkerBuilder::from_patterns(
226 directory.join(glob_base),
227 &glob_patterns,
228 )
229 .file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK)
230 .build()?;
231 for entry in walker {
232 let entry = match entry {
233 Ok(entry) => entry,
234 Err(err) => {
235 warn!("Failed to read glob entry: {err}");
236 continue;
237 }
238 };
239 let metadata = if entry.path_is_symlink() {
240 match fs_err::metadata(entry.path()) {
242 Ok(metadata) => metadata,
243 Err(err) => {
244 warn!("Failed to resolve symlink for glob entry: {err}");
245 continue;
246 }
247 }
248 } else {
249 match entry.metadata() {
250 Ok(metadata) => metadata,
251 Err(err) => {
252 warn!("Failed to read metadata for glob entry: {err}");
253 continue;
254 }
255 }
256 };
257 if !metadata.is_file() {
258 if !entry.path_is_symlink() {
259 warn!(
261 "Expected file for cache key, but found directory: `{}`",
262 entry.path().display()
263 );
264 }
265 continue;
266 }
267 let timestamp = Timestamp::from_metadata(&metadata);
268 if last_changed.as_ref().is_none_or(|(_, prev_timestamp)| {
269 *prev_timestamp < Timestamp::from_metadata(&metadata)
270 }) {
271 last_changed = Some((entry.into_path(), timestamp));
272 }
273 }
274 }
275 }
276
277 let timestamp = if let Some((path, timestamp)) = last_changed {
278 debug!(
279 "Computed cache info: {timestamp:?}, {commit:?}, {tags:?}, {env:?}, {directories:?}. Most recently modified: {}",
280 path.user_display()
281 );
282 Some(timestamp)
283 } else {
284 None
285 };
286
287 Ok(Self {
288 timestamp,
289 commit,
290 tags,
291 env,
292 directories,
293 })
294 }
295
296 pub fn from_file(path: impl AsRef<Path>) -> std::io::Result<Self> {
299 let metadata = fs_err::metadata(path.as_ref())?;
300 let timestamp = Timestamp::from_metadata(&metadata);
301 Ok(Self {
302 timestamp: Some(timestamp),
303 ..Self::default()
304 })
305 }
306
307 pub fn is_empty(&self) -> bool {
309 self.timestamp.is_none()
310 && self.commit.is_none()
311 && self.tags.is_none()
312 && self.env.is_empty()
313 && self.directories.is_empty()
314 }
315}
316
317#[derive(Debug, Deserialize)]
319#[serde(rename_all = "kebab-case")]
320struct PyProjectToml {
321 tool: Option<Tool>,
322}
323
324#[derive(Debug, Deserialize)]
325#[serde(rename_all = "kebab-case")]
326struct Tool {
327 uv: Option<ToolUv>,
328}
329
330#[derive(Debug, Deserialize)]
331#[serde(rename_all = "kebab-case")]
332struct ToolUv {
333 cache_keys: Option<Vec<CacheKey>>,
334}
335
336#[derive(Debug, Clone, serde::Deserialize)]
337#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
338#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
339pub enum CacheKey {
340 Path(Cow<'static, str>),
342 File { file: Cow<'static, str> },
344 Directory { dir: Cow<'static, str> },
346 Git { git: GitPattern },
348 Environment { env: String },
350}
351
352#[derive(Debug, Clone, serde::Deserialize)]
353#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
354#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
355pub enum GitPattern {
356 Bool(bool),
357 Set(GitSet),
358}
359
360#[derive(Debug, Clone, serde::Deserialize)]
361#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
362#[serde(rename_all = "kebab-case", deny_unknown_fields)]
363pub struct GitSet {
364 commit: Option<bool>,
365 tags: Option<bool>,
366}
367
368pub enum FilePattern {
369 Glob(String),
370 Path(PathBuf),
371}
372
373#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
375#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
376enum DirectoryTimestamp {
377 Timestamp(Timestamp),
378 Inode(u64),
379}
380
381#[cfg(all(test, unix))]
382mod tests_unix {
383 use anyhow::Result;
384
385 use super::{CacheInfo, Timestamp};
386
387 #[test]
388 fn test_cache_info_symlink_resolve() -> Result<()> {
389 let dir = tempfile::tempdir()?;
390 let dir = dir.path().join("dir");
391 fs_err::create_dir_all(&dir)?;
392
393 let write_manifest = |cache_key: &str| {
394 fs_err::write(
395 dir.join("pyproject.toml"),
396 format!(
397 r#"
398 [tool.uv]
399 cache-keys = [
400 "{cache_key}"
401 ]
402 "#
403 ),
404 )
405 };
406
407 let touch = |path: &str| -> Result<_> {
408 let path = dir.join(path);
409 fs_err::create_dir_all(path.parent().unwrap())?;
410 fs_err::write(&path, "")?;
411 Ok(Timestamp::from_metadata(&path.metadata()?))
412 };
413
414 let cache_timestamp = || -> Result<_> { Ok(CacheInfo::from_directory(&dir)?.timestamp) };
415
416 write_manifest("x/**")?;
417 assert_eq!(cache_timestamp()?, None);
418 let y = touch("x/y")?;
419 assert_eq!(cache_timestamp()?, Some(y));
420 let z = touch("x/z")?;
421 assert_eq!(cache_timestamp()?, Some(z));
422
423 let a = touch("../a")?;
425 fs_err::os::unix::fs::symlink(dir.join("../a"), dir.join("x/a"))?;
426 assert_eq!(cache_timestamp()?, Some(a));
427
428 let c = touch("../b/c")?;
430 fs_err::os::unix::fs::symlink(dir.join("../b"), dir.join("x/b"))?;
431 assert_eq!(cache_timestamp()?, Some(a));
432
433 write_manifest("x/y")?;
435 assert_eq!(cache_timestamp()?, Some(y));
436 write_manifest("x/a")?;
437 assert_eq!(cache_timestamp()?, Some(a));
438 write_manifest("x/b/c")?;
439 assert_eq!(cache_timestamp()?, Some(c));
440
441 write_manifest("x/*b*")?;
443 assert_eq!(cache_timestamp()?, None);
444
445 Ok(())
446 }
447}