uv_install_wheel/linker.rs
1use std::collections::{BTreeMap, BTreeSet};
2use std::io;
3use std::path::{Path, PathBuf};
4use std::sync::Mutex;
5use std::time::SystemTime;
6
7use fs_err as fs;
8use itertools::Itertools;
9use rustc_hash::FxHashMap;
10use tracing::{debug, instrument};
11use walkdir::WalkDir;
12
13use uv_distribution_filename::WheelFilename;
14use uv_fs::Simplified;
15use uv_fs::link::{CopyLocks, LinkOptions, OnExistingDirectory, link_dir};
16use uv_preview::{Preview, PreviewFeature};
17use uv_warnings::warn_user;
18
19use crate::Error;
20
21pub use uv_fs::link::LinkMode;
22
23/// Shared state for concurrent wheel installations.
24#[derive(Debug, Default)]
25pub struct InstallState {
26 /// Directory-level locks to prevent concurrent write corruption.
27 locks: CopyLocks,
28 /// Top level files and directories in site-packages, stored as relative path, and wheels they
29 /// are from, with the absolute paths in the unpacked wheel.
30 site_packages_paths: Mutex<FxHashMap<PathBuf, BTreeSet<(WheelFilename, PathBuf)>>>,
31 /// Preview settings for feature flags.
32 preview: Preview,
33}
34
35impl InstallState {
36 /// Create a new `InstallState` with the given preview settings.
37 pub fn new(preview: Preview) -> Self {
38 Self {
39 locks: CopyLocks::default(),
40 site_packages_paths: Mutex::new(FxHashMap::default()),
41 preview,
42 }
43 }
44
45 /// Get the underlying copy locks for use with [`uv_fs::link::link_dir`] functions.
46 pub fn copy_locks(&self) -> &CopyLocks {
47 &self.locks
48 }
49
50 /// Register which package installs which (top level) path.
51 ///
52 /// This is later used warn when different files at the same path exist in multiple packages.
53 ///
54 /// The first non-self argument is the target path relative to site-packages, the second is the
55 /// source path in the unpacked wheel.
56 fn register_installed_path(&self, relative: &Path, absolute: &Path, wheel: &WheelFilename) {
57 debug_assert!(!relative.is_absolute());
58 debug_assert!(absolute.is_absolute());
59
60 // Only register top level entries, these are the only ones we have reliably as cloning
61 // a directory on macOS traverses outside our code.
62 if relative.components().count() != 1 {
63 return;
64 }
65
66 self.site_packages_paths
67 .lock()
68 .unwrap()
69 .entry(relative.to_path_buf())
70 .or_default()
71 .insert((wheel.clone(), absolute.to_path_buf()));
72 }
73
74 /// Warn when the same file with different contents exists in multiple packages.
75 ///
76 /// The intent is to detect different variants of the same package installed over each other,
77 /// or different packages using the same top-level module name, which cause non-deterministic
78 /// failures only surfacing at runtime. See <https://github.com/astral-sh/uv/pull/13437> for a
79 /// list of cases.
80 ///
81 /// The check has some false negatives. It is rather too lenient than too strict, and includes
82 /// support for namespace packages that include the same `__init__.py` file, e.g., gpu-a and
83 /// gpu-b both including the same `gpu/__init__.py`.
84 ///
85 /// We assume that all wheels of a package have the same module(s), so a conflict between
86 /// installing two unpacked wheels is a conflict between two packages.
87 ///
88 /// # Performance
89 ///
90 /// When there are no namespace packages, this method is a Mutex lock and a hash map iteration.
91 ///
92 /// When there are namespace packages, we only traverse into directories shared by at least two
93 /// packages. For example, for namespace packages gpu-a, gpu-b, and gpu-c with
94 /// `gpu/a/__init__.py`, `gpu/b/__init__.py`, and `gpu/c/__init__.py` respectively, we only
95 /// need to read the `gpu` directory. If there is a deeper shared directory, we only recurse
96 /// down to this directory. As packages without conflicts generally do not share many
97 /// directories, we do not recurse far.
98 ///
99 /// For each directory, we analyze all packages sharing the directory at the same time, reading
100 /// the directory in each unpacked wheel only once. Effectively, we perform a parallel directory
101 /// walk with early exit.
102 ///
103 /// We avoid reading the actual file contents and assume they are the same when their file
104 /// length matches. This also excludes the same empty `__init__.py` files being reported as
105 /// conflicting.
106 pub fn warn_package_conflicts(self) -> Result<(), io::Error> {
107 // This warning is currently in preview.
108 if !self
109 .preview
110 .is_enabled(PreviewFeature::DetectModuleConflicts)
111 {
112 return Ok(());
113 }
114
115 for (relative, wheels) in &*self.site_packages_paths.lock().unwrap() {
116 // Fast path: Only one package is using this module name, no conflicts.
117 let mut wheel_iter = wheels.iter();
118 let Some(first_wheel) = wheel_iter.next() else {
119 debug_assert!(false, "at least one wheel");
120 continue;
121 };
122 if wheel_iter.next().is_none() {
123 continue;
124 }
125
126 // TODO(konsti): This assumes a path is either a file or a directory in all wheels.
127 let file_type = fs_err::metadata(&first_wheel.1)?.file_type();
128 if file_type.is_file() {
129 // Handle conflicts between files directly in site-packages without a module
130 // directory enclosing them.
131 let files: BTreeSet<(&WheelFilename, u64)> = wheels
132 .iter()
133 .map(|(wheel, absolute)| Ok((wheel, absolute.metadata()?.len())))
134 .collect::<Result<_, io::Error>>()?;
135 Self::warn_file_conflict(relative, &files);
136 } else if file_type.is_dir() {
137 // Don't early return if the method returns true, so we show warnings for each
138 // top-level module.
139 Self::warn_directory_conflict(relative, wheels)?;
140 } else {
141 // We don't expect any other file type, but it's ok if this check has false
142 // negatives.
143 }
144 }
145
146 Ok(())
147 }
148
149 /// Analyze a directory for conflicts.
150 ///
151 /// If there are any non-identical files (checked by size) included in more than one wheel,
152 /// report this file and return.
153 ///
154 /// If there are any directories included in more than one wheel, recurse to analyze whether
155 /// the directories contain conflicting files.
156 ///
157 /// Returns `true` if a warning was emitted.
158 fn warn_directory_conflict(
159 directory: &Path,
160 wheels: &BTreeSet<(WheelFilename, PathBuf)>,
161 ) -> Result<bool, io::Error> {
162 // The files in the directory, as paths relative to the site-packages, with their origin and
163 // size.
164 let mut files: BTreeMap<PathBuf, BTreeSet<(&WheelFilename, u64)>> = BTreeMap::default();
165 // The directories in the directory, as paths relative to the site-packages, with their
166 // origin and absolute path.
167 let mut subdirectories: BTreeMap<PathBuf, BTreeSet<(WheelFilename, PathBuf)>> =
168 BTreeMap::default();
169
170 // Read the shared directory in each unpacked wheel.
171 for (wheel, absolute) in wheels {
172 for dir_entry in fs_err::read_dir(absolute)? {
173 let dir_entry = dir_entry?;
174 let relative = directory.join(dir_entry.file_name());
175 let file_type = dir_entry.file_type()?;
176 if file_type.is_file() {
177 files
178 .entry(relative)
179 .or_default()
180 .insert((wheel, dir_entry.metadata()?.len()));
181 } else if file_type.is_dir() {
182 subdirectories
183 .entry(relative)
184 .or_default()
185 .insert((wheel.clone(), dir_entry.path()));
186 } else {
187 // We don't expect any other file type, but it's ok if this check has false
188 // negatives.
189 }
190 }
191 }
192
193 for (file, file_wheels) in files {
194 if Self::warn_file_conflict(&file, &file_wheels) {
195 return Ok(true);
196 }
197 }
198
199 for (subdirectory, subdirectory_wheels) in subdirectories {
200 if subdirectory_wheels.len() == 1 {
201 continue;
202 }
203 // If there are directories shared between multiple wheels, recurse to check them
204 // for shared files.
205 if Self::warn_directory_conflict(&subdirectory, &subdirectory_wheels)? {
206 return Ok(true);
207 }
208 }
209
210 Ok(false)
211 }
212
213 /// Check if all files are the same size, if so assume they are identical.
214 ///
215 /// It's unlikely that two modules overlap with different contents but their files all have
216 /// the same length, so we use this heuristic in this performance critical path to avoid
217 /// reading potentially large files.
218 fn warn_file_conflict(file: &Path, file_wheels: &BTreeSet<(&WheelFilename, u64)>) -> bool {
219 let Some((_, file_len)) = file_wheels.first() else {
220 debug_assert!(false, "Always at least one element");
221 return false;
222 };
223 if !file_wheels
224 .iter()
225 .any(|(_, file_len_other)| file_len_other != file_len)
226 {
227 return false;
228 }
229
230 let packages = file_wheels
231 .iter()
232 .map(|(wheel_filename, _file_len)| {
233 format!("* {} ({})", wheel_filename.name, wheel_filename)
234 })
235 .join("\n");
236 warn_user!(
237 "The file `{}` is provided by more than one package, \
238 which causes an install race condition and can result in a broken module. \
239 Packages containing the file:\n{}",
240 file.user_display(),
241 packages
242 );
243
244 // Assumption: There is generally two packages that have a conflict. The output is
245 // more helpful with a single message that calls out the packages
246 // rather than being comprehensive about the conflicting files.
247 true
248 }
249}
250
251/// Extract a wheel by linking all of its files into site packages.
252///
253/// Returns the number of files extracted.
254#[instrument(skip_all)]
255pub fn link_wheel_files(
256 link_mode: LinkMode,
257 site_packages: impl AsRef<Path>,
258 wheel: impl AsRef<Path>,
259 state: &InstallState,
260 filename: &WheelFilename,
261) -> Result<usize, Error> {
262 let wheel = wheel.as_ref();
263 let site_packages = site_packages.as_ref();
264 let count = register_installed_paths(wheel, state, filename)?;
265
266 // The `RECORD` file is modified during installation, so it needs a real
267 // copy rather than a link back to the cache.
268 let options = LinkOptions::new(link_mode)
269 .with_mutable_copy_filter(|p: &Path| p.ends_with("RECORD"))
270 .with_copy_locks(state.copy_locks())
271 .with_on_existing_directory(OnExistingDirectory::Merge);
272 let used_link_mode = link_dir(wheel, site_packages, &options)?;
273
274 if used_link_mode == LinkMode::Clone {
275 // The directory mtime is not updated when cloning and the mtime is
276 // used by CPython's import mechanisms to determine if it should look
277 // for new packages in a directory. Force an update so packages are
278 // importable without manual cache invalidation.
279 //
280 // <https://github.com/python/cpython/blob/8336cb2b6f428246803b02a4e97fce49d0bb1e09/Lib/importlib/_bootstrap_external.py#L1601>
281 update_site_packages_mtime(site_packages);
282 }
283
284 Ok(count)
285}
286
287/// Update the mtime of the site-packages directory to the current time.
288fn update_site_packages_mtime(site_packages: &Path) {
289 let now = SystemTime::now();
290 match fs::File::open(site_packages) {
291 Ok(dir) => {
292 if let Err(err) = dir.set_modified(now) {
293 debug!(
294 "Failed to update mtime for {}: {err}",
295 site_packages.display()
296 );
297 }
298 }
299 Err(err) => debug!(
300 "Failed to open {} to update mtime: {err}",
301 site_packages.display()
302 ),
303 }
304}
305
306/// Walk the wheel directory and register all paths for conflict detection.
307///
308/// Returns the number of files (not directories) in the wheel.
309fn register_installed_paths(
310 wheel: &Path,
311 state: &InstallState,
312 filename: &WheelFilename,
313) -> Result<usize, Error> {
314 let mut count = 0;
315 for entry in WalkDir::new(wheel) {
316 let entry = entry?;
317 let path = entry.path();
318 let relative = path.strip_prefix(wheel).expect("walkdir starts with root");
319 state.register_installed_path(relative, path, filename);
320 if entry.file_type().is_file() {
321 count += 1;
322 }
323 }
324 Ok(count)
325}