Skip to main content

uv_install_wheel/
linker.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::io;
3use std::path::{Path, PathBuf};
4use std::sync::Mutex;
5use std::time::SystemTime;
6
7use fs_err as fs;
8use itertools::Itertools;
9use rustc_hash::FxHashMap;
10use tracing::{debug, instrument};
11use walkdir::WalkDir;
12
13use uv_distribution_filename::WheelFilename;
14use uv_fs::Simplified;
15use uv_fs::link::{CopyLocks, LinkOptions, OnExistingDirectory, link_dir};
16use uv_preview::{Preview, PreviewFeature};
17use uv_warnings::warn_user;
18
19use crate::Error;
20
21pub use uv_fs::link::LinkMode;
22
23/// Shared state for concurrent wheel installations.
24#[derive(Debug, Default)]
25pub struct InstallState {
26    /// Directory-level locks to prevent concurrent write corruption.
27    locks: CopyLocks,
28    /// Top level files and directories in site-packages, stored as relative path, and wheels they
29    /// are from, with the absolute paths in the unpacked wheel.
30    site_packages_paths: Mutex<FxHashMap<PathBuf, BTreeSet<(WheelFilename, PathBuf)>>>,
31    /// Preview settings for feature flags.
32    preview: Preview,
33}
34
35impl InstallState {
36    /// Create a new `InstallState` with the given preview settings.
37    pub fn new(preview: Preview) -> Self {
38        Self {
39            locks: CopyLocks::default(),
40            site_packages_paths: Mutex::new(FxHashMap::default()),
41            preview,
42        }
43    }
44
45    /// Get the underlying copy locks for use with [`uv_fs::link::link_dir`] functions.
46    pub fn copy_locks(&self) -> &CopyLocks {
47        &self.locks
48    }
49
50    /// Register which package installs which (top level) path.
51    ///
52    /// This is later used warn when different files at the same path exist in multiple packages.
53    ///
54    /// The first non-self argument is the target path relative to site-packages, the second is the
55    /// source path in the unpacked wheel.
56    fn register_installed_path(&self, relative: &Path, absolute: &Path, wheel: &WheelFilename) {
57        debug_assert!(!relative.is_absolute());
58        debug_assert!(absolute.is_absolute());
59
60        // Only register top level entries, these are the only ones we have reliably as cloning
61        // a directory on macOS traverses outside our code.
62        if relative.components().count() != 1 {
63            return;
64        }
65
66        self.site_packages_paths
67            .lock()
68            .unwrap()
69            .entry(relative.to_path_buf())
70            .or_default()
71            .insert((wheel.clone(), absolute.to_path_buf()));
72    }
73
74    /// Warn when the same file with different contents exists in multiple packages.
75    ///
76    /// The intent is to detect different variants of the same package installed over each other,
77    /// or different packages using the same top-level module name, which cause non-deterministic
78    /// failures only surfacing at runtime. See <https://github.com/astral-sh/uv/pull/13437> for a
79    /// list of cases.
80    ///
81    /// The check has some false negatives. It is rather too lenient than too strict, and includes
82    /// support for namespace packages that include the same `__init__.py` file, e.g., gpu-a and
83    /// gpu-b both including the same `gpu/__init__.py`.
84    ///
85    /// We assume that all wheels of a package have the same module(s), so a conflict between
86    /// installing two unpacked wheels is a conflict between two packages.
87    ///
88    /// # Performance
89    ///
90    /// When there are no namespace packages, this method is a Mutex lock and a hash map iteration.
91    ///
92    /// When there are namespace packages, we only traverse into directories shared by at least two
93    /// packages. For example, for namespace packages gpu-a, gpu-b, and gpu-c with
94    /// `gpu/a/__init__.py`, `gpu/b/__init__.py`, and `gpu/c/__init__.py` respectively, we only
95    /// need to read the `gpu` directory. If there is a deeper shared directory, we only recurse
96    /// down to this directory. As packages without conflicts generally do not share many
97    /// directories, we do not recurse far.
98    ///
99    /// For each directory, we analyze all packages sharing the directory at the same time, reading
100    /// the directory in each unpacked wheel only once. Effectively, we perform a parallel directory
101    /// walk with early exit.
102    ///
103    /// We avoid reading the actual file contents and assume they are the same when their file
104    /// length matches. This also excludes the same empty `__init__.py` files being reported as
105    /// conflicting.
106    pub fn warn_package_conflicts(self) -> Result<(), io::Error> {
107        // This warning is currently in preview.
108        if !self
109            .preview
110            .is_enabled(PreviewFeature::DetectModuleConflicts)
111        {
112            return Ok(());
113        }
114
115        for (relative, wheels) in &*self.site_packages_paths.lock().unwrap() {
116            // Fast path: Only one package is using this module name, no conflicts.
117            let mut wheel_iter = wheels.iter();
118            let Some(first_wheel) = wheel_iter.next() else {
119                debug_assert!(false, "at least one wheel");
120                continue;
121            };
122            if wheel_iter.next().is_none() {
123                continue;
124            }
125
126            // TODO(konsti): This assumes a path is either a file or a directory in all wheels.
127            let file_type = fs_err::metadata(&first_wheel.1)?.file_type();
128            if file_type.is_file() {
129                // Handle conflicts between files directly in site-packages without a module
130                // directory enclosing them.
131                let files: BTreeSet<(&WheelFilename, u64)> = wheels
132                    .iter()
133                    .map(|(wheel, absolute)| Ok((wheel, absolute.metadata()?.len())))
134                    .collect::<Result<_, io::Error>>()?;
135                Self::warn_file_conflict(relative, &files);
136            } else if file_type.is_dir() {
137                // Don't early return if the method returns true, so we show warnings for each
138                // top-level module.
139                Self::warn_directory_conflict(relative, wheels)?;
140            } else {
141                // We don't expect any other file type, but it's ok if this check has false
142                // negatives.
143            }
144        }
145
146        Ok(())
147    }
148
149    /// Analyze a directory for conflicts.
150    ///
151    /// If there are any non-identical files (checked by size) included in more than one wheel,
152    /// report this file and return.
153    ///
154    /// If there are any directories included in more than one wheel, recurse to analyze whether
155    /// the directories contain conflicting files.
156    ///
157    /// Returns `true` if a warning was emitted.
158    fn warn_directory_conflict(
159        directory: &Path,
160        wheels: &BTreeSet<(WheelFilename, PathBuf)>,
161    ) -> Result<bool, io::Error> {
162        // The files in the directory, as paths relative to the site-packages, with their origin and
163        // size.
164        let mut files: BTreeMap<PathBuf, BTreeSet<(&WheelFilename, u64)>> = BTreeMap::default();
165        // The directories in the directory, as paths relative to the site-packages, with their
166        // origin and absolute path.
167        let mut subdirectories: BTreeMap<PathBuf, BTreeSet<(WheelFilename, PathBuf)>> =
168            BTreeMap::default();
169
170        // Read the shared directory in each unpacked wheel.
171        for (wheel, absolute) in wheels {
172            for dir_entry in fs_err::read_dir(absolute)? {
173                let dir_entry = dir_entry?;
174                let relative = directory.join(dir_entry.file_name());
175                let file_type = dir_entry.file_type()?;
176                if file_type.is_file() {
177                    files
178                        .entry(relative)
179                        .or_default()
180                        .insert((wheel, dir_entry.metadata()?.len()));
181                } else if file_type.is_dir() {
182                    subdirectories
183                        .entry(relative)
184                        .or_default()
185                        .insert((wheel.clone(), dir_entry.path()));
186                } else {
187                    // We don't expect any other file type, but it's ok if this check has false
188                    // negatives.
189                }
190            }
191        }
192
193        for (file, file_wheels) in files {
194            if Self::warn_file_conflict(&file, &file_wheels) {
195                return Ok(true);
196            }
197        }
198
199        for (subdirectory, subdirectory_wheels) in subdirectories {
200            if subdirectory_wheels.len() == 1 {
201                continue;
202            }
203            // If there are directories shared between multiple wheels, recurse to check them
204            // for shared files.
205            if Self::warn_directory_conflict(&subdirectory, &subdirectory_wheels)? {
206                return Ok(true);
207            }
208        }
209
210        Ok(false)
211    }
212
213    /// Check if all files are the same size, if so assume they are identical.
214    ///
215    /// It's unlikely that two modules overlap with different contents but their files all have
216    /// the same length, so we use this heuristic in this performance critical path to avoid
217    /// reading potentially large files.
218    fn warn_file_conflict(file: &Path, file_wheels: &BTreeSet<(&WheelFilename, u64)>) -> bool {
219        let Some((_, file_len)) = file_wheels.first() else {
220            debug_assert!(false, "Always at least one element");
221            return false;
222        };
223        if !file_wheels
224            .iter()
225            .any(|(_, file_len_other)| file_len_other != file_len)
226        {
227            return false;
228        }
229
230        let packages = file_wheels
231            .iter()
232            .map(|(wheel_filename, _file_len)| {
233                format!("* {} ({})", wheel_filename.name, wheel_filename)
234            })
235            .join("\n");
236        warn_user!(
237            "The file `{}` is provided by more than one package, \
238            which causes an install race condition and can result in a broken module. \
239            Packages containing the file:\n{}",
240            file.user_display(),
241            packages
242        );
243
244        // Assumption: There is generally two packages that have a conflict. The output is
245        // more helpful with a single message that calls out the packages
246        // rather than being comprehensive about the conflicting files.
247        true
248    }
249}
250
251/// Extract a wheel by linking all of its files into site packages.
252///
253/// Returns the number of files extracted.
254#[instrument(skip_all)]
255pub fn link_wheel_files(
256    link_mode: LinkMode,
257    site_packages: impl AsRef<Path>,
258    wheel: impl AsRef<Path>,
259    state: &InstallState,
260    filename: &WheelFilename,
261) -> Result<usize, Error> {
262    let wheel = wheel.as_ref();
263    let site_packages = site_packages.as_ref();
264    let count = register_installed_paths(wheel, state, filename)?;
265
266    // The `RECORD` file is modified during installation, so it needs a real
267    // copy rather than a link back to the cache.
268    let options = LinkOptions::new(link_mode)
269        .with_mutable_copy_filter(|p: &Path| p.ends_with("RECORD"))
270        .with_copy_locks(state.copy_locks())
271        .with_on_existing_directory(OnExistingDirectory::Merge);
272    let used_link_mode = link_dir(wheel, site_packages, &options)?;
273
274    if used_link_mode == LinkMode::Clone {
275        // The directory mtime is not updated when cloning and the mtime is
276        // used by CPython's import mechanisms to determine if it should look
277        // for new packages in a directory. Force an update so packages are
278        // importable without manual cache invalidation.
279        //
280        // <https://github.com/python/cpython/blob/8336cb2b6f428246803b02a4e97fce49d0bb1e09/Lib/importlib/_bootstrap_external.py#L1601>
281        update_site_packages_mtime(site_packages);
282    }
283
284    Ok(count)
285}
286
287/// Update the mtime of the site-packages directory to the current time.
288fn update_site_packages_mtime(site_packages: &Path) {
289    let now = SystemTime::now();
290    match fs::File::open(site_packages) {
291        Ok(dir) => {
292            if let Err(err) = dir.set_modified(now) {
293                debug!(
294                    "Failed to update mtime for {}: {err}",
295                    site_packages.display()
296                );
297            }
298        }
299        Err(err) => debug!(
300            "Failed to open {} to update mtime: {err}",
301            site_packages.display()
302        ),
303    }
304}
305
306/// Walk the wheel directory and register all paths for conflict detection.
307///
308/// Returns the number of files (not directories) in the wheel.
309fn register_installed_paths(
310    wheel: &Path,
311    state: &InstallState,
312    filename: &WheelFilename,
313) -> Result<usize, Error> {
314    let mut count = 0;
315    for entry in WalkDir::new(wheel) {
316        let entry = entry?;
317        let path = entry.path();
318        let relative = path.strip_prefix(wheel).expect("walkdir starts with root");
319        state.register_installed_path(relative, path, filename);
320        if entry.file_type().is_file() {
321            count += 1;
322        }
323    }
324    Ok(count)
325}