uv_install_wheel/
wheel.rs

1use std::collections::HashMap;
2use std::io;
3use std::io::{BufReader, Read, Write};
4use std::path::{Path, PathBuf};
5
6use data_encoding::BASE64URL_NOPAD;
7use fs_err as fs;
8use fs_err::{DirEntry, File};
9use mailparse::parse_headers;
10use rustc_hash::FxHashMap;
11use sha2::{Digest, Sha256};
12use tracing::{debug, instrument, trace, warn};
13use walkdir::WalkDir;
14
15use uv_fs::{Simplified, persist_with_retry_sync, relative_to};
16use uv_normalize::PackageName;
17use uv_pypi_types::DirectUrl;
18use uv_shell::escape_posix_for_single_quotes;
19use uv_trampoline_builder::windows_script_launcher;
20use uv_warnings::warn_user_once;
21
22use crate::record::RecordEntry;
23use crate::script::{Script, scripts_from_ini};
24use crate::{Error, Layout};
25
26/// Wrapper script template function
27///
28/// <https://github.com/pypa/pip/blob/7f8a6844037fb7255cfd0d34ff8e8cf44f2598d4/src/pip/_vendor/distlib/scripts.py#L41-L48>
29///
30/// Script template slightly modified: removed `import re`, allowing scripts that never import `re` to load faster.
31fn get_script_launcher(entry_point: &Script, shebang: &str) -> String {
32    let Script {
33        module, function, ..
34    } = entry_point;
35
36    let import_name = entry_point.import_name();
37
38    format!(
39        r#"{shebang}
40# -*- coding: utf-8 -*-
41import sys
42from {module} import {import_name}
43if __name__ == "__main__":
44    if sys.argv[0].endswith("-script.pyw"):
45        sys.argv[0] = sys.argv[0][:-11]
46    elif sys.argv[0].endswith(".exe"):
47        sys.argv[0] = sys.argv[0][:-4]
48    sys.exit({function}())
49"#
50    )
51}
52
53/// Part of entrypoints parsing
54pub(crate) fn read_scripts_from_section(
55    scripts_section: &HashMap<String, Option<String>>,
56    section_name: &str,
57    extras: Option<&[String]>,
58) -> Result<Vec<Script>, Error> {
59    let mut scripts = Vec::new();
60    for (script_name, python_location) in scripts_section {
61        match python_location {
62            Some(value) => {
63                if let Some(script) = Script::from_value(script_name, value, extras)? {
64                    scripts.push(script);
65                }
66            }
67            None => {
68                return Err(Error::InvalidWheel(format!(
69                    "[{section_name}] key {script_name} must have a value"
70                )));
71            }
72        }
73    }
74    Ok(scripts)
75}
76
77/// Shamelessly stolen (and updated for recent sha2)
78/// <https://github.com/richo/hashing-copy/blob/d8dd2fdb63c6faf198de0c9e5713d6249cbb5323/src/lib.rs#L10-L52>
79/// which in turn got it from std
80/// <https://doc.rust-lang.org/1.58.0/src/std/io/copy.rs.html#128-156>
81fn copy_and_hash(reader: &mut impl Read, writer: &mut impl Write) -> io::Result<(u64, String)> {
82    // TODO: Do we need to support anything besides sha256?
83    let mut hasher = Sha256::new();
84    // Same buf size as std. Note that this number is important for performance
85    let mut buf = vec![0; 8 * 1024];
86
87    let mut written = 0;
88    loop {
89        let len = match reader.read(&mut buf) {
90            Ok(0) => break,
91            Ok(len) => len,
92            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
93            Err(e) => return Err(e),
94        };
95        hasher.update(&buf[..len]);
96        writer.write_all(&buf[..len])?;
97        written += len as u64;
98    }
99    Ok((
100        written,
101        format!("sha256={}", BASE64URL_NOPAD.encode(&hasher.finalize())),
102    ))
103}
104
105/// Format the shebang for a given Python executable.
106///
107/// Like pip, if a shebang is non-simple (too long or contains spaces), we use `/bin/sh` as the
108/// executable.
109///
110/// See: <https://github.com/pypa/pip/blob/0ad4c94be74cc24874c6feb5bb3c2152c398a18e/src/pip/_vendor/distlib/scripts.py#L136-L165>
111fn format_shebang(executable: impl AsRef<Path>, os_name: &str, relocatable: bool) -> String {
112    // Convert the executable to a simplified path.
113    let executable = executable.as_ref().simplified_display().to_string();
114
115    // Validate the shebang.
116    if os_name == "posix" {
117        // The length of the full line: the shebang, plus the leading `#` and `!`, and a trailing
118        // newline.
119        let shebang_length = 2 + executable.len() + 1;
120
121        // If the shebang is too long, or contains spaces, wrap it in `/bin/sh`.
122        // Same applies for relocatable scripts (executable is relative to script dir, hence `dirname` trick)
123        // (note: the Windows trampoline binaries natively support relative paths to executable)
124        if shebang_length > 127 || executable.contains(' ') || relocatable {
125            let prefix = if relocatable {
126                r#""$(dirname -- "$(realpath -- "$0")")"/"#
127            } else {
128                ""
129            };
130            let executable = format!(
131                "{}'{}'",
132                prefix,
133                escape_posix_for_single_quotes(&executable)
134            );
135            return format!("#!/bin/sh\n'''exec' {executable} \"$0\" \"$@\"\n' '''");
136        }
137    }
138
139    format!("#!{executable}")
140}
141
142/// Returns a [`PathBuf`] to `python[w].exe` for script execution.
143///
144/// <https://github.com/pypa/pip/blob/76e82a43f8fb04695e834810df64f2d9a2ff6020/src/pip/_vendor/distlib/scripts.py#L121-L126>
145fn get_script_executable(python_executable: &Path, is_gui: bool) -> PathBuf {
146    // Only check for `pythonw.exe` on Windows.
147    if cfg!(windows) && is_gui {
148        python_executable
149            .file_name()
150            .map(|name| {
151                let new_name = name.to_string_lossy().replace("python", "pythonw");
152                python_executable.with_file_name(new_name)
153            })
154            .filter(|path| path.is_file())
155            .unwrap_or_else(|| python_executable.to_path_buf())
156    } else {
157        python_executable.to_path_buf()
158    }
159}
160
161/// Determine the absolute path to an entrypoint script.
162fn entrypoint_path(entrypoint: &Script, layout: &Layout) -> PathBuf {
163    if cfg!(windows) {
164        // On windows we actually build an .exe wrapper
165        let script_name = entrypoint
166            .name
167            // FIXME: What are the in-reality rules here for names?
168            .strip_suffix(".py")
169            .unwrap_or(&entrypoint.name)
170            .to_string()
171            + ".exe";
172
173        layout.scheme.scripts.join(script_name)
174    } else {
175        layout.scheme.scripts.join(&entrypoint.name)
176    }
177}
178
179/// Create the wrapper scripts in the bin folder of the venv for launching console scripts.
180pub(crate) fn write_script_entrypoints(
181    layout: &Layout,
182    relocatable: bool,
183    site_packages: &Path,
184    entrypoints: &[Script],
185    record: &mut Vec<RecordEntry>,
186    is_gui: bool,
187) -> Result<(), Error> {
188    for entrypoint in entrypoints {
189        let warn_names = ["activate", "activate_this.py"];
190        if warn_names.contains(&entrypoint.name.as_str())
191            || entrypoint.name.starts_with("activate.")
192        {
193            warn_user_once!(
194                "The script name `{}` is reserved for virtual environment activation scripts.",
195                entrypoint.name
196            );
197        }
198        let reserved_names = ["python", "pythonw", "python3"];
199        if reserved_names.contains(&entrypoint.name.as_str())
200            || entrypoint
201                .name
202                .strip_prefix("python3.")
203                .is_some_and(|suffix| suffix.parse::<u8>().is_ok())
204        {
205            return Err(Error::ReservedScriptName(entrypoint.name.clone()));
206        }
207
208        let entrypoint_absolute = entrypoint_path(entrypoint, layout);
209
210        let entrypoint_relative = pathdiff::diff_paths(&entrypoint_absolute, site_packages)
211            .ok_or_else(|| {
212                Error::Io(io::Error::other(format!(
213                    "Could not find relative path for: {}",
214                    entrypoint_absolute.simplified_display()
215                )))
216            })?;
217
218        // Generate the launcher script.
219        let launcher_executable = get_script_executable(&layout.sys_executable, is_gui);
220        let launcher_executable =
221            get_relocatable_executable(launcher_executable, layout, relocatable)?;
222        let launcher_python_script = get_script_launcher(
223            entrypoint,
224            &format_shebang(&launcher_executable, &layout.os_name, relocatable),
225        );
226
227        // If necessary, wrap the launcher script in a Windows launcher binary.
228        if cfg!(windows) {
229            write_file_recorded(
230                site_packages,
231                &entrypoint_relative,
232                &windows_script_launcher(&launcher_python_script, is_gui, &launcher_executable)?,
233                record,
234            )?;
235        } else {
236            write_file_recorded(
237                site_packages,
238                &entrypoint_relative,
239                &launcher_python_script,
240                record,
241            )?;
242
243            // Make the launcher executable.
244            #[cfg(unix)]
245            {
246                use std::fs::Permissions;
247                use std::os::unix::fs::PermissionsExt;
248
249                let path = site_packages.join(entrypoint_relative);
250                let permissions = fs::metadata(&path)?.permissions();
251                if permissions.mode() & 0o111 != 0o111 {
252                    fs::set_permissions(path, Permissions::from_mode(permissions.mode() | 0o111))?;
253                }
254            }
255        }
256    }
257    Ok(())
258}
259
260/// A parsed `WHEEL` file.
261#[derive(Debug, Clone, PartialEq, Eq)]
262pub struct WheelFile(FxHashMap<String, Vec<String>>);
263
264impl WheelFile {
265    /// Parse `WHEEL` file.
266    ///
267    /// > {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same
268    /// > email message format:
269    pub fn parse(wheel_text: &str) -> Result<Self, Error> {
270        // {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same email message format:
271        let data = parse_email_message_file(&mut wheel_text.as_bytes(), "WHEEL")?;
272
273        // mkl_fft-1.3.6-58-cp310-cp310-manylinux2014_x86_64.whl has multiple Wheel-Version entries, we have to ignore that
274        // like pip
275        let wheel_version = data
276            .get("Wheel-Version")
277            .and_then(|wheel_versions| wheel_versions.first());
278        let wheel_version = wheel_version
279            .and_then(|wheel_version| wheel_version.split_once('.'))
280            .ok_or_else(|| {
281                Error::InvalidWheel(format!(
282                    "Invalid Wheel-Version in WHEEL file: {wheel_version:?}"
283                ))
284            })?;
285        // pip has some test wheels that use that ancient version,
286        // and technically we only need to check that the version is not higher
287        if wheel_version == ("0", "1") {
288            warn!("Ancient wheel version 0.1 (expected is 1.0)");
289            return Ok(Self(data));
290        }
291        // Check that installer is compatible with Wheel-Version. Warn if minor version is greater, abort if major version is greater.
292        // Wheel-Version: 1.0
293        if wheel_version.0 != "1" {
294            return Err(Error::InvalidWheel(format!(
295                "Unsupported wheel major version (expected {}, got {})",
296                1, wheel_version.0
297            )));
298        }
299        if wheel_version.1 > "0" {
300            warn!(
301                "Warning: Unsupported wheel minor version (expected {}, got {})",
302                0, wheel_version.1
303            );
304        }
305        Ok(Self(data))
306    }
307
308    /// Whether the wheel should be installed into the `purelib` or `platlib` directory.
309    pub fn lib_kind(&self) -> LibKind {
310        // Determine whether Root-Is-Purelib == ‘true’.
311        // If it is, the wheel is pure, and should be installed into purelib.
312        let root_is_purelib = self
313            .0
314            .get("Root-Is-Purelib")
315            .and_then(|root_is_purelib| root_is_purelib.first())
316            .is_some_and(|root_is_purelib| root_is_purelib == "true");
317        if root_is_purelib {
318            LibKind::Pure
319        } else {
320            LibKind::Plat
321        }
322    }
323
324    /// Return the list of wheel tags.
325    pub fn tags(&self) -> Option<&[String]> {
326        self.0.get("Tag").map(Vec::as_slice)
327    }
328}
329
330/// Whether the wheel should be installed into the `purelib` or `platlib` directory.
331#[derive(Debug, Clone, Copy, PartialEq, Eq)]
332pub enum LibKind {
333    /// Install into the `purelib` directory.
334    Pure,
335    /// Install into the `platlib` directory.
336    Plat,
337}
338
339/// Moves the files and folders in src to dest, updating the RECORD in the process
340pub(crate) fn move_folder_recorded(
341    src_dir: &Path,
342    dest_dir: &Path,
343    site_packages: &Path,
344    record: &mut [RecordEntry],
345) -> Result<(), Error> {
346    let mut rename_or_copy = RenameOrCopy::default();
347    fs::create_dir_all(dest_dir)?;
348    for entry in WalkDir::new(src_dir) {
349        let entry = entry?;
350        let src = entry.path();
351        // This is the base path for moving to the actual target for the data
352        // e.g. for data it's without <..>.data/data/
353        let relative_to_data = src
354            .strip_prefix(src_dir)
355            .expect("walkdir prefix must not change");
356        // This is the path stored in RECORD
357        // e.g. for data it's with .data/data/
358        let relative_to_site_packages = src
359            .strip_prefix(site_packages)
360            .expect("prefix must not change");
361        let target = dest_dir.join(relative_to_data);
362        if entry.file_type().is_dir() {
363            fs::create_dir_all(&target)?;
364        } else {
365            rename_or_copy.rename_or_copy(src, &target)?;
366            let entry = record
367                .iter_mut()
368                .find(|entry| Path::new(&entry.path) == relative_to_site_packages)
369                .ok_or_else(|| {
370                    Error::RecordFile(format!(
371                        "Could not find entry for {} ({})",
372                        relative_to_site_packages.simplified_display(),
373                        src.simplified_display()
374                    ))
375                })?;
376            entry.path = relative_to(&target, site_packages)?
377                .portable_display()
378                .to_string();
379        }
380    }
381    Ok(())
382}
383
384/// Installs a single script (not an entrypoint).
385///
386/// Binary files are moved with a copy fallback, while we rewrite scripts' shebangs if applicable.
387fn install_script(
388    layout: &Layout,
389    relocatable: bool,
390    site_packages: &Path,
391    record: &mut [RecordEntry],
392    file: &DirEntry,
393    #[allow(unused)] rename_or_copy: &mut RenameOrCopy,
394) -> Result<(), Error> {
395    let file_type = file.file_type()?;
396
397    if file_type.is_dir() {
398        return Err(Error::InvalidWheel(format!(
399            "Wheel contains an invalid entry (directory) in the `scripts` directory: {}",
400            file.path().simplified_display()
401        )));
402    }
403
404    if file_type.is_symlink() {
405        let Ok(target) = file.path().canonicalize() else {
406            return Err(Error::InvalidWheel(format!(
407                "Wheel contains an invalid entry (broken symlink) in the `scripts` directory: {}",
408                file.path().simplified_display(),
409            )));
410        };
411        if target.is_dir() {
412            return Err(Error::InvalidWheel(format!(
413                "Wheel contains an invalid entry (directory symlink) in the `scripts` directory: {} ({})",
414                file.path().simplified_display(),
415                target.simplified_display()
416            )));
417        }
418    }
419
420    let script_absolute = layout.scheme.scripts.join(file.file_name());
421    let script_relative =
422        pathdiff::diff_paths(&script_absolute, site_packages).ok_or_else(|| {
423            Error::Io(io::Error::other(format!(
424                "Could not find relative path for: {}",
425                script_absolute.simplified_display()
426            )))
427        })?;
428
429    let path = file.path();
430    let mut script = File::open(&path)?;
431
432    // https://sphinx-locales.github.io/peps/pep-0427/#recommended-installer-features
433    // > In wheel, scripts are packaged in {distribution}-{version}.data/scripts/.
434    // > If the first line of a file in scripts/ starts with exactly b'#!python',
435    // > rewrite to point to the correct interpreter. Unix installers may need to
436    // > add the +x bit to these files if the archive was created on Windows.
437    //
438    // > The b'#!pythonw' convention is allowed. b'#!pythonw' indicates a GUI script
439    // > instead of a console script.
440    let placeholder_python = b"#!python";
441    // scripts might be binaries, so we read an exact number of bytes instead of the first line as string
442    let mut start = vec![0; placeholder_python.len()];
443    match script.read_exact(&mut start) {
444        Ok(()) => {}
445        // Ignore scripts shorter than the buffer.
446        Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => {}
447        Err(err) => return Err(Error::Io(err)),
448    }
449    let size_and_encoded_hash = if start == placeholder_python {
450        // Read the rest of the first line, one byte at a time, until we hit a newline.
451        let mut is_gui = false;
452        let mut first = true;
453        let mut byte = [0u8; 1];
454        loop {
455            match script.read_exact(&mut byte) {
456                Ok(()) => {
457                    if byte[0] == b'\n' || byte[0] == b'\r' {
458                        break;
459                    }
460
461                    // Check if this is a GUI script (starts with 'w').
462                    if first {
463                        is_gui = byte[0] == b'w';
464                        first = false;
465                    }
466                }
467                Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => break,
468                Err(err) => return Err(Error::Io(err)),
469            }
470        }
471
472        let executable = get_script_executable(&layout.sys_executable, is_gui);
473        let executable = get_relocatable_executable(executable, layout, relocatable)?;
474        let mut start = format_shebang(&executable, &layout.os_name, relocatable)
475            .as_bytes()
476            .to_vec();
477
478        // Use appropriate line ending for the platform.
479        if layout.os_name == "nt" {
480            start.extend_from_slice(b"\r\n");
481        } else {
482            start.push(b'\n');
483        }
484
485        let mut target = uv_fs::tempfile_in(&layout.scheme.scripts)?;
486        let size_and_encoded_hash = copy_and_hash(&mut start.chain(script), &mut target)?;
487
488        persist_with_retry_sync(target, &script_absolute)?;
489        fs::remove_file(&path)?;
490
491        // Make the script executable. We just created the file, so we can set permissions directly.
492        #[cfg(unix)]
493        {
494            use std::fs::Permissions;
495            use std::os::unix::fs::PermissionsExt;
496
497            let permissions = fs::metadata(&script_absolute)?.permissions();
498            if permissions.mode() & 0o111 != 0o111 {
499                fs::set_permissions(
500                    script_absolute,
501                    Permissions::from_mode(permissions.mode() | 0o111),
502                )?;
503            }
504        }
505
506        Some(size_and_encoded_hash)
507    } else {
508        // Reading and writing is slow (especially for large binaries), so we move them instead, if
509        // we can. This also retains the file permissions. We _can't_ move (and must copy) if the
510        // file permissions need to be changed, since we might not own the file.
511        drop(script);
512
513        #[cfg(unix)]
514        {
515            use std::fs::Permissions;
516            use std::os::unix::fs::PermissionsExt;
517
518            let permissions = fs::metadata(&path)?.permissions();
519            if permissions.mode() & 0o111 == 0o111 {
520                // If the permissions are already executable, we don't need to change them.
521                // We fall back to copy when the file is on another drive.
522                rename_or_copy.rename_or_copy(&path, &script_absolute)?;
523            } else {
524                // If we have to modify the permissions, copy the file, since we might not own it,
525                // and we may not be allowed to change permissions on an unowned moved file.
526                warn!(
527                    "Copying script from {} to {} (permissions: {:o})",
528                    path.simplified_display(),
529                    script_absolute.simplified_display(),
530                    permissions.mode()
531                );
532
533                uv_fs::copy_atomic_sync(&path, &script_absolute)?;
534
535                fs::set_permissions(
536                    script_absolute,
537                    Permissions::from_mode(permissions.mode() | 0o111),
538                )?;
539            }
540        }
541
542        #[cfg(not(unix))]
543        {
544            // Here, two wrappers over rename are clashing: We want to retry for security software
545            // blocking the file, but we also need the copy fallback is the problem was trying to
546            // move a file cross-drive.
547            match uv_fs::with_retry_sync(&path, &script_absolute, "renaming", || {
548                fs_err::rename(&path, &script_absolute)
549            }) {
550                Ok(()) => (),
551                Err(err) => {
552                    debug!("Failed to rename, falling back to copy: {err}");
553                    uv_fs::with_retry_sync(&path, &script_absolute, "copying", || {
554                        fs_err::copy(&path, &script_absolute)?;
555                        Ok(())
556                    })?;
557                }
558            }
559        }
560
561        None
562    };
563
564    // Find the existing entry in the `RECORD`.
565    let relative_to_site_packages = path
566        .strip_prefix(site_packages)
567        .expect("Prefix must no change");
568    let entry = record
569        .iter_mut()
570        .find(|entry| Path::new(&entry.path) == relative_to_site_packages)
571        .ok_or_else(|| {
572            // This should be possible to occur at this point, but filesystems and such
573            Error::RecordFile(format!(
574                "Could not find entry for {} ({})",
575                relative_to_site_packages.simplified_display(),
576                path.simplified_display()
577            ))
578        })?;
579
580    // Update the entry in the `RECORD`.
581    entry.path = script_relative.portable_display().to_string();
582    if let Some((size, encoded_hash)) = size_and_encoded_hash {
583        entry.size = Some(size);
584        entry.hash = Some(encoded_hash);
585    }
586    Ok(())
587}
588
589/// Move the files from the .data directory to the right location in the venv
590#[instrument(skip_all)]
591pub(crate) fn install_data(
592    layout: &Layout,
593    relocatable: bool,
594    site_packages: &Path,
595    data_dir: &Path,
596    dist_name: &PackageName,
597    console_scripts: &[Script],
598    gui_scripts: &[Script],
599    record: &mut [RecordEntry],
600) -> Result<(), Error> {
601    for entry in fs::read_dir(data_dir)? {
602        let entry = entry?;
603        let path = entry.path();
604
605        match path.file_name().and_then(|name| name.to_str()) {
606            Some("data") => {
607                trace!(
608                    ?dist_name,
609                    "Installing data/data to {}",
610                    layout.scheme.data.user_display()
611                );
612                // Move the content of the folder to the root of the venv
613                move_folder_recorded(&path, &layout.scheme.data, site_packages, record)?;
614            }
615            Some("scripts") => {
616                trace!(
617                    ?dist_name,
618                    "Installing data/scripts to {}",
619                    layout.scheme.scripts.user_display()
620                );
621                let mut rename_or_copy = RenameOrCopy::default();
622                let mut initialized = false;
623                for file in fs::read_dir(path)? {
624                    let file = file?;
625
626                    // Couldn't find any docs for this, took it directly from
627                    // https://github.com/pypa/pip/blob/b5457dfee47dd9e9f6ec45159d9d410ba44e5ea1/src/pip/_internal/operations/install/wheel.py#L565-L583
628                    let name = file.file_name().to_string_lossy().to_string();
629                    let match_name = name
630                        .strip_suffix(".exe")
631                        .or_else(|| name.strip_suffix("-script.py"))
632                        .or_else(|| name.strip_suffix(".pya"))
633                        .unwrap_or(&name);
634                    if console_scripts
635                        .iter()
636                        .chain(gui_scripts)
637                        .any(|script| script.name == match_name)
638                    {
639                        continue;
640                    }
641
642                    // Create the scripts directory, if it doesn't exist.
643                    if !initialized {
644                        fs::create_dir_all(&layout.scheme.scripts)?;
645                        initialized = true;
646                    }
647
648                    install_script(
649                        layout,
650                        relocatable,
651                        site_packages,
652                        record,
653                        &file,
654                        &mut rename_or_copy,
655                    )?;
656                }
657            }
658            Some("headers") => {
659                let target_path = layout.scheme.include.join(dist_name.as_str());
660                trace!(
661                    ?dist_name,
662                    "Installing data/headers to {}",
663                    target_path.user_display()
664                );
665                move_folder_recorded(&path, &target_path, site_packages, record)?;
666            }
667            Some("purelib") => {
668                trace!(
669                    ?dist_name,
670                    "Installing data/purelib to {}",
671                    layout.scheme.purelib.user_display()
672                );
673                move_folder_recorded(&path, &layout.scheme.purelib, site_packages, record)?;
674            }
675            Some("platlib") => {
676                trace!(
677                    ?dist_name,
678                    "Installing data/platlib to {}",
679                    layout.scheme.platlib.user_display()
680                );
681                move_folder_recorded(&path, &layout.scheme.platlib, site_packages, record)?;
682            }
683            _ => {
684                return Err(Error::InvalidWheel(format!(
685                    "Unknown wheel data type: {}",
686                    entry.file_name().display()
687                )));
688            }
689        }
690    }
691    Ok(())
692}
693
694/// Write the content to a file and add the hash to the RECORD list
695///
696/// We still the path in the absolute path to the site packages and the relative path in the
697/// site packages because we must only record the relative path in RECORD
698pub(crate) fn write_file_recorded(
699    site_packages: &Path,
700    relative_path: &Path,
701    content: impl AsRef<[u8]>,
702    record: &mut Vec<RecordEntry>,
703) -> Result<(), Error> {
704    debug_assert!(
705        !relative_path.is_absolute(),
706        "Path must be relative: {}",
707        relative_path.display()
708    );
709
710    uv_fs::write_atomic_sync(site_packages.join(relative_path), content.as_ref())?;
711
712    let hash = Sha256::new().chain_update(content.as_ref()).finalize();
713    let encoded_hash = format!("sha256={}", BASE64URL_NOPAD.encode(&hash));
714    record.push(RecordEntry {
715        path: relative_path.portable_display().to_string(),
716        hash: Some(encoded_hash),
717        size: Some(content.as_ref().len() as u64),
718    });
719    Ok(())
720}
721
722/// Adds `INSTALLER`, `REQUESTED` and `direct_url.json` to the .dist-info dir
723pub(crate) fn write_installer_metadata<Cache: serde::Serialize, Build: serde::Serialize>(
724    site_packages: &Path,
725    dist_info_prefix: &str,
726    requested: bool,
727    direct_url: Option<&DirectUrl>,
728    cache_info: Option<&Cache>,
729    build_info: Option<&Build>,
730    installer: Option<&str>,
731    record: &mut Vec<RecordEntry>,
732) -> Result<(), Error> {
733    let dist_info_dir = PathBuf::from(format!("{dist_info_prefix}.dist-info"));
734    if requested {
735        write_file_recorded(site_packages, &dist_info_dir.join("REQUESTED"), "", record)?;
736    }
737    if let Some(direct_url) = direct_url {
738        write_file_recorded(
739            site_packages,
740            &dist_info_dir.join("direct_url.json"),
741            serde_json::to_string(direct_url)?.as_bytes(),
742            record,
743        )?;
744    }
745    if let Some(cache_info) = cache_info {
746        write_file_recorded(
747            site_packages,
748            &dist_info_dir.join("uv_cache.json"),
749            serde_json::to_string(cache_info)?.as_bytes(),
750            record,
751        )?;
752    }
753    if let Some(build_info) = build_info {
754        write_file_recorded(
755            site_packages,
756            &dist_info_dir.join("uv_build.json"),
757            serde_json::to_string(build_info)?.as_bytes(),
758            record,
759        )?;
760    }
761    if let Some(installer) = installer {
762        write_file_recorded(
763            site_packages,
764            &dist_info_dir.join("INSTALLER"),
765            installer,
766            record,
767        )?;
768    }
769    Ok(())
770}
771
772/// Get the path to the Python executable for the [`Layout`], based on whether the wheel should
773/// be relocatable.
774///
775/// Returns `sys.executable` if the wheel is not relocatable; otherwise, returns a path relative
776/// to the scripts directory.
777pub(crate) fn get_relocatable_executable(
778    executable: PathBuf,
779    layout: &Layout,
780    relocatable: bool,
781) -> Result<PathBuf, Error> {
782    Ok(if relocatable {
783        pathdiff::diff_paths(&executable, &layout.scheme.scripts).ok_or_else(|| {
784            Error::Io(io::Error::other(format!(
785                "Could not find relative path for: {}",
786                executable.simplified_display()
787            )))
788        })?
789    } else {
790        executable
791    })
792}
793
794/// Reads the record file
795/// <https://www.python.org/dev/peps/pep-0376/#record>
796pub fn read_record_file(record: &mut impl Read) -> Result<Vec<RecordEntry>, Error> {
797    csv::ReaderBuilder::new()
798        .has_headers(false)
799        .escape(Some(b'"'))
800        .from_reader(record)
801        .deserialize()
802        .map(|entry| {
803            let entry: RecordEntry = entry?;
804            Ok(RecordEntry {
805                // selenium uses absolute paths for some reason
806                path: entry.path.trim_start_matches('/').to_string(),
807                ..entry
808            })
809        })
810        .collect()
811}
812
813/// Parse a file with email message format such as WHEEL and METADATA
814fn parse_email_message_file(
815    file: impl Read,
816    debug_filename: &str,
817) -> Result<FxHashMap<String, Vec<String>>, Error> {
818    let mut data: FxHashMap<String, Vec<String>> = FxHashMap::default();
819
820    let file = BufReader::new(file);
821    let content = file.bytes().collect::<Result<Vec<u8>, _>>()?;
822
823    let headers = parse_headers(content.as_slice())
824        .map_err(|err| {
825            Error::InvalidWheel(format!("Failed to parse {debug_filename} file: {err}"))
826        })?
827        .0;
828
829    for header in headers {
830        let name = header.get_key(); // Will not be trimmed because if it contains space, mailparse will skip the header
831        let mut value = header.get_value();
832
833        // Trim the value only if needed
834        let trimmed_value = value.trim();
835        if value != trimmed_value {
836            value = trimmed_value.to_string();
837        }
838
839        data.entry(name).or_default().push(value);
840    }
841
842    Ok(data)
843}
844
845/// Find the `dist-info` directory in an unzipped wheel.
846///
847/// See: <https://github.com/PyO3/python-pkginfo-rs>
848///
849/// See: <https://github.com/pypa/pip/blob/36823099a9cdd83261fdbc8c1d2a24fa2eea72ca/src/pip/_internal/utils/wheel.py#L38>
850pub(crate) fn find_dist_info(path: impl AsRef<Path>) -> Result<String, Error> {
851    // Iterate over `path` to find the `.dist-info` directory. It should be at the top-level.
852    let Some(dist_info) = fs::read_dir(path.as_ref())?.find_map(|entry| {
853        let entry = entry.ok()?;
854        let file_type = entry.file_type().ok()?;
855        if file_type.is_dir() {
856            let path = entry.path();
857            if path.extension().is_some_and(|ext| ext == "dist-info") {
858                Some(path)
859            } else {
860                None
861            }
862        } else {
863            None
864        }
865    }) else {
866        return Err(Error::InvalidWheel(
867            "Missing .dist-info directory".to_string(),
868        ));
869    };
870
871    let Some(dist_info_prefix) = dist_info.file_stem() else {
872        return Err(Error::InvalidWheel(
873            "Missing .dist-info directory".to_string(),
874        ));
875    };
876
877    Ok(dist_info_prefix.to_string_lossy().to_string())
878}
879
880/// Read the `dist-info` metadata from a directory.
881pub(crate) fn dist_info_metadata(
882    dist_info_prefix: &str,
883    wheel: impl AsRef<Path>,
884) -> Result<Vec<u8>, Error> {
885    let metadata_file = wheel
886        .as_ref()
887        .join(format!("{dist_info_prefix}.dist-info/METADATA"));
888    Ok(fs::read(metadata_file)?)
889}
890
891/// Parses the `entry_points.txt` entry in the wheel for console scripts
892///
893/// Returns (`script_name`, module, function)
894///
895/// Extras are supposed to be ignored, which happens if you pass None for extras.
896pub(crate) fn parse_scripts(
897    wheel: impl AsRef<Path>,
898    dist_info_prefix: &str,
899    extras: Option<&[String]>,
900    python_minor: u8,
901) -> Result<(Vec<Script>, Vec<Script>), Error> {
902    let entry_points_path = wheel
903        .as_ref()
904        .join(format!("{dist_info_prefix}.dist-info/entry_points.txt"));
905
906    // Read the entry points mapping. If the file doesn't exist, we just return an empty mapping.
907    let Ok(ini) = fs::read_to_string(entry_points_path) else {
908        return Ok((Vec::new(), Vec::new()));
909    };
910
911    scripts_from_ini(extras, python_minor, ini)
912}
913
914/// Rename a file with a fallback to copy that switches over on the first failure.
915#[derive(Default, Copy, Clone)]
916enum RenameOrCopy {
917    #[default]
918    Rename,
919    Copy,
920}
921
922impl RenameOrCopy {
923    /// Try to rename, and on failure, copy.
924    ///
925    /// Usually, source and target are on the same device, so we can rename, but if that fails, we
926    /// have to copy. If renaming failed once, we switch to copy permanently.
927    fn rename_or_copy(&mut self, from: impl AsRef<Path>, to: impl AsRef<Path>) -> io::Result<()> {
928        match self {
929            Self::Rename => match fs_err::rename(from.as_ref(), to.as_ref()) {
930                Ok(()) => {}
931                Err(err) => {
932                    *self = Self::Copy;
933                    debug!("Failed to rename, falling back to copy: {err}");
934                    fs_err::copy(from.as_ref(), to.as_ref())?;
935                }
936            },
937            Self::Copy => {
938                fs_err::copy(from.as_ref(), to.as_ref())?;
939            }
940        }
941        Ok(())
942    }
943}
944
945#[cfg(test)]
946mod test {
947    use std::io::Cursor;
948    use std::path::Path;
949
950    use anyhow::Result;
951    use assert_fs::prelude::*;
952    use indoc::{formatdoc, indoc};
953
954    use super::{
955        Error, RecordEntry, Script, WheelFile, format_shebang, get_script_executable,
956        parse_email_message_file, read_record_file, write_installer_metadata,
957    };
958
959    #[test]
960    fn test_parse_email_message_file() {
961        let text = indoc! {"
962            Wheel-Version: 1.0
963            Generator: bdist_wheel (0.37.1)
964            Root-Is-Purelib: false
965            Tag: cp38-cp38-manylinux_2_17_x86_64
966            Tag: cp38-cp38-manylinux2014_x86_64
967        "};
968
969        parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
970    }
971
972    #[test]
973    fn test_parse_email_message_file_with_trimmed_value() {
974        let text = indoc! {"
975            Wheel-Version: 1.0
976            Generator: bdist_wheel (0.37.1)
977            Root-Is-Purelib: false
978            Tag:        cp38-cp38-manylinux_2_17_x86_64
979        "};
980
981        let wheel = parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
982        let tags = &wheel["Tag"];
983        let tag = tags
984            .first()
985            .expect("Expected one tag inside the WHEEL file");
986        assert_eq!(tag, "cp38-cp38-manylinux_2_17_x86_64");
987    }
988
989    #[test]
990    fn test_parse_email_message_file_is_skipping_keys_with_space() {
991        let text = indoc! {"
992            Wheel-Version: 1.0
993            Generator: bdist_wheel (0.37.1)
994            Root-Is-Purelib: false
995              Tag  : cp38-cp38-manylinux_2_17_x86_64
996        "};
997
998        let wheel = parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
999        assert!(!wheel.contains_key("Tag"));
1000        assert_eq!(3, wheel.keys().len());
1001    }
1002
1003    #[test]
1004    fn test_parse_email_message_file_with_value_starting_with_linesep_and_two_space() {
1005        // Check: https://files.pythonhosted.org/packages/0c/b7/ecfdce6368cc3664d301f7f52db4fe1004aa7da7a12c4a9bf1de534ff6ab/ziglang-0.13.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl
1006        let text = indoc! {"
1007            Wheel-Version: 1.0
1008            Generator: ziglang make_wheels.py
1009            Root-Is-Purelib: false
1010            Tag:
1011              py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64
1012        "};
1013
1014        parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap();
1015    }
1016
1017    #[test]
1018    fn test_parse_wheel_version() {
1019        fn wheel_with_version(version: &str) -> String {
1020            formatdoc! {"
1021                Wheel-Version: {}
1022                Generator: bdist_wheel (0.37.0)
1023                Root-Is-Purelib: true
1024                Tag: py2-none-any
1025                Tag: py3-none-any
1026                ",
1027                version
1028            }
1029        }
1030        WheelFile::parse(&wheel_with_version("1.0")).unwrap();
1031        WheelFile::parse(&wheel_with_version("2.0")).unwrap_err();
1032    }
1033
1034    #[test]
1035    fn record_with_absolute_paths() {
1036        let record: &str = indoc! {"
1037            /selenium/__init__.py,sha256=l8nEsTP4D2dZVula_p4ZuCe8AGnxOq7MxMeAWNvR0Qc,811
1038            /selenium/common/exceptions.py,sha256=oZx2PS-g1gYLqJA_oqzE4Rq4ngplqlwwRBZDofiqni0,9309
1039            selenium-4.1.0.dist-info/METADATA,sha256=jqvBEwtJJ2zh6CljTfTXmpF1aiFs-gvOVikxGbVyX40,6468
1040            selenium-4.1.0.dist-info/RECORD,,
1041        "};
1042
1043        let entries = read_record_file(&mut record.as_bytes()).unwrap();
1044        let expected = [
1045            "selenium/__init__.py",
1046            "selenium/common/exceptions.py",
1047            "selenium-4.1.0.dist-info/METADATA",
1048            "selenium-4.1.0.dist-info/RECORD",
1049        ]
1050        .map(ToString::to_string)
1051        .to_vec();
1052        let actual = entries
1053            .into_iter()
1054            .map(|entry| entry.path)
1055            .collect::<Vec<String>>();
1056        assert_eq!(expected, actual);
1057    }
1058
1059    #[test]
1060    fn test_script_from_value() {
1061        assert_eq!(
1062            Script::from_value("launcher", "foo.bar:main", None).unwrap(),
1063            Some(Script {
1064                name: "launcher".to_string(),
1065                module: "foo.bar".to_string(),
1066                function: "main".to_string(),
1067            })
1068        );
1069        assert_eq!(
1070            Script::from_value(
1071                "launcher",
1072                "foo.bar:main",
1073                Some(&["bar".to_string(), "baz".to_string()]),
1074            )
1075            .unwrap(),
1076            Some(Script {
1077                name: "launcher".to_string(),
1078                module: "foo.bar".to_string(),
1079                function: "main".to_string(),
1080            })
1081        );
1082        assert_eq!(
1083            Script::from_value("launcher", "foomod:main_bar [bar,baz]", Some(&[])).unwrap(),
1084            None
1085        );
1086        assert_eq!(
1087            Script::from_value(
1088                "launcher",
1089                "foomod:main_bar [bar,baz]",
1090                Some(&["bar".to_string(), "baz".to_string()]),
1091            )
1092            .unwrap(),
1093            Some(Script {
1094                name: "launcher".to_string(),
1095                module: "foomod".to_string(),
1096                function: "main_bar".to_string(),
1097            })
1098        );
1099    }
1100
1101    #[test]
1102    fn test_shebang() {
1103        // By default, use a simple shebang.
1104        let executable = Path::new("/usr/bin/python3");
1105        let os_name = "posix";
1106        assert_eq!(
1107            format_shebang(executable, os_name, false),
1108            "#!/usr/bin/python3"
1109        );
1110
1111        // If the path contains spaces, we should use the `exec` trick.
1112        let executable = Path::new("/usr/bin/path to python3");
1113        let os_name = "posix";
1114        assert_eq!(
1115            format_shebang(executable, os_name, false),
1116            "#!/bin/sh\n'''exec' '/usr/bin/path to python3' \"$0\" \"$@\"\n' '''"
1117        );
1118
1119        // And if we want a relocatable script, we should use the `exec` trick with `dirname`.
1120        let executable = Path::new("python3");
1121        let os_name = "posix";
1122        assert_eq!(
1123            format_shebang(executable, os_name, true),
1124            "#!/bin/sh\n'''exec' \"$(dirname -- \"$(realpath -- \"$0\")\")\"/'python3' \"$0\" \"$@\"\n' '''"
1125        );
1126
1127        // Except on Windows...
1128        let executable = Path::new("/usr/bin/path to python3");
1129        let os_name = "nt";
1130        assert_eq!(
1131            format_shebang(executable, os_name, false),
1132            "#!/usr/bin/path to python3"
1133        );
1134
1135        // Quotes, however, are ok.
1136        let executable = Path::new("/usr/bin/'python3'");
1137        let os_name = "posix";
1138        assert_eq!(
1139            format_shebang(executable, os_name, false),
1140            "#!/usr/bin/'python3'"
1141        );
1142
1143        // If the path is too long, we should not use the `exec` trick.
1144        let executable = Path::new(
1145            "/usr/bin/path/to/a/very/long/executable/executable/executable/executable/executable/executable/executable/executable/name/python3",
1146        );
1147        let os_name = "posix";
1148        assert_eq!(
1149            format_shebang(executable, os_name, false),
1150            "#!/bin/sh\n'''exec' '/usr/bin/path/to/a/very/long/executable/executable/executable/executable/executable/executable/executable/executable/name/python3' \"$0\" \"$@\"\n' '''"
1151        );
1152    }
1153
1154    #[test]
1155    fn test_empty_value() -> Result<(), Error> {
1156        let wheel = indoc! {r"
1157        Wheel-Version: 1.0
1158        Generator: custom
1159        Root-Is-Purelib: false
1160        Tag:
1161        Tag: -manylinux_2_17_x86_64
1162        Tag: -manylinux2014_x86_64
1163        "
1164        };
1165        let reader = Cursor::new(wheel.to_string().into_bytes());
1166        let wheel_file = parse_email_message_file(reader, "WHEEL")?;
1167        assert_eq!(
1168            wheel_file.get("Wheel-Version"),
1169            Some(&["1.0".to_string()].to_vec())
1170        );
1171        assert_eq!(
1172            wheel_file.get("Tag"),
1173            Some(
1174                &[
1175                    String::new(),
1176                    "-manylinux_2_17_x86_64".to_string(),
1177                    "-manylinux2014_x86_64".to_string()
1178                ]
1179                .to_vec()
1180            )
1181        );
1182        Ok(())
1183    }
1184
1185    #[test]
1186    fn test_script_executable() -> Result<()> {
1187        // Test with adjacent pythonw.exe
1188        let temp_dir = assert_fs::TempDir::new()?;
1189        let python_exe = temp_dir.child("python.exe");
1190        let pythonw_exe = temp_dir.child("pythonw.exe");
1191        python_exe.write_str("")?;
1192        pythonw_exe.write_str("")?;
1193
1194        let script_path = get_script_executable(&python_exe, true);
1195        #[cfg(windows)]
1196        assert_eq!(script_path, pythonw_exe.to_path_buf());
1197        #[cfg(not(windows))]
1198        assert_eq!(script_path, python_exe.to_path_buf());
1199
1200        let script_path = get_script_executable(&python_exe, false);
1201        assert_eq!(script_path, python_exe.to_path_buf());
1202
1203        // Test without adjacent pythonw.exe
1204        let temp_dir = assert_fs::TempDir::new()?;
1205        let python_exe = temp_dir.child("python.exe");
1206        python_exe.write_str("")?;
1207
1208        let script_path = get_script_executable(&python_exe, true);
1209        assert_eq!(script_path, python_exe.to_path_buf());
1210
1211        let script_path = get_script_executable(&python_exe, false);
1212        assert_eq!(script_path, python_exe.to_path_buf());
1213
1214        // Test with overridden python.exe and pythonw.exe
1215        let temp_dir = assert_fs::TempDir::new()?;
1216        let python_exe = temp_dir.child("python.exe");
1217        let pythonw_exe = temp_dir.child("pythonw.exe");
1218        let dot_python_exe = temp_dir.child(".python.exe");
1219        let dot_pythonw_exe = temp_dir.child(".pythonw.exe");
1220        python_exe.write_str("")?;
1221        pythonw_exe.write_str("")?;
1222        dot_python_exe.write_str("")?;
1223        dot_pythonw_exe.write_str("")?;
1224
1225        let script_path = get_script_executable(&dot_python_exe, true);
1226        #[cfg(windows)]
1227        assert_eq!(script_path, dot_pythonw_exe.to_path_buf());
1228        #[cfg(not(windows))]
1229        assert_eq!(script_path, dot_python_exe.to_path_buf());
1230
1231        let script_path = get_script_executable(&dot_python_exe, false);
1232        assert_eq!(script_path, dot_python_exe.to_path_buf());
1233
1234        Ok(())
1235    }
1236
1237    #[test]
1238    fn test_write_installer_metadata() {
1239        let temp_dir = assert_fs::TempDir::new().unwrap();
1240        let site_packages = temp_dir.path();
1241        let mut record: Vec<RecordEntry> = Vec::new();
1242        temp_dir
1243            .child("foo-0.1.0.dist-info")
1244            .create_dir_all()
1245            .unwrap();
1246        write_installer_metadata::<(), ()>(
1247            site_packages,
1248            "foo-0.1.0",
1249            true,
1250            None,
1251            None,
1252            None,
1253            Some("uv"),
1254            &mut record,
1255        )
1256        .unwrap();
1257        let expected = [
1258            "foo-0.1.0.dist-info/REQUESTED",
1259            "foo-0.1.0.dist-info/INSTALLER",
1260        ]
1261        .map(ToString::to_string)
1262        .to_vec();
1263        let actual = record
1264            .into_iter()
1265            .map(|entry| entry.path)
1266            .collect::<Vec<String>>();
1267        assert_eq!(expected, actual);
1268    }
1269}