Skip to main content

uv_extract/
sync.rs

1use std::path::{Path, PathBuf};
2use std::sync::{LazyLock, Mutex};
3
4use crate::vendor::{CloneableSeekableReader, HasLength};
5use crate::{Error, insecure_no_validate, validate_archive_member_name};
6use rayon::prelude::*;
7use rustc_hash::FxHashSet;
8use tracing::warn;
9use uv_configuration::RAYON_INITIALIZE;
10use zip::ZipArchive;
11
12/// Unzip a `.zip` archive into the target directory.
13pub fn unzip<R: Send + std::io::Read + std::io::Seek + HasLength>(
14    reader: R,
15    target: &Path,
16) -> Result<(), Error> {
17    // Unzip in parallel.
18    let reader = std::io::BufReader::new(reader);
19    let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
20    let directories = Mutex::new(FxHashSet::default());
21    let skip_validation = insecure_no_validate();
22    // Initialize the threadpool with the user settings.
23    LazyLock::force(&RAYON_INITIALIZE);
24    (0..archive.len())
25        .into_par_iter()
26        .map(|file_number| {
27            let mut archive = archive.clone();
28            let mut file = archive.by_index(file_number)?;
29
30            if let Err(e) = validate_archive_member_name(file.name()) {
31                if !skip_validation {
32                    return Err(e);
33                }
34            }
35
36            // Determine the path of the file within the wheel.
37            let Some(enclosed_name) = file.enclosed_name() else {
38                warn!("Skipping unsafe file name: {}", file.name());
39                return Ok(());
40            };
41
42            // Create necessary parent directories.
43            let path = target.join(enclosed_name);
44            if file.is_dir() {
45                let mut directories = directories.lock().unwrap();
46                if directories.insert(path.clone()) {
47                    fs_err::create_dir_all(path).map_err(Error::Io)?;
48                }
49                return Ok(());
50            }
51
52            if let Some(parent) = path.parent() {
53                let mut directories = directories.lock().unwrap();
54                if directories.insert(parent.to_path_buf()) {
55                    fs_err::create_dir_all(parent).map_err(Error::Io)?;
56                }
57            }
58
59            // Copy the file contents.
60            let outfile = fs_err::File::create(&path).map_err(Error::Io)?;
61            let size = file.size();
62            if size > 0 {
63                let mut writer = if let Ok(size) = usize::try_from(size) {
64                    std::io::BufWriter::with_capacity(std::cmp::min(size, 1024 * 1024), outfile)
65                } else {
66                    std::io::BufWriter::new(outfile)
67                };
68                std::io::copy(&mut file, &mut writer).map_err(Error::io_or_compression)?;
69            }
70
71            // See `uv_extract::stream::unzip`. For simplicity, this is identical with the code there except for being
72            // sync.
73            #[cfg(unix)]
74            {
75                use std::fs::Permissions;
76                use std::os::unix::fs::PermissionsExt;
77
78                if let Some(mode) = file.unix_mode() {
79                    // https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
80                    let has_any_executable_bit = mode & 0o111;
81                    if has_any_executable_bit != 0 {
82                        let permissions = fs_err::metadata(&path).map_err(Error::Io)?.permissions();
83                        if permissions.mode() & 0o111 != 0o111 {
84                            fs_err::set_permissions(
85                                &path,
86                                Permissions::from_mode(permissions.mode() | 0o111),
87                            )
88                            .map_err(Error::Io)?;
89                        }
90                    }
91                }
92            }
93
94            Ok(())
95        })
96        .collect::<Result<_, Error>>()
97}
98
99/// Extract the top-level directory from an unpacked archive.
100///
101/// The specification says:
102/// > A .tar.gz source distribution (sdist) contains a single top-level directory called
103/// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package.
104///
105/// This function returns the path to that top-level directory.
106pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> {
107    // TODO(konstin): Verify the name of the directory.
108    let top_level = fs_err::read_dir(source.as_ref())
109        .map_err(Error::Io)?
110        .collect::<std::io::Result<Vec<fs_err::DirEntry>>>()
111        .map_err(Error::Io)?;
112    match top_level.as_slice() {
113        [root] => Ok(root.path()),
114        [] => Err(Error::EmptyArchive),
115        _ => Err(Error::NonSingularArchive(
116            top_level
117                .into_iter()
118                .map(|entry| entry.file_name())
119                .collect(),
120        )),
121    }
122}