Skip to main content

uv_extract/
sync.rs

1use std::path::{Path, PathBuf};
2use std::sync::{LazyLock, Mutex};
3
4use crate::vendor::CloneableSeekableReader;
5use crate::{CompressionMethod, Error, insecure_no_validate, validate_archive_member_name};
6use rayon::prelude::*;
7use rustc_hash::FxHashSet;
8use tracing::warn;
9use uv_configuration::RAYON_INITIALIZE;
10use uv_warnings::warn_user_once;
11use zip::ZipArchive;
12
13/// Unzip a `.zip` archive into the target directory.
14pub fn unzip(reader: fs_err::File, target: &Path) -> Result<(), Error> {
15    let (reader, filename) = reader.into_parts();
16
17    // Unzip in parallel.
18    let reader = std::io::BufReader::new(reader);
19    let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
20    let directories = Mutex::new(FxHashSet::default());
21    let skip_validation = insecure_no_validate();
22    // Initialize the threadpool with the user settings.
23    LazyLock::force(&RAYON_INITIALIZE);
24    (0..archive.len())
25        .into_par_iter()
26        .map(|file_number| {
27            let mut archive = archive.clone();
28            let mut file = archive.by_index(file_number)?;
29
30            let compression = CompressionMethod::from(file.compression());
31            if !compression.is_well_known() {
32                warn_user_once!(
33                    "One or more file entries in '{filename}' use the '{compression}' compression method, which is not widely supported. A future version of uv will reject ZIP archives containing entries compressed with this method. Entries must be compressed with the '{stored}', '{deflate}', or '{zstd}' compression methods.",
34                    filename = filename.display(),
35                    stored = CompressionMethod::Stored,
36                    deflate = CompressionMethod::Deflated,
37                    zstd = CompressionMethod::Zstd,
38                );
39            }
40
41            if let Err(e) = validate_archive_member_name(file.name()) {
42                if !skip_validation {
43                    return Err(e);
44                }
45            }
46
47            // Determine the path of the file within the wheel.
48            let Some(enclosed_name) = file.enclosed_name() else {
49                warn!("Skipping unsafe file name: {}", file.name());
50                return Ok(());
51            };
52
53            // Create necessary parent directories.
54            let path = target.join(enclosed_name);
55            if file.is_dir() {
56                let mut directories = directories.lock().unwrap();
57                if directories.insert(path.clone()) {
58                    fs_err::create_dir_all(path).map_err(Error::Io)?;
59                }
60                return Ok(());
61            }
62
63            if let Some(parent) = path.parent() {
64                let mut directories = directories.lock().unwrap();
65                if directories.insert(parent.to_path_buf()) {
66                    fs_err::create_dir_all(parent).map_err(Error::Io)?;
67                }
68            }
69
70            // Copy the file contents.
71            let outfile = fs_err::File::create(&path).map_err(Error::Io)?;
72            let size = file.size();
73            if size > 0 {
74                let mut writer = if let Ok(size) = usize::try_from(size) {
75                    std::io::BufWriter::with_capacity(std::cmp::min(size, 1024 * 1024), outfile)
76                } else {
77                    std::io::BufWriter::new(outfile)
78                };
79                std::io::copy(&mut file, &mut writer).map_err(Error::io_or_compression)?;
80            }
81
82            // See `uv_extract::stream::unzip`. For simplicity, this is identical with the code there except for being
83            // sync.
84            #[cfg(unix)]
85            {
86                use std::fs::Permissions;
87                use std::os::unix::fs::PermissionsExt;
88
89                if let Some(mode) = file.unix_mode() {
90                    // https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
91                    let has_any_executable_bit = mode & 0o111;
92                    if has_any_executable_bit != 0 {
93                        let permissions = fs_err::metadata(&path).map_err(Error::Io)?.permissions();
94                        if permissions.mode() & 0o111 != 0o111 {
95                            fs_err::set_permissions(
96                                &path,
97                                Permissions::from_mode(permissions.mode() | 0o111),
98                            )
99                            .map_err(Error::Io)?;
100                        }
101                    }
102                }
103            }
104
105            Ok(())
106        })
107        .collect::<Result<_, Error>>()
108}
109
110/// Extract the top-level directory from an unpacked archive.
111///
112/// The specification says:
113/// > A .tar.gz source distribution (sdist) contains a single top-level directory called
114/// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package.
115///
116/// This function returns the path to that top-level directory.
117pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> {
118    // TODO(konstin): Verify the name of the directory.
119    let top_level = fs_err::read_dir(source.as_ref())
120        .map_err(Error::Io)?
121        .collect::<std::io::Result<Vec<fs_err::DirEntry>>>()
122        .map_err(Error::Io)?;
123    match top_level.as_slice() {
124        [root] => Ok(root.path()),
125        [] => Err(Error::EmptyArchive),
126        _ => Err(Error::NonSingularArchive(
127            top_level
128                .into_iter()
129                .map(|entry| entry.file_name())
130                .collect(),
131        )),
132    }
133}