python_packaging/
wheel.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*! Interact with Python wheel files. */
10
11use {
12    crate::{
13        filesystem_scanning::PythonResourceIterator, module_util::PythonModuleSuffixes,
14        package_metadata::PythonPackageMetadata, resource::PythonResource,
15    },
16    anyhow::{anyhow, Context, Result},
17    once_cell::sync::Lazy,
18    simple_file_manifest::{File, FileEntry, FileManifest},
19    std::{borrow::Cow, io::Read, path::Path},
20    zip::ZipArchive,
21};
22
23/// Regex for finding the wheel info directory.
24///
25/// This is copied from the wheel.wheelfile Python module.
26
27static RE_WHEEL_INFO: Lazy<regex::Regex> = Lazy::new(|| {
28    regex::Regex::new(r"^(?P<namever>(?P<name>.+?)-(?P<ver>.+?))(-(?P<build>\d[^-]*))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)\.whl$").unwrap()
29});
30
31const S_IXUSR: u32 = 64;
32
33/// Represents a Python wheel archive.
34pub struct WheelArchive {
35    files: FileManifest,
36    name_version: String,
37}
38
39impl WheelArchive {
40    /// Construct an instance from a generic reader.
41    ///
42    /// `basename` is the filename of the wheel. It is used to try to
43    /// locate the info directory.
44    pub fn from_reader<R>(reader: R, basename: &str) -> Result<Self>
45    where
46        R: std::io::Read + std::io::Seek,
47    {
48        let captures = RE_WHEEL_INFO
49            .captures(basename)
50            .ok_or_else(|| anyhow!("failed to parse wheel basename: {}", basename))?;
51
52        let name_version = captures
53            .name("namever")
54            .ok_or_else(|| anyhow!("could not find name-version in wheel name"))?
55            .as_str()
56            .to_string();
57
58        let mut archive = ZipArchive::new(reader)?;
59
60        let mut files = FileManifest::default();
61
62        for i in 0..archive.len() {
63            let mut file = archive.by_index(i)?;
64
65            // We only index files.
66            if file.is_dir() {
67                continue;
68            }
69
70            let mut buffer = Vec::with_capacity(file.size() as usize);
71            file.read_to_end(&mut buffer)?;
72
73            files.add_file_entry(
74                Path::new(file.name()),
75                FileEntry::new_from_data(buffer, file.unix_mode().unwrap_or(0) & S_IXUSR != 0),
76            )?;
77        }
78
79        Ok(Self {
80            files,
81            name_version,
82        })
83    }
84
85    /// Construct an instance from a filesystem path.
86    pub fn from_path(path: &Path) -> Result<Self> {
87        let fh = std::fs::File::open(path)
88            .with_context(|| format!("opening {} for wheel reading", path.display()))?;
89
90        let reader = std::io::BufReader::new(fh);
91        let basename = path
92            .file_name()
93            .ok_or_else(|| anyhow!("could not derive file name"))?
94            .to_string_lossy();
95
96        Self::from_reader(reader, &basename)
97    }
98
99    fn dist_info_path(&self) -> String {
100        format!("{}.dist-info", self.name_version)
101    }
102
103    fn data_path(&self) -> String {
104        format!("{}.data", self.name_version)
105    }
106
107    /// Obtain metadata about the wheel archive itself.
108    pub fn archive_metadata(&self) -> Result<PythonPackageMetadata> {
109        let path = format!("{}/WHEEL", self.dist_info_path());
110
111        let file = self
112            .files
113            .get(&path)
114            .ok_or_else(|| anyhow!("{} does not exist", path))?;
115
116        PythonPackageMetadata::from_metadata(&file.resolve_content()?)
117    }
118
119    /// Obtain the `.dist-info/METADATA` content as a parsed object.
120    pub fn metadata(&self) -> Result<PythonPackageMetadata> {
121        let path = format!("{}/METADATA", self.dist_info_path());
122
123        let file = self
124            .files
125            .get(&path)
126            .ok_or_else(|| anyhow!("{} does not exist", path))?;
127
128        PythonPackageMetadata::from_metadata(&file.resolve_content()?)
129    }
130
131    /// Obtain the first header value from the archive metadata file.
132    pub fn archive_metadata_header(&self, header: &str) -> Result<Cow<str>> {
133        let metadata = self.archive_metadata()?;
134
135        Ok(Cow::Owned(
136            metadata
137                .find_first_header(header)
138                .ok_or_else(|| anyhow!("{} not found", header))?
139                .to_string(),
140        ))
141    }
142
143    /// Obtain values of all headers from the archive metadata file.
144    pub fn archive_metadata_headers(&self, header: &str) -> Result<Vec<Cow<str>>> {
145        let metadata = self.archive_metadata()?;
146
147        Ok(metadata
148            .find_all_headers(header)
149            .iter()
150            .map(|s| Cow::Owned(s.to_string()))
151            .collect::<Vec<_>>())
152    }
153
154    /// Obtain the version number of the wheel specification used to build this wheel.
155    pub fn wheel_version(&self) -> Result<Cow<str>> {
156        self.archive_metadata_header("Wheel-Version")
157    }
158
159    /// Obtain the generator of the wheel archive.
160    pub fn wheel_generator(&self) -> Result<Cow<str>> {
161        self.archive_metadata_header("Generator")
162    }
163
164    /// Whether `Root-Is-Purelib` is set.
165    pub fn root_is_purelib(&self) -> Result<bool> {
166        Ok(self.archive_metadata_header("Root-Is-Purelib")? == "true")
167    }
168
169    /// `Tag` values for the wheel archive.
170    pub fn tags(&self) -> Result<Vec<Cow<str>>> {
171        self.archive_metadata_headers("Tag")
172    }
173
174    /// `Build` identifier for the wheel archive.
175    pub fn build(&self) -> Result<Cow<str>> {
176        self.archive_metadata_header("Build")
177    }
178
179    /// `Install-Paths-To` values.
180    pub fn install_paths_to(&self) -> Result<Vec<Cow<str>>> {
181        self.archive_metadata_headers("Install-Paths-To")
182    }
183
184    /// Obtain files in the .dist-info/ directory.
185    ///
186    /// The returned `PathBuf` are prefixed with the appropriate `*.dist-info`
187    /// directory.
188    pub fn dist_info_files(&self) -> Vec<File> {
189        let prefix = format!("{}/", self.dist_info_path());
190        self.files
191            .iter_files()
192            .filter(|f| f.path().starts_with(&prefix))
193            .collect::<Vec<_>>()
194    }
195
196    /// Obtain paths in a `.data/*/` directory.
197    fn data_paths(&self, key: &str) -> Vec<File> {
198        let prefix = format!("{}.data/{}/", self.name_version, key);
199
200        self.files
201            .iter_files()
202            .filter_map(|f| {
203                if f.path().starts_with(&prefix) {
204                    Some(File::new(
205                        &f.path().display().to_string()[prefix.len()..],
206                        f.entry(),
207                    ))
208                } else {
209                    None
210                }
211            })
212            .collect::<Vec<_>>()
213    }
214
215    /// Obtain files that should be installed to `purelib`.
216    ///
217    /// `*.data/purelib/` prefix is stripped from returned `PathBuf`.
218    pub fn purelib_files(&self) -> Vec<File> {
219        self.data_paths("purelib")
220    }
221
222    /// Obtain files that should be installed to `platlib`.
223    ///
224    /// `*.data/platlib/` prefix is stripped from returned `PathBuf`.
225    pub fn platlib_files(&self) -> Vec<File> {
226        self.data_paths("platlib")
227    }
228
229    /// Obtain files that should be installed to `headers`.
230    ///
231    /// `*.data/headers/` prefix is stripped from returned `PathBuf`.
232    pub fn headers_files(&self) -> Vec<File> {
233        self.data_paths("headers")
234    }
235
236    /// Obtain files that should be installed to `scripts`.
237    ///
238    /// `*.data/scripts/` prefix is stripped from returned `PathBuf`.
239    ///
240    /// TODO support optional argument to rewrite `#!python` shebangs.
241    pub fn scripts_files(&self) -> Vec<File> {
242        self.data_paths("scripts")
243    }
244
245    /// Obtain files that should be installed to `data`.
246    ///
247    /// `*.data/data/` prefix is stripped from returned `PathBuf`.
248    pub fn data_files(&self) -> Vec<File> {
249        self.data_paths("data")
250    }
251
252    /// Obtain normal files not part of metadata or special files.
253    ///
254    /// These are likely installed as-is.
255    ///
256    /// The returned `PathBuf` has the same path as the file in the
257    /// wheel archive.
258    pub fn regular_files(&self) -> Vec<File> {
259        let dist_info_prefix = format!("{}/", self.dist_info_path());
260        let data_prefix = format!("{}/", self.data_path());
261
262        self.files
263            .iter_files()
264            .filter(|f| {
265                !(f.path().starts_with(&dist_info_prefix) || f.path().starts_with(&data_prefix))
266            })
267            .collect::<Vec<_>>()
268    }
269
270    /// Obtain `PythonResource` for files within the wheel.
271    pub fn python_resources<'a>(
272        &self,
273        cache_tag: &str,
274        suffixes: &PythonModuleSuffixes,
275        emit_files: bool,
276        classify_files: bool,
277    ) -> Result<Vec<PythonResource<'a>>> {
278        // The filesystem scanning code relies on the final install layout.
279        // So we need to simulate that.
280
281        // Regular files are as-is.
282        let mut inputs = self.regular_files();
283
284        // As are .dist-info paths.
285        inputs.extend(self.dist_info_files());
286
287        // Get modules from purelib and platlib, remapping them to the root.
288        inputs.extend(self.purelib_files());
289        inputs.extend(self.platlib_files());
290
291        // Get resources from data, remapping them to the root.
292        inputs.extend(self.data_files());
293
294        // Other data keys are `headers` and `scripts`, which we don't yet
295        // support as resource types.
296
297        PythonResourceIterator::from_data_locations(
298            &inputs,
299            cache_tag,
300            suffixes,
301            emit_files,
302            classify_files,
303        )?
304        .collect::<Result<Vec<_>>>()
305    }
306}