python_packaging/
wheel_builder.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Functionality for creating wheels.
10
11use {
12    anyhow::{anyhow, Context, Result},
13    once_cell::sync::Lazy,
14    sha2::Digest,
15    simple_file_manifest::{FileEntry, FileManifest},
16    std::{
17        cmp::Ordering,
18        io::{Seek, Write},
19        path::{Path, PathBuf},
20    },
21};
22
23/// Wheel filename component escape regular expression.
24static RE_FILENAME_ESCAPE: Lazy<regex::Regex> =
25    Lazy::new(|| regex::Regex::new(r"[^\w\d.]+").unwrap());
26
27fn base64_engine() -> impl base64::engine::Engine {
28    base64::engine::fast_portable::FastPortable::from(
29        &base64::alphabet::URL_SAFE,
30        base64::engine::fast_portable::FastPortableConfig::new().with_encode_padding(false),
31    )
32}
33
34/// Define and build a Python wheel from raw components.
35///
36/// Python wheels are glorified zip files with some special files
37/// annotating the Python component therein.
38///
39/// # Wheel Level Parameters
40///
41/// Wheels are defined by a *distribution* (e.g. a Python package name),
42/// a *version*, a *compatibility tag*, and an optional *build tag*.
43///
44/// The *compatibility tag* defines the Python, ABI, and platform
45/// compatibility of the wheel. See
46/// [PEP 425](https://www.python.org/dev/peps/pep-0425/) for an overview of the
47/// components of the compatibility tag and their potential values.
48///
49/// Our default *compatibility tag* value is `py3-none-any`. This is
50/// appropriate for a wheel containing pure Python code that is compatible
51/// with Python 3. If your wheel has binary executables or extension modules,
52/// you will want to update the compatibility tag to reflect the appropriate
53/// binary compatibility.
54///
55/// # .dist-info/WHEEL File
56///
57/// Wheel archives must have a `WHEEL` file describing the wheel itself.
58///
59/// This file is an email header like MIME document with various well-defined
60/// fields.
61///
62/// By default, we will automatically derive a minimal `WHEEL` file based
63/// on parameters passed into [Self::new] and defaults.
64///
65/// If you want to provide your own `WHEEL` file, simply define its content
66/// by adding a custom file through [Self::add_file_dist_info].
67///
68/// # .dist-info/METADATA File
69///
70/// Wheel archives must have a `METADATA` file describing the thing being
71/// distributed.
72///
73/// This file is an email header like MIME document with various well-defined
74/// fields.
75///
76/// By default, we will automatically derive a minimal `METADATA` file
77/// based on builder state.
78///
79/// If you want to provide your own `METADATA` file, simply define its content
80/// by adding a custom file through [Self::add_file_dist_info].
81///
82/// # Adding Files
83///
84/// Files in wheels go in 1 of 3 locations:
85///
86/// 1. The `.dist-info/` directory (added via [Self::add_file_dist_info]).
87/// 2. Special `.data/<location>/` directories (added via [Self::add_file_data]).
88/// 3. Everywhere else (added via [Self::add_file]).
89///
90/// Files in `.dist-info/` describe the wheel itself and the entity being
91/// distributed.
92///
93/// Files in `.data/<location>/` are moved to the indicated `<location>` when the
94/// wheel is installed. `<location>` here is the name of a Python installation
95/// directory, such as `purelib` (pure Python modules and bytecode), `platlib`
96/// (platform-specific / binary Python extension modules and other binaries),
97/// `scripts` (executable scripts), and more.
98///
99/// Files in all other locations in the archive are not treated specially and are
100/// extracted directly to `purelib` or `platlib`, depending on the value of
101/// `Root-Is-Purelib`.
102///
103/// # Building Wheels
104///
105/// Once you have modified settings and registered files, it is time to create your
106/// wheel.
107///
108/// If you want to materialize a `.whl` file with the proper file name, call
109/// [Self::write_wheel_into_directory].
110///
111/// If you want to just materialize the zip content of the wheel, call
112/// [Self::write_wheel_data].
113///
114/// If you want to obtain a collection of all the files that constitute the wheel
115/// before zip file generation, call [Self::build_file_manifest].
116///
117/// To obtain the name of the `.whl` file given current settings, call
118/// [Self::wheel_file_name].
119///
120/// Wheel zip archive content is deterministic for the same builder instance.
121/// For separate builder instances, content can be made identical by calling
122/// [Self::set_modified_time] to set the modified time and using identical input
123/// settings/files. (The modified time of files in zip files defaults to the time
124/// when the builder instance was created, which is obviously not deterministic.)
125///
126/// # Validation
127///
128/// This type generally performs little to no validation of input data. It is up
129/// to the caller to supply settings and content that constitutes a well-formed
130/// wheel.
131///
132/// Supplementary tools like [auditwheel](https://pypi.org/project/auditwheel/) can
133/// be useful for validating the content of wheels.
134pub struct WheelBuilder {
135    /// The primary name of the wheel.
136    distribution: String,
137
138    /// The version component of the wheel.
139    version: String,
140
141    /// Tag denoting the build of this wheel.
142    build_tag: Option<String>,
143
144    /// Python part of compatibility tag.
145    python_tag: String,
146
147    /// ABI part of compatibility tag.
148    abi_tag: String,
149
150    /// Platform part of compatibility tag.
151    platform_tag: String,
152
153    /// Name of tool that generated this wheel.
154    generator: String,
155
156    /// Whether archive should be extracted directly into purelib.
157    root_is_purelib: bool,
158
159    /// Files constituting the wheel.
160    manifest: FileManifest,
161
162    /// The modified time to write for files in the wheel archive.
163    modified_time: time::OffsetDateTime,
164}
165
166impl WheelBuilder {
167    /// Create a new instance with a package name and version.
168    pub fn new(distribution: impl ToString, version: impl ToString) -> Self {
169        Self {
170            distribution: distribution.to_string(),
171            version: version.to_string(),
172            build_tag: None,
173            python_tag: "py3".to_string(),
174            abi_tag: "none".to_string(),
175            platform_tag: "any".to_string(),
176            generator: "rust-python-packaging".to_string(),
177            root_is_purelib: false,
178            manifest: FileManifest::default(),
179            modified_time: time::OffsetDateTime::now_utc(),
180        }
181    }
182
183    /// Obtain the build tag for this wheel.
184    pub fn build_tag(&self) -> Option<&str> {
185        self.build_tag.as_deref()
186    }
187
188    /// Set the build tag for this wheel.
189    pub fn set_build_tag(&mut self, v: impl ToString) {
190        self.build_tag = Some(v.to_string());
191    }
192
193    /// Obtain the compatibility tag.
194    pub fn tag(&self) -> String {
195        format!("{}-{}-{}", self.python_tag, self.abi_tag, self.platform_tag)
196    }
197
198    /// Set the compatibility tag from a value.
199    pub fn set_tag(&mut self, tag: impl ToString) -> Result<()> {
200        let tag = tag.to_string();
201
202        let mut parts = tag.splitn(3, '-');
203
204        let python = parts
205            .next()
206            .ok_or_else(|| anyhow!("could not parse Python tag"))?;
207        let abi = parts
208            .next()
209            .ok_or_else(|| anyhow!("could not parse ABI tag"))?;
210        let platform = parts
211            .next()
212            .ok_or_else(|| anyhow!("could not parse Platform tag"))?;
213
214        self.set_python_tag(python);
215        self.set_abi_tag(abi);
216        self.set_platform_tag(platform);
217
218        Ok(())
219    }
220
221    /// Obtain the Python component of the compatibility tag.
222    pub fn python_tag(&self) -> &str {
223        &self.python_tag
224    }
225
226    /// Set the Python component of the compatibility tag.
227    pub fn set_python_tag(&mut self, v: impl ToString) {
228        self.python_tag = v.to_string();
229    }
230
231    /// Obtain the ABI component of the compatibility tag.
232    pub fn abi_tag(&self) -> &str {
233        &self.abi_tag
234    }
235
236    /// Set the ABI component of the compatibility tag.
237    pub fn set_abi_tag(&mut self, v: impl ToString) {
238        self.abi_tag = v.to_string();
239    }
240
241    /// Obtain the platform component of the compatibility tag.
242    pub fn platform_tag(&self) -> &str {
243        &self.platform_tag
244    }
245
246    /// Set the platform component of the compatibility tag.
247    pub fn set_platform_tag(&mut self, v: impl ToString) {
248        self.platform_tag = v.to_string();
249    }
250
251    /// Obtain the `Generator` value for the `WHEEL` file.
252    pub fn generator(&self) -> &str {
253        &self.generator
254    }
255
256    /// Set the `Generator` value for the `WHEEL` file.
257    pub fn set_generator(&mut self, v: impl ToString) {
258        self.generator = v.to_string();
259    }
260
261    /// Obtain the `Root-Is-Purelib` value.
262    pub fn root_is_purelib(&self) -> bool {
263        self.root_is_purelib
264    }
265
266    /// Set the value for `Root-Is-Purelib`.
267    ///
268    /// If `true`, the wheel archive is extracted directly into `purelib`. If `false`,
269    /// it is extracted to `platlib`.
270    pub fn set_root_is_purelib(&mut self, v: bool) {
271        self.root_is_purelib = v;
272    }
273
274    /// Obtain the modified time for files in the wheel archive.
275    pub fn modified_time(&self) -> time::OffsetDateTime {
276        self.modified_time
277    }
278
279    /// Set the modified time for files in the wheel archive.
280    pub fn set_modified_time(&mut self, v: time::OffsetDateTime) {
281        self.modified_time = v;
282    }
283
284    fn normalized_distribution(&self) -> String {
285        self.distribution.to_lowercase().replace('-', "_")
286    }
287
288    fn dist_info_path(&self) -> PathBuf {
289        PathBuf::from(format!(
290            "{}-{}.dist-info",
291            self.normalized_distribution(),
292            self.version
293        ))
294    }
295
296    /// Add a file to the wheel at the given path.
297    ///
298    /// No validation of the path is performed.
299    pub fn add_file(&mut self, path: impl AsRef<Path>, file: impl Into<FileEntry>) -> Result<()> {
300        self.manifest.add_file_entry(path, file)?;
301
302        Ok(())
303    }
304
305    /// Add a file to the `.dist-info/` directory.
306    ///
307    /// Attempts to add the `RECORD` file will work. However, the content will be
308    /// ignored and regenerated as part of wheel building.
309    pub fn add_file_dist_info(
310        &mut self,
311        path: impl AsRef<Path>,
312        file: impl Into<FileEntry>,
313    ) -> Result<()> {
314        self.manifest
315            .add_file_entry(self.dist_info_path().join(path), file)?;
316
317        Ok(())
318    }
319
320    /// Add a file to a `.data/<destination>/` directory.
321    ///
322    /// `destination` is the name of a well-known Python installation directory. e.g.
323    /// `{purelib, platlib, headers, scripts, data}`. When the wheel is installed,
324    /// files in these `.data/<destination>/` directories are moved to the corresponding
325    /// path location within the targeted environment.
326    ///
327    /// No validation of the `destination` values is performed.
328    pub fn add_file_data(
329        &mut self,
330        destination: impl ToString,
331        path: impl AsRef<Path>,
332        file: impl Into<FileEntry>,
333    ) -> Result<()> {
334        self.manifest.add_file_entry(
335            PathBuf::from(format!(
336                "{}-{}.data",
337                self.normalized_distribution(),
338                self.version
339            ))
340            .join(destination.to_string())
341            .join(path),
342            file,
343        )?;
344
345        Ok(())
346    }
347
348    /// Construct the contents of the `.dist-info/WHEEL` file.
349    fn derive_wheel_file(&self) -> String {
350        format!(
351            "Wheel-Version: 1.0\nGenerator: {}\nRoot-Is-Purelib: {}\nTag: {}\n",
352            self.generator,
353            self.root_is_purelib,
354            self.tag()
355        )
356    }
357
358    fn derive_metadata_file(&self) -> String {
359        format!(
360            "Metadata-Version: 2.1\nName: {}\nVersion: {}\n",
361            self.distribution, self.version
362        )
363    }
364
365    /// Derive the content of a `.dist-info/RECORD` file in a wheel.
366    ///
367    /// This iterates the contents of a [FileManifest] and derives digests and
368    /// other metadata and assembles it into the appropriate format.
369    pub fn derive_record_file(&self, manifest: &FileManifest) -> Result<String> {
370        let mut lines = manifest
371            .iter_entries()
372            .map(|(path, entry)| {
373                let content = entry
374                    .resolve_content()
375                    .with_context(|| format!("resolving content for {}", path.display()))?;
376
377                let mut digest = sha2::Sha256::new();
378                digest.update(&content);
379
380                Ok(format!(
381                    "{},sha256={},{}",
382                    path.display(),
383                    base64::encode_engine(digest.finalize().as_slice(), &base64_engine()),
384                    content.len()
385                ))
386            })
387            .collect::<Result<Vec<_>>>()?;
388
389        lines.push(format!("{}/RECORD,,\n", self.dist_info_path().display()));
390
391        Ok(lines.join("\n"))
392    }
393
394    /// Obtain the file name for this wheel, as currently configured.
395    ///
396    /// The file name of a wheel is of the form
397    /// `{distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl`,
398    /// per PEP 427. Each component is escaped with a regular expression.
399    pub fn wheel_file_name(&self) -> String {
400        let mut parts = vec![self.normalized_distribution(), self.version.clone()];
401
402        if let Some(v) = &self.build_tag {
403            parts.push(v.clone());
404        }
405
406        parts.push(self.python_tag.clone());
407        parts.push(self.abi_tag.clone());
408        parts.push(self.platform_tag.clone());
409
410        let s = parts
411            .iter()
412            .map(|x| RE_FILENAME_ESCAPE.replace_all(x, "_"))
413            .collect::<Vec<_>>()
414            .join("-");
415
416        format!("{}.whl", s)
417    }
418
419    /// Obtain a [FileManifest] holding the contents of the built wheel.
420    ///
421    /// This function does most of the work to construct the built wheel. It will
422    /// derive special files like `.dist-info/WHEEL` and `.dist-info/RECORD` and
423    /// join them with files already registered in the builder.
424    pub fn build_file_manifest(&self) -> Result<FileManifest> {
425        let mut m = self.manifest.clone();
426
427        // Add the .dist-info/WHEEL file if it hasn't been provided already.
428        if !m.has_path(self.dist_info_path().join("WHEEL")) {
429            m.add_file_entry(
430                self.dist_info_path().join("WHEEL"),
431                self.derive_wheel_file().as_bytes(),
432            )?;
433        }
434
435        // Add the .dist-info/METADATA file if it hasn't been provided already.
436        if !m.has_path(self.dist_info_path().join("METADATA")) {
437            m.add_file_entry(
438                self.dist_info_path().join("METADATA"),
439                self.derive_metadata_file().as_bytes(),
440            )?;
441        }
442
443        // We derive the RECORD file. But it could have been added as a file. Ensure
444        // it doesn't exist.
445        m.remove(self.dist_info_path().join("RECORD"));
446
447        m.add_file_entry(
448            self.dist_info_path().join("RECORD"),
449            self.derive_record_file(&m)
450                .context("deriving RECORD file")?
451                .as_bytes(),
452        )?;
453
454        Ok(m)
455    }
456
457    /// Writes the contents of a wheel file to a writable destination.
458    ///
459    /// Wheels are zip files. So this function effectively materializes a zip file
460    /// to the specified writer.
461    pub fn write_wheel_data(&self, writer: &mut (impl Write + Seek)) -> Result<()> {
462        let m = self
463            .build_file_manifest()
464            .context("building wheel file manifest")?;
465
466        // We place the special .dist-info/ files last, as recommended by PEP 427.
467        let mut files = m.iter_files().collect::<Vec<_>>();
468        let dist_info_path = self.dist_info_path();
469        files.sort_by(|a, b| {
470            if a.path().starts_with(&dist_info_path) && !b.path().starts_with(&dist_info_path) {
471                Ordering::Greater
472            } else if b.path().starts_with(&dist_info_path)
473                && !a.path().starts_with(&dist_info_path)
474            {
475                Ordering::Less
476            } else {
477                a.path().cmp(b.path())
478            }
479        });
480
481        let mut zf = zip::ZipWriter::new(writer);
482
483        for file in files.into_iter() {
484            let options = zip::write::FileOptions::default()
485                .unix_permissions(if file.entry().is_executable() {
486                    0o0755
487                } else {
488                    0o0644
489                })
490                .last_modified_time(
491                    zip::DateTime::from_date_and_time(
492                        self.modified_time.year() as u16,
493                        self.modified_time.month() as u8,
494                        self.modified_time.day(),
495                        self.modified_time.hour(),
496                        self.modified_time.minute(),
497                        self.modified_time.second(),
498                    )
499                    .map_err(|_| anyhow!("could not convert time to zip::DateTime"))?,
500                );
501
502            zf.start_file(format!("{}", file.path().display()), options)?;
503            zf.write_all(
504                &file
505                    .entry()
506                    .resolve_content()
507                    .with_context(|| format!("resolving content of {}", file.path().display()))?,
508            )
509            .with_context(|| format!("writing zip member {}", file.path().display()))?;
510        }
511
512        zf.finish().context("finishing zip file")?;
513
514        Ok(())
515    }
516
517    /// Write the wheel file into a given directory, which must exist.
518    ///
519    /// Returns the path of the written wheel file on success.
520    ///
521    /// The wheel file isn't created until after wheel content generation. So
522    /// the only scenario in which the file would exist but not have appropriate
523    /// content is if some kind of I/O error occurred.
524    pub fn write_wheel_into_directory(&self, directory: impl AsRef<Path>) -> Result<PathBuf> {
525        let path = directory.as_ref().join(self.wheel_file_name());
526
527        let mut cursor = std::io::Cursor::new(Vec::<u8>::new());
528        self.write_wheel_data(&mut cursor)
529            .context("creating wheel zip data")?;
530
531        std::fs::write(&path, cursor.into_inner())
532            .with_context(|| format!("writing wheel data to {}", path.display()))?;
533
534        Ok(path)
535    }
536}
537
538#[cfg(test)]
539mod test {
540    use super::*;
541
542    #[test]
543    fn empty() -> Result<()> {
544        let builder = WheelBuilder::new("my-package", "0.1");
545
546        let mut dest = std::io::Cursor::new(Vec::<u8>::new());
547        builder.write_wheel_data(&mut dest)?;
548
549        let m = builder.build_file_manifest()?;
550        assert_eq!(m.iter_entries().count(), 3);
551        assert_eq!(m.get("my_package-0.1.dist-info/WHEEL"),
552                   Some(&b"Wheel-Version: 1.0\nGenerator: rust-python-packaging\nRoot-Is-Purelib: false\nTag: py3-none-any\n".as_ref().into()));
553        assert_eq!(
554            m.get("my_package-0.1.dist-info/METADATA"),
555            Some(
556                &b"Metadata-Version: 2.1\nName: my-package\nVersion: 0.1\n"
557                    .as_ref()
558                    .into()
559            )
560        );
561        assert_eq!(
562            m.get("my_package-0.1.dist-info/RECORD"),
563            Some(&b"my_package-0.1.dist-info/METADATA,sha256=sXUNNYpfVReu7VHhVzSbKiT5ciO4Fwcwm7icBNiYn3Y,52\nmy_package-0.1.dist-info/WHEEL,sha256=76DhAzqMvlOgtCOiUNpWcD643b1CXd507uRH1hq6fQw,93\nmy_package-0.1.dist-info/RECORD,,\n".as_ref().into())
564        );
565
566        Ok(())
567    }
568
569    #[test]
570    fn wheel_file_name() -> Result<()> {
571        let mut builder = WheelBuilder::new("my-package", "0.1");
572
573        assert_eq!(builder.wheel_file_name(), "my_package-0.1-py3-none-any.whl");
574
575        builder.set_python_tag("py39");
576        assert_eq!(
577            builder.wheel_file_name(),
578            "my_package-0.1-py39-none-any.whl"
579        );
580
581        builder.set_abi_tag("abi");
582        assert_eq!(builder.wheel_file_name(), "my_package-0.1-py39-abi-any.whl");
583
584        builder.set_platform_tag("platform");
585        assert_eq!(
586            builder.wheel_file_name(),
587            "my_package-0.1-py39-abi-platform.whl"
588        );
589
590        builder.set_tag("py3-none-any")?;
591        assert_eq!(builder.wheel_file_name(), "my_package-0.1-py3-none-any.whl");
592
593        builder.set_build_tag("build");
594        assert_eq!(
595            builder.wheel_file_name(),
596            "my_package-0.1-build-py3-none-any.whl"
597        );
598
599        Ok(())
600    }
601
602    #[test]
603    fn custom_wheel_file() -> Result<()> {
604        let mut builder = WheelBuilder::new("my-package", "0.1");
605
606        builder.add_file_dist_info("WHEEL", vec![42])?;
607
608        let m = builder.build_file_manifest()?;
609        assert_eq!(
610            m.get("my_package-0.1.dist-info/WHEEL"),
611            Some(&vec![42].into())
612        );
613
614        Ok(())
615    }
616
617    #[test]
618    fn custom_metadata_file() -> Result<()> {
619        let mut builder = WheelBuilder::new("my-package", "0.1");
620
621        builder.add_file_dist_info("METADATA", vec![42])?;
622
623        let m = builder.build_file_manifest()?;
624        assert_eq!(
625            m.get("my_package-0.1.dist-info/METADATA"),
626            Some(&vec![42].into())
627        );
628
629        Ok(())
630    }
631
632    #[test]
633    fn add_file_data() -> Result<()> {
634        let mut builder = WheelBuilder::new("my-package", "0.1");
635
636        builder.add_file_data("purelib", "__init__.py", vec![42])?;
637
638        let m = builder.build_file_manifest()?;
639        assert_eq!(
640            m.get("my_package-0.1.data/purelib/__init__.py"),
641            Some(&vec![42].into())
642        );
643
644        Ok(())
645    }
646}