python_packaging/wheel_builder.rs
1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Functionality for creating wheels.
10
11use {
12 anyhow::{anyhow, Context, Result},
13 once_cell::sync::Lazy,
14 sha2::Digest,
15 simple_file_manifest::{FileEntry, FileManifest},
16 std::{
17 cmp::Ordering,
18 io::{Seek, Write},
19 path::{Path, PathBuf},
20 },
21};
22
23/// Wheel filename component escape regular expression.
24static RE_FILENAME_ESCAPE: Lazy<regex::Regex> =
25 Lazy::new(|| regex::Regex::new(r"[^\w\d.]+").unwrap());
26
27fn base64_engine() -> impl base64::engine::Engine {
28 base64::engine::fast_portable::FastPortable::from(
29 &base64::alphabet::URL_SAFE,
30 base64::engine::fast_portable::FastPortableConfig::new().with_encode_padding(false),
31 )
32}
33
34/// Define and build a Python wheel from raw components.
35///
36/// Python wheels are glorified zip files with some special files
37/// annotating the Python component therein.
38///
39/// # Wheel Level Parameters
40///
41/// Wheels are defined by a *distribution* (e.g. a Python package name),
42/// a *version*, a *compatibility tag*, and an optional *build tag*.
43///
44/// The *compatibility tag* defines the Python, ABI, and platform
45/// compatibility of the wheel. See
46/// [PEP 425](https://www.python.org/dev/peps/pep-0425/) for an overview of the
47/// components of the compatibility tag and their potential values.
48///
49/// Our default *compatibility tag* value is `py3-none-any`. This is
50/// appropriate for a wheel containing pure Python code that is compatible
51/// with Python 3. If your wheel has binary executables or extension modules,
52/// you will want to update the compatibility tag to reflect the appropriate
53/// binary compatibility.
54///
55/// # .dist-info/WHEEL File
56///
57/// Wheel archives must have a `WHEEL` file describing the wheel itself.
58///
59/// This file is an email header like MIME document with various well-defined
60/// fields.
61///
62/// By default, we will automatically derive a minimal `WHEEL` file based
63/// on parameters passed into [Self::new] and defaults.
64///
65/// If you want to provide your own `WHEEL` file, simply define its content
66/// by adding a custom file through [Self::add_file_dist_info].
67///
68/// # .dist-info/METADATA File
69///
70/// Wheel archives must have a `METADATA` file describing the thing being
71/// distributed.
72///
73/// This file is an email header like MIME document with various well-defined
74/// fields.
75///
76/// By default, we will automatically derive a minimal `METADATA` file
77/// based on builder state.
78///
79/// If you want to provide your own `METADATA` file, simply define its content
80/// by adding a custom file through [Self::add_file_dist_info].
81///
82/// # Adding Files
83///
84/// Files in wheels go in 1 of 3 locations:
85///
86/// 1. The `.dist-info/` directory (added via [Self::add_file_dist_info]).
87/// 2. Special `.data/<location>/` directories (added via [Self::add_file_data]).
88/// 3. Everywhere else (added via [Self::add_file]).
89///
90/// Files in `.dist-info/` describe the wheel itself and the entity being
91/// distributed.
92///
93/// Files in `.data/<location>/` are moved to the indicated `<location>` when the
94/// wheel is installed. `<location>` here is the name of a Python installation
95/// directory, such as `purelib` (pure Python modules and bytecode), `platlib`
96/// (platform-specific / binary Python extension modules and other binaries),
97/// `scripts` (executable scripts), and more.
98///
99/// Files in all other locations in the archive are not treated specially and are
100/// extracted directly to `purelib` or `platlib`, depending on the value of
101/// `Root-Is-Purelib`.
102///
103/// # Building Wheels
104///
105/// Once you have modified settings and registered files, it is time to create your
106/// wheel.
107///
108/// If you want to materialize a `.whl` file with the proper file name, call
109/// [Self::write_wheel_into_directory].
110///
111/// If you want to just materialize the zip content of the wheel, call
112/// [Self::write_wheel_data].
113///
114/// If you want to obtain a collection of all the files that constitute the wheel
115/// before zip file generation, call [Self::build_file_manifest].
116///
117/// To obtain the name of the `.whl` file given current settings, call
118/// [Self::wheel_file_name].
119///
120/// Wheel zip archive content is deterministic for the same builder instance.
121/// For separate builder instances, content can be made identical by calling
122/// [Self::set_modified_time] to set the modified time and using identical input
123/// settings/files. (The modified time of files in zip files defaults to the time
124/// when the builder instance was created, which is obviously not deterministic.)
125///
126/// # Validation
127///
128/// This type generally performs little to no validation of input data. It is up
129/// to the caller to supply settings and content that constitutes a well-formed
130/// wheel.
131///
132/// Supplementary tools like [auditwheel](https://pypi.org/project/auditwheel/) can
133/// be useful for validating the content of wheels.
134pub struct WheelBuilder {
135 /// The primary name of the wheel.
136 distribution: String,
137
138 /// The version component of the wheel.
139 version: String,
140
141 /// Tag denoting the build of this wheel.
142 build_tag: Option<String>,
143
144 /// Python part of compatibility tag.
145 python_tag: String,
146
147 /// ABI part of compatibility tag.
148 abi_tag: String,
149
150 /// Platform part of compatibility tag.
151 platform_tag: String,
152
153 /// Name of tool that generated this wheel.
154 generator: String,
155
156 /// Whether archive should be extracted directly into purelib.
157 root_is_purelib: bool,
158
159 /// Files constituting the wheel.
160 manifest: FileManifest,
161
162 /// The modified time to write for files in the wheel archive.
163 modified_time: time::OffsetDateTime,
164}
165
166impl WheelBuilder {
167 /// Create a new instance with a package name and version.
168 pub fn new(distribution: impl ToString, version: impl ToString) -> Self {
169 Self {
170 distribution: distribution.to_string(),
171 version: version.to_string(),
172 build_tag: None,
173 python_tag: "py3".to_string(),
174 abi_tag: "none".to_string(),
175 platform_tag: "any".to_string(),
176 generator: "rust-python-packaging".to_string(),
177 root_is_purelib: false,
178 manifest: FileManifest::default(),
179 modified_time: time::OffsetDateTime::now_utc(),
180 }
181 }
182
183 /// Obtain the build tag for this wheel.
184 pub fn build_tag(&self) -> Option<&str> {
185 self.build_tag.as_deref()
186 }
187
188 /// Set the build tag for this wheel.
189 pub fn set_build_tag(&mut self, v: impl ToString) {
190 self.build_tag = Some(v.to_string());
191 }
192
193 /// Obtain the compatibility tag.
194 pub fn tag(&self) -> String {
195 format!("{}-{}-{}", self.python_tag, self.abi_tag, self.platform_tag)
196 }
197
198 /// Set the compatibility tag from a value.
199 pub fn set_tag(&mut self, tag: impl ToString) -> Result<()> {
200 let tag = tag.to_string();
201
202 let mut parts = tag.splitn(3, '-');
203
204 let python = parts
205 .next()
206 .ok_or_else(|| anyhow!("could not parse Python tag"))?;
207 let abi = parts
208 .next()
209 .ok_or_else(|| anyhow!("could not parse ABI tag"))?;
210 let platform = parts
211 .next()
212 .ok_or_else(|| anyhow!("could not parse Platform tag"))?;
213
214 self.set_python_tag(python);
215 self.set_abi_tag(abi);
216 self.set_platform_tag(platform);
217
218 Ok(())
219 }
220
221 /// Obtain the Python component of the compatibility tag.
222 pub fn python_tag(&self) -> &str {
223 &self.python_tag
224 }
225
226 /// Set the Python component of the compatibility tag.
227 pub fn set_python_tag(&mut self, v: impl ToString) {
228 self.python_tag = v.to_string();
229 }
230
231 /// Obtain the ABI component of the compatibility tag.
232 pub fn abi_tag(&self) -> &str {
233 &self.abi_tag
234 }
235
236 /// Set the ABI component of the compatibility tag.
237 pub fn set_abi_tag(&mut self, v: impl ToString) {
238 self.abi_tag = v.to_string();
239 }
240
241 /// Obtain the platform component of the compatibility tag.
242 pub fn platform_tag(&self) -> &str {
243 &self.platform_tag
244 }
245
246 /// Set the platform component of the compatibility tag.
247 pub fn set_platform_tag(&mut self, v: impl ToString) {
248 self.platform_tag = v.to_string();
249 }
250
251 /// Obtain the `Generator` value for the `WHEEL` file.
252 pub fn generator(&self) -> &str {
253 &self.generator
254 }
255
256 /// Set the `Generator` value for the `WHEEL` file.
257 pub fn set_generator(&mut self, v: impl ToString) {
258 self.generator = v.to_string();
259 }
260
261 /// Obtain the `Root-Is-Purelib` value.
262 pub fn root_is_purelib(&self) -> bool {
263 self.root_is_purelib
264 }
265
266 /// Set the value for `Root-Is-Purelib`.
267 ///
268 /// If `true`, the wheel archive is extracted directly into `purelib`. If `false`,
269 /// it is extracted to `platlib`.
270 pub fn set_root_is_purelib(&mut self, v: bool) {
271 self.root_is_purelib = v;
272 }
273
274 /// Obtain the modified time for files in the wheel archive.
275 pub fn modified_time(&self) -> time::OffsetDateTime {
276 self.modified_time
277 }
278
279 /// Set the modified time for files in the wheel archive.
280 pub fn set_modified_time(&mut self, v: time::OffsetDateTime) {
281 self.modified_time = v;
282 }
283
284 fn normalized_distribution(&self) -> String {
285 self.distribution.to_lowercase().replace('-', "_")
286 }
287
288 fn dist_info_path(&self) -> PathBuf {
289 PathBuf::from(format!(
290 "{}-{}.dist-info",
291 self.normalized_distribution(),
292 self.version
293 ))
294 }
295
296 /// Add a file to the wheel at the given path.
297 ///
298 /// No validation of the path is performed.
299 pub fn add_file(&mut self, path: impl AsRef<Path>, file: impl Into<FileEntry>) -> Result<()> {
300 self.manifest.add_file_entry(path, file)?;
301
302 Ok(())
303 }
304
305 /// Add a file to the `.dist-info/` directory.
306 ///
307 /// Attempts to add the `RECORD` file will work. However, the content will be
308 /// ignored and regenerated as part of wheel building.
309 pub fn add_file_dist_info(
310 &mut self,
311 path: impl AsRef<Path>,
312 file: impl Into<FileEntry>,
313 ) -> Result<()> {
314 self.manifest
315 .add_file_entry(self.dist_info_path().join(path), file)?;
316
317 Ok(())
318 }
319
320 /// Add a file to a `.data/<destination>/` directory.
321 ///
322 /// `destination` is the name of a well-known Python installation directory. e.g.
323 /// `{purelib, platlib, headers, scripts, data}`. When the wheel is installed,
324 /// files in these `.data/<destination>/` directories are moved to the corresponding
325 /// path location within the targeted environment.
326 ///
327 /// No validation of the `destination` values is performed.
328 pub fn add_file_data(
329 &mut self,
330 destination: impl ToString,
331 path: impl AsRef<Path>,
332 file: impl Into<FileEntry>,
333 ) -> Result<()> {
334 self.manifest.add_file_entry(
335 PathBuf::from(format!(
336 "{}-{}.data",
337 self.normalized_distribution(),
338 self.version
339 ))
340 .join(destination.to_string())
341 .join(path),
342 file,
343 )?;
344
345 Ok(())
346 }
347
348 /// Construct the contents of the `.dist-info/WHEEL` file.
349 fn derive_wheel_file(&self) -> String {
350 format!(
351 "Wheel-Version: 1.0\nGenerator: {}\nRoot-Is-Purelib: {}\nTag: {}\n",
352 self.generator,
353 self.root_is_purelib,
354 self.tag()
355 )
356 }
357
358 fn derive_metadata_file(&self) -> String {
359 format!(
360 "Metadata-Version: 2.1\nName: {}\nVersion: {}\n",
361 self.distribution, self.version
362 )
363 }
364
365 /// Derive the content of a `.dist-info/RECORD` file in a wheel.
366 ///
367 /// This iterates the contents of a [FileManifest] and derives digests and
368 /// other metadata and assembles it into the appropriate format.
369 pub fn derive_record_file(&self, manifest: &FileManifest) -> Result<String> {
370 let mut lines = manifest
371 .iter_entries()
372 .map(|(path, entry)| {
373 let content = entry
374 .resolve_content()
375 .with_context(|| format!("resolving content for {}", path.display()))?;
376
377 let mut digest = sha2::Sha256::new();
378 digest.update(&content);
379
380 Ok(format!(
381 "{},sha256={},{}",
382 path.display(),
383 base64::encode_engine(digest.finalize().as_slice(), &base64_engine()),
384 content.len()
385 ))
386 })
387 .collect::<Result<Vec<_>>>()?;
388
389 lines.push(format!("{}/RECORD,,\n", self.dist_info_path().display()));
390
391 Ok(lines.join("\n"))
392 }
393
394 /// Obtain the file name for this wheel, as currently configured.
395 ///
396 /// The file name of a wheel is of the form
397 /// `{distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl`,
398 /// per PEP 427. Each component is escaped with a regular expression.
399 pub fn wheel_file_name(&self) -> String {
400 let mut parts = vec![self.normalized_distribution(), self.version.clone()];
401
402 if let Some(v) = &self.build_tag {
403 parts.push(v.clone());
404 }
405
406 parts.push(self.python_tag.clone());
407 parts.push(self.abi_tag.clone());
408 parts.push(self.platform_tag.clone());
409
410 let s = parts
411 .iter()
412 .map(|x| RE_FILENAME_ESCAPE.replace_all(x, "_"))
413 .collect::<Vec<_>>()
414 .join("-");
415
416 format!("{}.whl", s)
417 }
418
419 /// Obtain a [FileManifest] holding the contents of the built wheel.
420 ///
421 /// This function does most of the work to construct the built wheel. It will
422 /// derive special files like `.dist-info/WHEEL` and `.dist-info/RECORD` and
423 /// join them with files already registered in the builder.
424 pub fn build_file_manifest(&self) -> Result<FileManifest> {
425 let mut m = self.manifest.clone();
426
427 // Add the .dist-info/WHEEL file if it hasn't been provided already.
428 if !m.has_path(self.dist_info_path().join("WHEEL")) {
429 m.add_file_entry(
430 self.dist_info_path().join("WHEEL"),
431 self.derive_wheel_file().as_bytes(),
432 )?;
433 }
434
435 // Add the .dist-info/METADATA file if it hasn't been provided already.
436 if !m.has_path(self.dist_info_path().join("METADATA")) {
437 m.add_file_entry(
438 self.dist_info_path().join("METADATA"),
439 self.derive_metadata_file().as_bytes(),
440 )?;
441 }
442
443 // We derive the RECORD file. But it could have been added as a file. Ensure
444 // it doesn't exist.
445 m.remove(self.dist_info_path().join("RECORD"));
446
447 m.add_file_entry(
448 self.dist_info_path().join("RECORD"),
449 self.derive_record_file(&m)
450 .context("deriving RECORD file")?
451 .as_bytes(),
452 )?;
453
454 Ok(m)
455 }
456
457 /// Writes the contents of a wheel file to a writable destination.
458 ///
459 /// Wheels are zip files. So this function effectively materializes a zip file
460 /// to the specified writer.
461 pub fn write_wheel_data(&self, writer: &mut (impl Write + Seek)) -> Result<()> {
462 let m = self
463 .build_file_manifest()
464 .context("building wheel file manifest")?;
465
466 // We place the special .dist-info/ files last, as recommended by PEP 427.
467 let mut files = m.iter_files().collect::<Vec<_>>();
468 let dist_info_path = self.dist_info_path();
469 files.sort_by(|a, b| {
470 if a.path().starts_with(&dist_info_path) && !b.path().starts_with(&dist_info_path) {
471 Ordering::Greater
472 } else if b.path().starts_with(&dist_info_path)
473 && !a.path().starts_with(&dist_info_path)
474 {
475 Ordering::Less
476 } else {
477 a.path().cmp(b.path())
478 }
479 });
480
481 let mut zf = zip::ZipWriter::new(writer);
482
483 for file in files.into_iter() {
484 let options = zip::write::FileOptions::default()
485 .unix_permissions(if file.entry().is_executable() {
486 0o0755
487 } else {
488 0o0644
489 })
490 .last_modified_time(
491 zip::DateTime::from_date_and_time(
492 self.modified_time.year() as u16,
493 self.modified_time.month() as u8,
494 self.modified_time.day(),
495 self.modified_time.hour(),
496 self.modified_time.minute(),
497 self.modified_time.second(),
498 )
499 .map_err(|_| anyhow!("could not convert time to zip::DateTime"))?,
500 );
501
502 zf.start_file(format!("{}", file.path().display()), options)?;
503 zf.write_all(
504 &file
505 .entry()
506 .resolve_content()
507 .with_context(|| format!("resolving content of {}", file.path().display()))?,
508 )
509 .with_context(|| format!("writing zip member {}", file.path().display()))?;
510 }
511
512 zf.finish().context("finishing zip file")?;
513
514 Ok(())
515 }
516
517 /// Write the wheel file into a given directory, which must exist.
518 ///
519 /// Returns the path of the written wheel file on success.
520 ///
521 /// The wheel file isn't created until after wheel content generation. So
522 /// the only scenario in which the file would exist but not have appropriate
523 /// content is if some kind of I/O error occurred.
524 pub fn write_wheel_into_directory(&self, directory: impl AsRef<Path>) -> Result<PathBuf> {
525 let path = directory.as_ref().join(self.wheel_file_name());
526
527 let mut cursor = std::io::Cursor::new(Vec::<u8>::new());
528 self.write_wheel_data(&mut cursor)
529 .context("creating wheel zip data")?;
530
531 std::fs::write(&path, cursor.into_inner())
532 .with_context(|| format!("writing wheel data to {}", path.display()))?;
533
534 Ok(path)
535 }
536}
537
538#[cfg(test)]
539mod test {
540 use super::*;
541
542 #[test]
543 fn empty() -> Result<()> {
544 let builder = WheelBuilder::new("my-package", "0.1");
545
546 let mut dest = std::io::Cursor::new(Vec::<u8>::new());
547 builder.write_wheel_data(&mut dest)?;
548
549 let m = builder.build_file_manifest()?;
550 assert_eq!(m.iter_entries().count(), 3);
551 assert_eq!(m.get("my_package-0.1.dist-info/WHEEL"),
552 Some(&b"Wheel-Version: 1.0\nGenerator: rust-python-packaging\nRoot-Is-Purelib: false\nTag: py3-none-any\n".as_ref().into()));
553 assert_eq!(
554 m.get("my_package-0.1.dist-info/METADATA"),
555 Some(
556 &b"Metadata-Version: 2.1\nName: my-package\nVersion: 0.1\n"
557 .as_ref()
558 .into()
559 )
560 );
561 assert_eq!(
562 m.get("my_package-0.1.dist-info/RECORD"),
563 Some(&b"my_package-0.1.dist-info/METADATA,sha256=sXUNNYpfVReu7VHhVzSbKiT5ciO4Fwcwm7icBNiYn3Y,52\nmy_package-0.1.dist-info/WHEEL,sha256=76DhAzqMvlOgtCOiUNpWcD643b1CXd507uRH1hq6fQw,93\nmy_package-0.1.dist-info/RECORD,,\n".as_ref().into())
564 );
565
566 Ok(())
567 }
568
569 #[test]
570 fn wheel_file_name() -> Result<()> {
571 let mut builder = WheelBuilder::new("my-package", "0.1");
572
573 assert_eq!(builder.wheel_file_name(), "my_package-0.1-py3-none-any.whl");
574
575 builder.set_python_tag("py39");
576 assert_eq!(
577 builder.wheel_file_name(),
578 "my_package-0.1-py39-none-any.whl"
579 );
580
581 builder.set_abi_tag("abi");
582 assert_eq!(builder.wheel_file_name(), "my_package-0.1-py39-abi-any.whl");
583
584 builder.set_platform_tag("platform");
585 assert_eq!(
586 builder.wheel_file_name(),
587 "my_package-0.1-py39-abi-platform.whl"
588 );
589
590 builder.set_tag("py3-none-any")?;
591 assert_eq!(builder.wheel_file_name(), "my_package-0.1-py3-none-any.whl");
592
593 builder.set_build_tag("build");
594 assert_eq!(
595 builder.wheel_file_name(),
596 "my_package-0.1-build-py3-none-any.whl"
597 );
598
599 Ok(())
600 }
601
602 #[test]
603 fn custom_wheel_file() -> Result<()> {
604 let mut builder = WheelBuilder::new("my-package", "0.1");
605
606 builder.add_file_dist_info("WHEEL", vec![42])?;
607
608 let m = builder.build_file_manifest()?;
609 assert_eq!(
610 m.get("my_package-0.1.dist-info/WHEEL"),
611 Some(&vec![42].into())
612 );
613
614 Ok(())
615 }
616
617 #[test]
618 fn custom_metadata_file() -> Result<()> {
619 let mut builder = WheelBuilder::new("my-package", "0.1");
620
621 builder.add_file_dist_info("METADATA", vec![42])?;
622
623 let m = builder.build_file_manifest()?;
624 assert_eq!(
625 m.get("my_package-0.1.dist-info/METADATA"),
626 Some(&vec![42].into())
627 );
628
629 Ok(())
630 }
631
632 #[test]
633 fn add_file_data() -> Result<()> {
634 let mut builder = WheelBuilder::new("my-package", "0.1");
635
636 builder.add_file_data("purelib", "__init__.py", vec![42])?;
637
638 let m = builder.build_file_manifest()?;
639 assert_eq!(
640 m.get("my_package-0.1.data/purelib/__init__.py"),
641 Some(&vec![42].into())
642 );
643
644 Ok(())
645 }
646}