python_packaging/
zip_app_builder.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*! Functionality for building .zip file based Python applications. */
10
11use {
12    crate::{
13        bytecode::{CompileMode, PythonBytecodeCompiler},
14        module_util::resolve_path_for_module,
15        resource::{BytecodeOptimizationLevel, PythonModuleBytecode, PythonModuleSource},
16    },
17    anyhow::{anyhow, Context, Result},
18    simple_file_manifest::{set_executable, FileEntry, FileManifest},
19    std::{
20        io::{Seek, Write},
21        path::Path,
22    },
23    zip::CompressionMethod,
24};
25
26/// Interface for building .zip file based Python applications.
27///
28/// This type implements functionality provided by the Python stdlib `zipapp`
29/// module. It is used to produce zip files containing Python resources
30/// (notably module source and bytecode) that Python interpreters can execute
31/// as standalone applications.
32///
33/// The zip archives can contain a shebang line (`#!<interpreter>`) denoting
34/// a program to use to execute the zipapp. This is typically `python` or some
35/// such variant.
36pub struct ZipAppBuilder {
37    /// Interpreter to use in shebang line.
38    interpreter: Option<String>,
39
40    /// Files to store in the zip archive.
41    manifest: FileManifest,
42
43    /// Compression method to use within archive.
44    compression_method: CompressionMethod,
45
46    /// The modified time to write for files in the zip archive.
47    modified_time: time::OffsetDateTime,
48
49    /// Bytecode compiler to use for generating bytecode from Python source code.
50    compiler: Option<Box<dyn PythonBytecodeCompiler>>,
51
52    /// Optimization level for Python bytecode.
53    optimize_level: BytecodeOptimizationLevel,
54}
55
56impl Default for ZipAppBuilder {
57    fn default() -> Self {
58        Self {
59            interpreter: None,
60            manifest: FileManifest::default(),
61            compression_method: CompressionMethod::Stored,
62            modified_time: time::OffsetDateTime::now_utc(),
63            compiler: None,
64            optimize_level: BytecodeOptimizationLevel::Zero,
65        }
66    }
67}
68
69impl ZipAppBuilder {
70    /// Obtain the interpreter to use in the shebang line.
71    pub fn interpreter(&self) -> Option<&str> {
72        self.interpreter.as_deref()
73    }
74
75    /// Set the interpreter to use in the shebang line.
76    pub fn set_interpreter(&mut self, v: impl ToString) {
77        self.interpreter = Some(v.to_string());
78    }
79
80    /// Obtain the modified time for files in the wheel archive.
81    pub fn modified_time(&self) -> time::OffsetDateTime {
82        self.modified_time
83    }
84
85    /// Set the modified time for files in the wheel archive.
86    pub fn set_modified_time(&mut self, v: time::OffsetDateTime) {
87        self.modified_time = v;
88    }
89
90    /// Set the Python bytecode compiler to use to turn source code into bytecode.
91    pub fn set_bytecode_compiler(&mut self, v: Box<dyn PythonBytecodeCompiler>) {
92        self.compiler = Some(v);
93    }
94
95    /// Obtain the bytecode optimization level used when generating Python bytecode.
96    pub fn optimize_level(&self) -> BytecodeOptimizationLevel {
97        self.optimize_level
98    }
99
100    /// Set the bytecode optimization level used when generating Python bytecode.
101    pub fn set_optimize_level(&mut self, v: BytecodeOptimizationLevel) {
102        self.optimize_level = v;
103    }
104
105    /// Add a file to the zip archive.
106    ///
107    /// This is the lowest level mechanism to add an entry to the zip archive. The
108    /// path/file will be added without modification.
109    pub fn add_file_entry(
110        &mut self,
111        path: impl AsRef<Path>,
112        entry: impl Into<FileEntry>,
113    ) -> Result<()> {
114        Ok(self.manifest.add_file_entry(path, entry)?)
115    }
116
117    /// Add Python module source code to the archive.
118    ///
119    /// This only adds source code, not bytecode.
120    pub fn add_python_module_source(
121        &mut self,
122        source: &PythonModuleSource,
123        prefix: &str,
124    ) -> Result<()> {
125        let path = source.resolve_path(prefix);
126
127        self.manifest
128            .add_file_entry(path, FileEntry::new_from_data(source.source.clone(), false))?;
129
130        Ok(())
131    }
132
133    /// Add Python module source and corresponding bytecode to the archive.
134    ///
135    /// This will automatically compile bytecode at the specified optimization level
136    /// given the source code provided.
137    pub fn add_python_module_source_and_bytecode(
138        &mut self,
139        source: &PythonModuleSource,
140        prefix: &str,
141    ) -> Result<()> {
142        let compiler = self
143            .compiler
144            .as_mut()
145            .ok_or_else(|| anyhow!("bytecode compiler not available"))?;
146
147        let py_path = source.resolve_path(prefix);
148
149        // The zip-based importer doesn't use the standard __pycache__ path layout when
150        // searching for .pyc files. Rather, the old Python 2 layout of searching for
151        // a .pyc file in the same directory that the .py would be in is used.
152        let pyc_path = py_path.with_extension("pyc");
153
154        let bytecode = source
155            .as_bytecode_module(self.optimize_level)
156            .compile(compiler.as_mut(), CompileMode::PycUncheckedHash)?;
157
158        self.manifest.add_file_entry(
159            py_path,
160            FileEntry::new_from_data(source.source.clone(), false),
161        )?;
162        self.manifest
163            .add_file_entry(pyc_path, FileEntry::new_from_data(bytecode, false))?;
164
165        Ok(())
166    }
167
168    /// Add Python module bytecode, without corresponding source code.
169    pub fn add_python_module_bytecode(
170        &mut self,
171        bytecode: &PythonModuleBytecode,
172        prefix: &str,
173    ) -> Result<()> {
174        // The path to bytecode in zip archives isn't the same as the typical filesystem
175        // layout so we have to compute as if this is a .py file then change the extension.
176        let path = resolve_path_for_module(prefix, &bytecode.name, bytecode.is_package, None)
177            .with_extension("pyc");
178
179        self.manifest.add_file_entry(
180            path,
181            FileEntry::new_from_data(bytecode.resolve_bytecode()?, false),
182        )?;
183
184        Ok(())
185    }
186
187    /// Define the function called when the zip-based application is executed.
188    ///
189    /// This defines a `__main__.py[c]` that invokes the `func` function in the `module` module.
190    pub fn add_main(&mut self, module: &str, func: &str, prefix: &str) -> Result<()> {
191        let source = format!(
192            "# -*- coding: utf-8 -*-\nimport {}\n{}.{}()\n",
193            module, module, func
194        );
195
196        let module = PythonModuleSource {
197            name: "__main__".to_string(),
198            source: source.as_bytes().into(),
199            is_package: false,
200            cache_tag: "".to_string(),
201            is_stdlib: false,
202            is_test: false,
203        };
204
205        if self.compiler.is_some() {
206            self.add_python_module_source_and_bytecode(&module, prefix)?;
207        } else {
208            self.add_python_module_source(&module, prefix)?;
209        }
210
211        Ok(())
212    }
213
214    /// Writes zip archive data to a writer.
215    ///
216    /// This will emit a zip archive + optional leading shebang so it is runnable
217    /// as a standalone executable file.
218    pub fn write_zip_app(&self, writer: &mut (impl Write + Seek)) -> Result<()> {
219        if let Some(interpreter) = &self.interpreter {
220            writer.write_all(format!("#!{}\n", interpreter).as_bytes())?;
221        }
222
223        self.write_zip_data(writer)?;
224
225        Ok(())
226    }
227
228    /// Write the zip archive to a filesystem path.
229    pub fn write_to_path(&self, path: impl AsRef<Path>) -> Result<()> {
230        let path = path.as_ref();
231
232        if let Some(parent) = path.parent() {
233            std::fs::create_dir_all(parent).context("creating parent directory")?;
234        }
235
236        let mut fh = std::fs::File::create(path).context("opening zip file")?;
237        self.write_zip_app(&mut fh).context("writing zip file")?;
238        set_executable(&mut fh).context("marking zip file as executable")?;
239
240        Ok(())
241    }
242
243    /// Writes zip archive data to a writer.
244    fn write_zip_data(&self, writer: &mut (impl Write + Seek)) -> Result<()> {
245        let mut zf = zip::ZipWriter::new(writer);
246
247        for file in self.manifest.iter_files() {
248            let options = zip::write::FileOptions::default()
249                .compression_method(self.compression_method)
250                .unix_permissions(if file.entry().is_executable() {
251                    0o0755
252                } else {
253                    0o0644
254                })
255                .last_modified_time(
256                    zip::DateTime::from_date_and_time(
257                        self.modified_time.year() as u16,
258                        self.modified_time.month() as u8,
259                        self.modified_time.day(),
260                        self.modified_time.hour(),
261                        self.modified_time.minute(),
262                        self.modified_time.second(),
263                    )
264                    .map_err(|_| anyhow!("could not convert time to zip::DateTime"))?,
265                );
266
267            zf.start_file(format!("{}", file.path().display()), options)?;
268            zf.write_all(
269                &file
270                    .entry()
271                    .resolve_content()
272                    .with_context(|| format!("resolving content of {}", file.path().display()))?,
273            )
274            .with_context(|| format!("writing zip member {}", file.path().display()))?;
275        }
276
277        zf.finish().context("finishing zip file")?;
278
279        Ok(())
280    }
281}
282
283#[cfg(test)]
284mod test {
285    use {super::*, crate::testutil::FakeBytecodeCompiler, std::io::Read};
286
287    #[test]
288    fn empty() -> Result<()> {
289        let builder = ZipAppBuilder::default();
290        let mut dest = std::io::Cursor::new(Vec::<u8>::new());
291        builder.write_zip_app(&mut dest)?;
292
293        let z = zip::ZipArchive::new(dest)?;
294        assert_eq!(z.len(), 0);
295
296        Ok(())
297    }
298
299    #[test]
300    fn shebang() -> Result<()> {
301        let mut builder = ZipAppBuilder::default();
302        builder.set_interpreter("python");
303        let mut dest = std::io::Cursor::new(Vec::<u8>::new());
304        builder.write_zip_app(&mut dest)?;
305
306        assert!(dest.get_ref().starts_with(b"#!python\n"));
307
308        let z = zip::ZipArchive::new(dest)?;
309        assert_eq!(z.len(), 0);
310
311        Ok(())
312    }
313
314    #[test]
315    fn add_source() -> Result<()> {
316        let mut builder = ZipAppBuilder::default();
317        builder.add_python_module_source(
318            &PythonModuleSource {
319                name: "foo".to_string(),
320                source: b"foo".to_vec().into(),
321                is_package: false,
322                cache_tag: "".to_string(),
323                is_stdlib: false,
324                is_test: false,
325            },
326            "",
327        )?;
328
329        let mut dest = std::io::Cursor::new(Vec::<u8>::new());
330        builder.write_zip_app(&mut dest)?;
331
332        let mut z = zip::ZipArchive::new(dest)?;
333        assert_eq!(z.len(), 1);
334
335        let mut zf = z.by_index(0)?;
336        let mut b = Vec::<u8>::new();
337        zf.read_to_end(&mut b)?;
338        assert_eq!(zf.name(), "foo.py");
339        assert_eq!(zf.compression(), CompressionMethod::Stored);
340        assert!(zf.is_file());
341        assert_eq!(b, b"foo");
342
343        Ok(())
344    }
345
346    #[test]
347    fn add_source_and_bytecode_no_compiler() -> Result<()> {
348        let mut builder = ZipAppBuilder::default();
349
350        assert!(builder
351            .add_python_module_source_and_bytecode(
352                &PythonModuleSource {
353                    name: "".to_string(),
354                    source: b"".to_vec().into(),
355                    is_package: false,
356                    cache_tag: "".to_string(),
357                    is_stdlib: false,
358                    is_test: false
359                },
360                ""
361            )
362            .is_err());
363
364        Ok(())
365    }
366
367    #[test]
368    fn add_source_and_bytecode() -> Result<()> {
369        let mut builder = ZipAppBuilder::default();
370        builder.set_bytecode_compiler(Box::new(FakeBytecodeCompiler { magic_number: 42 }));
371
372        let m = PythonModuleSource {
373            name: "foo".to_string(),
374            source: b"foo".to_vec().into(),
375            is_package: false,
376            cache_tag: "".to_string(),
377            is_stdlib: false,
378            is_test: false,
379        };
380
381        builder.add_python_module_source_and_bytecode(&m, "lib")?;
382
383        let mut dest = std::io::Cursor::new(Vec::<u8>::new());
384        builder.write_zip_app(&mut dest)?;
385
386        let mut z = zip::ZipArchive::new(dest)?;
387        assert_eq!(z.len(), 2);
388
389        {
390            let mut zf = z.by_index(0)?;
391            let mut b = Vec::<u8>::new();
392            zf.read_to_end(&mut b)?;
393            assert_eq!(zf.name(), "lib/foo.py");
394            assert_eq!(zf.compression(), CompressionMethod::Stored);
395            assert!(zf.is_file());
396            assert_eq!(b, m.source.resolve_content()?);
397        }
398
399        {
400            let mut zf = z.by_index(1)?;
401            let mut b = Vec::<u8>::new();
402            zf.read_to_end(&mut b)?;
403            assert_eq!(zf.name(), "lib/foo.pyc");
404            assert_eq!(zf.compression(), CompressionMethod::Stored);
405            assert!(zf.is_file());
406            assert_eq!(b, b"bc0foo");
407        }
408
409        Ok(())
410    }
411
412    #[test]
413    fn add_main() -> Result<()> {
414        let mut builder = ZipAppBuilder::default();
415        builder.add_main("foo", "bar", "lib")?;
416
417        let mut dest = std::io::Cursor::new(Vec::<u8>::new());
418        builder.write_zip_app(&mut dest)?;
419
420        let mut z = zip::ZipArchive::new(dest)?;
421        assert_eq!(z.len(), 1);
422
423        let mut zf = z.by_index(0)?;
424        let mut b = Vec::<u8>::new();
425        zf.read_to_end(&mut b)?;
426        assert_eq!(zf.name(), "lib/__main__.py");
427        assert_eq!(zf.compression(), CompressionMethod::Stored);
428        assert!(zf.is_file());
429
430        Ok(())
431    }
432}