python_packaging/
bytecode.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*! Work with Python bytecode. */
10
11use {
12    super::resource::BytecodeOptimizationLevel,
13    anyhow::{anyhow, Context, Result},
14    byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt},
15    std::{
16        io::{BufRead, BufReader, Read, Write},
17        path::Path,
18        process,
19    },
20};
21
22pub const BYTECODE_COMPILER: &[u8] = include_bytes!("bytecodecompiler.py");
23
24/// An entity that can compile Python bytecode.
25pub trait PythonBytecodeCompiler {
26    /// Obtain the magic number to use in the bytecode header.
27    fn get_magic_number(&self) -> u32;
28
29    /// Compile Python source into bytecode with an optimization level.
30    fn compile(
31        &mut self,
32        source: &[u8],
33        filename: &str,
34        optimize: BytecodeOptimizationLevel,
35        output_mode: CompileMode,
36    ) -> Result<Vec<u8>>;
37}
38
39/// An entity to perform Python bytecode compilation.
40#[derive(Debug)]
41pub struct BytecodeCompiler {
42    command: process::Child,
43
44    /// Magic number for bytecode header.
45    magic_number: u32,
46}
47
48/// Output mode for BytecodeCompiler.
49pub enum CompileMode {
50    /// Emit just Python bytecode.
51    Bytecode,
52    /// Emit .pyc header with hash verification.
53    PycCheckedHash,
54    /// Emit .pyc header with no hash verification.
55    PycUncheckedHash,
56}
57
58impl BytecodeCompiler {
59    /// Create a bytecode compiler using a Python executable.
60    ///
61    /// A Python process will be started and it will start executing a Python
62    /// source file embedded in this crate. That process interacts with this
63    /// object via a pipe, which is used to send bytecode compilation
64    /// requests and receive the compiled bytecode. The process is terminated
65    /// when this object is dropped.
66    ///
67    /// A Python script is written to the directory passed. This should ideally be
68    /// a temporary directory. The file name is deterministic, so it isn't safe
69    /// for multiple callers to simultaneously pass the same directory. The temporary
70    /// file is deleted before this function returns. Ideally this function would use
71    /// a proper temporary file internally. The reason this isn't done is to avoid
72    /// an extra crate dependency.
73    pub fn new(python: &Path, script_dir: impl AsRef<Path>) -> Result<BytecodeCompiler> {
74        let script_path = script_dir.as_ref().join("bytecode-compiler.py");
75        std::fs::write(&script_path, BYTECODE_COMPILER)
76            .with_context(|| format!("writing Python script to {}", script_path.display()))?;
77
78        let mut command = process::Command::new(python)
79            .arg(&script_path)
80            .stdin(process::Stdio::piped())
81            .stdout(process::Stdio::piped())
82            .spawn()
83            .with_context(|| format!("spawning {}", python.display()))?;
84
85        let stdin = command
86            .stdin
87            .as_mut()
88            .ok_or_else(|| anyhow!("unable to get stdin"))
89            .with_context(|| format!("obtaining stdin from {} process", python.display()))?;
90
91        stdin.write_all(b"magic_number\n").with_context(|| {
92            format!(
93                "writing magic_number command request to {} process",
94                python.display()
95            )
96        })?;
97        stdin
98            .flush()
99            .with_context(|| format!("flushing stdin to {} process", python.display()))?;
100
101        let stdout = command
102            .stdout
103            .as_mut()
104            .ok_or_else(|| anyhow!("unable to get stdout"))?;
105        let magic_number = stdout.read_u32::<LittleEndian>().with_context(|| {
106            format!(
107                "reading magic number from invoked {} process",
108                python.display()
109            )
110        })?;
111
112        std::fs::remove_file(&script_path)
113            .with_context(|| format!("deleting {}", script_path.display()))?;
114
115        Ok(BytecodeCompiler {
116            command,
117            magic_number,
118        })
119    }
120}
121
122impl PythonBytecodeCompiler for BytecodeCompiler {
123    fn get_magic_number(&self) -> u32 {
124        self.magic_number
125    }
126
127    fn compile(
128        self: &mut BytecodeCompiler,
129        source: &[u8],
130        filename: &str,
131        optimize: BytecodeOptimizationLevel,
132        output_mode: CompileMode,
133    ) -> Result<Vec<u8>> {
134        let stdin = self.command.stdin.as_mut().expect("failed to get stdin");
135        let stdout = self.command.stdout.as_mut().expect("failed to get stdout");
136
137        let mut reader = BufReader::new(stdout);
138
139        stdin
140            .write_all(b"compile\n")
141            .context("writing compile command")?;
142        stdin
143            .write_all(filename.len().to_string().as_bytes())
144            .context("writing filename length")?;
145        stdin.write_all(b"\n")?;
146        stdin
147            .write_all(source.len().to_string().as_bytes())
148            .context("writing source code length")?;
149        stdin.write_all(b"\n")?;
150        stdin.write_all(i32::from(optimize).to_string().as_bytes())?;
151        stdin.write_all(b"\n")?;
152        stdin
153            .write_all(match output_mode {
154                CompileMode::Bytecode => b"bytecode",
155                CompileMode::PycCheckedHash => b"pyc-checked-hash",
156                CompileMode::PycUncheckedHash => b"pyc-unchecked-hash",
157            })
158            .context("writing format")?;
159        stdin.write_all(b"\n")?;
160        stdin
161            .write_all(filename.as_bytes())
162            .context("writing filename")?;
163        stdin.write_all(source).context("writing source code")?;
164        stdin.flush().context("flushing")?;
165
166        let mut code_s = String::new();
167        reader
168            .read_line(&mut code_s)
169            .context("reading result code")?;
170        let code_s = code_s.trim_end();
171        let code = code_s.parse::<u8>().unwrap();
172
173        match code {
174            0 => {
175                let mut len_s = String::new();
176                reader
177                    .read_line(&mut len_s)
178                    .context("reading output size line")?;
179
180                let len_s = len_s.trim_end();
181                let bytecode_len = len_s.parse::<u64>().unwrap();
182
183                let mut bytecode: Vec<u8> = Vec::new();
184                reader
185                    .take(bytecode_len)
186                    .read_to_end(&mut bytecode)
187                    .context("reading bytecode result")?;
188
189                Ok(bytecode)
190            }
191            1 => {
192                let mut len_s = String::new();
193                reader
194                    .read_line(&mut len_s)
195                    .context("reading error string length line")?;
196
197                let len_s = len_s.trim_end();
198                let error_len = len_s.parse::<u64>().unwrap();
199
200                let mut error_data = vec![];
201                reader
202                    .take(error_len)
203                    .read_to_end(&mut error_data)
204                    .context("reading error message")?;
205
206                Err(anyhow!(
207                    "compiling error: {}",
208                    String::from_utf8(error_data)?
209                ))
210            }
211            _ => Err(anyhow!(
212                "unexpected result code from compile command: {}",
213                code
214            )),
215        }
216    }
217}
218
219impl Drop for BytecodeCompiler {
220    fn drop(&mut self) {
221        let stdin = self.command.stdin.as_mut().expect("failed to get stdin");
222        let _ = stdin.write_all(b"exit\n").and_then(|()| stdin.flush());
223
224        self.command.wait().expect("compiler process did not exit");
225    }
226}
227
228/// How to write out a .pyc bytecode header.
229#[derive(Debug, Clone, Copy)]
230pub enum BytecodeHeaderMode {
231    /// Use a file modified time plus source size.
232    ModifiedTimeAndSourceSize((u32, u32)),
233    /// Check the hash against the hash of a source file.
234    CheckedHash(u64),
235    /// Do not check the hash, but embed it anyway.
236    UncheckedHash(u64),
237}
238
239/// Compute the header for a .pyc file.
240pub fn compute_bytecode_header(magic_number: u32, mode: BytecodeHeaderMode) -> Result<Vec<u8>> {
241    let mut header: Vec<u8> = Vec::new();
242
243    header.write_u32::<LittleEndian>(magic_number)?;
244
245    match mode {
246        BytecodeHeaderMode::ModifiedTimeAndSourceSize((mtime, source_size)) => {
247            header.write_u32::<LittleEndian>(0)?;
248            header.write_u32::<LittleEndian>(mtime)?;
249            header.write_u32::<LittleEndian>(source_size)?;
250        }
251        BytecodeHeaderMode::CheckedHash(hash) => {
252            header.write_u32::<LittleEndian>(3)?;
253            header.write_u64::<LittleEndian>(hash)?;
254        }
255        BytecodeHeaderMode::UncheckedHash(hash) => {
256            header.write_u32::<LittleEndian>(1)?;
257            header.write_u64::<LittleEndian>(hash)?;
258        }
259    }
260
261    assert_eq!(header.len(), 16);
262
263    Ok(header)
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn test_header() -> Result<()> {
272        assert_eq!(
273            compute_bytecode_header(
274                168627541,
275                BytecodeHeaderMode::ModifiedTimeAndSourceSize((5, 10))
276            )?,
277            b"U\r\r\n\x00\x00\x00\x00\x05\x00\x00\x00\x0a\x00\x00\x00"
278        );
279
280        assert_eq!(
281            compute_bytecode_header(168627541, BytecodeHeaderMode::CheckedHash(0))?,
282            b"U\r\r\n\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
283        );
284        assert_eq!(
285            compute_bytecode_header(168627541, BytecodeHeaderMode::UncheckedHash(0))?,
286            b"U\r\r\n\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
287        );
288
289        Ok(())
290    }
291}