rustpython_derive_impl/
compile_bytecode.rs

1//! Parsing and processing for this form:
2//! ```ignore
3//! py_compile!(
4//!     // either:
5//!     source = "python_source_code",
6//!     // or
7//!     file = "file/path/relative/to/$CARGO_MANIFEST_DIR",
8//!
9//!     // the mode to compile the code in
10//!     mode = "exec", // or "eval" or "single"
11//!     // the path put into the CodeObject, defaults to "frozen"
12//!     module_name = "frozen",
13//! )
14//! ```
15
16use crate::{extract_spans, Diagnostic};
17use once_cell::sync::Lazy;
18use proc_macro2::{Span, TokenStream};
19use quote::quote;
20use rustpython_compiler_core::{bytecode::CodeObject, frozen, Mode};
21use std::{
22    collections::HashMap,
23    env, fs,
24    path::{Path, PathBuf},
25};
26use syn::{
27    self,
28    parse::{Parse, ParseStream, Result as ParseResult},
29    parse2,
30    spanned::Spanned,
31    Lit, LitByteStr, LitStr, Macro, Meta, MetaNameValue, Token,
32};
33
34static CARGO_MANIFEST_DIR: Lazy<PathBuf> = Lazy::new(|| {
35    PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR is not present"))
36});
37
38enum CompilationSourceKind {
39    /// Source is a File (Path)
40    File(PathBuf),
41    /// Direct Raw source code
42    SourceCode(String),
43    /// Source is a directory
44    Dir(PathBuf),
45}
46
47struct CompiledModule {
48    code: CodeObject,
49    package: bool,
50}
51
52struct CompilationSource {
53    kind: CompilationSourceKind,
54    span: (Span, Span),
55}
56
57pub trait Compiler {
58    fn compile(
59        &self,
60        source: &str,
61        mode: Mode,
62        module_name: String,
63    ) -> Result<CodeObject, Box<dyn std::error::Error>>;
64}
65
66impl CompilationSource {
67    fn compile_string<D: std::fmt::Display, F: FnOnce() -> D>(
68        &self,
69        source: &str,
70        mode: Mode,
71        module_name: String,
72        compiler: &dyn Compiler,
73        origin: F,
74    ) -> Result<CodeObject, Diagnostic> {
75        compiler.compile(source, mode, module_name).map_err(|err| {
76            Diagnostic::spans_error(
77                self.span,
78                format!("Python compile error from {}: {}", origin(), err),
79            )
80        })
81    }
82
83    fn compile(
84        &self,
85        mode: Mode,
86        module_name: String,
87        compiler: &dyn Compiler,
88    ) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
89        match &self.kind {
90            CompilationSourceKind::Dir(rel_path) => self.compile_dir(
91                &CARGO_MANIFEST_DIR.join(rel_path),
92                String::new(),
93                mode,
94                compiler,
95            ),
96            _ => Ok(hashmap! {
97                module_name.clone() => CompiledModule {
98                    code: self.compile_single(mode, module_name, compiler)?,
99                    package: false,
100                },
101            }),
102        }
103    }
104
105    fn compile_single(
106        &self,
107        mode: Mode,
108        module_name: String,
109        compiler: &dyn Compiler,
110    ) -> Result<CodeObject, Diagnostic> {
111        match &self.kind {
112            CompilationSourceKind::File(rel_path) => {
113                let path = CARGO_MANIFEST_DIR.join(rel_path);
114                let source = fs::read_to_string(&path).map_err(|err| {
115                    Diagnostic::spans_error(
116                        self.span,
117                        format!("Error reading file {path:?}: {err}"),
118                    )
119                })?;
120                self.compile_string(&source, mode, module_name, compiler, || rel_path.display())
121            }
122            CompilationSourceKind::SourceCode(code) => {
123                self.compile_string(&textwrap::dedent(code), mode, module_name, compiler, || {
124                    "string literal"
125                })
126            }
127            CompilationSourceKind::Dir(_) => {
128                unreachable!("Can't use compile_single with directory source")
129            }
130        }
131    }
132
133    fn compile_dir(
134        &self,
135        path: &Path,
136        parent: String,
137        mode: Mode,
138        compiler: &dyn Compiler,
139    ) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
140        let mut code_map = HashMap::new();
141        let paths = fs::read_dir(path)
142            .or_else(|e| {
143                if cfg!(windows) {
144                    if let Ok(real_path) = fs::read_to_string(path.canonicalize().unwrap()) {
145                        return fs::read_dir(real_path.trim());
146                    }
147                }
148                Err(e)
149            })
150            .map_err(|err| {
151                Diagnostic::spans_error(self.span, format!("Error listing dir {path:?}: {err}"))
152            })?;
153        for path in paths {
154            let path = path.map_err(|err| {
155                Diagnostic::spans_error(self.span, format!("Failed to list file: {err}"))
156            })?;
157            let path = path.path();
158            let file_name = path.file_name().unwrap().to_str().ok_or_else(|| {
159                Diagnostic::spans_error(self.span, format!("Invalid UTF-8 in file name {path:?}"))
160            })?;
161            if path.is_dir() {
162                code_map.extend(self.compile_dir(
163                    &path,
164                    if parent.is_empty() {
165                        file_name.to_string()
166                    } else {
167                        format!("{parent}.{file_name}")
168                    },
169                    mode,
170                    compiler,
171                )?);
172            } else if file_name.ends_with(".py") {
173                let stem = path.file_stem().unwrap().to_str().unwrap();
174                let is_init = stem == "__init__";
175                let module_name = if is_init {
176                    parent.clone()
177                } else if parent.is_empty() {
178                    stem.to_owned()
179                } else {
180                    format!("{parent}.{stem}")
181                };
182
183                let compile_path = |src_path: &Path| {
184                    let source = fs::read_to_string(src_path).map_err(|err| {
185                        Diagnostic::spans_error(
186                            self.span,
187                            format!("Error reading file {path:?}: {err}"),
188                        )
189                    })?;
190                    self.compile_string(&source, mode, module_name.clone(), compiler, || {
191                        path.strip_prefix(&*CARGO_MANIFEST_DIR)
192                            .ok()
193                            .unwrap_or(&path)
194                            .display()
195                    })
196                };
197                let code = compile_path(&path).or_else(|e| {
198                    if cfg!(windows) {
199                        if let Ok(real_path) = fs::read_to_string(path.canonicalize().unwrap()) {
200                            let joined = path.parent().unwrap().join(real_path.trim());
201                            if joined.exists() {
202                                return compile_path(&joined);
203                            } else {
204                                return Err(e);
205                            }
206                        }
207                    }
208                    Err(e)
209                });
210
211                let code = match code {
212                    Ok(code) => code,
213                    Err(_)
214                        if stem.starts_with("badsyntax_")
215                            | parent.ends_with(".encoded_modules") =>
216                    {
217                        // TODO: handle with macro arg rather than hard-coded path
218                        continue;
219                    }
220                    Err(e) => return Err(e),
221                };
222
223                code_map.insert(
224                    module_name,
225                    CompiledModule {
226                        code,
227                        package: is_init,
228                    },
229                );
230            }
231        }
232        Ok(code_map)
233    }
234}
235
236/// This is essentially just a comma-separated list of Meta nodes, aka the inside of a MetaList.
237struct PyCompileInput {
238    span: Span,
239    metas: Vec<Meta>,
240}
241
242impl PyCompileInput {
243    fn parse(&self, allow_dir: bool) -> Result<PyCompileArgs, Diagnostic> {
244        let mut module_name = None;
245        let mut mode = None;
246        let mut source: Option<CompilationSource> = None;
247        let mut crate_name = None;
248
249        fn assert_source_empty(source: &Option<CompilationSource>) -> Result<(), Diagnostic> {
250            if let Some(source) = source {
251                Err(Diagnostic::spans_error(
252                    source.span,
253                    "Cannot have more than one source",
254                ))
255            } else {
256                Ok(())
257            }
258        }
259
260        for meta in &self.metas {
261            if let Meta::NameValue(name_value) = meta {
262                let ident = match name_value.path.get_ident() {
263                    Some(ident) => ident,
264                    None => continue,
265                };
266                let check_str = || match &name_value.lit {
267                    Lit::Str(s) => Ok(s),
268                    _ => Err(err_span!(name_value.lit, "{ident} must be a string")),
269                };
270                if ident == "mode" {
271                    let s = check_str()?;
272                    match s.value().parse() {
273                        Ok(mode_val) => mode = Some(mode_val),
274                        Err(e) => bail_span!(s, "{}", e),
275                    }
276                } else if ident == "module_name" {
277                    module_name = Some(check_str()?.value())
278                } else if ident == "source" {
279                    assert_source_empty(&source)?;
280                    let code = check_str()?.value();
281                    source = Some(CompilationSource {
282                        kind: CompilationSourceKind::SourceCode(code),
283                        span: extract_spans(&name_value).unwrap(),
284                    });
285                } else if ident == "file" {
286                    assert_source_empty(&source)?;
287                    let path = check_str()?.value().into();
288                    source = Some(CompilationSource {
289                        kind: CompilationSourceKind::File(path),
290                        span: extract_spans(&name_value).unwrap(),
291                    });
292                } else if ident == "dir" {
293                    if !allow_dir {
294                        bail_span!(ident, "py_compile doesn't accept dir")
295                    }
296
297                    assert_source_empty(&source)?;
298                    let path = check_str()?.value().into();
299                    source = Some(CompilationSource {
300                        kind: CompilationSourceKind::Dir(path),
301                        span: extract_spans(&name_value).unwrap(),
302                    });
303                } else if ident == "crate_name" {
304                    let name = check_str()?.parse()?;
305                    crate_name = Some(name);
306                }
307            }
308        }
309
310        let source = source.ok_or_else(|| {
311            syn::Error::new(
312                self.span,
313                "Must have either file or source in py_compile!()/py_freeze!()",
314            )
315        })?;
316
317        Ok(PyCompileArgs {
318            source,
319            mode: mode.unwrap_or(Mode::Exec),
320            module_name: module_name.unwrap_or_else(|| "frozen".to_owned()),
321            crate_name: crate_name.unwrap_or_else(|| syn::parse_quote!(::rustpython_vm)),
322        })
323    }
324}
325
326fn parse_meta(input: ParseStream) -> ParseResult<Meta> {
327    let path = input.call(syn::Path::parse_mod_style)?;
328    let eq_token: Token![=] = input.parse()?;
329    let span = input.span();
330    if input.peek(LitStr) {
331        Ok(Meta::NameValue(MetaNameValue {
332            path,
333            eq_token,
334            lit: Lit::Str(input.parse()?),
335        }))
336    } else if let Ok(mac) = input.parse::<Macro>() {
337        Ok(Meta::NameValue(MetaNameValue {
338            path,
339            eq_token,
340            lit: Lit::Str(LitStr::new(&mac.tokens.to_string(), mac.span())),
341        }))
342    } else {
343        Err(syn::Error::new(span, "Expected string or stringify macro"))
344    }
345}
346
347impl Parse for PyCompileInput {
348    fn parse(input: ParseStream) -> ParseResult<Self> {
349        let span = input.cursor().span();
350        let metas = input
351            .parse_terminated::<Meta, Token![,]>(parse_meta)?
352            .into_iter()
353            .collect();
354        Ok(PyCompileInput { span, metas })
355    }
356}
357
358struct PyCompileArgs {
359    source: CompilationSource,
360    mode: Mode,
361    module_name: String,
362    crate_name: syn::Path,
363}
364
365pub fn impl_py_compile(
366    input: TokenStream,
367    compiler: &dyn Compiler,
368) -> Result<TokenStream, Diagnostic> {
369    let input: PyCompileInput = parse2(input)?;
370    let args = input.parse(false)?;
371
372    let crate_name = args.crate_name;
373    let code = args
374        .source
375        .compile_single(args.mode, args.module_name, compiler)?;
376
377    let frozen = frozen::FrozenCodeObject::encode(&code);
378    let bytes = LitByteStr::new(&frozen.bytes, Span::call_site());
379
380    let output = quote! {
381        #crate_name::frozen::FrozenCodeObject { bytes: &#bytes[..] }
382    };
383
384    Ok(output)
385}
386
387pub fn impl_py_freeze(
388    input: TokenStream,
389    compiler: &dyn Compiler,
390) -> Result<TokenStream, Diagnostic> {
391    let input: PyCompileInput = parse2(input)?;
392    let args = input.parse(true)?;
393
394    let crate_name = args.crate_name;
395    let code_map = args.source.compile(args.mode, args.module_name, compiler)?;
396
397    let data = frozen::FrozenLib::encode(code_map.iter().map(|(k, v)| {
398        let v = frozen::FrozenModule {
399            code: frozen::FrozenCodeObject::encode(&v.code),
400            package: v.package,
401        };
402        (&**k, v)
403    }));
404    let bytes = LitByteStr::new(&data.bytes, Span::call_site());
405
406    let output = quote! {
407        #crate_name::frozen::FrozenLib::from_ref(#bytes)
408    };
409
410    Ok(output)
411}