Skip to main content

rustpython_derive_impl/
compile_bytecode.rs

1//! Parsing and processing for this form:
2//! ```ignore
3//! py_compile!(
4//!     // either:
5//!     source = "python_source_code",
6//!     // or
7//!     file = "file/path/relative/to/this/file",
8//!
9//!     // the mode to compile the code in
10//!     mode = "exec", // or "eval" or "single"
11//!     // the path put into the CodeObject, defaults to "frozen"
12//!     module_name = "frozen",
13//! )
14//! ```
15
16use crate::Diagnostic;
17use proc_macro2::{Span, TokenStream};
18use quote::quote;
19use rustpython_compiler_core::{Mode, bytecode::CodeObject, frozen};
20use std::{
21    collections::HashMap,
22    fs,
23    path::{Path, PathBuf},
24};
25use syn::{
26    self, LitByteStr, LitStr, Macro,
27    parse::{ParseStream, Parser, Result as ParseResult},
28    spanned::Spanned,
29};
30
31enum CompilationSourceKind {
32    /// Source is a File (Path)
33    File { base: PathBuf, rel_path: PathBuf },
34    /// Direct Raw source code
35    SourceCode(String),
36    /// Source is a directory
37    Dir { base: PathBuf, rel_path: PathBuf },
38}
39
40struct CompiledModule {
41    code: CodeObject,
42    package: bool,
43}
44
45struct CompilationSource {
46    kind: CompilationSourceKind,
47    span: (Span, Span),
48}
49
50pub trait Compiler {
51    fn compile(
52        &self,
53        source: &str,
54        mode: Mode,
55        module_name: String,
56    ) -> Result<CodeObject, Box<dyn core::error::Error>>;
57}
58
59impl CompilationSource {
60    fn compile_string<D: core::fmt::Display, F: FnOnce() -> D>(
61        &self,
62        source: &str,
63        mode: Mode,
64        module_name: String,
65        compiler: &dyn Compiler,
66        origin: F,
67    ) -> Result<CodeObject, Diagnostic> {
68        compiler.compile(source, mode, module_name).map_err(|err| {
69            Diagnostic::spans_error(
70                self.span,
71                format!("Python compile error from {}: {}", origin(), err),
72            )
73        })
74    }
75
76    fn compile(
77        &self,
78        mode: Mode,
79        module_name: String,
80        compiler: &dyn Compiler,
81    ) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
82        match &self.kind {
83            CompilationSourceKind::Dir { base, rel_path } => {
84                self.compile_dir(base, &base.join(rel_path), String::new(), mode, compiler)
85            }
86            _ => Ok(hashmap! {
87                module_name.clone() => CompiledModule {
88                    code: self.compile_single(mode, module_name, compiler)?,
89                    package: false,
90                },
91            }),
92        }
93    }
94
95    fn compile_single(
96        &self,
97        mode: Mode,
98        module_name: String,
99        compiler: &dyn Compiler,
100    ) -> Result<CodeObject, Diagnostic> {
101        match &self.kind {
102            CompilationSourceKind::File { base, rel_path } => {
103                let path = base.join(rel_path);
104                let source = fs::read_to_string(&path).map_err(|err| {
105                    Diagnostic::spans_error(
106                        self.span,
107                        format!("Error reading file {path:?}: {err}"),
108                    )
109                })?;
110                self.compile_string(&source, mode, module_name, compiler, || rel_path.display())
111            }
112            CompilationSourceKind::SourceCode(code) => self.compile_string(
113                &textwrap::dedent(code),
114                mode,
115                module_name,
116                compiler,
117                || "string literal",
118            ),
119            CompilationSourceKind::Dir { .. } => {
120                unreachable!("Can't use compile_single with directory source")
121            }
122        }
123    }
124
125    fn compile_dir(
126        &self,
127        base: &Path,
128        path: &Path,
129        parent: String,
130        mode: Mode,
131        compiler: &dyn Compiler,
132    ) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
133        let mut code_map = HashMap::new();
134        let paths = fs::read_dir(path)
135            .or_else(|e| {
136                if cfg!(windows)
137                    && let Ok(real_path) = fs::read_to_string(path.canonicalize().unwrap())
138                {
139                    return fs::read_dir(real_path.trim());
140                }
141                Err(e)
142            })
143            .map_err(|err| {
144                Diagnostic::spans_error(self.span, format!("Error listing dir {path:?}: {err}"))
145            })?;
146        for path in paths {
147            let path = path.map_err(|err| {
148                Diagnostic::spans_error(self.span, format!("Failed to list file: {err}"))
149            })?;
150            let path = path.path();
151            let file_name = path.file_name().unwrap().to_str().ok_or_else(|| {
152                Diagnostic::spans_error(self.span, format!("Invalid UTF-8 in file name {path:?}"))
153            })?;
154            if path.is_dir() {
155                code_map.extend(self.compile_dir(
156                    base,
157                    &path,
158                    if parent.is_empty() {
159                        file_name.to_string()
160                    } else {
161                        format!("{parent}.{file_name}")
162                    },
163                    mode,
164                    compiler,
165                )?);
166            } else if file_name.ends_with(".py") {
167                let stem = path.file_stem().unwrap().to_str().unwrap();
168                let is_init = stem == "__init__";
169                let module_name = if is_init {
170                    parent.clone()
171                } else if parent.is_empty() {
172                    stem.to_owned()
173                } else {
174                    format!("{parent}.{stem}")
175                };
176
177                let compile_path = |src_path: &Path| {
178                    let source = fs::read_to_string(src_path).map_err(|err| {
179                        Diagnostic::spans_error(
180                            self.span,
181                            format!("Error reading file {path:?}: {err}"),
182                        )
183                    })?;
184                    self.compile_string(&source, mode, module_name.clone(), compiler, || {
185                        path.strip_prefix(base).ok().unwrap_or(&path).display()
186                    })
187                };
188                let code = compile_path(&path).or_else(|e| {
189                    if cfg!(windows)
190                        && let Ok(real_path) = fs::read_to_string(path.canonicalize().unwrap())
191                    {
192                        let joined = path.parent().unwrap().join(real_path.trim());
193                        if joined.exists() {
194                            return compile_path(&joined);
195                        } else {
196                            return Err(e);
197                        }
198                    }
199                    Err(e)
200                });
201
202                let code = match code {
203                    Ok(code) => code,
204                    Err(_)
205                        if stem.starts_with("badsyntax_")
206                            | parent.ends_with(".encoded_modules") =>
207                    {
208                        // TODO: handle with macro arg rather than hard-coded path
209                        continue;
210                    }
211                    Err(e) => return Err(e),
212                };
213
214                code_map.insert(
215                    module_name,
216                    CompiledModule {
217                        code,
218                        package: is_init,
219                    },
220                );
221            }
222        }
223        Ok(code_map)
224    }
225}
226
227impl PyCompileArgs {
228    fn parse(input: TokenStream, allow_dir: bool) -> Result<Self, Diagnostic> {
229        let mut module_name = None;
230        let mut mode = None;
231        let mut source: Option<CompilationSource> = None;
232        let mut crate_name = None;
233
234        fn assert_source_empty(source: &Option<CompilationSource>) -> Result<(), syn::Error> {
235            if let Some(source) = source {
236                Err(syn::Error::new(
237                    source.span.0,
238                    "Cannot have more than one source",
239                ))
240            } else {
241                Ok(())
242            }
243        }
244
245        syn::meta::parser(|meta| {
246            let ident = meta
247                .path
248                .get_ident()
249                .ok_or_else(|| meta.error("unknown arg"))?;
250            let check_str = || meta.value()?.call(parse_str);
251            let str_path = || {
252                let s = check_str()?;
253                let mut base_path = s
254                    .span()
255                    .unwrap()
256                    .local_file()
257                    .ok_or_else(|| err_span!(s, "filepath literal has no span information"))?;
258                base_path.pop();
259                Ok::<_, syn::Error>((base_path, PathBuf::from(s.value())))
260            };
261            if ident == "mode" {
262                let s = check_str()?;
263                match s.value().parse() {
264                    Ok(mode_val) => mode = Some(mode_val),
265                    Err(e) => bail_span!(s, "{}", e),
266                }
267            } else if ident == "module_name" {
268                module_name = Some(check_str()?.value())
269            } else if ident == "source" {
270                assert_source_empty(&source)?;
271                let code = check_str()?.value();
272                source = Some(CompilationSource {
273                    kind: CompilationSourceKind::SourceCode(code),
274                    span: (ident.span(), meta.input.cursor().span()),
275                });
276            } else if ident == "file" {
277                assert_source_empty(&source)?;
278                let (base, rel_path) = str_path()?;
279                source = Some(CompilationSource {
280                    kind: CompilationSourceKind::File { base, rel_path },
281                    span: (ident.span(), meta.input.cursor().span()),
282                });
283            } else if ident == "dir" {
284                if !allow_dir {
285                    bail_span!(ident, "py_compile doesn't accept dir")
286                }
287
288                assert_source_empty(&source)?;
289                let (base, rel_path) = str_path()?;
290                source = Some(CompilationSource {
291                    kind: CompilationSourceKind::Dir { base, rel_path },
292                    span: (ident.span(), meta.input.cursor().span()),
293                });
294            } else if ident == "crate_name" {
295                let name = check_str()?.parse()?;
296                crate_name = Some(name);
297            } else {
298                return Err(meta.error("unknown attr"));
299            }
300            Ok(())
301        })
302        .parse2(input)?;
303
304        let source = source.ok_or_else(|| {
305            syn::Error::new(
306                Span::call_site(),
307                "Must have either file or source in py_compile!()/py_freeze!()",
308            )
309        })?;
310
311        Ok(Self {
312            source,
313            mode: mode.unwrap_or(Mode::Exec),
314            module_name: module_name.unwrap_or_else(|| "frozen".to_owned()),
315            crate_name: crate_name.unwrap_or_else(|| syn::parse_quote!(::rustpython_vm)),
316        })
317    }
318}
319
320fn parse_str(input: ParseStream<'_>) -> ParseResult<LitStr> {
321    let span = input.span();
322    if input.peek(LitStr) {
323        input.parse()
324    } else if let Ok(mac) = input.parse::<Macro>() {
325        Ok(LitStr::new(&mac.tokens.to_string(), mac.span()))
326    } else {
327        Err(syn::Error::new(span, "Expected string or stringify macro"))
328    }
329}
330
331struct PyCompileArgs {
332    source: CompilationSource,
333    mode: Mode,
334    module_name: String,
335    crate_name: syn::Path,
336}
337
338pub fn impl_py_compile(
339    input: TokenStream,
340    compiler: &dyn Compiler,
341) -> Result<TokenStream, Diagnostic> {
342    let args = PyCompileArgs::parse(input, false)?;
343
344    let crate_name = args.crate_name;
345    let code = args
346        .source
347        .compile_single(args.mode, args.module_name, compiler)?;
348
349    let frozen = frozen::FrozenCodeObject::encode(&code);
350    let bytes = LitByteStr::new(&frozen.bytes, Span::call_site());
351
352    let output = quote! {
353        #crate_name::frozen::FrozenCodeObject { bytes: &#bytes[..] }
354    };
355
356    Ok(output)
357}
358
359pub fn impl_py_freeze(
360    input: TokenStream,
361    compiler: &dyn Compiler,
362) -> Result<TokenStream, Diagnostic> {
363    let args = PyCompileArgs::parse(input, true)?;
364
365    let crate_name = args.crate_name;
366    let code_map = args.source.compile(args.mode, args.module_name, compiler)?;
367
368    let data = frozen::FrozenLib::encode(code_map.iter().map(|(k, v)| {
369        let v = frozen::FrozenModule {
370            code: frozen::FrozenCodeObject::encode(&v.code),
371            package: v.package,
372        };
373        (&**k, v)
374    }));
375    let bytes = LitByteStr::new(&data.bytes, Span::call_site());
376
377    let output = quote! {
378        #crate_name::frozen::FrozenLib::from_ref(#bytes)
379    };
380
381    Ok(output)
382}