Skip to main content

rustpython_vm/vm/
interpreter.rs

1#[cfg(all(unix, feature = "threading"))]
2use super::StopTheWorldState;
3use super::{Context, PyConfig, PyGlobalState, VirtualMachine, setting::Settings, thread};
4use crate::{
5    PyResult, builtins, common::rc::PyRc, frozen::FrozenModule, getpath, py_freeze, stdlib::atexit,
6    vm::PyBaseExceptionRef,
7};
8use alloc::collections::BTreeMap;
9use core::sync::atomic::Ordering;
10
11type InitFunc = Box<dyn FnOnce(&mut VirtualMachine)>;
12
13/// Exit code used when stdout/stderr flush fails during interpreter shutdown.
14/// Matches CPython's behavior (see cpython/Python/pylifecycle.c).
15const EXITCODE_FLUSH_FAILURE: u32 = 120;
16
17/// Configuration builder for constructing an Interpreter.
18///
19/// This is the preferred way to configure and create an interpreter with custom modules.
20/// Modules must be registered before the interpreter is built,
21/// similar to CPython's `PyImport_AppendInittab` which must be called before `Py_Initialize`.
22///
23/// # Example
24/// ```
25/// use rustpython_vm::Interpreter;
26///
27/// let builder = Interpreter::builder(Default::default());
28/// // In practice, add stdlib: builder.add_native_modules(&stdlib_module_defs(&builder.ctx))
29/// let interp = builder.build();
30/// ```
31pub struct InterpreterBuilder {
32    settings: Settings,
33    pub ctx: PyRc<Context>,
34    module_defs: Vec<&'static builtins::PyModuleDef>,
35    frozen_modules: Vec<(&'static str, FrozenModule)>,
36    init_hooks: Vec<InitFunc>,
37}
38
39/// Private helper to initialize a VM with settings, context, and custom initialization.
40fn initialize_main_vm<F>(
41    settings: Settings,
42    ctx: PyRc<Context>,
43    module_defs: Vec<&'static builtins::PyModuleDef>,
44    frozen_modules: Vec<(&'static str, FrozenModule)>,
45    init_hooks: Vec<InitFunc>,
46    init: F,
47) -> (VirtualMachine, PyRc<PyGlobalState>)
48where
49    F: FnOnce(&mut VirtualMachine),
50{
51    use crate::codecs::CodecsRegistry;
52    use crate::common::hash::HashSecret;
53    use crate::common::lock::PyMutex;
54    use crate::warn::WarningsState;
55    use core::sync::atomic::{AtomicBool, AtomicU64};
56    use crossbeam_utils::atomic::AtomicCell;
57
58    let paths = getpath::init_path_config(&settings);
59    let config = PyConfig::new(settings, paths);
60
61    // Build module_defs map from builtin modules + additional modules
62    let mut all_module_defs: BTreeMap<&'static str, &'static builtins::PyModuleDef> =
63        crate::stdlib::builtin_module_defs(&ctx)
64            .into_iter()
65            .chain(module_defs)
66            .map(|def| (def.name.as_str(), def))
67            .collect();
68
69    // Register sysconfigdata under platform-specific name as well
70    if let Some(&sysconfigdata_def) = all_module_defs.get("_sysconfigdata") {
71        use std::sync::OnceLock;
72        static SYSCONFIGDATA_NAME: OnceLock<&'static str> = OnceLock::new();
73        let leaked_name = *SYSCONFIGDATA_NAME.get_or_init(|| {
74            let name = crate::stdlib::sys::sysconfigdata_name();
75            Box::leak(name.into_boxed_str())
76        });
77        all_module_defs.insert(leaked_name, sysconfigdata_def);
78    }
79
80    // Create hash secret
81    let seed = match config.settings.hash_seed {
82        Some(seed) => seed,
83        None => super::process_hash_secret_seed(),
84    };
85    let hash_secret = HashSecret::new(seed);
86
87    // Create codec registry and warnings state
88    let codec_registry = CodecsRegistry::new(&ctx);
89    let warnings = WarningsState::init_state(&ctx);
90
91    // Create int_max_str_digits
92    let int_max_str_digits = AtomicCell::new(match config.settings.int_max_str_digits {
93        -1 => 4300,
94        other => other,
95    } as usize);
96
97    // Initialize frozen modules (core + user-provided)
98    let mut frozen: std::collections::HashMap<&'static str, FrozenModule, ahash::RandomState> =
99        core_frozen_inits().collect();
100    frozen.extend(frozen_modules);
101
102    // Create PyGlobalState
103    let global_state = PyRc::new(PyGlobalState {
104        config,
105        module_defs: all_module_defs,
106        frozen,
107        stacksize: AtomicCell::new(0),
108        thread_count: AtomicCell::new(0),
109        hash_secret,
110        atexit_funcs: PyMutex::default(),
111        codec_registry,
112        finalizing: AtomicBool::new(false),
113        warnings,
114        override_frozen_modules: AtomicCell::new(0),
115        before_forkers: PyMutex::default(),
116        after_forkers_child: PyMutex::default(),
117        after_forkers_parent: PyMutex::default(),
118        int_max_str_digits,
119        switch_interval: AtomicCell::new(0.005),
120        global_trace_func: PyMutex::default(),
121        global_profile_func: PyMutex::default(),
122        #[cfg(feature = "threading")]
123        main_thread_ident: AtomicCell::new(0),
124        #[cfg(feature = "threading")]
125        thread_frames: parking_lot::Mutex::new(std::collections::HashMap::new()),
126        #[cfg(feature = "threading")]
127        thread_handles: parking_lot::Mutex::new(Vec::new()),
128        #[cfg(feature = "threading")]
129        shutdown_handles: parking_lot::Mutex::new(Vec::new()),
130        monitoring: PyMutex::default(),
131        monitoring_events: AtomicCell::new(0),
132        instrumentation_version: AtomicU64::new(0),
133        #[cfg(all(unix, feature = "threading"))]
134        stop_the_world: StopTheWorldState::new(),
135    });
136
137    // Create VM with the global state
138    // Note: Don't clone here - init_hooks need exclusive access to mutate state
139    let mut vm = VirtualMachine::new(ctx, global_state);
140
141    // Execute initialization hooks (can mutate vm.state)
142    for hook in init_hooks {
143        hook(&mut vm);
144    }
145
146    // Call custom init function (can mutate vm.state)
147    init(&mut vm);
148
149    vm.initialize();
150
151    // Clone global_state for Interpreter after all initialization is done
152    let global_state = vm.state.clone();
153    (vm, global_state)
154}
155
156impl InterpreterBuilder {
157    /// Create a new interpreter configuration with default settings.
158    pub fn new() -> Self {
159        Self {
160            settings: Settings::default(),
161            ctx: Context::genesis().clone(),
162            module_defs: Vec::new(),
163            frozen_modules: Vec::new(),
164            init_hooks: Vec::new(),
165        }
166    }
167
168    /// Set custom settings for the interpreter.
169    ///
170    /// If called multiple times, only the last settings will be used.
171    pub fn settings(mut self, settings: Settings) -> Self {
172        self.settings = settings;
173        self
174    }
175
176    /// Add a single native module definition.
177    ///
178    /// # Example
179    /// ```
180    /// use rustpython_vm::{Interpreter, builtins::PyModuleDef};
181    ///
182    /// let builder = Interpreter::builder(Default::default());
183    /// // Note: In practice, use module_def from your #[pymodule]
184    /// // let def = mymodule::module_def(&builder.ctx);
185    /// // let interp = builder.add_native_module(def).build();
186    /// let interp = builder.build();
187    /// ```
188    pub fn add_native_module(self, def: &'static builtins::PyModuleDef) -> Self {
189        self.add_native_modules(&[def])
190    }
191
192    /// Add multiple native module definitions.
193    ///
194    /// # Example
195    /// ```
196    /// use rustpython_vm::Interpreter;
197    ///
198    /// let builder = Interpreter::builder(Default::default());
199    /// // In practice, use module_defs from rustpython_stdlib:
200    /// // let defs = rustpython_stdlib::stdlib_module_defs(&builder.ctx);
201    /// // let interp = builder.add_native_modules(&defs).build();
202    /// let interp = builder.build();
203    /// ```
204    pub fn add_native_modules(mut self, defs: &[&'static builtins::PyModuleDef]) -> Self {
205        self.module_defs.extend_from_slice(defs);
206        self
207    }
208
209    /// Add a custom initialization hook.
210    ///
211    /// Hooks are executed in the order they are added during interpreter creation.
212    /// This function will be called after modules are registered but before
213    /// the VM is initialized, allowing for additional customization.
214    ///
215    /// # Example
216    /// ```
217    /// use rustpython_vm::Interpreter;
218    ///
219    /// let interp = Interpreter::builder(Default::default())
220    ///     .init_hook(|vm| {
221    ///         // Custom initialization
222    ///     })
223    ///     .build();
224    /// ```
225    pub fn init_hook<F>(mut self, init: F) -> Self
226    where
227        F: FnOnce(&mut VirtualMachine) + 'static,
228    {
229        self.init_hooks.push(Box::new(init));
230        self
231    }
232
233    /// Add frozen modules to the interpreter.
234    ///
235    /// Frozen modules are Python modules compiled into the binary.
236    /// This method accepts any iterator of (name, FrozenModule) pairs.
237    ///
238    /// # Example
239    /// ```
240    /// use rustpython_vm::Interpreter;
241    ///
242    /// let interp = Interpreter::builder(Default::default())
243    ///     // In practice: .add_frozen_modules(rustpython_pylib::FROZEN_STDLIB)
244    ///     .build();
245    /// ```
246    pub fn add_frozen_modules<I>(mut self, frozen: I) -> Self
247    where
248        I: IntoIterator<Item = (&'static str, FrozenModule)>,
249    {
250        self.frozen_modules.extend(frozen);
251        self
252    }
253
254    /// Build the interpreter.
255    ///
256    /// This consumes the configuration and returns a fully initialized Interpreter.
257    pub fn build(self) -> Interpreter {
258        let (vm, global_state) = initialize_main_vm(
259            self.settings,
260            self.ctx,
261            self.module_defs,
262            self.frozen_modules,
263            self.init_hooks,
264            |_| {}, // No additional init needed
265        );
266        Interpreter { global_state, vm }
267    }
268
269    /// Alias for `build()` for compatibility with the `interpreter()` pattern.
270    pub fn interpreter(self) -> Interpreter {
271        self.build()
272    }
273}
274
275impl Default for InterpreterBuilder {
276    fn default() -> Self {
277        Self::new()
278    }
279}
280
281/// The general interface for the VM
282///
283/// # Examples
284/// Runs a simple embedded hello world program.
285/// ```
286/// use rustpython_vm::Interpreter;
287/// use rustpython_vm::compiler::Mode;
288/// Interpreter::without_stdlib(Default::default()).enter(|vm| {
289///     let scope = vm.new_scope_with_builtins();
290///     let source = r#"print("Hello World!")"#;
291///     let code_obj = vm.compile(
292///             source,
293///             Mode::Exec,
294///             "<embedded>".to_owned(),
295///     ).map_err(|err| vm.new_syntax_error(&err, Some(source))).unwrap();
296///     vm.run_code_obj(code_obj, scope).unwrap();
297/// });
298/// ```
299pub struct Interpreter {
300    pub global_state: PyRc<PyGlobalState>,
301    vm: VirtualMachine,
302}
303
304impl Interpreter {
305    /// Create a new interpreter configuration builder.
306    ///
307    /// # Example
308    /// ```
309    /// use rustpython_vm::Interpreter;
310    ///
311    /// let builder = Interpreter::builder(Default::default());
312    /// // In practice, add stdlib: builder.add_native_modules(&stdlib_module_defs(&builder.ctx))
313    /// let interp = builder.build();
314    /// ```
315    pub fn builder(settings: Settings) -> InterpreterBuilder {
316        InterpreterBuilder::new().settings(settings)
317    }
318
319    /// This is a bare unit to build up an interpreter without the standard library.
320    /// To create an interpreter with the standard library with the `rustpython` crate, use `rustpython::InterpreterBuilder`.
321    /// To create an interpreter without the `rustpython` crate, but only with `rustpython-vm`,
322    /// try to build one from the source code of `InterpreterBuilder`. It will not be a one-liner but it also will not be too hard.
323    pub fn without_stdlib(settings: Settings) -> Self {
324        Self::with_init(settings, |_| {})
325    }
326
327    /// Create with initialize function taking mutable vm reference.
328    ///
329    /// Note: This is a legacy API. To add stdlib, use `Interpreter::builder()` instead.
330    pub fn with_init<F>(settings: Settings, init: F) -> Self
331    where
332        F: FnOnce(&mut VirtualMachine),
333    {
334        let (vm, global_state) = initialize_main_vm(
335            settings,
336            Context::genesis().clone(),
337            Vec::new(), // No module_defs
338            Vec::new(), // No frozen_modules
339            Vec::new(), // No init_hooks
340            init,
341        );
342        Self { global_state, vm }
343    }
344
345    /// Run a function with the main virtual machine and return a PyResult of the result.
346    ///
347    /// To enter vm context multiple times or to avoid buffer/exception management, this function is preferred.
348    /// `enter` is lightweight and it returns a python object in PyResult.
349    /// You can stop or continue the execution multiple times by calling `enter`.
350    ///
351    /// To finalize the vm once all desired `enter`s are called, calling `finalize` will be helpful.
352    ///
353    /// See also [`Interpreter::run`] for managed way to run the interpreter.
354    pub fn enter<F, R>(&self, f: F) -> R
355    where
356        F: FnOnce(&VirtualMachine) -> R,
357    {
358        thread::enter_vm(&self.vm, || f(&self.vm))
359    }
360
361    /// Run [`Interpreter::enter`] and call [`VirtualMachine::expect_pyresult`] for the result.
362    ///
363    /// This function is useful when you want to expect a result from the function,
364    /// but also print useful panic information when exception raised.
365    ///
366    /// See also [`Interpreter::enter`] and [`VirtualMachine::expect_pyresult`] for more information.
367    pub fn enter_and_expect<F, R>(&self, f: F, msg: &str) -> R
368    where
369        F: FnOnce(&VirtualMachine) -> PyResult<R>,
370    {
371        self.enter(|vm| {
372            let result = f(vm);
373            vm.expect_pyresult(result, msg)
374        })
375    }
376
377    /// Run a function with the main virtual machine and return exit code.
378    ///
379    /// To enter vm context only once and safely terminate the vm, this function is preferred.
380    /// Unlike [`Interpreter::enter`], `run` calls finalize and returns exit code.
381    /// You will not be able to obtain Python exception in this way.
382    ///
383    /// See [`Interpreter::finalize`] for the finalization steps.
384    /// See also [`Interpreter::enter`] for pure function call to obtain Python exception.
385    pub fn run<F>(self, f: F) -> u32
386    where
387        F: FnOnce(&VirtualMachine) -> PyResult<()>,
388    {
389        let res = self.enter(|vm| f(vm));
390        self.finalize(res.err())
391    }
392
393    /// Finalize vm and turns an exception to exit code.
394    ///
395    /// Finalization steps (matching Py_FinalizeEx):
396    /// 1. Flush stdout and stderr.
397    /// 1. Handle exit exception and turn it to exit code.
398    /// 1. Call threading._shutdown() to join non-daemon threads.
399    /// 1. Run atexit exit functions.
400    /// 1. Set finalizing flag (suppresses unraisable exceptions from __del__).
401    /// 1. Forced GC collection pass (collect cycles while builtins are available).
402    /// 1. Module finalization (finalize_modules).
403    /// 1. Final stdout/stderr flush.
404    ///
405    /// Note that calling `finalize` is not necessary by purpose though.
406    pub fn finalize(self, exc: Option<PyBaseExceptionRef>) -> u32 {
407        self.enter(|vm| {
408            let mut flush_status = vm.flush_std();
409
410            // See if any exception leaked out:
411            let exit_code = if let Some(exc) = exc {
412                vm.handle_exit_exception(exc)
413            } else {
414                0
415            };
416
417            // Wait for thread shutdown - call threading._shutdown() if available.
418            // This waits for all non-daemon threads to complete.
419            // threading module may not be imported, so ignore import errors.
420            if let Ok(threading) = vm.import("threading", 0)
421                && let Ok(shutdown) = threading.get_attr("_shutdown", vm)
422                && let Err(e) = shutdown.call((), vm)
423            {
424                vm.run_unraisable(
425                    e,
426                    Some("Exception ignored in threading shutdown".to_owned()),
427                    threading,
428                );
429            }
430
431            // Run atexit handlers before setting finalizing flag.
432            // This allows unraisable exceptions from atexit handlers to be reported.
433            atexit::_run_exitfuncs(vm);
434
435            // Now suppress unraisable exceptions from daemon threads and __del__
436            // methods during the rest of shutdown.
437            vm.state.finalizing.store(true, Ordering::Release);
438
439            // GC pass - collect cycles before module cleanup
440            crate::gc_state::gc_state().collect_force(2);
441
442            // Module finalization: remove modules from sys.modules, GC collect
443            // (while builtins is still available for __del__), then clear module dicts.
444            vm.finalize_modules();
445
446            if vm.flush_std() < 0 && flush_status == 0 {
447                flush_status = -1;
448            }
449
450            // Match CPython: if exit_code is 0 and stdout flush failed, exit 120
451            if exit_code == 0 && flush_status < 0 {
452                EXITCODE_FLUSH_FAILURE
453            } else {
454                exit_code
455            }
456        })
457    }
458}
459
460fn core_frozen_inits() -> impl Iterator<Item = (&'static str, FrozenModule)> {
461    let iter = core::iter::empty();
462    macro_rules! ext_modules {
463        ($iter:ident, $($t:tt)*) => {
464            let $iter = $iter.chain(py_freeze!($($t)*));
465        };
466    }
467
468    // Python modules that the vm calls into, but are not actually part of the stdlib. They could
469    // in theory be implemented in Rust, but are easiest to do in Python for one reason or another.
470    // Includes _importlib_bootstrap and _importlib_bootstrap_external
471    ext_modules!(
472        iter,
473        dir = "../../Lib/python_builtins",
474        crate_name = "rustpython_compiler_core"
475    );
476
477    // core stdlib Python modules that the vm calls into, but are still used in Python
478    // application code, e.g. copyreg
479    // FIXME: Initializing core_modules here results duplicated frozen module generation for core_modules.
480    // We need a way to initialize this modules for both `Interpreter::without_stdlib()` and `InterpreterBuilder::new().init_stdlib().interpreter()`
481    // #[cfg(not(feature = "freeze-stdlib"))]
482    ext_modules!(
483        iter,
484        dir = "../../Lib/core_modules",
485        crate_name = "rustpython_compiler_core"
486    );
487
488    // Collect frozen module entries
489    let mut entries: Vec<_> = iter.collect();
490
491    // Add test module aliases
492    if let Some(hello_code) = entries
493        .iter()
494        .find(|(n, _)| *n == "__hello__")
495        .map(|(_, m)| m.code)
496    {
497        entries.push((
498            "__hello_alias__",
499            FrozenModule {
500                code: hello_code,
501                package: false,
502            },
503        ));
504        entries.push((
505            "__phello_alias__",
506            FrozenModule {
507                code: hello_code,
508                package: true,
509            },
510        ));
511        entries.push((
512            "__phello_alias__.spam",
513            FrozenModule {
514                code: hello_code,
515                package: false,
516            },
517        ));
518        entries.push((
519            "__hello_only__",
520            FrozenModule {
521                code: hello_code,
522                package: false,
523            },
524        ));
525    }
526    if let Some(code) = entries
527        .iter()
528        .find(|(n, _)| *n == "__phello__")
529        .map(|(_, m)| m.code)
530    {
531        entries.push((
532            "__phello__.__init__",
533            FrozenModule {
534                code,
535                package: false,
536            },
537        ));
538    }
539    if let Some(code) = entries
540        .iter()
541        .find(|(n, _)| *n == "__phello__.ham")
542        .map(|(_, m)| m.code)
543    {
544        entries.push((
545            "__phello__.ham.__init__",
546            FrozenModule {
547                code,
548                package: false,
549            },
550        ));
551    }
552    entries.into_iter()
553}
554
555#[cfg(test)]
556mod tests {
557    use super::*;
558    use crate::{
559        PyObjectRef,
560        builtins::{PyStr, int},
561    };
562    use malachite_bigint::ToBigInt;
563
564    #[test]
565    fn test_add_py_integers() {
566        Interpreter::without_stdlib(Default::default()).enter(|vm| {
567            let a: PyObjectRef = vm.ctx.new_int(33_i32).into();
568            let b: PyObjectRef = vm.ctx.new_int(12_i32).into();
569            let res = vm._add(&a, &b).unwrap();
570            let value = int::get_value(&res);
571            assert_eq!(*value, 45_i32.to_bigint().unwrap());
572        })
573    }
574
575    #[test]
576    fn test_multiply_str() {
577        Interpreter::without_stdlib(Default::default()).enter(|vm| {
578            let a = vm.new_pyobj(crate::common::ascii!("Hello "));
579            let b = vm.new_pyobj(4_i32);
580            let res = vm._mul(&a, &b).unwrap();
581            let value = res.downcast_ref::<PyStr>().unwrap();
582            assert_eq!(value.as_wtf8(), "Hello Hello Hello Hello ")
583        })
584    }
585}