rustpython_vm/vm/interpreter.rs
1#[cfg(all(unix, feature = "threading"))]
2use super::StopTheWorldState;
3use super::{Context, PyConfig, PyGlobalState, VirtualMachine, setting::Settings, thread};
4use crate::{
5 PyResult, builtins, common::rc::PyRc, frozen::FrozenModule, getpath, py_freeze, stdlib::atexit,
6 vm::PyBaseExceptionRef,
7};
8use alloc::collections::BTreeMap;
9use core::sync::atomic::Ordering;
10
11type InitFunc = Box<dyn FnOnce(&mut VirtualMachine)>;
12
13/// Exit code used when stdout/stderr flush fails during interpreter shutdown.
14/// Matches CPython's behavior (see cpython/Python/pylifecycle.c).
15const EXITCODE_FLUSH_FAILURE: u32 = 120;
16
17/// Configuration builder for constructing an Interpreter.
18///
19/// This is the preferred way to configure and create an interpreter with custom modules.
20/// Modules must be registered before the interpreter is built,
21/// similar to CPython's `PyImport_AppendInittab` which must be called before `Py_Initialize`.
22///
23/// # Example
24/// ```
25/// use rustpython_vm::Interpreter;
26///
27/// let builder = Interpreter::builder(Default::default());
28/// // In practice, add stdlib: builder.add_native_modules(&stdlib_module_defs(&builder.ctx))
29/// let interp = builder.build();
30/// ```
31pub struct InterpreterBuilder {
32 settings: Settings,
33 pub ctx: PyRc<Context>,
34 module_defs: Vec<&'static builtins::PyModuleDef>,
35 frozen_modules: Vec<(&'static str, FrozenModule)>,
36 init_hooks: Vec<InitFunc>,
37}
38
39/// Private helper to initialize a VM with settings, context, and custom initialization.
40fn initialize_main_vm<F>(
41 settings: Settings,
42 ctx: PyRc<Context>,
43 module_defs: Vec<&'static builtins::PyModuleDef>,
44 frozen_modules: Vec<(&'static str, FrozenModule)>,
45 init_hooks: Vec<InitFunc>,
46 init: F,
47) -> (VirtualMachine, PyRc<PyGlobalState>)
48where
49 F: FnOnce(&mut VirtualMachine),
50{
51 use crate::codecs::CodecsRegistry;
52 use crate::common::hash::HashSecret;
53 use crate::common::lock::PyMutex;
54 use crate::warn::WarningsState;
55 use core::sync::atomic::{AtomicBool, AtomicU64};
56 use crossbeam_utils::atomic::AtomicCell;
57
58 let paths = getpath::init_path_config(&settings);
59 let config = PyConfig::new(settings, paths);
60
61 // Build module_defs map from builtin modules + additional modules
62 let mut all_module_defs: BTreeMap<&'static str, &'static builtins::PyModuleDef> =
63 crate::stdlib::builtin_module_defs(&ctx)
64 .into_iter()
65 .chain(module_defs)
66 .map(|def| (def.name.as_str(), def))
67 .collect();
68
69 // Register sysconfigdata under platform-specific name as well
70 if let Some(&sysconfigdata_def) = all_module_defs.get("_sysconfigdata") {
71 use std::sync::OnceLock;
72 static SYSCONFIGDATA_NAME: OnceLock<&'static str> = OnceLock::new();
73 let leaked_name = *SYSCONFIGDATA_NAME.get_or_init(|| {
74 let name = crate::stdlib::sys::sysconfigdata_name();
75 Box::leak(name.into_boxed_str())
76 });
77 all_module_defs.insert(leaked_name, sysconfigdata_def);
78 }
79
80 // Create hash secret
81 let seed = match config.settings.hash_seed {
82 Some(seed) => seed,
83 None => super::process_hash_secret_seed(),
84 };
85 let hash_secret = HashSecret::new(seed);
86
87 // Create codec registry and warnings state
88 let codec_registry = CodecsRegistry::new(&ctx);
89 let warnings = WarningsState::init_state(&ctx);
90
91 // Create int_max_str_digits
92 let int_max_str_digits = AtomicCell::new(match config.settings.int_max_str_digits {
93 -1 => 4300,
94 other => other,
95 } as usize);
96
97 // Initialize frozen modules (core + user-provided)
98 let mut frozen: std::collections::HashMap<&'static str, FrozenModule, ahash::RandomState> =
99 core_frozen_inits().collect();
100 frozen.extend(frozen_modules);
101
102 // Create PyGlobalState
103 let global_state = PyRc::new(PyGlobalState {
104 config,
105 module_defs: all_module_defs,
106 frozen,
107 stacksize: AtomicCell::new(0),
108 thread_count: AtomicCell::new(0),
109 hash_secret,
110 atexit_funcs: PyMutex::default(),
111 codec_registry,
112 finalizing: AtomicBool::new(false),
113 warnings,
114 override_frozen_modules: AtomicCell::new(0),
115 before_forkers: PyMutex::default(),
116 after_forkers_child: PyMutex::default(),
117 after_forkers_parent: PyMutex::default(),
118 int_max_str_digits,
119 switch_interval: AtomicCell::new(0.005),
120 global_trace_func: PyMutex::default(),
121 global_profile_func: PyMutex::default(),
122 #[cfg(feature = "threading")]
123 main_thread_ident: AtomicCell::new(0),
124 #[cfg(feature = "threading")]
125 thread_frames: parking_lot::Mutex::new(std::collections::HashMap::new()),
126 #[cfg(feature = "threading")]
127 thread_handles: parking_lot::Mutex::new(Vec::new()),
128 #[cfg(feature = "threading")]
129 shutdown_handles: parking_lot::Mutex::new(Vec::new()),
130 monitoring: PyMutex::default(),
131 monitoring_events: AtomicCell::new(0),
132 instrumentation_version: AtomicU64::new(0),
133 #[cfg(all(unix, feature = "threading"))]
134 stop_the_world: StopTheWorldState::new(),
135 });
136
137 // Create VM with the global state
138 // Note: Don't clone here - init_hooks need exclusive access to mutate state
139 let mut vm = VirtualMachine::new(ctx, global_state);
140
141 // Execute initialization hooks (can mutate vm.state)
142 for hook in init_hooks {
143 hook(&mut vm);
144 }
145
146 // Call custom init function (can mutate vm.state)
147 init(&mut vm);
148
149 vm.initialize();
150
151 // Clone global_state for Interpreter after all initialization is done
152 let global_state = vm.state.clone();
153 (vm, global_state)
154}
155
156impl InterpreterBuilder {
157 /// Create a new interpreter configuration with default settings.
158 pub fn new() -> Self {
159 Self {
160 settings: Settings::default(),
161 ctx: Context::genesis().clone(),
162 module_defs: Vec::new(),
163 frozen_modules: Vec::new(),
164 init_hooks: Vec::new(),
165 }
166 }
167
168 /// Set custom settings for the interpreter.
169 ///
170 /// If called multiple times, only the last settings will be used.
171 pub fn settings(mut self, settings: Settings) -> Self {
172 self.settings = settings;
173 self
174 }
175
176 /// Add a single native module definition.
177 ///
178 /// # Example
179 /// ```
180 /// use rustpython_vm::{Interpreter, builtins::PyModuleDef};
181 ///
182 /// let builder = Interpreter::builder(Default::default());
183 /// // Note: In practice, use module_def from your #[pymodule]
184 /// // let def = mymodule::module_def(&builder.ctx);
185 /// // let interp = builder.add_native_module(def).build();
186 /// let interp = builder.build();
187 /// ```
188 pub fn add_native_module(self, def: &'static builtins::PyModuleDef) -> Self {
189 self.add_native_modules(&[def])
190 }
191
192 /// Add multiple native module definitions.
193 ///
194 /// # Example
195 /// ```
196 /// use rustpython_vm::Interpreter;
197 ///
198 /// let builder = Interpreter::builder(Default::default());
199 /// // In practice, use module_defs from rustpython_stdlib:
200 /// // let defs = rustpython_stdlib::stdlib_module_defs(&builder.ctx);
201 /// // let interp = builder.add_native_modules(&defs).build();
202 /// let interp = builder.build();
203 /// ```
204 pub fn add_native_modules(mut self, defs: &[&'static builtins::PyModuleDef]) -> Self {
205 self.module_defs.extend_from_slice(defs);
206 self
207 }
208
209 /// Add a custom initialization hook.
210 ///
211 /// Hooks are executed in the order they are added during interpreter creation.
212 /// This function will be called after modules are registered but before
213 /// the VM is initialized, allowing for additional customization.
214 ///
215 /// # Example
216 /// ```
217 /// use rustpython_vm::Interpreter;
218 ///
219 /// let interp = Interpreter::builder(Default::default())
220 /// .init_hook(|vm| {
221 /// // Custom initialization
222 /// })
223 /// .build();
224 /// ```
225 pub fn init_hook<F>(mut self, init: F) -> Self
226 where
227 F: FnOnce(&mut VirtualMachine) + 'static,
228 {
229 self.init_hooks.push(Box::new(init));
230 self
231 }
232
233 /// Add frozen modules to the interpreter.
234 ///
235 /// Frozen modules are Python modules compiled into the binary.
236 /// This method accepts any iterator of (name, FrozenModule) pairs.
237 ///
238 /// # Example
239 /// ```
240 /// use rustpython_vm::Interpreter;
241 ///
242 /// let interp = Interpreter::builder(Default::default())
243 /// // In practice: .add_frozen_modules(rustpython_pylib::FROZEN_STDLIB)
244 /// .build();
245 /// ```
246 pub fn add_frozen_modules<I>(mut self, frozen: I) -> Self
247 where
248 I: IntoIterator<Item = (&'static str, FrozenModule)>,
249 {
250 self.frozen_modules.extend(frozen);
251 self
252 }
253
254 /// Build the interpreter.
255 ///
256 /// This consumes the configuration and returns a fully initialized Interpreter.
257 pub fn build(self) -> Interpreter {
258 let (vm, global_state) = initialize_main_vm(
259 self.settings,
260 self.ctx,
261 self.module_defs,
262 self.frozen_modules,
263 self.init_hooks,
264 |_| {}, // No additional init needed
265 );
266 Interpreter { global_state, vm }
267 }
268
269 /// Alias for `build()` for compatibility with the `interpreter()` pattern.
270 pub fn interpreter(self) -> Interpreter {
271 self.build()
272 }
273}
274
275impl Default for InterpreterBuilder {
276 fn default() -> Self {
277 Self::new()
278 }
279}
280
281/// The general interface for the VM
282///
283/// # Examples
284/// Runs a simple embedded hello world program.
285/// ```
286/// use rustpython_vm::Interpreter;
287/// use rustpython_vm::compiler::Mode;
288/// Interpreter::without_stdlib(Default::default()).enter(|vm| {
289/// let scope = vm.new_scope_with_builtins();
290/// let source = r#"print("Hello World!")"#;
291/// let code_obj = vm.compile(
292/// source,
293/// Mode::Exec,
294/// "<embedded>".to_owned(),
295/// ).map_err(|err| vm.new_syntax_error(&err, Some(source))).unwrap();
296/// vm.run_code_obj(code_obj, scope).unwrap();
297/// });
298/// ```
299pub struct Interpreter {
300 pub global_state: PyRc<PyGlobalState>,
301 vm: VirtualMachine,
302}
303
304impl Interpreter {
305 /// Create a new interpreter configuration builder.
306 ///
307 /// # Example
308 /// ```
309 /// use rustpython_vm::Interpreter;
310 ///
311 /// let builder = Interpreter::builder(Default::default());
312 /// // In practice, add stdlib: builder.add_native_modules(&stdlib_module_defs(&builder.ctx))
313 /// let interp = builder.build();
314 /// ```
315 pub fn builder(settings: Settings) -> InterpreterBuilder {
316 InterpreterBuilder::new().settings(settings)
317 }
318
319 /// This is a bare unit to build up an interpreter without the standard library.
320 /// To create an interpreter with the standard library with the `rustpython` crate, use `rustpython::InterpreterBuilder`.
321 /// To create an interpreter without the `rustpython` crate, but only with `rustpython-vm`,
322 /// try to build one from the source code of `InterpreterBuilder`. It will not be a one-liner but it also will not be too hard.
323 pub fn without_stdlib(settings: Settings) -> Self {
324 Self::with_init(settings, |_| {})
325 }
326
327 /// Create with initialize function taking mutable vm reference.
328 ///
329 /// Note: This is a legacy API. To add stdlib, use `Interpreter::builder()` instead.
330 pub fn with_init<F>(settings: Settings, init: F) -> Self
331 where
332 F: FnOnce(&mut VirtualMachine),
333 {
334 let (vm, global_state) = initialize_main_vm(
335 settings,
336 Context::genesis().clone(),
337 Vec::new(), // No module_defs
338 Vec::new(), // No frozen_modules
339 Vec::new(), // No init_hooks
340 init,
341 );
342 Self { global_state, vm }
343 }
344
345 /// Run a function with the main virtual machine and return a PyResult of the result.
346 ///
347 /// To enter vm context multiple times or to avoid buffer/exception management, this function is preferred.
348 /// `enter` is lightweight and it returns a python object in PyResult.
349 /// You can stop or continue the execution multiple times by calling `enter`.
350 ///
351 /// To finalize the vm once all desired `enter`s are called, calling `finalize` will be helpful.
352 ///
353 /// See also [`Interpreter::run`] for managed way to run the interpreter.
354 pub fn enter<F, R>(&self, f: F) -> R
355 where
356 F: FnOnce(&VirtualMachine) -> R,
357 {
358 thread::enter_vm(&self.vm, || f(&self.vm))
359 }
360
361 /// Run [`Interpreter::enter`] and call [`VirtualMachine::expect_pyresult`] for the result.
362 ///
363 /// This function is useful when you want to expect a result from the function,
364 /// but also print useful panic information when exception raised.
365 ///
366 /// See also [`Interpreter::enter`] and [`VirtualMachine::expect_pyresult`] for more information.
367 pub fn enter_and_expect<F, R>(&self, f: F, msg: &str) -> R
368 where
369 F: FnOnce(&VirtualMachine) -> PyResult<R>,
370 {
371 self.enter(|vm| {
372 let result = f(vm);
373 vm.expect_pyresult(result, msg)
374 })
375 }
376
377 /// Run a function with the main virtual machine and return exit code.
378 ///
379 /// To enter vm context only once and safely terminate the vm, this function is preferred.
380 /// Unlike [`Interpreter::enter`], `run` calls finalize and returns exit code.
381 /// You will not be able to obtain Python exception in this way.
382 ///
383 /// See [`Interpreter::finalize`] for the finalization steps.
384 /// See also [`Interpreter::enter`] for pure function call to obtain Python exception.
385 pub fn run<F>(self, f: F) -> u32
386 where
387 F: FnOnce(&VirtualMachine) -> PyResult<()>,
388 {
389 let res = self.enter(|vm| f(vm));
390 self.finalize(res.err())
391 }
392
393 /// Finalize vm and turns an exception to exit code.
394 ///
395 /// Finalization steps (matching Py_FinalizeEx):
396 /// 1. Flush stdout and stderr.
397 /// 1. Handle exit exception and turn it to exit code.
398 /// 1. Call threading._shutdown() to join non-daemon threads.
399 /// 1. Run atexit exit functions.
400 /// 1. Set finalizing flag (suppresses unraisable exceptions from __del__).
401 /// 1. Forced GC collection pass (collect cycles while builtins are available).
402 /// 1. Module finalization (finalize_modules).
403 /// 1. Final stdout/stderr flush.
404 ///
405 /// Note that calling `finalize` is not necessary by purpose though.
406 pub fn finalize(self, exc: Option<PyBaseExceptionRef>) -> u32 {
407 self.enter(|vm| {
408 let mut flush_status = vm.flush_std();
409
410 // See if any exception leaked out:
411 let exit_code = if let Some(exc) = exc {
412 vm.handle_exit_exception(exc)
413 } else {
414 0
415 };
416
417 // Wait for thread shutdown - call threading._shutdown() if available.
418 // This waits for all non-daemon threads to complete.
419 // threading module may not be imported, so ignore import errors.
420 if let Ok(threading) = vm.import("threading", 0)
421 && let Ok(shutdown) = threading.get_attr("_shutdown", vm)
422 && let Err(e) = shutdown.call((), vm)
423 {
424 vm.run_unraisable(
425 e,
426 Some("Exception ignored in threading shutdown".to_owned()),
427 threading,
428 );
429 }
430
431 // Run atexit handlers before setting finalizing flag.
432 // This allows unraisable exceptions from atexit handlers to be reported.
433 atexit::_run_exitfuncs(vm);
434
435 // Now suppress unraisable exceptions from daemon threads and __del__
436 // methods during the rest of shutdown.
437 vm.state.finalizing.store(true, Ordering::Release);
438
439 // GC pass - collect cycles before module cleanup
440 crate::gc_state::gc_state().collect_force(2);
441
442 // Module finalization: remove modules from sys.modules, GC collect
443 // (while builtins is still available for __del__), then clear module dicts.
444 vm.finalize_modules();
445
446 if vm.flush_std() < 0 && flush_status == 0 {
447 flush_status = -1;
448 }
449
450 // Match CPython: if exit_code is 0 and stdout flush failed, exit 120
451 if exit_code == 0 && flush_status < 0 {
452 EXITCODE_FLUSH_FAILURE
453 } else {
454 exit_code
455 }
456 })
457 }
458}
459
460fn core_frozen_inits() -> impl Iterator<Item = (&'static str, FrozenModule)> {
461 let iter = core::iter::empty();
462 macro_rules! ext_modules {
463 ($iter:ident, $($t:tt)*) => {
464 let $iter = $iter.chain(py_freeze!($($t)*));
465 };
466 }
467
468 // Python modules that the vm calls into, but are not actually part of the stdlib. They could
469 // in theory be implemented in Rust, but are easiest to do in Python for one reason or another.
470 // Includes _importlib_bootstrap and _importlib_bootstrap_external
471 ext_modules!(
472 iter,
473 dir = "../../Lib/python_builtins",
474 crate_name = "rustpython_compiler_core"
475 );
476
477 // core stdlib Python modules that the vm calls into, but are still used in Python
478 // application code, e.g. copyreg
479 // FIXME: Initializing core_modules here results duplicated frozen module generation for core_modules.
480 // We need a way to initialize this modules for both `Interpreter::without_stdlib()` and `InterpreterBuilder::new().init_stdlib().interpreter()`
481 // #[cfg(not(feature = "freeze-stdlib"))]
482 ext_modules!(
483 iter,
484 dir = "../../Lib/core_modules",
485 crate_name = "rustpython_compiler_core"
486 );
487
488 // Collect frozen module entries
489 let mut entries: Vec<_> = iter.collect();
490
491 // Add test module aliases
492 if let Some(hello_code) = entries
493 .iter()
494 .find(|(n, _)| *n == "__hello__")
495 .map(|(_, m)| m.code)
496 {
497 entries.push((
498 "__hello_alias__",
499 FrozenModule {
500 code: hello_code,
501 package: false,
502 },
503 ));
504 entries.push((
505 "__phello_alias__",
506 FrozenModule {
507 code: hello_code,
508 package: true,
509 },
510 ));
511 entries.push((
512 "__phello_alias__.spam",
513 FrozenModule {
514 code: hello_code,
515 package: false,
516 },
517 ));
518 entries.push((
519 "__hello_only__",
520 FrozenModule {
521 code: hello_code,
522 package: false,
523 },
524 ));
525 }
526 if let Some(code) = entries
527 .iter()
528 .find(|(n, _)| *n == "__phello__")
529 .map(|(_, m)| m.code)
530 {
531 entries.push((
532 "__phello__.__init__",
533 FrozenModule {
534 code,
535 package: false,
536 },
537 ));
538 }
539 if let Some(code) = entries
540 .iter()
541 .find(|(n, _)| *n == "__phello__.ham")
542 .map(|(_, m)| m.code)
543 {
544 entries.push((
545 "__phello__.ham.__init__",
546 FrozenModule {
547 code,
548 package: false,
549 },
550 ));
551 }
552 entries.into_iter()
553}
554
555#[cfg(test)]
556mod tests {
557 use super::*;
558 use crate::{
559 PyObjectRef,
560 builtins::{PyStr, int},
561 };
562 use malachite_bigint::ToBigInt;
563
564 #[test]
565 fn test_add_py_integers() {
566 Interpreter::without_stdlib(Default::default()).enter(|vm| {
567 let a: PyObjectRef = vm.ctx.new_int(33_i32).into();
568 let b: PyObjectRef = vm.ctx.new_int(12_i32).into();
569 let res = vm._add(&a, &b).unwrap();
570 let value = int::get_value(&res);
571 assert_eq!(*value, 45_i32.to_bigint().unwrap());
572 })
573 }
574
575 #[test]
576 fn test_multiply_str() {
577 Interpreter::without_stdlib(Default::default()).enter(|vm| {
578 let a = vm.new_pyobj(crate::common::ascii!("Hello "));
579 let b = vm.new_pyobj(4_i32);
580 let res = vm._mul(&a, &b).unwrap();
581 let value = res.downcast_ref::<PyStr>().unwrap();
582 assert_eq!(value.as_wtf8(), "Hello Hello Hello Hello ")
583 })
584 }
585}