Skip to main content

shape_vm/compiler/
mod.rs

1//! Bytecode compiler - translates AST to bytecode
2
3use shape_ast::error::{Result, ShapeError, SourceLocation};
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use crate::blob_cache_v2::BlobCache;
8use crate::borrow_checker::BorrowMode;
9use crate::bytecode::{
10    BytecodeProgram, Constant, FunctionBlob, FunctionHash, Instruction, OpCode,
11    Program as ContentAddressedProgram,
12};
13use crate::type_tracking::{TypeTracker, VariableTypeInfo};
14use shape_ast::ast::{FunctionDef, Program, TypeAnnotation};
15use shape_runtime::type_schema::SchemaId;
16use shape_runtime::type_system::{
17    Type, TypeAnalysisMode, TypeError, TypeErrorWithLocation, analyze_program_with_mode,
18    checking::MethodTable,
19};
20
21// Sub-modules
22pub(crate) mod comptime;
23pub(crate) mod comptime_builtins;
24pub(crate) mod comptime_target;
25mod control_flow;
26mod expressions;
27mod functions;
28mod helpers;
29mod literals;
30mod loops;
31mod patterns;
32mod statements;
33pub mod string_interpolation;
34
35/// Loop compilation context
36pub(crate) struct LoopContext {
37    /// Break jump targets
38    pub(crate) break_jumps: Vec<usize>,
39    /// Continue jump target
40    pub(crate) continue_target: usize,
41    /// Optional local to store break values for expression loops
42    pub(crate) break_value_local: Option<u16>,
43    /// Whether a for-in iterator is on the stack (break must pop it)
44    pub(crate) iterator_on_stack: bool,
45    /// Drop scope depth when the loop was entered (for break/continue early exit drops)
46    pub(crate) drop_scope_depth: usize,
47}
48
49/// Information about an imported symbol (fields used for diagnostics/LSP)
50#[derive(Debug, Clone)]
51#[allow(dead_code)]
52pub(crate) struct ImportedSymbol {
53    /// Original name in the source module
54    pub original_name: String,
55    /// Module path the symbol was imported from
56    pub module_path: String,
57}
58
59#[derive(Debug, Clone)]
60pub(crate) struct StructGenericInfo {
61    pub type_params: Vec<shape_ast::ast::TypeParam>,
62    pub runtime_field_types: HashMap<String, shape_ast::ast::TypeAnnotation>,
63}
64
65/// Whether a type's Drop impl is sync-only, async-only, or both.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub(crate) enum DropKind {
68    SyncOnly,
69    AsyncOnly,
70    Both,
71}
72
73/// Canonical compile-time parameter passing contract.
74///
75/// This is the single source of truth used by compiler lowering and LSP rendering.
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
77pub enum ParamPassMode {
78    ByValue,
79    ByRefShared,
80    ByRefExclusive,
81}
82
83impl ParamPassMode {
84    pub const fn is_reference(self) -> bool {
85        !matches!(self, Self::ByValue)
86    }
87
88    pub const fn is_exclusive(self) -> bool {
89        matches!(self, Self::ByRefExclusive)
90    }
91}
92
93/// Per-function blob builder for content-addressed compilation.
94///
95/// Uses a **snapshot** strategy: records the global instruction/constant/string
96/// pool sizes at the start of function compilation, then at finalization
97/// extracts the delta and remaps global indices to blob-local indices.
98pub(crate) struct FunctionBlobBuilder {
99    /// Function name.
100    pub name: String,
101    /// Global instruction index where this function's code starts.
102    pub instr_start: usize,
103    /// Global constant pool size when this function started compiling.
104    #[allow(dead_code)]
105    pub const_start: usize,
106    /// Global string pool size when this function started compiling.
107    #[allow(dead_code)]
108    pub string_start: usize,
109    /// Names of functions called by this function (for dependency tracking).
110    pub called_functions: Vec<String>,
111    /// Type schema names this function constructs.
112    pub type_schemas: Vec<String>,
113    /// Accumulated permissions required by this function's direct calls.
114    pub required_permissions: shape_abi_v1::PermissionSet,
115}
116
117impl FunctionBlobBuilder {
118    pub fn new(name: String, instr_start: usize, const_start: usize, string_start: usize) -> Self {
119        Self {
120            name,
121            instr_start,
122            const_start,
123            string_start,
124            called_functions: Vec::new(),
125            type_schemas: Vec::new(),
126            required_permissions: shape_abi_v1::PermissionSet::pure(),
127        }
128    }
129
130    /// Record that this function calls another function by name.
131    pub fn record_call(&mut self, callee_name: &str) {
132        if !self.called_functions.iter().any(|n| n == callee_name) {
133            self.called_functions.push(callee_name.to_owned());
134        }
135    }
136
137    /// Record that this function requires the given permissions
138    /// (e.g., from a stdlib module call identified by capability_tags).
139    pub fn record_permissions(&mut self, perms: &shape_abi_v1::PermissionSet) {
140        self.required_permissions = self.required_permissions.union(perms);
141    }
142
143    /// Finalize this builder into a FunctionBlob by extracting the delta from
144    /// the global program pools and remapping indices to blob-local ones.
145    pub fn finalize(
146        &self,
147        program: &crate::bytecode::BytecodeProgram,
148        func: &crate::bytecode::Function,
149        blob_name_to_hash: &HashMap<String, FunctionHash>,
150        instr_end: usize,
151    ) -> FunctionBlob {
152        use crate::bytecode::Operand;
153
154        // Extract global-indexed instructions for this function.
155        let global_instructions = &program.instructions[self.instr_start..instr_end];
156
157        // Build constant remap: global index -> local index.
158        let mut const_remap: HashMap<u16, u16> = HashMap::new();
159        let mut local_constants: Vec<Constant> = Vec::new();
160        // Build string remap similarly.
161        let mut string_remap: HashMap<u16, u16> = HashMap::new();
162        let mut local_strings: Vec<String> = Vec::new();
163        // Build function operand remap: global function index -> dependency-local index.
164        let mut func_remap: HashMap<u16, u16> = HashMap::new();
165        // Start from explicitly recorded call dependencies, then augment with
166        // function-value references found in constants/operands.
167        let mut called_functions = self.called_functions.clone();
168
169        let mut ensure_called = |callee_name: &str| -> u16 {
170            if let Some(dep_idx) = called_functions.iter().position(|n| n == callee_name) {
171                dep_idx as u16
172            } else {
173                called_functions.push(callee_name.to_owned());
174                (called_functions.len() - 1) as u16
175            }
176        };
177
178        // Scan instructions for all constant/string references and build
179        // blob-local pools with remapped indices.
180        for instr in global_instructions {
181            if let Some(ref operand) = instr.operand {
182                match operand {
183                    Operand::Const(idx) => {
184                        if !const_remap.contains_key(idx) {
185                            let local_idx = local_constants.len() as u16;
186                            const_remap.insert(*idx, local_idx);
187                            let mut constant = program.constants[*idx as usize].clone();
188                            if let Constant::Function(fid) = constant {
189                                let global_idx = fid as usize;
190                                if let Some(callee) = program.functions.get(global_idx) {
191                                    let dep_idx = ensure_called(&callee.name);
192                                    constant = Constant::Function(dep_idx);
193                                }
194                            }
195                            local_constants.push(constant);
196                        }
197                    }
198                    Operand::Property(idx) => {
199                        if !string_remap.contains_key(idx) {
200                            let local_idx = local_strings.len() as u16;
201                            string_remap.insert(*idx, local_idx);
202                            local_strings.push(program.strings[*idx as usize].clone());
203                        }
204                    }
205                    Operand::Name(sid) => {
206                        let gidx = sid.0 as u16;
207                        if !string_remap.contains_key(&gidx) {
208                            let local_idx = local_strings.len() as u16;
209                            string_remap.insert(gidx, local_idx);
210                            local_strings.push(program.strings[gidx as usize].clone());
211                        }
212                    }
213                    Operand::MethodCall { name, .. } => {
214                        let gidx = name.0 as u16;
215                        if !string_remap.contains_key(&gidx) {
216                            let local_idx = local_strings.len() as u16;
217                            string_remap.insert(gidx, local_idx);
218                            local_strings.push(program.strings[gidx as usize].clone());
219                        }
220                    }
221                    Operand::TypedMethodCall { string_id, .. } => {
222                        let gidx = *string_id;
223                        if !string_remap.contains_key(&gidx) {
224                            let local_idx = local_strings.len() as u16;
225                            string_remap.insert(gidx, local_idx);
226                            local_strings.push(program.strings[gidx as usize].clone());
227                        }
228                    }
229                    Operand::Function(fid) => {
230                        let global_idx = fid.0 as usize;
231                        if !func_remap.contains_key(&fid.0) {
232                            // Map global function index -> dependency-local index.
233                            // If this call target was not explicitly recorded (e.g. emitted via
234                            // function-valued constants), add it so content-addressed linking can
235                            // remap stable function IDs correctly.
236                            if let Some(callee) = program.functions.get(global_idx) {
237                                let dep_idx = ensure_called(&callee.name);
238                                func_remap.insert(fid.0, dep_idx);
239                            }
240                        }
241                    }
242                    _ => {}
243                }
244            }
245        }
246
247        // Remap instructions to use local indices.
248        let local_instructions: Vec<Instruction> = global_instructions
249            .iter()
250            .map(|instr| {
251                let mut remapped = instr.clone();
252                if let Some(operand) = &mut remapped.operand {
253                    match operand {
254                        Operand::Const(idx) => {
255                            if let Some(&local) = const_remap.get(idx) {
256                                *idx = local;
257                            }
258                        }
259                        Operand::Property(idx) => {
260                            if let Some(&local) = string_remap.get(idx) {
261                                *idx = local;
262                            }
263                        }
264                        Operand::Name(sid) => {
265                            if let Some(&local) = string_remap.get(&(sid.0 as u16)) {
266                                sid.0 = local as u32;
267                            }
268                        }
269                        Operand::MethodCall { name, arg_count: _ } => {
270                            if let Some(&local) = string_remap.get(&(name.0 as u16)) {
271                                name.0 = local as u32;
272                            }
273                        }
274                        Operand::TypedMethodCall { string_id, .. } => {
275                            if let Some(&local) = string_remap.get(string_id) {
276                                *string_id = local;
277                            }
278                        }
279                        Operand::Function(fid) => {
280                            if let Some(&local) = func_remap.get(&fid.0) {
281                                fid.0 = local;
282                            }
283                        }
284                        _ => {}
285                    }
286                }
287                remapped
288            })
289            .collect();
290
291        // Build dependency list from called function names.
292        // Use FunctionHash::ZERO as sentinel for forward references (not yet compiled).
293        let dependencies: Vec<FunctionHash> = called_functions
294            .iter()
295            .map(|callee| {
296                blob_name_to_hash
297                    .get(callee)
298                    .copied()
299                    .unwrap_or(FunctionHash::ZERO)
300            })
301            .collect();
302
303        // Build source map from global debug info.
304        let source_map: Vec<(usize, u32, u32)> = program
305            .debug_info
306            .line_numbers
307            .iter()
308            .filter(|(idx, _, _)| *idx >= self.instr_start && *idx < instr_end)
309            .map(|(idx, fid, line)| (idx - self.instr_start, *fid as u32, *line))
310            .collect();
311
312        // Scan instructions for CallForeign operands and collect content hashes
313        // from the program's foreign_functions table.
314        let mut foreign_deps: Vec<[u8; 32]> = Vec::new();
315        for instr in &local_instructions {
316            if instr.opcode == crate::bytecode::OpCode::CallForeign {
317                if let Some(Operand::ForeignFunction(idx)) = instr.operand {
318                    if let Some(entry) = program.foreign_functions.get(idx as usize) {
319                        if let Some(hash) = entry.content_hash {
320                            foreign_deps.push(hash);
321                        }
322                    }
323                }
324            }
325        }
326        foreign_deps.sort();
327        foreign_deps.dedup();
328
329        let mut blob = FunctionBlob {
330            content_hash: FunctionHash::ZERO,
331            name: self.name.clone(),
332            arity: func.arity,
333            param_names: func.param_names.clone(),
334            locals_count: func.locals_count,
335            is_closure: func.is_closure,
336            captures_count: func.captures_count,
337            is_async: func.is_async,
338            ref_params: func.ref_params.clone(),
339            ref_mutates: func.ref_mutates.clone(),
340            mutable_captures: func.mutable_captures.clone(),
341            frame_descriptor: func.frame_descriptor.clone(),
342            required_permissions: self.required_permissions.clone(),
343            instructions: local_instructions,
344            constants: local_constants,
345            strings: local_strings,
346            dependencies,
347            callee_names: called_functions,
348            type_schemas: self.type_schemas.clone(),
349            foreign_dependencies: foreign_deps,
350            source_map,
351        };
352        blob.finalize();
353        blob
354    }
355}
356
357#[derive(Debug, Clone, Copy, PartialEq, Eq)]
358pub enum TypeDiagnosticMode {
359    ReliableOnly,
360    Strict,
361    RecoverAll,
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
365pub enum CompileDiagnosticMode {
366    FailFast,
367    RecoverAll,
368}
369
370/// Compiler state
371pub struct BytecodeCompiler {
372    /// The program being built
373    pub(crate) program: BytecodeProgram,
374
375    /// Current function being compiled
376    pub(crate) current_function: Option<usize>,
377
378    /// Local variable mappings (name -> index)
379    pub(crate) locals: Vec<HashMap<String, u16>>,
380
381    /// ModuleBinding variable mappings (name -> index)
382    pub(crate) module_bindings: HashMap<String, u16>,
383
384    /// Next local variable index
385    pub(crate) next_local: u16,
386
387    /// Next module_binding variable index
388    pub(crate) next_global: u16,
389
390    /// Loop context stack for break/continue
391    pub(crate) loop_stack: Vec<LoopContext>,
392
393    /// Counter for synthetic closure function names
394    pub(crate) closure_counter: u64,
395
396    /// When compiling a DataTable closure method (e.g. dt.filter(row => ...)),
397    /// this holds the (schema_id, type_name) to tag the closure's row parameter as RowView.
398    pub(crate) closure_row_schema: Option<(u32, String)>,
399
400    /// Unified type metadata for the last compiled expression.
401    ///
402    /// This is the single source for relational/value kind propagation
403    /// (Table<T>, Indexed<T>, known object schema, etc.).
404    pub(crate) last_expr_type_info: Option<VariableTypeInfo>,
405
406    /// Type tracker for optimized field access
407    pub(crate) type_tracker: TypeTracker,
408
409    /// Schema ID of the last compiled expression (if it's a TypedObject).
410    /// Used for compile-time typed merge optimization.
411    pub(crate) last_expr_schema: Option<SchemaId>,
412
413    /// Numeric type of the last compiled expression (for typed opcode emission).
414    /// Set by literal compilation, variable loads, and other expression compilers.
415    /// Read by binary op compilation to emit typed opcodes (e.g., MulInt).
416    pub(crate) last_expr_numeric_type: Option<crate::type_tracking::NumericType>,
417
418    /// Type inference engine for match exhaustiveness and type checking
419    pub(crate) type_inference: shape_runtime::type_system::inference::TypeInferenceEngine,
420
421    /// Track type aliases defined in the program
422    /// Maps alias name -> target type (for type validation)
423    pub(crate) type_aliases: HashMap<String, String>,
424
425    /// Current source line being compiled (for debug info)
426    pub(crate) current_line: u32,
427
428    /// Current source file ID (for multi-file debug info)
429    pub(crate) current_file_id: u16,
430
431    /// Source text (for error messages)
432    pub(crate) source_text: Option<String>,
433
434    /// Source lines (split from source_text for quick access)
435    pub(crate) source_lines: Vec<String>,
436
437    /// Imported symbols: local_name -> ImportedSymbol
438    pub(crate) imported_names: HashMap<String, ImportedSymbol>,
439    /// Module namespace bindings introduced by `use module.path`.
440    /// Used to avoid UFCS rewrites for module calls like `duckdb.connect(...)`.
441    pub(crate) module_namespace_bindings: HashSet<String>,
442    /// Active lexical module scope stack while compiling `mod Name { ... }`.
443    pub(crate) module_scope_stack: Vec<String>,
444
445    /// Known exports for import suggestions: function_name -> module_path
446    /// Used to provide helpful error messages like "Did you mean to import from...?"
447    pub(crate) known_exports: HashMap<String, String>,
448    /// Function arity bounds keyed by function name: (required_params, total_params).
449    /// Required params are non-default parameters. Defaults are only allowed
450    /// in trailing positions.
451    pub(crate) function_arity_bounds: HashMap<String, (usize, usize)>,
452    /// Function const parameter indices keyed by function name.
453    /// Const parameters must receive compile-time constant arguments at call sites.
454    pub(crate) function_const_params: HashMap<String, Vec<usize>>,
455    /// Original function definitions keyed by function name.
456    /// Used for const-template specialization at call sites.
457    pub(crate) function_defs: HashMap<String, FunctionDef>,
458    /// Foreign function definitions keyed by function name.
459    /// Used to resolve the effective (Result-wrapped) return type at call sites.
460    pub(crate) foreign_function_defs: HashMap<String, shape_ast::ast::ForeignFunctionDef>,
461    /// Cached const specializations keyed by `(base_name + const-arg fingerprint)`.
462    pub(crate) const_specializations: HashMap<String, usize>,
463    /// Monotonic counter for unique specialization symbol names.
464    pub(crate) next_const_specialization_id: u64,
465    /// Const-parameter bindings for specialized function symbols.
466    /// These bindings are exposed to comptime handlers as typed module_bindings.
467    pub(crate) specialization_const_bindings:
468        HashMap<String, Vec<(String, shape_value::ValueWord)>>,
469
470    /// Struct type definitions: type_name -> (field_names in order, definition span)
471    pub(crate) struct_types: HashMap<String, (Vec<String>, shape_ast::ast::Span)>,
472    /// Generic metadata for struct types used to instantiate runtime type names
473    /// (e.g. `MyType<number>`) at struct-literal construction sites.
474    pub(crate) struct_generic_info: HashMap<String, StructGenericInfo>,
475    /// Names of `type C` declarations with native layout metadata.
476    pub(crate) native_layout_types: HashSet<String>,
477    /// Generated conversion pair cache keys: `c_type::object_type`.
478    pub(crate) generated_native_conversion_pairs: HashSet<String>,
479
480    /// Whether the current function being compiled is async
481    pub(crate) current_function_is_async: bool,
482
483    /// Directory of the source file being compiled (for resolving relative source paths)
484    pub(crate) source_dir: Option<std::path::PathBuf>,
485
486    /// Collected compilation errors (for multi-error reporting)
487    pub(crate) errors: Vec<shape_ast::error::ShapeError>,
488
489    /// Hoisted fields from optimistic hoisting pre-pass.
490    /// Maps variable name → list of property names assigned later (e.g., a.y = 2 → "a" → ["y"]).
491    /// Used to include future property assignments in inline object schemas at compile time.
492    pub(crate) hoisted_fields: HashMap<String, Vec<String>>,
493
494    /// When compiling a variable initializer, the name of the variable being assigned to.
495    /// Used by compile_typed_object_literal to include hoisted fields in the schema.
496    pub(crate) pending_variable_name: Option<String>,
497
498    /// Known trait names (populated in the first pass so meta definitions can reference traits)
499    pub(crate) known_traits: std::collections::HashSet<String>,
500
501    /// Full trait definitions keyed by trait name.
502    /// Used to install default method implementations for impl blocks that omit them.
503    pub(crate) trait_defs: HashMap<String, shape_ast::ast::types::TraitDef>,
504
505    /// Extension registry for comptime execution
506    pub(crate) extension_registry: Option<Arc<Vec<shape_runtime::module_exports::ModuleExports>>>,
507
508    /// Comptime field values per type: type_name -> (field_name -> ValueWord)
509    /// These are type-level constants baked at compile time with zero runtime cost.
510    pub(crate) comptime_fields: HashMap<String, HashMap<String, shape_value::ValueWord>>,
511    /// Type diagnostic mode for shared analyzer diagnostics.
512    pub(crate) type_diagnostic_mode: TypeDiagnosticMode,
513    /// Expression compilation diagnostic mode.
514    pub(crate) compile_diagnostic_mode: CompileDiagnosticMode,
515    /// Whether this compiler instance is compiling code for comptime execution.
516    /// Enables comptime-only builtins and comptime-specific statement semantics.
517    pub(crate) comptime_mode: bool,
518    /// Internal guard for compiler-synthesized `__comptime__` helper calls.
519    /// User source must never access `__comptime__` directly.
520    pub(crate) allow_internal_comptime_namespace: bool,
521    /// Method table for data-driven method signature queries.
522    /// Used to replace hardcoded heuristics (e.g., is_type_preserving_table_method)
523    /// with MethodTable lookups (is_self_returning, takes_closure_with_receiver_param).
524    pub(crate) method_table: MethodTable,
525    /// Borrow checker for reference lifetime tracking.
526    pub(crate) borrow_checker: crate::borrow_checker::BorrowChecker,
527    /// Locals that are reference-typed in the current function.
528    pub(crate) ref_locals: HashSet<u16>,
529    /// Subset of ref_locals that hold exclusive (`&mut`) borrows.
530    /// Used to enforce the three concurrency rules at task boundaries.
531    pub(crate) exclusive_ref_locals: HashSet<u16>,
532    /// Local variable indices declared as `const` (immutable binding).
533    pub(crate) const_locals: HashSet<u16>,
534    /// Module binding indices declared as `const` (immutable binding).
535    pub(crate) const_module_bindings: HashSet<u16>,
536    /// Local variable indices declared as immutable `let` (not `let mut` or `var`).
537    pub(crate) immutable_locals: HashSet<u16>,
538    /// Local variable indices that are function parameters (first N locals in a function).
539    /// Used to avoid trusting inferred type hints for params with no explicit annotation.
540    pub(crate) param_locals: HashSet<u16>,
541    /// Module binding indices declared as immutable `let`.
542    pub(crate) immutable_module_bindings: HashSet<u16>,
543    /// True while compiling function call arguments (allows `&` references).
544    pub(crate) in_call_args: bool,
545    /// Borrow mode for the argument currently being compiled.
546    pub(crate) current_call_arg_borrow_mode: Option<BorrowMode>,
547    /// ModuleBinding-ref writebacks collected while compiling current call args.
548    pub(crate) call_arg_module_binding_ref_writebacks: Vec<Vec<(u16, u16)>>,
549    /// Inferred reference parameters for untyped params: function -> per-param flag.
550    pub(crate) inferred_ref_params: HashMap<String, Vec<bool>>,
551    /// Inferred mutating-reference params: function -> per-param flag.
552    pub(crate) inferred_ref_mutates: HashMap<String, Vec<bool>>,
553    /// Effective per-parameter pass mode (explicit + inferred), by function name.
554    pub(crate) inferred_param_pass_modes: HashMap<String, Vec<ParamPassMode>>,
555    /// Inferred parameter type hints for unannotated params.
556    /// Keyed by function name; each entry is a per-param optional type string.
557    pub(crate) inferred_param_type_hints: HashMap<String, Vec<Option<String>>>,
558    /// Stack of scopes, each containing locals that need Drop calls at scope exit.
559    /// Each entry is (local_index, is_async).
560    pub(crate) drop_locals: Vec<Vec<(u16, bool)>>,
561    /// Per-type drop kind: tracks whether each type has sync, async, or both drop impls.
562    /// Populated during the first-pass registration of impl blocks.
563    pub(crate) drop_type_info: HashMap<String, DropKind>,
564    /// Module bindings that need Drop calls at program exit.
565    /// Each entry is (binding_index, is_async).
566    pub(crate) drop_module_bindings: Vec<(u16, bool)>,
567    /// Mutable closure captures in the current function being compiled.
568    /// Maps captured variable name -> upvalue index (for LoadClosure/StoreClosure).
569    /// Only populated while compiling a closure body that has mutable captures.
570    pub(crate) mutable_closure_captures: HashMap<String, u16>,
571
572    /// Variables in the current scope that have been boxed into SharedCells
573    /// by a mutable closure capture. When a subsequent closure captures one
574    /// of these variables (even immutably), it must use the SharedCell path
575    /// so it shares the same mutable cell.
576    pub(crate) boxed_locals: HashSet<String>,
577
578    /// Active permission set for capability checking.
579    ///
580    /// When set, imported stdlib functions are checked against capability_tags.
581    /// If a function requires a permission not in this set, a compile error is
582    /// emitted and the function never enters bytecode.
583    ///
584    /// `None` means no checking (backwards-compatible default).
585    pub(crate) permission_set: Option<shape_abi_v1::PermissionSet>,
586
587    // -- Content-addressed blob tracking --
588    /// Active blob builder (set while compiling a function body).
589    pub(crate) current_blob_builder: Option<FunctionBlobBuilder>,
590    /// Completed function blobs (finalized with content hash).
591    pub(crate) completed_blobs: Vec<FunctionBlob>,
592    /// Map from function name to content hash (populated after finalization).
593    pub(crate) blob_name_to_hash: HashMap<String, FunctionHash>,
594    /// The content-addressed program produced alongside BytecodeProgram.
595    pub(crate) content_addressed_program: Option<ContentAddressedProgram>,
596    /// Content hash per compiled function index (function_id -> blob hash).
597    /// This is the stable identity bridge for the flat runtime format.
598    pub(crate) function_hashes_by_id: Vec<Option<FunctionHash>>,
599
600    /// Optional blob-level cache for incremental compilation.
601    /// When set, compiled blobs are stored after finalization and looked up
602    /// by content hash to avoid redundant work across compilations.
603    pub(crate) blob_cache: Option<BlobCache>,
604
605    /// Temporary function name aliases for comptime replace body.
606    /// Maps alias (e.g., `__original__`) to actual function name (e.g., `__original__myFunc`).
607    /// Set before compiling a replacement body and cleared after.
608    pub(crate) function_aliases: HashMap<String, String>,
609
610    /// Parameters of the function currently being compiled.
611    /// Used by match exhaustiveness checking to fall back to type annotations
612    /// when the type inference engine cannot resolve a parameter's type.
613    pub(crate) current_function_params: Vec<shape_ast::ast::FunctionParameter>,
614
615    /// Legacy cache of function names collected from stdlib-loaded modules.
616    ///
617    /// Internal builtin access is now gated by per-definition declaring-module
618    /// provenance, not by membership in this set.
619    pub stdlib_function_names: HashSet<String>,
620
621    /// Per-function flag: when true, `get_builtin_function` resolves `__*` names.
622    /// Toggled during compilation for definitions originating from `std::*`.
623    pub(crate) allow_internal_builtins: bool,
624
625    /// Package-scoped native library resolutions for the current host.
626    pub(crate) native_resolution_context:
627        Option<shape_runtime::native_resolution::NativeResolutionSet>,
628}
629
630impl Default for BytecodeCompiler {
631    fn default() -> Self {
632        Self::new()
633    }
634}
635
636mod compiler_impl_part1;
637mod compiler_impl_part2;
638mod compiler_impl_part3;
639mod compiler_impl_part4;
640
641/// Infer effective reference parameters and mutation behavior without compiling bytecode.
642///
643/// Returns `(inferred_ref_params, inferred_ref_mutates)` keyed by function name.
644/// - `inferred_ref_params[f][i] == true` means parameter `i` of `f` is inferred/treated as ref.
645/// - `inferred_ref_mutates[f][i] == true` means that reference parameter is mutating (`&mut`).
646pub fn infer_reference_model(
647    program: &Program,
648) -> (HashMap<String, Vec<bool>>, HashMap<String, Vec<bool>>) {
649    let (inferred_ref_params, inferred_ref_mutates, _) =
650        BytecodeCompiler::infer_reference_model(program);
651    (inferred_ref_params, inferred_ref_mutates)
652}
653
654/// Infer effective parameter pass modes (`ByValue` / `ByRefShared` / `ByRefExclusive`)
655/// keyed by function name.
656pub fn infer_param_pass_modes(program: &Program) -> HashMap<String, Vec<ParamPassMode>> {
657    let (inferred_ref_params, inferred_ref_mutates, _) =
658        BytecodeCompiler::infer_reference_model(program);
659    BytecodeCompiler::build_param_pass_mode_map(
660        program,
661        &inferred_ref_params,
662        &inferred_ref_mutates,
663    )
664}
665
666#[cfg(all(test, feature = "deep-tests"))]
667#[path = "compiler_tests.rs"]
668mod compiler_deep;
669
670#[cfg(all(test, feature = "deep-tests"))]
671#[path = "borrow_deep_tests.rs"]
672mod borrow_deep_tests;