Skip to main content

shape_vm/compiler/
mod.rs

1//! Bytecode compiler - translates AST to bytecode
2
3use shape_ast::error::{Result, ShapeError, SourceLocation};
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use crate::blob_cache_v2::BlobCache;
8/// Borrow mode for reference parameters - Shared (&) or Exclusive (&mut).
9/// Kept for codegen even though the lexical borrow checker has been removed.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum BorrowMode {
12    Shared,
13    Exclusive,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub(crate) enum ExprResultMode {
18    Value,
19    PreserveRef,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
23pub(crate) struct ExprReferenceResult {
24    pub raw_mode: Option<BorrowMode>,
25    pub auto_deref_mode: Option<BorrowMode>,
26}
27
28/// A borrow place key used for encoding borrow targets in codegen.
29pub type BorrowPlace = u32;
30use crate::bytecode::{
31    BuiltinFunction, BytecodeProgram, Constant, FunctionBlob, FunctionHash, Instruction, OpCode,
32    Operand, Program as ContentAddressedProgram,
33};
34use crate::type_tracking::{TypeTracker, VariableTypeInfo};
35use shape_ast::ast::{FunctionDef, Program, TypeAnnotation};
36use shape_runtime::type_schema::SchemaId;
37use shape_runtime::type_system::{
38    Type, TypeAnalysisMode, TypeError, TypeErrorWithLocation, analyze_program_with_mode,
39    checking::MethodTable,
40};
41
42// Sub-modules
43pub(crate) mod comptime;
44pub(crate) mod comptime_builtins;
45pub(crate) mod comptime_target;
46mod control_flow;
47mod expressions;
48mod functions;
49mod functions_annotations;
50mod functions_foreign;
51mod helpers;
52mod helpers_binding;
53mod helpers_reference;
54mod literals;
55mod loops;
56mod patterns;
57mod statements;
58pub mod string_interpolation;
59
60/// Loop compilation context
61pub(crate) struct LoopContext {
62    /// Break jump targets
63    pub(crate) break_jumps: Vec<usize>,
64    /// Continue jump target (usize::MAX = deferred, use continue_jumps)
65    pub(crate) continue_target: usize,
66    /// Optional local to store break values for expression loops
67    pub(crate) break_value_local: Option<u16>,
68    /// Whether a for-in iterator is on the stack (break must pop it)
69    pub(crate) iterator_on_stack: bool,
70    /// Drop scope depth when the loop was entered (for break/continue early exit drops)
71    pub(crate) drop_scope_depth: usize,
72    /// Forward-patched continue jumps for range counter loops where the
73    /// increment block is after the body (so continue must forward-jump).
74    pub(crate) continue_jumps: Vec<usize>,
75}
76
77/// Information about an imported symbol (fields used for diagnostics/LSP)
78#[derive(Debug, Clone)]
79#[allow(dead_code)]
80pub(crate) struct ImportedSymbol {
81    /// Original name in the source module
82    pub original_name: String,
83    /// Module path the symbol was imported from
84    pub module_path: String,
85    /// High-level kind of the imported symbol (function, type, etc.)
86    /// `None` for legacy inlining path where kind is not tracked.
87    pub kind: Option<shape_ast::module_utils::ModuleExportKind>,
88}
89
90/// Imported annotation binding routed through a hidden synthetic module.
91#[derive(Debug, Clone)]
92pub(crate) struct ImportedAnnotationSymbol {
93    /// Original annotation name in the source module.
94    pub original_name: String,
95    /// Source module path the annotation was imported from.
96    pub _module_path: String,
97    /// Hidden synthetic module name that owns the compiled annotation scope.
98    pub hidden_module_name: String,
99}
100
101/// Module-scoped builtin function declaration with a runtime source module.
102#[derive(Debug, Clone)]
103pub(crate) struct ModuleBuiltinFunction {
104    /// The callable name as exported by the runtime/native module.
105    pub export_name: String,
106    /// Original source module path that provides the runtime implementation.
107    pub source_module_path: String,
108}
109
110/// Compiler-internal scope taxonomy for name resolution.
111#[derive(Debug, Clone, Copy, PartialEq, Eq)]
112#[allow(dead_code)]
113pub(crate) enum ResolutionScope {
114    Local,
115    ModuleBinding,
116    NamedImport,
117    NamespaceImport,
118    TypeAssociated,
119    Prelude,
120    SyntaxReserved,
121    InternalIntrinsic,
122}
123
124impl ResolutionScope {
125    pub(crate) const fn label(self) -> &'static str {
126        match self {
127            Self::Local => "local scope",
128            Self::ModuleBinding => "module scope",
129            Self::NamedImport => "named import scope",
130            Self::NamespaceImport => "namespace import scope",
131            Self::TypeAssociated => "type-associated scope",
132            Self::Prelude => "implicit prelude scope",
133            Self::SyntaxReserved => "syntax-reserved scope",
134            Self::InternalIntrinsic => "internal intrinsic scope",
135        }
136    }
137}
138
139/// Builtin lookup result annotated with the scope class it currently belongs to.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141pub(crate) enum BuiltinNameResolution {
142    Surface {
143        builtin: BuiltinFunction,
144        scope: ResolutionScope,
145    },
146    InternalOnly {
147        builtin: BuiltinFunction,
148        scope: ResolutionScope,
149    },
150}
151
152impl BuiltinNameResolution {
153    pub(crate) const fn scope(self) -> ResolutionScope {
154        match self {
155            Self::Surface { scope, .. } | Self::InternalOnly { scope, .. } => scope,
156        }
157    }
158}
159
160#[derive(Debug, Clone)]
161pub(crate) struct StructGenericInfo {
162    pub type_params: Vec<shape_ast::ast::TypeParam>,
163    pub runtime_field_types: HashMap<String, shape_ast::ast::TypeAnnotation>,
164}
165
166/// Whether a type's Drop impl is sync-only, async-only, or both.
167#[derive(Debug, Clone, Copy, PartialEq, Eq)]
168pub(crate) enum DropKind {
169    SyncOnly,
170    AsyncOnly,
171    Both,
172}
173
174/// Canonical compile-time parameter passing contract.
175///
176/// This is the single source of truth used by compiler lowering and LSP rendering.
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
178pub enum ParamPassMode {
179    ByValue,
180    ByRefShared,
181    ByRefExclusive,
182}
183
184impl ParamPassMode {
185    pub const fn is_reference(self) -> bool {
186        !matches!(self, Self::ByValue)
187    }
188
189    pub const fn is_exclusive(self) -> bool {
190        matches!(self, Self::ByRefExclusive)
191    }
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub(crate) struct FunctionReturnReferenceSummary {
196    pub param_index: usize,
197    pub mode: BorrowMode,
198    pub projection: Option<Vec<crate::mir::types::ProjectionStep>>,
199}
200
201impl From<crate::mir::analysis::ReturnReferenceSummary> for FunctionReturnReferenceSummary {
202    fn from(value: crate::mir::analysis::ReturnReferenceSummary) -> Self {
203        Self {
204            param_index: value.param_index,
205            mode: match value.kind {
206                crate::mir::types::BorrowKind::Shared => BorrowMode::Shared,
207                crate::mir::types::BorrowKind::Exclusive => BorrowMode::Exclusive,
208            },
209            projection: value.projection,
210        }
211    }
212}
213
214/// Per-function blob builder for content-addressed compilation.
215///
216/// Uses a **snapshot** strategy: records the global instruction/constant/string
217/// pool sizes at the start of function compilation, then at finalization
218/// extracts the delta and remaps global indices to blob-local indices.
219pub(crate) struct FunctionBlobBuilder {
220    /// Function name.
221    pub name: String,
222    /// Global instruction index where this function's code starts.
223    pub instr_start: usize,
224    /// Global constant pool size when this function started compiling.
225    #[allow(dead_code)]
226    pub const_start: usize,
227    /// Global string pool size when this function started compiling.
228    #[allow(dead_code)]
229    pub string_start: usize,
230    /// Names of functions called by this function (for dependency tracking).
231    pub called_functions: Vec<String>,
232    /// Type schema names this function constructs.
233    pub type_schemas: Vec<String>,
234    /// Accumulated permissions required by this function's direct calls.
235    pub required_permissions: shape_abi_v1::PermissionSet,
236}
237
238impl FunctionBlobBuilder {
239    pub fn new(name: String, instr_start: usize, const_start: usize, string_start: usize) -> Self {
240        Self {
241            name,
242            instr_start,
243            const_start,
244            string_start,
245            called_functions: Vec::new(),
246            type_schemas: Vec::new(),
247            required_permissions: shape_abi_v1::PermissionSet::pure(),
248        }
249    }
250
251    /// Record that this function calls another function by name.
252    pub fn record_call(&mut self, callee_name: &str) {
253        if !self.called_functions.iter().any(|n| n == callee_name) {
254            self.called_functions.push(callee_name.to_owned());
255        }
256    }
257
258    /// Record that this function requires the given permissions
259    /// (e.g., from a stdlib module call identified by capability_tags).
260    pub fn record_permissions(&mut self, perms: &shape_abi_v1::PermissionSet) {
261        self.required_permissions = self.required_permissions.union(perms);
262    }
263
264    /// Finalize this builder into a FunctionBlob by extracting the delta from
265    /// the global program pools and remapping indices to blob-local ones.
266    pub fn finalize(
267        &self,
268        program: &crate::bytecode::BytecodeProgram,
269        func: &crate::bytecode::Function,
270        blob_name_to_hash: &HashMap<String, FunctionHash>,
271        instr_end: usize,
272    ) -> FunctionBlob {
273        use crate::bytecode::Operand;
274
275        // Extract global-indexed instructions for this function.
276        let global_instructions = &program.instructions[self.instr_start..instr_end];
277
278        // Build constant remap: global index -> local index.
279        let mut const_remap: HashMap<u16, u16> = HashMap::new();
280        let mut local_constants: Vec<Constant> = Vec::new();
281        // Build string remap similarly.
282        let mut string_remap: HashMap<u16, u16> = HashMap::new();
283        let mut local_strings: Vec<String> = Vec::new();
284        // Build function operand remap: global function index -> dependency-local index.
285        let mut func_remap: HashMap<u16, u16> = HashMap::new();
286        // Start from explicitly recorded call dependencies, then augment with
287        // function-value references found in constants/operands.
288        let mut called_functions = self.called_functions.clone();
289
290        let mut ensure_called = |callee_name: &str| -> u16 {
291            if let Some(dep_idx) = called_functions.iter().position(|n| n == callee_name) {
292                dep_idx as u16
293            } else {
294                called_functions.push(callee_name.to_owned());
295                (called_functions.len() - 1) as u16
296            }
297        };
298
299        // Scan instructions for all constant/string references and build
300        // blob-local pools with remapped indices.
301        for instr in global_instructions {
302            if let Some(ref operand) = instr.operand {
303                match operand {
304                    Operand::Const(idx) => {
305                        if !const_remap.contains_key(idx) {
306                            let local_idx = local_constants.len() as u16;
307                            const_remap.insert(*idx, local_idx);
308                            let mut constant = program.constants[*idx as usize].clone();
309                            if let Constant::Function(fid) = constant {
310                                let global_idx = fid as usize;
311                                if let Some(callee) = program.functions.get(global_idx) {
312                                    let dep_idx = ensure_called(&callee.name);
313                                    constant = Constant::Function(dep_idx);
314                                }
315                            }
316                            local_constants.push(constant);
317                        }
318                    }
319                    Operand::Property(idx) => {
320                        if !string_remap.contains_key(idx) {
321                            let local_idx = local_strings.len() as u16;
322                            string_remap.insert(*idx, local_idx);
323                            local_strings.push(program.strings[*idx as usize].clone());
324                        }
325                    }
326                    Operand::Name(sid) => {
327                        let gidx = sid.0 as u16;
328                        if !string_remap.contains_key(&gidx) {
329                            let local_idx = local_strings.len() as u16;
330                            string_remap.insert(gidx, local_idx);
331                            local_strings.push(program.strings[gidx as usize].clone());
332                        }
333                    }
334                    Operand::MethodCall { name, .. } => {
335                        let gidx = name.0 as u16;
336                        if !string_remap.contains_key(&gidx) {
337                            let local_idx = local_strings.len() as u16;
338                            string_remap.insert(gidx, local_idx);
339                            local_strings.push(program.strings[gidx as usize].clone());
340                        }
341                    }
342                    Operand::TypedMethodCall { string_id, .. } => {
343                        let gidx = *string_id;
344                        if !string_remap.contains_key(&gidx) {
345                            let local_idx = local_strings.len() as u16;
346                            string_remap.insert(gidx, local_idx);
347                            local_strings.push(program.strings[gidx as usize].clone());
348                        }
349                    }
350                    Operand::Function(fid) => {
351                        let global_idx = fid.0 as usize;
352                        if !func_remap.contains_key(&fid.0) {
353                            // Map global function index -> dependency-local index.
354                            // If this call target was not explicitly recorded (e.g. emitted via
355                            // function-valued constants), add it so content-addressed linking can
356                            // remap stable function IDs correctly.
357                            if let Some(callee) = program.functions.get(global_idx) {
358                                let dep_idx = ensure_called(&callee.name);
359                                func_remap.insert(fid.0, dep_idx);
360                            }
361                        }
362                    }
363                    _ => {}
364                }
365            }
366        }
367
368        // Remap instructions to use local indices.
369        let local_instructions: Vec<Instruction> = global_instructions
370            .iter()
371            .map(|instr| {
372                let mut remapped = instr.clone();
373                if let Some(operand) = &mut remapped.operand {
374                    match operand {
375                        Operand::Const(idx) => {
376                            if let Some(&local) = const_remap.get(idx) {
377                                *idx = local;
378                            }
379                        }
380                        Operand::Property(idx) => {
381                            if let Some(&local) = string_remap.get(idx) {
382                                *idx = local;
383                            }
384                        }
385                        Operand::Name(sid) => {
386                            if let Some(&local) = string_remap.get(&(sid.0 as u16)) {
387                                sid.0 = local as u32;
388                            }
389                        }
390                        Operand::MethodCall { name, arg_count: _ } => {
391                            if let Some(&local) = string_remap.get(&(name.0 as u16)) {
392                                name.0 = local as u32;
393                            }
394                        }
395                        Operand::TypedMethodCall { string_id, .. } => {
396                            if let Some(&local) = string_remap.get(string_id) {
397                                *string_id = local;
398                            }
399                        }
400                        Operand::Function(fid) => {
401                            if let Some(&local) = func_remap.get(&fid.0) {
402                                fid.0 = local;
403                            }
404                        }
405                        _ => {}
406                    }
407                }
408                remapped
409            })
410            .collect();
411
412        // Build dependency list from called function names.
413        // Use FunctionHash::ZERO as sentinel for forward references (not yet compiled).
414        let dependencies: Vec<FunctionHash> = called_functions
415            .iter()
416            .map(|callee| {
417                blob_name_to_hash
418                    .get(callee)
419                    .copied()
420                    .unwrap_or(FunctionHash::ZERO)
421            })
422            .collect();
423
424        // Build source map from global debug info.
425        let source_map: Vec<(usize, u32, u32)> = program
426            .debug_info
427            .line_numbers
428            .iter()
429            .filter(|(idx, _, _)| *idx >= self.instr_start && *idx < instr_end)
430            .map(|(idx, fid, line)| (idx - self.instr_start, *fid as u32, *line))
431            .collect();
432
433        // Scan instructions for CallForeign operands and collect content hashes
434        // from the program's foreign_functions table.
435        let mut foreign_deps: Vec<[u8; 32]> = Vec::new();
436        for instr in &local_instructions {
437            if instr.opcode == crate::bytecode::OpCode::CallForeign {
438                if let Some(Operand::ForeignFunction(idx)) = instr.operand {
439                    if let Some(entry) = program.foreign_functions.get(idx as usize) {
440                        if let Some(hash) = entry.content_hash {
441                            foreign_deps.push(hash);
442                        }
443                    }
444                }
445            }
446        }
447        foreign_deps.sort();
448        foreign_deps.dedup();
449
450        let mut blob = FunctionBlob {
451            content_hash: FunctionHash::ZERO,
452            name: self.name.clone(),
453            arity: func.arity,
454            param_names: func.param_names.clone(),
455            locals_count: func.locals_count,
456            is_closure: func.is_closure,
457            captures_count: func.captures_count,
458            is_async: func.is_async,
459            ref_params: func.ref_params.clone(),
460            ref_mutates: func.ref_mutates.clone(),
461            mutable_captures: func.mutable_captures.clone(),
462            frame_descriptor: func.frame_descriptor.clone(),
463            required_permissions: self.required_permissions.clone(),
464            instructions: local_instructions,
465            constants: local_constants,
466            strings: local_strings,
467            dependencies,
468            callee_names: called_functions,
469            type_schemas: self.type_schemas.clone(),
470            foreign_dependencies: foreign_deps,
471            source_map,
472        };
473        blob.finalize();
474        blob
475    }
476}
477
478#[derive(Debug, Clone, Copy, PartialEq, Eq)]
479pub enum TypeDiagnosticMode {
480    ReliableOnly,
481    Strict,
482    RecoverAll,
483}
484
485#[derive(Debug, Clone, Copy, PartialEq, Eq)]
486pub enum CompileDiagnosticMode {
487    FailFast,
488    RecoverAll,
489}
490
491/// Compiler state
492pub struct BytecodeCompiler {
493    /// The program being built
494    pub(crate) program: BytecodeProgram,
495
496    /// Current function being compiled
497    pub(crate) current_function: Option<usize>,
498
499    /// Local variable mappings (name -> index)
500    pub(crate) locals: Vec<HashMap<String, u16>>,
501
502    /// ModuleBinding variable mappings (name -> index)
503    pub(crate) module_bindings: HashMap<String, u16>,
504
505    /// Next local variable index
506    pub(crate) next_local: u16,
507
508    /// Next module_binding variable index
509    pub(crate) next_global: u16,
510
511    /// Loop context stack for break/continue
512    pub(crate) loop_stack: Vec<LoopContext>,
513
514    /// Counter for synthetic closure function names
515    pub(crate) closure_counter: u64,
516
517    /// When compiling a DataTable closure method (e.g. dt.filter(row => ...)),
518    /// this holds the (schema_id, type_name) to tag the closure's row parameter as RowView.
519    pub(crate) closure_row_schema: Option<(u32, String)>,
520
521    /// Unified type metadata for the last compiled expression.
522    ///
523    /// This is the single source for relational/value kind propagation
524    /// (Table<T>, Indexed<T>, known object schema, etc.).
525    pub(crate) last_expr_type_info: Option<VariableTypeInfo>,
526
527    /// Type tracker for optimized field access
528    pub(crate) type_tracker: TypeTracker,
529
530    /// Schema ID of the last compiled expression (if it's a TypedObject).
531    /// Used for compile-time typed merge optimization.
532    pub(crate) last_expr_schema: Option<SchemaId>,
533
534    /// Numeric type of the last compiled expression (for typed opcode emission).
535    /// Set by literal compilation, variable loads, and other expression compilers.
536    /// Read by binary op compilation to emit typed opcodes (e.g., MulInt).
537    pub(crate) last_expr_numeric_type: Option<crate::type_tracking::NumericType>,
538
539    /// Result mode for the expression currently being compiled.
540    pub(crate) current_expr_result_mode: ExprResultMode,
541
542    /// Whether the last compiled expression left a raw reference on the stack.
543    ///
544    /// `auto_deref_mode` is only set for propagated ref results (identifier loads,
545    /// ref-returning calls) that should implicitly dereference in value contexts.
546    /// Explicit `&expr` results keep `raw_mode` without enabling auto-deref.
547    pub(crate) last_expr_reference_result: ExprReferenceResult,
548
549    /// Known pass modes for local callable bindings (closures / function aliases).
550    pub(crate) local_callable_pass_modes: HashMap<u16, Vec<ParamPassMode>>,
551
552    /// Known safe return-reference summaries for local callable bindings.
553    pub(crate) local_callable_return_reference_summaries:
554        HashMap<u16, FunctionReturnReferenceSummary>,
555
556    /// Known pass modes for module-binding callable values.
557    pub(crate) module_binding_callable_pass_modes: HashMap<u16, Vec<ParamPassMode>>,
558
559    /// Known safe return-reference summaries for module-binding callable values.
560    pub(crate) module_binding_callable_return_reference_summaries:
561        HashMap<u16, FunctionReturnReferenceSummary>,
562
563    /// Named functions that safely return one reference parameter unchanged.
564    pub(crate) function_return_reference_summaries: HashMap<String, FunctionReturnReferenceSummary>,
565
566    /// The return-reference summary of the function currently being compiled, if any.
567    pub(crate) current_function_return_reference_summary: Option<FunctionReturnReferenceSummary>,
568
569    /// Type inference engine for match exhaustiveness and type checking
570    pub(crate) type_inference: shape_runtime::type_system::inference::TypeInferenceEngine,
571
572    /// Track type aliases defined in the program
573    /// Maps alias name -> target type (for type validation)
574    pub(crate) type_aliases: HashMap<String, String>,
575
576    /// Current source line being compiled (for debug info)
577    pub(crate) current_line: u32,
578
579    /// Current source file ID (for multi-file debug info)
580    pub(crate) current_file_id: u16,
581
582    /// Source text (for error messages)
583    pub(crate) source_text: Option<String>,
584
585    /// Source lines (split from source_text for quick access)
586    pub(crate) source_lines: Vec<String>,
587
588    /// Imported symbols: local_name -> ImportedSymbol
589    pub(crate) imported_names: HashMap<String, ImportedSymbol>,
590    /// Imported annotations: local_name -> ImportedAnnotationSymbol
591    pub(crate) imported_annotations: HashMap<String, ImportedAnnotationSymbol>,
592    /// Qualified builtin function declarations available as module-scoped callables.
593    pub(crate) module_builtin_functions: HashMap<String, ModuleBuiltinFunction>,
594    /// Module namespace bindings introduced by `use module.path`.
595    /// Used to avoid UFCS rewrites for module calls like `duckdb.connect(...)`.
596    pub(crate) module_namespace_bindings: HashSet<String>,
597    /// Imported synthetic/local module path -> original source module path.
598    /// Used when code inside a wrapper module needs to dispatch to native exports
599    /// from the underlying source module.
600    pub(crate) module_scope_sources: HashMap<String, String>,
601    /// Active lexical module scope stack while compiling `mod Name { ... }`.
602    pub(crate) module_scope_stack: Vec<String>,
603
604    /// Known exports for import suggestions: function_name -> module_path
605    /// Used to provide helpful error messages like "Did you mean to import from...?"
606    pub(crate) known_exports: HashMap<String, String>,
607    /// Function arity bounds keyed by function name: (required_params, total_params).
608    /// Required params are non-default parameters. Defaults are only allowed
609    /// in trailing positions.
610    pub(crate) function_arity_bounds: HashMap<String, (usize, usize)>,
611    /// Function const parameter indices keyed by function name.
612    /// Const parameters must receive compile-time constant arguments at call sites.
613    pub(crate) function_const_params: HashMap<String, Vec<usize>>,
614    /// Original function definitions keyed by function name.
615    /// Used for const-template specialization at call sites.
616    pub(crate) function_defs: HashMap<String, FunctionDef>,
617    /// Foreign function definitions keyed by function name.
618    /// Used to resolve the effective (Result-wrapped) return type at call sites.
619    pub(crate) foreign_function_defs: HashMap<String, shape_ast::ast::ForeignFunctionDef>,
620    /// Cached const specializations keyed by `(base_name + const-arg fingerprint)`.
621    pub(crate) const_specializations: HashMap<String, usize>,
622    /// Monotonic counter for unique specialization symbol names.
623    pub(crate) next_const_specialization_id: u64,
624    /// Const-parameter bindings for specialized function symbols.
625    /// These bindings are exposed to comptime handlers as typed module_bindings.
626    pub(crate) specialization_const_bindings:
627        HashMap<String, Vec<(String, shape_value::ValueWord)>>,
628
629    /// Struct type definitions: type_name -> (field_names in order, definition span)
630    pub(crate) struct_types: HashMap<String, (Vec<String>, shape_ast::ast::Span)>,
631    /// Generic metadata for struct types used to instantiate runtime type names
632    /// (e.g. `MyType<number>`) at struct-literal construction sites.
633    pub(crate) struct_generic_info: HashMap<String, StructGenericInfo>,
634    /// Names of `type C` declarations with native layout metadata.
635    pub(crate) native_layout_types: HashSet<String>,
636    /// Generated conversion pair cache keys: `c_type::object_type`.
637    pub(crate) generated_native_conversion_pairs: HashSet<String>,
638
639    /// Whether the current function being compiled is async
640    pub(crate) current_function_is_async: bool,
641
642    /// Directory of the source file being compiled (for resolving relative source paths)
643    pub(crate) source_dir: Option<std::path::PathBuf>,
644
645    /// Collected compilation errors (for multi-error reporting)
646    pub(crate) errors: Vec<shape_ast::error::ShapeError>,
647
648    /// Hoisted fields from optimistic hoisting pre-pass.
649    /// Maps variable name → list of property names assigned later (e.g., a.y = 2 → "a" → ["y"]).
650    /// Used to include future property assignments in inline object schemas at compile time.
651    pub(crate) hoisted_fields: HashMap<String, Vec<String>>,
652
653    /// When compiling a variable initializer, the name of the variable being assigned to.
654    /// Used by compile_typed_object_literal to include hoisted fields in the schema.
655    pub(crate) pending_variable_name: Option<String>,
656    /// Lexical names that will later need their binding value to remain a raw reference.
657    /// This is only used to choose `Value` vs `PreserveRef` lowering for bindings; MIR
658    /// remains the sole authority for borrow legality.
659    pub(crate) future_reference_use_name_scopes: Vec<HashSet<String>>,
660
661    /// Known trait names (populated in the first pass so meta definitions can reference traits)
662    pub(crate) known_traits: std::collections::HashSet<String>,
663
664    /// Full trait definitions keyed by trait name.
665    /// Used to install default method implementations for impl blocks that omit them.
666    pub(crate) trait_defs: HashMap<String, shape_ast::ast::types::TraitDef>,
667
668    /// Extension registry for comptime execution
669    pub(crate) extension_registry: Option<Arc<Vec<shape_runtime::module_exports::ModuleExports>>>,
670
671    /// Comptime field values per type: type_name -> (field_name -> ValueWord)
672    /// These are type-level constants baked at compile time with zero runtime cost.
673    pub(crate) comptime_fields: HashMap<String, HashMap<String, shape_value::ValueWord>>,
674    /// Type diagnostic mode for shared analyzer diagnostics.
675    pub(crate) type_diagnostic_mode: TypeDiagnosticMode,
676    /// Expression compilation diagnostic mode.
677    pub(crate) compile_diagnostic_mode: CompileDiagnosticMode,
678    /// Whether this compiler instance is compiling code for comptime execution.
679    /// Enables comptime-only builtins and comptime-specific statement semantics.
680    pub(crate) comptime_mode: bool,
681    /// Functions removed by comptime annotation handlers (`remove target`).
682    /// These are still present in `program.functions` (registered in the first pass)
683    /// but must produce a clear compile-time error when called instead of jumping
684    /// to an invalid entry point.
685    pub(crate) removed_functions: HashSet<String>,
686    /// Internal guard for compiler-synthesized `__comptime__` helper calls.
687    /// User source must never access `__comptime__` directly.
688    pub(crate) allow_internal_comptime_namespace: bool,
689    /// Method table for data-driven method signature queries.
690    /// Used to replace hardcoded heuristics (e.g., is_type_preserving_table_method)
691    /// with MethodTable lookups (is_self_returning, takes_closure_with_receiver_param).
692    pub(crate) method_table: MethodTable,
693    /// Locals that are reference-typed in the current function.
694    pub(crate) ref_locals: HashSet<u16>,
695    /// Subset of ref_locals that hold exclusive (`&mut`) borrows.
696    /// Used to enforce the three concurrency rules at task boundaries.
697    pub(crate) exclusive_ref_locals: HashSet<u16>,
698    /// Subset of ref_locals that were INFERRED as by-reference (not explicitly declared `&`).
699    /// Inferred-ref params are owned values passed by reference for performance;
700    /// closures may capture them (the value is dereferenced at capture time).
701    pub(crate) inferred_ref_locals: HashSet<u16>,
702    /// Locals whose binding value is itself a first-class reference (`let r = &x`).
703    /// Reads auto-deref; writes still rebind the local.
704    pub(crate) reference_value_locals: HashSet<u16>,
705    /// Subset of reference_value_locals that hold exclusive (`&mut`) references.
706    pub(crate) exclusive_reference_value_locals: HashSet<u16>,
707    /// Local variable indices declared as `const` (immutable binding).
708    pub(crate) const_locals: HashSet<u16>,
709    /// Module binding indices declared as `const` (immutable binding).
710    pub(crate) const_module_bindings: HashSet<u16>,
711    /// Local variable indices declared as immutable `let` (not `let mut` or `var`).
712    pub(crate) immutable_locals: HashSet<u16>,
713    /// Local variable indices that are function parameters (first N locals in a function).
714    /// Used to avoid trusting inferred type hints for params with no explicit annotation.
715    pub(crate) param_locals: HashSet<u16>,
716    /// Module binding indices declared as immutable `let`.
717    pub(crate) immutable_module_bindings: HashSet<u16>,
718    /// Module bindings whose value is itself a first-class reference.
719    pub(crate) reference_value_module_bindings: HashSet<u16>,
720    /// Subset of reference_value_module_bindings that hold exclusive (`&mut`) references.
721    pub(crate) exclusive_reference_value_module_bindings: HashSet<u16>,
722    /// ModuleBinding-ref writebacks collected while compiling current call args.
723    pub(crate) call_arg_module_binding_ref_writebacks: Vec<Vec<(u16, u16)>>,
724    /// Inferred reference parameters for untyped params: function -> per-param flag.
725    pub(crate) inferred_ref_params: HashMap<String, Vec<bool>>,
726    /// Inferred mutating-reference params: function -> per-param flag.
727    pub(crate) inferred_ref_mutates: HashMap<String, Vec<bool>>,
728    /// Effective per-parameter pass mode (explicit + inferred), by function name.
729    pub(crate) inferred_param_pass_modes: HashMap<String, Vec<ParamPassMode>>,
730    /// Inferred parameter type hints for unannotated params.
731    /// Keyed by function name; each entry is a per-param optional type string.
732    pub(crate) inferred_param_type_hints: HashMap<String, Vec<Option<String>>>,
733    /// Stack of scopes, each containing locals that need Drop calls at scope exit.
734    /// Each entry is (local_index, is_async).
735    pub(crate) drop_locals: Vec<Vec<(u16, bool)>>,
736    /// Per-type drop kind: tracks whether each type has sync, async, or both drop impls.
737    /// Populated during the first-pass registration of impl blocks.
738    pub(crate) drop_type_info: HashMap<String, DropKind>,
739    /// Module bindings that need Drop calls at program exit.
740    /// Each entry is (binding_index, is_async).
741    pub(crate) drop_module_bindings: Vec<(u16, bool)>,
742    /// Mutable closure captures in the current function being compiled.
743    /// Maps captured variable name -> upvalue index (for LoadClosure/StoreClosure).
744    /// Only populated while compiling a closure body that has mutable captures.
745    pub(crate) mutable_closure_captures: HashMap<String, u16>,
746
747    /// Variables in the current scope that have been boxed into SharedCells
748    /// by a mutable closure capture. When a subsequent closure captures one
749    /// of these variables (even immutably), it must use the SharedCell path
750    /// so it shares the same mutable cell.
751    pub(crate) boxed_locals: HashSet<String>,
752
753    /// Active permission set for capability checking.
754    ///
755    /// When set, imported stdlib functions are checked against capability_tags.
756    /// If a function requires a permission not in this set, a compile error is
757    /// emitted and the function never enters bytecode.
758    ///
759    /// `None` means no checking (backwards-compatible default).
760    pub(crate) permission_set: Option<shape_abi_v1::PermissionSet>,
761
762    // -- Content-addressed blob tracking --
763    /// Active blob builder (set while compiling a function body).
764    pub(crate) current_blob_builder: Option<FunctionBlobBuilder>,
765    /// Completed function blobs (finalized with content hash).
766    pub(crate) completed_blobs: Vec<FunctionBlob>,
767    /// Map from function name to content hash (populated after finalization).
768    pub(crate) blob_name_to_hash: HashMap<String, FunctionHash>,
769    /// The content-addressed program produced alongside BytecodeProgram.
770    pub(crate) content_addressed_program: Option<ContentAddressedProgram>,
771    /// Content hash per compiled function index (function_id -> blob hash).
772    /// This is the stable identity bridge for the flat runtime format.
773    pub(crate) function_hashes_by_id: Vec<Option<FunctionHash>>,
774
775    /// Optional blob-level cache for incremental compilation.
776    /// When set, compiled blobs are stored after finalization and looked up
777    /// by content hash to avoid redundant work across compilations.
778    pub(crate) blob_cache: Option<BlobCache>,
779
780    /// Temporary function name aliases for comptime replace body.
781    /// Maps alias (e.g., `__original__`) to actual function name (e.g., `__original__myFunc`).
782    /// Set before compiling a replacement body and cleared after.
783    pub(crate) function_aliases: HashMap<String, String>,
784
785    /// Parameters of the function currently being compiled.
786    /// Used by match exhaustiveness checking to fall back to type annotations
787    /// when the type inference engine cannot resolve a parameter's type.
788    pub(crate) current_function_params: Vec<shape_ast::ast::FunctionParameter>,
789
790    /// Legacy cache of function names collected from stdlib-loaded modules.
791    ///
792    /// Internal builtin access is now gated by per-definition declaring-module
793    /// provenance, not by membership in this set.
794    pub stdlib_function_names: HashSet<String>,
795
796    /// Per-function flag: when true, `get_builtin_function` resolves `__*` names.
797    /// Toggled during compilation for definitions originating from `std::*`.
798    pub(crate) allow_internal_builtins: bool,
799
800    /// Package-scoped native library resolutions for the current host.
801    pub(crate) native_resolution_context:
802        Option<shape_runtime::native_resolution::NativeResolutionSet>,
803
804    /// Active synthetic MIR context while compiling non-function code.
805    pub(crate) non_function_mir_context_stack: Vec<String>,
806
807    /// MIR lowered for compiled functions and synthetic non-function contexts.
808    pub(crate) mir_functions: HashMap<String, crate::mir::types::MirFunction>,
809
810    /// Borrow analyses produced from lowered MIR for compiled functions and
811    /// synthetic non-function contexts.
812    pub(crate) mir_borrow_analyses: HashMap<String, crate::mir::BorrowAnalysis>,
813
814    /// Storage plans produced by the storage planning pass for each function.
815    /// Maps function name to the plan mapping each MIR slot to a `BindingStorageClass`.
816    pub(crate) mir_storage_plans: HashMap<String, crate::mir::StoragePlan>,
817
818    /// Per-function borrow summaries for interprocedural alias checking.
819    /// Describes which parameters conflict and must not alias at call sites.
820    pub(crate) function_borrow_summaries: HashMap<String, crate::mir::FunctionBorrowSummary>,
821
822    /// Per-function mapping from AST spans to MIR program points.
823    /// Used to bridge the bytecode compiler (which knows AST spans) to
824    /// MIR ownership decisions (which are keyed by `Point`).
825    pub(crate) mir_span_to_point:
826        HashMap<String, HashMap<shape_ast::ast::Span, crate::mir::types::Point>>,
827
828    /// Field-level definite-initialization and liveness analyses for compiled functions.
829    pub(crate) mir_field_analyses: HashMap<String, crate::mir::FieldAnalysis>,
830
831    /// Graph-compiled namespace map: local namespace name -> canonical module path.
832    /// Populated during graph-driven compilation to resolve qualified names.
833    pub(crate) graph_namespace_map: HashMap<String, String>,
834
835    /// Module dependency graph (set during graph-driven compilation).
836    pub(crate) module_graph: Option<std::sync::Arc<crate::module_graph::ModuleGraph>>,
837}
838
839impl Default for BytecodeCompiler {
840    fn default() -> Self {
841        Self::new()
842    }
843}
844
845mod compiler_impl_initialization;
846mod compiler_impl_reference_model;
847
848/// Infer effective reference parameters and mutation behavior without compiling bytecode.
849///
850/// Returns `(inferred_ref_params, inferred_ref_mutates)` keyed by function name.
851/// - `inferred_ref_params[f][i] == true` means parameter `i` of `f` is inferred/treated as ref.
852/// - `inferred_ref_mutates[f][i] == true` means that reference parameter is mutating (`&mut`).
853pub fn infer_reference_model(
854    program: &Program,
855) -> (HashMap<String, Vec<bool>>, HashMap<String, Vec<bool>>) {
856    let (inferred_ref_params, inferred_ref_mutates, _) =
857        BytecodeCompiler::infer_reference_model(program);
858    (inferred_ref_params, inferred_ref_mutates)
859}
860
861/// Infer effective parameter pass modes (`ByValue` / `ByRefShared` / `ByRefExclusive`)
862/// keyed by function name.
863pub fn infer_param_pass_modes(program: &Program) -> HashMap<String, Vec<ParamPassMode>> {
864    let (inferred_ref_params, inferred_ref_mutates, _) =
865        BytecodeCompiler::infer_reference_model(program);
866    BytecodeCompiler::build_param_pass_mode_map(
867        program,
868        &inferred_ref_params,
869        &inferred_ref_mutates,
870    )
871}
872
873#[cfg(all(test, feature = "deep-tests"))]
874#[path = "compiler_tests.rs"]
875mod compiler_deep;