shape_vm/compiler/mod.rs
1//! Bytecode compiler - translates AST to bytecode
2
3use shape_ast::error::{Result, ShapeError, SourceLocation};
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use crate::blob_cache_v2::BlobCache;
8use crate::borrow_checker::BorrowMode;
9use crate::bytecode::{
10 BytecodeProgram, Constant, FunctionBlob, FunctionHash, Instruction, OpCode,
11 Program as ContentAddressedProgram,
12};
13use crate::type_tracking::{TypeTracker, VariableTypeInfo};
14use shape_ast::ast::{FunctionDef, Program, TypeAnnotation};
15use shape_runtime::type_schema::SchemaId;
16use shape_runtime::type_system::{
17 Type, TypeAnalysisMode, TypeError, TypeErrorWithLocation, analyze_program_with_mode,
18 checking::MethodTable,
19};
20
21// Sub-modules
22pub(crate) mod comptime;
23pub(crate) mod comptime_builtins;
24pub(crate) mod comptime_target;
25mod control_flow;
26mod expressions;
27mod functions;
28mod helpers;
29mod literals;
30mod loops;
31mod patterns;
32mod statements;
33pub mod string_interpolation;
34
35/// Loop compilation context
36pub(crate) struct LoopContext {
37 /// Break jump targets
38 pub(crate) break_jumps: Vec<usize>,
39 /// Continue jump target
40 pub(crate) continue_target: usize,
41 /// Optional local to store break values for expression loops
42 pub(crate) break_value_local: Option<u16>,
43 /// Whether a for-in iterator is on the stack (break must pop it)
44 pub(crate) iterator_on_stack: bool,
45 /// Drop scope depth when the loop was entered (for break/continue early exit drops)
46 pub(crate) drop_scope_depth: usize,
47}
48
49/// Information about an imported symbol (fields used for diagnostics/LSP)
50#[derive(Debug, Clone)]
51#[allow(dead_code)]
52pub(crate) struct ImportedSymbol {
53 /// Original name in the source module
54 pub original_name: String,
55 /// Module path the symbol was imported from
56 pub module_path: String,
57}
58
59#[derive(Debug, Clone)]
60pub(crate) struct StructGenericInfo {
61 pub type_params: Vec<shape_ast::ast::TypeParam>,
62 pub runtime_field_types: HashMap<String, shape_ast::ast::TypeAnnotation>,
63}
64
65/// Whether a type's Drop impl is sync-only, async-only, or both.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub(crate) enum DropKind {
68 SyncOnly,
69 AsyncOnly,
70 Both,
71}
72
73/// Canonical compile-time parameter passing contract.
74///
75/// This is the single source of truth used by compiler lowering and LSP rendering.
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
77pub enum ParamPassMode {
78 ByValue,
79 ByRefShared,
80 ByRefExclusive,
81}
82
83impl ParamPassMode {
84 pub const fn is_reference(self) -> bool {
85 !matches!(self, Self::ByValue)
86 }
87
88 pub const fn is_exclusive(self) -> bool {
89 matches!(self, Self::ByRefExclusive)
90 }
91}
92
93/// Per-function blob builder for content-addressed compilation.
94///
95/// Uses a **snapshot** strategy: records the global instruction/constant/string
96/// pool sizes at the start of function compilation, then at finalization
97/// extracts the delta and remaps global indices to blob-local indices.
98pub(crate) struct FunctionBlobBuilder {
99 /// Function name.
100 pub name: String,
101 /// Global instruction index where this function's code starts.
102 pub instr_start: usize,
103 /// Global constant pool size when this function started compiling.
104 #[allow(dead_code)]
105 pub const_start: usize,
106 /// Global string pool size when this function started compiling.
107 #[allow(dead_code)]
108 pub string_start: usize,
109 /// Names of functions called by this function (for dependency tracking).
110 pub called_functions: Vec<String>,
111 /// Type schema names this function constructs.
112 pub type_schemas: Vec<String>,
113 /// Accumulated permissions required by this function's direct calls.
114 pub required_permissions: shape_abi_v1::PermissionSet,
115}
116
117impl FunctionBlobBuilder {
118 pub fn new(name: String, instr_start: usize, const_start: usize, string_start: usize) -> Self {
119 Self {
120 name,
121 instr_start,
122 const_start,
123 string_start,
124 called_functions: Vec::new(),
125 type_schemas: Vec::new(),
126 required_permissions: shape_abi_v1::PermissionSet::pure(),
127 }
128 }
129
130 /// Record that this function calls another function by name.
131 pub fn record_call(&mut self, callee_name: &str) {
132 if !self.called_functions.iter().any(|n| n == callee_name) {
133 self.called_functions.push(callee_name.to_owned());
134 }
135 }
136
137 /// Record that this function requires the given permissions
138 /// (e.g., from a stdlib module call identified by capability_tags).
139 pub fn record_permissions(&mut self, perms: &shape_abi_v1::PermissionSet) {
140 self.required_permissions = self.required_permissions.union(perms);
141 }
142
143 /// Finalize this builder into a FunctionBlob by extracting the delta from
144 /// the global program pools and remapping indices to blob-local ones.
145 pub fn finalize(
146 &self,
147 program: &crate::bytecode::BytecodeProgram,
148 func: &crate::bytecode::Function,
149 blob_name_to_hash: &HashMap<String, FunctionHash>,
150 instr_end: usize,
151 ) -> FunctionBlob {
152 use crate::bytecode::Operand;
153
154 // Extract global-indexed instructions for this function.
155 let global_instructions = &program.instructions[self.instr_start..instr_end];
156
157 // Build constant remap: global index -> local index.
158 let mut const_remap: HashMap<u16, u16> = HashMap::new();
159 let mut local_constants: Vec<Constant> = Vec::new();
160 // Build string remap similarly.
161 let mut string_remap: HashMap<u16, u16> = HashMap::new();
162 let mut local_strings: Vec<String> = Vec::new();
163 // Build function operand remap: global function index -> dependency-local index.
164 let mut func_remap: HashMap<u16, u16> = HashMap::new();
165 // Start from explicitly recorded call dependencies, then augment with
166 // function-value references found in constants/operands.
167 let mut called_functions = self.called_functions.clone();
168
169 let mut ensure_called = |callee_name: &str| -> u16 {
170 if let Some(dep_idx) = called_functions.iter().position(|n| n == callee_name) {
171 dep_idx as u16
172 } else {
173 called_functions.push(callee_name.to_owned());
174 (called_functions.len() - 1) as u16
175 }
176 };
177
178 // Scan instructions for all constant/string references and build
179 // blob-local pools with remapped indices.
180 for instr in global_instructions {
181 if let Some(ref operand) = instr.operand {
182 match operand {
183 Operand::Const(idx) => {
184 if !const_remap.contains_key(idx) {
185 let local_idx = local_constants.len() as u16;
186 const_remap.insert(*idx, local_idx);
187 let mut constant = program.constants[*idx as usize].clone();
188 if let Constant::Function(fid) = constant {
189 let global_idx = fid as usize;
190 if let Some(callee) = program.functions.get(global_idx) {
191 let dep_idx = ensure_called(&callee.name);
192 constant = Constant::Function(dep_idx);
193 }
194 }
195 local_constants.push(constant);
196 }
197 }
198 Operand::Property(idx) => {
199 if !string_remap.contains_key(idx) {
200 let local_idx = local_strings.len() as u16;
201 string_remap.insert(*idx, local_idx);
202 local_strings.push(program.strings[*idx as usize].clone());
203 }
204 }
205 Operand::Name(sid) => {
206 let gidx = sid.0 as u16;
207 if !string_remap.contains_key(&gidx) {
208 let local_idx = local_strings.len() as u16;
209 string_remap.insert(gidx, local_idx);
210 local_strings.push(program.strings[gidx as usize].clone());
211 }
212 }
213 Operand::MethodCall { name, .. } => {
214 let gidx = name.0 as u16;
215 if !string_remap.contains_key(&gidx) {
216 let local_idx = local_strings.len() as u16;
217 string_remap.insert(gidx, local_idx);
218 local_strings.push(program.strings[gidx as usize].clone());
219 }
220 }
221 Operand::TypedMethodCall { string_id, .. } => {
222 let gidx = *string_id;
223 if !string_remap.contains_key(&gidx) {
224 let local_idx = local_strings.len() as u16;
225 string_remap.insert(gidx, local_idx);
226 local_strings.push(program.strings[gidx as usize].clone());
227 }
228 }
229 Operand::Function(fid) => {
230 let global_idx = fid.0 as usize;
231 if !func_remap.contains_key(&fid.0) {
232 // Map global function index -> dependency-local index.
233 // If this call target was not explicitly recorded (e.g. emitted via
234 // function-valued constants), add it so content-addressed linking can
235 // remap stable function IDs correctly.
236 if let Some(callee) = program.functions.get(global_idx) {
237 let dep_idx = ensure_called(&callee.name);
238 func_remap.insert(fid.0, dep_idx);
239 }
240 }
241 }
242 _ => {}
243 }
244 }
245 }
246
247 // Remap instructions to use local indices.
248 let local_instructions: Vec<Instruction> = global_instructions
249 .iter()
250 .map(|instr| {
251 let mut remapped = instr.clone();
252 if let Some(operand) = &mut remapped.operand {
253 match operand {
254 Operand::Const(idx) => {
255 if let Some(&local) = const_remap.get(idx) {
256 *idx = local;
257 }
258 }
259 Operand::Property(idx) => {
260 if let Some(&local) = string_remap.get(idx) {
261 *idx = local;
262 }
263 }
264 Operand::Name(sid) => {
265 if let Some(&local) = string_remap.get(&(sid.0 as u16)) {
266 sid.0 = local as u32;
267 }
268 }
269 Operand::MethodCall { name, arg_count: _ } => {
270 if let Some(&local) = string_remap.get(&(name.0 as u16)) {
271 name.0 = local as u32;
272 }
273 }
274 Operand::TypedMethodCall { string_id, .. } => {
275 if let Some(&local) = string_remap.get(string_id) {
276 *string_id = local;
277 }
278 }
279 Operand::Function(fid) => {
280 if let Some(&local) = func_remap.get(&fid.0) {
281 fid.0 = local;
282 }
283 }
284 _ => {}
285 }
286 }
287 remapped
288 })
289 .collect();
290
291 // Build dependency list from called function names.
292 // Use FunctionHash::ZERO as sentinel for forward references (not yet compiled).
293 let dependencies: Vec<FunctionHash> = called_functions
294 .iter()
295 .map(|callee| {
296 blob_name_to_hash
297 .get(callee)
298 .copied()
299 .unwrap_or(FunctionHash::ZERO)
300 })
301 .collect();
302
303 // Build source map from global debug info.
304 let source_map: Vec<(usize, u32, u32)> = program
305 .debug_info
306 .line_numbers
307 .iter()
308 .filter(|(idx, _, _)| *idx >= self.instr_start && *idx < instr_end)
309 .map(|(idx, fid, line)| (idx - self.instr_start, *fid as u32, *line))
310 .collect();
311
312 // Scan instructions for CallForeign operands and collect content hashes
313 // from the program's foreign_functions table.
314 let mut foreign_deps: Vec<[u8; 32]> = Vec::new();
315 for instr in &local_instructions {
316 if instr.opcode == crate::bytecode::OpCode::CallForeign {
317 if let Some(Operand::ForeignFunction(idx)) = instr.operand {
318 if let Some(entry) = program.foreign_functions.get(idx as usize) {
319 if let Some(hash) = entry.content_hash {
320 foreign_deps.push(hash);
321 }
322 }
323 }
324 }
325 }
326 foreign_deps.sort();
327 foreign_deps.dedup();
328
329 let mut blob = FunctionBlob {
330 content_hash: FunctionHash::ZERO,
331 name: self.name.clone(),
332 arity: func.arity,
333 param_names: func.param_names.clone(),
334 locals_count: func.locals_count,
335 is_closure: func.is_closure,
336 captures_count: func.captures_count,
337 is_async: func.is_async,
338 ref_params: func.ref_params.clone(),
339 ref_mutates: func.ref_mutates.clone(),
340 mutable_captures: func.mutable_captures.clone(),
341 frame_descriptor: func.frame_descriptor.clone(),
342 required_permissions: self.required_permissions.clone(),
343 instructions: local_instructions,
344 constants: local_constants,
345 strings: local_strings,
346 dependencies,
347 callee_names: called_functions,
348 type_schemas: self.type_schemas.clone(),
349 foreign_dependencies: foreign_deps,
350 source_map,
351 };
352 blob.finalize();
353 blob
354 }
355}
356
357#[derive(Debug, Clone, Copy, PartialEq, Eq)]
358pub enum TypeDiagnosticMode {
359 ReliableOnly,
360 Strict,
361 RecoverAll,
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
365pub enum CompileDiagnosticMode {
366 FailFast,
367 RecoverAll,
368}
369
370/// Compiler state
371pub struct BytecodeCompiler {
372 /// The program being built
373 pub(crate) program: BytecodeProgram,
374
375 /// Current function being compiled
376 pub(crate) current_function: Option<usize>,
377
378 /// Local variable mappings (name -> index)
379 pub(crate) locals: Vec<HashMap<String, u16>>,
380
381 /// ModuleBinding variable mappings (name -> index)
382 pub(crate) module_bindings: HashMap<String, u16>,
383
384 /// Next local variable index
385 pub(crate) next_local: u16,
386
387 /// Next module_binding variable index
388 pub(crate) next_global: u16,
389
390 /// Loop context stack for break/continue
391 pub(crate) loop_stack: Vec<LoopContext>,
392
393 /// Counter for synthetic closure function names
394 pub(crate) closure_counter: u64,
395
396 /// When compiling a DataTable closure method (e.g. dt.filter(row => ...)),
397 /// this holds the (schema_id, type_name) to tag the closure's row parameter as RowView.
398 pub(crate) closure_row_schema: Option<(u32, String)>,
399
400 /// Unified type metadata for the last compiled expression.
401 ///
402 /// This is the single source for relational/value kind propagation
403 /// (Table<T>, Indexed<T>, known object schema, etc.).
404 pub(crate) last_expr_type_info: Option<VariableTypeInfo>,
405
406 /// Type tracker for optimized field access
407 pub(crate) type_tracker: TypeTracker,
408
409 /// Schema ID of the last compiled expression (if it's a TypedObject).
410 /// Used for compile-time typed merge optimization.
411 pub(crate) last_expr_schema: Option<SchemaId>,
412
413 /// Numeric type of the last compiled expression (for typed opcode emission).
414 /// Set by literal compilation, variable loads, and other expression compilers.
415 /// Read by binary op compilation to emit typed opcodes (e.g., MulInt).
416 pub(crate) last_expr_numeric_type: Option<crate::type_tracking::NumericType>,
417
418 /// Type inference engine for match exhaustiveness and type checking
419 pub(crate) type_inference: shape_runtime::type_system::inference::TypeInferenceEngine,
420
421 /// Track type aliases defined in the program
422 /// Maps alias name -> target type (for type validation)
423 pub(crate) type_aliases: HashMap<String, String>,
424
425 /// Current source line being compiled (for debug info)
426 pub(crate) current_line: u32,
427
428 /// Current source file ID (for multi-file debug info)
429 pub(crate) current_file_id: u16,
430
431 /// Source text (for error messages)
432 pub(crate) source_text: Option<String>,
433
434 /// Source lines (split from source_text for quick access)
435 pub(crate) source_lines: Vec<String>,
436
437 /// Imported symbols: local_name -> ImportedSymbol
438 pub(crate) imported_names: HashMap<String, ImportedSymbol>,
439 /// Module namespace bindings introduced by `use module.path`.
440 /// Used to avoid UFCS rewrites for module calls like `duckdb.connect(...)`.
441 pub(crate) module_namespace_bindings: HashSet<String>,
442 /// Active lexical module scope stack while compiling `mod Name { ... }`.
443 pub(crate) module_scope_stack: Vec<String>,
444
445 /// Known exports for import suggestions: function_name -> module_path
446 /// Used to provide helpful error messages like "Did you mean to import from...?"
447 pub(crate) known_exports: HashMap<String, String>,
448 /// Function arity bounds keyed by function name: (required_params, total_params).
449 /// Required params are non-default parameters. Defaults are only allowed
450 /// in trailing positions.
451 pub(crate) function_arity_bounds: HashMap<String, (usize, usize)>,
452 /// Function const parameter indices keyed by function name.
453 /// Const parameters must receive compile-time constant arguments at call sites.
454 pub(crate) function_const_params: HashMap<String, Vec<usize>>,
455 /// Original function definitions keyed by function name.
456 /// Used for const-template specialization at call sites.
457 pub(crate) function_defs: HashMap<String, FunctionDef>,
458 /// Foreign function definitions keyed by function name.
459 /// Used to resolve the effective (Result-wrapped) return type at call sites.
460 pub(crate) foreign_function_defs: HashMap<String, shape_ast::ast::ForeignFunctionDef>,
461 /// Cached const specializations keyed by `(base_name + const-arg fingerprint)`.
462 pub(crate) const_specializations: HashMap<String, usize>,
463 /// Monotonic counter for unique specialization symbol names.
464 pub(crate) next_const_specialization_id: u64,
465 /// Const-parameter bindings for specialized function symbols.
466 /// These bindings are exposed to comptime handlers as typed module_bindings.
467 pub(crate) specialization_const_bindings:
468 HashMap<String, Vec<(String, shape_value::ValueWord)>>,
469
470 /// Struct type definitions: type_name -> (field_names in order, definition span)
471 pub(crate) struct_types: HashMap<String, (Vec<String>, shape_ast::ast::Span)>,
472 /// Generic metadata for struct types used to instantiate runtime type names
473 /// (e.g. `MyType<number>`) at struct-literal construction sites.
474 pub(crate) struct_generic_info: HashMap<String, StructGenericInfo>,
475 /// Names of `type C` declarations with native layout metadata.
476 pub(crate) native_layout_types: HashSet<String>,
477 /// Generated conversion pair cache keys: `c_type::object_type`.
478 pub(crate) generated_native_conversion_pairs: HashSet<String>,
479
480 /// Whether the current function being compiled is async
481 pub(crate) current_function_is_async: bool,
482
483 /// Directory of the source file being compiled (for resolving relative source paths)
484 pub(crate) source_dir: Option<std::path::PathBuf>,
485
486 /// Collected compilation errors (for multi-error reporting)
487 pub(crate) errors: Vec<shape_ast::error::ShapeError>,
488
489 /// Hoisted fields from optimistic hoisting pre-pass.
490 /// Maps variable name → list of property names assigned later (e.g., a.y = 2 → "a" → ["y"]).
491 /// Used to include future property assignments in inline object schemas at compile time.
492 pub(crate) hoisted_fields: HashMap<String, Vec<String>>,
493
494 /// When compiling a variable initializer, the name of the variable being assigned to.
495 /// Used by compile_typed_object_literal to include hoisted fields in the schema.
496 pub(crate) pending_variable_name: Option<String>,
497
498 /// Known trait names (populated in the first pass so meta definitions can reference traits)
499 pub(crate) known_traits: std::collections::HashSet<String>,
500
501 /// Full trait definitions keyed by trait name.
502 /// Used to install default method implementations for impl blocks that omit them.
503 pub(crate) trait_defs: HashMap<String, shape_ast::ast::types::TraitDef>,
504
505 /// Extension registry for comptime execution
506 pub(crate) extension_registry: Option<Arc<Vec<shape_runtime::module_exports::ModuleExports>>>,
507
508 /// Comptime field values per type: type_name -> (field_name -> ValueWord)
509 /// These are type-level constants baked at compile time with zero runtime cost.
510 pub(crate) comptime_fields: HashMap<String, HashMap<String, shape_value::ValueWord>>,
511 /// Type diagnostic mode for shared analyzer diagnostics.
512 pub(crate) type_diagnostic_mode: TypeDiagnosticMode,
513 /// Expression compilation diagnostic mode.
514 pub(crate) compile_diagnostic_mode: CompileDiagnosticMode,
515 /// Whether this compiler instance is compiling code for comptime execution.
516 /// Enables comptime-only builtins and comptime-specific statement semantics.
517 pub(crate) comptime_mode: bool,
518 /// Internal guard for compiler-synthesized `__comptime__` helper calls.
519 /// User source must never access `__comptime__` directly.
520 pub(crate) allow_internal_comptime_namespace: bool,
521 /// Method table for data-driven method signature queries.
522 /// Used to replace hardcoded heuristics (e.g., is_type_preserving_table_method)
523 /// with MethodTable lookups (is_self_returning, takes_closure_with_receiver_param).
524 pub(crate) method_table: MethodTable,
525 /// Borrow checker for reference lifetime tracking.
526 pub(crate) borrow_checker: crate::borrow_checker::BorrowChecker,
527 /// Locals that are reference-typed in the current function.
528 pub(crate) ref_locals: HashSet<u16>,
529 /// Subset of ref_locals that hold exclusive (`&mut`) borrows.
530 /// Used to enforce the three concurrency rules at task boundaries.
531 pub(crate) exclusive_ref_locals: HashSet<u16>,
532 /// Local variable indices declared as `const` (immutable binding).
533 pub(crate) const_locals: HashSet<u16>,
534 /// Module binding indices declared as `const` (immutable binding).
535 pub(crate) const_module_bindings: HashSet<u16>,
536 /// Local variable indices declared as immutable `let` (not `let mut` or `var`).
537 pub(crate) immutable_locals: HashSet<u16>,
538 /// Local variable indices that are function parameters (first N locals in a function).
539 /// Used to avoid trusting inferred type hints for params with no explicit annotation.
540 pub(crate) param_locals: HashSet<u16>,
541 /// Module binding indices declared as immutable `let`.
542 pub(crate) immutable_module_bindings: HashSet<u16>,
543 /// True while compiling function call arguments (allows `&` references).
544 pub(crate) in_call_args: bool,
545 /// Borrow mode for the argument currently being compiled.
546 pub(crate) current_call_arg_borrow_mode: Option<BorrowMode>,
547 /// ModuleBinding-ref writebacks collected while compiling current call args.
548 pub(crate) call_arg_module_binding_ref_writebacks: Vec<Vec<(u16, u16)>>,
549 /// Inferred reference parameters for untyped params: function -> per-param flag.
550 pub(crate) inferred_ref_params: HashMap<String, Vec<bool>>,
551 /// Inferred mutating-reference params: function -> per-param flag.
552 pub(crate) inferred_ref_mutates: HashMap<String, Vec<bool>>,
553 /// Effective per-parameter pass mode (explicit + inferred), by function name.
554 pub(crate) inferred_param_pass_modes: HashMap<String, Vec<ParamPassMode>>,
555 /// Inferred parameter type hints for unannotated params.
556 /// Keyed by function name; each entry is a per-param optional type string.
557 pub(crate) inferred_param_type_hints: HashMap<String, Vec<Option<String>>>,
558 /// Stack of scopes, each containing locals that need Drop calls at scope exit.
559 /// Each entry is (local_index, is_async).
560 pub(crate) drop_locals: Vec<Vec<(u16, bool)>>,
561 /// Per-type drop kind: tracks whether each type has sync, async, or both drop impls.
562 /// Populated during the first-pass registration of impl blocks.
563 pub(crate) drop_type_info: HashMap<String, DropKind>,
564 /// Module bindings that need Drop calls at program exit.
565 /// Each entry is (binding_index, is_async).
566 pub(crate) drop_module_bindings: Vec<(u16, bool)>,
567 /// Mutable closure captures in the current function being compiled.
568 /// Maps captured variable name -> upvalue index (for LoadClosure/StoreClosure).
569 /// Only populated while compiling a closure body that has mutable captures.
570 pub(crate) mutable_closure_captures: HashMap<String, u16>,
571
572 /// Variables in the current scope that have been boxed into SharedCells
573 /// by a mutable closure capture. When a subsequent closure captures one
574 /// of these variables (even immutably), it must use the SharedCell path
575 /// so it shares the same mutable cell.
576 pub(crate) boxed_locals: HashSet<String>,
577
578 /// Active permission set for capability checking.
579 ///
580 /// When set, imported stdlib functions are checked against capability_tags.
581 /// If a function requires a permission not in this set, a compile error is
582 /// emitted and the function never enters bytecode.
583 ///
584 /// `None` means no checking (backwards-compatible default).
585 pub(crate) permission_set: Option<shape_abi_v1::PermissionSet>,
586
587 // -- Content-addressed blob tracking --
588 /// Active blob builder (set while compiling a function body).
589 pub(crate) current_blob_builder: Option<FunctionBlobBuilder>,
590 /// Completed function blobs (finalized with content hash).
591 pub(crate) completed_blobs: Vec<FunctionBlob>,
592 /// Map from function name to content hash (populated after finalization).
593 pub(crate) blob_name_to_hash: HashMap<String, FunctionHash>,
594 /// The content-addressed program produced alongside BytecodeProgram.
595 pub(crate) content_addressed_program: Option<ContentAddressedProgram>,
596 /// Content hash per compiled function index (function_id -> blob hash).
597 /// This is the stable identity bridge for the flat runtime format.
598 pub(crate) function_hashes_by_id: Vec<Option<FunctionHash>>,
599
600 /// Optional blob-level cache for incremental compilation.
601 /// When set, compiled blobs are stored after finalization and looked up
602 /// by content hash to avoid redundant work across compilations.
603 pub(crate) blob_cache: Option<BlobCache>,
604
605 /// Temporary function name aliases for comptime replace body.
606 /// Maps alias (e.g., `__original__`) to actual function name (e.g., `__original__myFunc`).
607 /// Set before compiling a replacement body and cleared after.
608 pub(crate) function_aliases: HashMap<String, String>,
609
610 /// Parameters of the function currently being compiled.
611 /// Used by match exhaustiveness checking to fall back to type annotations
612 /// when the type inference engine cannot resolve a parameter's type.
613 pub(crate) current_function_params: Vec<shape_ast::ast::FunctionParameter>,
614
615 /// Legacy cache of function names collected from stdlib-loaded modules.
616 ///
617 /// Internal builtin access is now gated by per-definition declaring-module
618 /// provenance, not by membership in this set.
619 pub stdlib_function_names: HashSet<String>,
620
621 /// Per-function flag: when true, `get_builtin_function` resolves `__*` names.
622 /// Toggled during compilation for definitions originating from `std::*`.
623 pub(crate) allow_internal_builtins: bool,
624
625 /// Package-scoped native library resolutions for the current host.
626 pub(crate) native_resolution_context:
627 Option<shape_runtime::native_resolution::NativeResolutionSet>,
628}
629
630impl Default for BytecodeCompiler {
631 fn default() -> Self {
632 Self::new()
633 }
634}
635
636mod compiler_impl_part1;
637mod compiler_impl_part2;
638mod compiler_impl_part3;
639mod compiler_impl_part4;
640
641/// Infer effective reference parameters and mutation behavior without compiling bytecode.
642///
643/// Returns `(inferred_ref_params, inferred_ref_mutates)` keyed by function name.
644/// - `inferred_ref_params[f][i] == true` means parameter `i` of `f` is inferred/treated as ref.
645/// - `inferred_ref_mutates[f][i] == true` means that reference parameter is mutating (`&mut`).
646pub fn infer_reference_model(
647 program: &Program,
648) -> (HashMap<String, Vec<bool>>, HashMap<String, Vec<bool>>) {
649 let (inferred_ref_params, inferred_ref_mutates, _) =
650 BytecodeCompiler::infer_reference_model(program);
651 (inferred_ref_params, inferred_ref_mutates)
652}
653
654/// Infer effective parameter pass modes (`ByValue` / `ByRefShared` / `ByRefExclusive`)
655/// keyed by function name.
656pub fn infer_param_pass_modes(program: &Program) -> HashMap<String, Vec<ParamPassMode>> {
657 let (inferred_ref_params, inferred_ref_mutates, _) =
658 BytecodeCompiler::infer_reference_model(program);
659 BytecodeCompiler::build_param_pass_mode_map(
660 program,
661 &inferred_ref_params,
662 &inferred_ref_mutates,
663 )
664}
665
666#[cfg(all(test, feature = "deep-tests"))]
667#[path = "compiler_tests.rs"]
668mod compiler_deep;
669
670#[cfg(all(test, feature = "deep-tests"))]
671#[path = "borrow_deep_tests.rs"]
672mod borrow_deep_tests;