shape_vm/compiler/mod.rs
1//! Bytecode compiler - translates AST to bytecode
2
3use shape_ast::error::{Result, ShapeError, SourceLocation};
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use crate::blob_cache_v2::BlobCache;
8/// Borrow mode for reference parameters - Shared (&) or Exclusive (&mut).
9/// Kept for codegen even though the lexical borrow checker has been removed.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum BorrowMode {
12 Shared,
13 Exclusive,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub(crate) enum ExprResultMode {
18 Value,
19 PreserveRef,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
23pub(crate) struct ExprReferenceResult {
24 pub raw_mode: Option<BorrowMode>,
25 pub auto_deref_mode: Option<BorrowMode>,
26}
27
28/// A borrow place key used for encoding borrow targets in codegen.
29pub type BorrowPlace = u32;
30use crate::bytecode::{
31 BuiltinFunction, BytecodeProgram, Constant, FunctionBlob, FunctionHash, Instruction, OpCode,
32 Operand, Program as ContentAddressedProgram,
33};
34use crate::type_tracking::{TypeTracker, VariableTypeInfo};
35use shape_ast::ast::{FunctionDef, Program, TypeAnnotation};
36use shape_runtime::type_schema::SchemaId;
37use shape_runtime::type_system::{
38 Type, TypeAnalysisMode, TypeError, TypeErrorWithLocation, analyze_program_with_mode,
39 checking::MethodTable,
40};
41
42// Sub-modules
43pub(crate) mod comptime;
44pub(crate) mod comptime_builtins;
45pub(crate) mod comptime_target;
46mod control_flow;
47mod expressions;
48mod functions;
49mod functions_annotations;
50mod functions_foreign;
51mod helpers;
52mod helpers_binding;
53mod helpers_reference;
54mod literals;
55mod loops;
56mod patterns;
57mod statements;
58pub mod string_interpolation;
59
60/// Loop compilation context
61pub(crate) struct LoopContext {
62 /// Break jump targets
63 pub(crate) break_jumps: Vec<usize>,
64 /// Continue jump target (usize::MAX = deferred, use continue_jumps)
65 pub(crate) continue_target: usize,
66 /// Optional local to store break values for expression loops
67 pub(crate) break_value_local: Option<u16>,
68 /// Whether a for-in iterator is on the stack (break must pop it)
69 pub(crate) iterator_on_stack: bool,
70 /// Drop scope depth when the loop was entered (for break/continue early exit drops)
71 pub(crate) drop_scope_depth: usize,
72 /// Forward-patched continue jumps for range counter loops where the
73 /// increment block is after the body (so continue must forward-jump).
74 pub(crate) continue_jumps: Vec<usize>,
75}
76
77/// Information about an imported symbol (fields used for diagnostics/LSP)
78#[derive(Debug, Clone)]
79#[allow(dead_code)]
80pub(crate) struct ImportedSymbol {
81 /// Original name in the source module
82 pub original_name: String,
83 /// Module path the symbol was imported from
84 pub module_path: String,
85 /// High-level kind of the imported symbol (function, type, etc.)
86 /// `None` for legacy inlining path where kind is not tracked.
87 pub kind: Option<shape_ast::module_utils::ModuleExportKind>,
88}
89
90/// Imported annotation binding routed through a hidden synthetic module.
91#[derive(Debug, Clone)]
92pub(crate) struct ImportedAnnotationSymbol {
93 /// Original annotation name in the source module.
94 pub original_name: String,
95 /// Source module path the annotation was imported from.
96 pub _module_path: String,
97 /// Hidden synthetic module name that owns the compiled annotation scope.
98 pub hidden_module_name: String,
99}
100
101/// Module-scoped builtin function declaration with a runtime source module.
102#[derive(Debug, Clone)]
103pub(crate) struct ModuleBuiltinFunction {
104 /// The callable name as exported by the runtime/native module.
105 pub export_name: String,
106 /// Original source module path that provides the runtime implementation.
107 pub source_module_path: String,
108}
109
110/// Compiler-internal scope taxonomy for name resolution.
111#[derive(Debug, Clone, Copy, PartialEq, Eq)]
112#[allow(dead_code)]
113pub(crate) enum ResolutionScope {
114 Local,
115 ModuleBinding,
116 NamedImport,
117 NamespaceImport,
118 TypeAssociated,
119 Prelude,
120 SyntaxReserved,
121 InternalIntrinsic,
122}
123
124impl ResolutionScope {
125 pub(crate) const fn label(self) -> &'static str {
126 match self {
127 Self::Local => "local scope",
128 Self::ModuleBinding => "module scope",
129 Self::NamedImport => "named import scope",
130 Self::NamespaceImport => "namespace import scope",
131 Self::TypeAssociated => "type-associated scope",
132 Self::Prelude => "implicit prelude scope",
133 Self::SyntaxReserved => "syntax-reserved scope",
134 Self::InternalIntrinsic => "internal intrinsic scope",
135 }
136 }
137}
138
139/// Builtin lookup result annotated with the scope class it currently belongs to.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141pub(crate) enum BuiltinNameResolution {
142 Surface {
143 builtin: BuiltinFunction,
144 scope: ResolutionScope,
145 },
146 InternalOnly {
147 builtin: BuiltinFunction,
148 scope: ResolutionScope,
149 },
150}
151
152impl BuiltinNameResolution {
153 pub(crate) const fn scope(self) -> ResolutionScope {
154 match self {
155 Self::Surface { scope, .. } | Self::InternalOnly { scope, .. } => scope,
156 }
157 }
158}
159
160#[derive(Debug, Clone)]
161pub(crate) struct StructGenericInfo {
162 pub type_params: Vec<shape_ast::ast::TypeParam>,
163 pub runtime_field_types: HashMap<String, shape_ast::ast::TypeAnnotation>,
164}
165
166/// Whether a type's Drop impl is sync-only, async-only, or both.
167#[derive(Debug, Clone, Copy, PartialEq, Eq)]
168pub(crate) enum DropKind {
169 SyncOnly,
170 AsyncOnly,
171 Both,
172}
173
174/// Canonical compile-time parameter passing contract.
175///
176/// This is the single source of truth used by compiler lowering and LSP rendering.
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
178pub enum ParamPassMode {
179 ByValue,
180 ByRefShared,
181 ByRefExclusive,
182}
183
184impl ParamPassMode {
185 pub const fn is_reference(self) -> bool {
186 !matches!(self, Self::ByValue)
187 }
188
189 pub const fn is_exclusive(self) -> bool {
190 matches!(self, Self::ByRefExclusive)
191 }
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub(crate) struct FunctionReturnReferenceSummary {
196 pub param_index: usize,
197 pub mode: BorrowMode,
198 pub projection: Option<Vec<crate::mir::types::ProjectionStep>>,
199}
200
201impl From<crate::mir::analysis::ReturnReferenceSummary> for FunctionReturnReferenceSummary {
202 fn from(value: crate::mir::analysis::ReturnReferenceSummary) -> Self {
203 Self {
204 param_index: value.param_index,
205 mode: match value.kind {
206 crate::mir::types::BorrowKind::Shared => BorrowMode::Shared,
207 crate::mir::types::BorrowKind::Exclusive => BorrowMode::Exclusive,
208 },
209 projection: value.projection,
210 }
211 }
212}
213
214/// Per-function blob builder for content-addressed compilation.
215///
216/// Uses a **snapshot** strategy: records the global instruction/constant/string
217/// pool sizes at the start of function compilation, then at finalization
218/// extracts the delta and remaps global indices to blob-local indices.
219pub(crate) struct FunctionBlobBuilder {
220 /// Function name.
221 pub name: String,
222 /// Global instruction index where this function's code starts.
223 pub instr_start: usize,
224 /// Global constant pool size when this function started compiling.
225 #[allow(dead_code)]
226 pub const_start: usize,
227 /// Global string pool size when this function started compiling.
228 #[allow(dead_code)]
229 pub string_start: usize,
230 /// Names of functions called by this function (for dependency tracking).
231 pub called_functions: Vec<String>,
232 /// Type schema names this function constructs.
233 pub type_schemas: Vec<String>,
234 /// Accumulated permissions required by this function's direct calls.
235 pub required_permissions: shape_abi_v1::PermissionSet,
236}
237
238impl FunctionBlobBuilder {
239 pub fn new(name: String, instr_start: usize, const_start: usize, string_start: usize) -> Self {
240 Self {
241 name,
242 instr_start,
243 const_start,
244 string_start,
245 called_functions: Vec::new(),
246 type_schemas: Vec::new(),
247 required_permissions: shape_abi_v1::PermissionSet::pure(),
248 }
249 }
250
251 /// Record that this function calls another function by name.
252 pub fn record_call(&mut self, callee_name: &str) {
253 if !self.called_functions.iter().any(|n| n == callee_name) {
254 self.called_functions.push(callee_name.to_owned());
255 }
256 }
257
258 /// Record that this function requires the given permissions
259 /// (e.g., from a stdlib module call identified by capability_tags).
260 pub fn record_permissions(&mut self, perms: &shape_abi_v1::PermissionSet) {
261 self.required_permissions = self.required_permissions.union(perms);
262 }
263
264 /// Finalize this builder into a FunctionBlob by extracting the delta from
265 /// the global program pools and remapping indices to blob-local ones.
266 pub fn finalize(
267 &self,
268 program: &crate::bytecode::BytecodeProgram,
269 func: &crate::bytecode::Function,
270 blob_name_to_hash: &HashMap<String, FunctionHash>,
271 instr_end: usize,
272 ) -> FunctionBlob {
273 use crate::bytecode::Operand;
274
275 // Extract global-indexed instructions for this function.
276 let global_instructions = &program.instructions[self.instr_start..instr_end];
277
278 // Build constant remap: global index -> local index.
279 let mut const_remap: HashMap<u16, u16> = HashMap::new();
280 let mut local_constants: Vec<Constant> = Vec::new();
281 // Build string remap similarly.
282 let mut string_remap: HashMap<u16, u16> = HashMap::new();
283 let mut local_strings: Vec<String> = Vec::new();
284 // Build function operand remap: global function index -> dependency-local index.
285 let mut func_remap: HashMap<u16, u16> = HashMap::new();
286 // Start from explicitly recorded call dependencies, then augment with
287 // function-value references found in constants/operands.
288 let mut called_functions = self.called_functions.clone();
289
290 let mut ensure_called = |callee_name: &str| -> u16 {
291 if let Some(dep_idx) = called_functions.iter().position(|n| n == callee_name) {
292 dep_idx as u16
293 } else {
294 called_functions.push(callee_name.to_owned());
295 (called_functions.len() - 1) as u16
296 }
297 };
298
299 // Scan instructions for all constant/string references and build
300 // blob-local pools with remapped indices.
301 for instr in global_instructions {
302 if let Some(ref operand) = instr.operand {
303 match operand {
304 Operand::Const(idx) => {
305 if !const_remap.contains_key(idx) {
306 let local_idx = local_constants.len() as u16;
307 const_remap.insert(*idx, local_idx);
308 let mut constant = program.constants[*idx as usize].clone();
309 if let Constant::Function(fid) = constant {
310 let global_idx = fid as usize;
311 if let Some(callee) = program.functions.get(global_idx) {
312 let dep_idx = ensure_called(&callee.name);
313 constant = Constant::Function(dep_idx);
314 }
315 }
316 local_constants.push(constant);
317 }
318 }
319 Operand::Property(idx) => {
320 if !string_remap.contains_key(idx) {
321 let local_idx = local_strings.len() as u16;
322 string_remap.insert(*idx, local_idx);
323 local_strings.push(program.strings[*idx as usize].clone());
324 }
325 }
326 Operand::Name(sid) => {
327 let gidx = sid.0 as u16;
328 if !string_remap.contains_key(&gidx) {
329 let local_idx = local_strings.len() as u16;
330 string_remap.insert(gidx, local_idx);
331 local_strings.push(program.strings[gidx as usize].clone());
332 }
333 }
334 Operand::MethodCall { name, .. } => {
335 let gidx = name.0 as u16;
336 if !string_remap.contains_key(&gidx) {
337 let local_idx = local_strings.len() as u16;
338 string_remap.insert(gidx, local_idx);
339 local_strings.push(program.strings[gidx as usize].clone());
340 }
341 }
342 Operand::TypedMethodCall { string_id, .. } => {
343 let gidx = *string_id;
344 if !string_remap.contains_key(&gidx) {
345 let local_idx = local_strings.len() as u16;
346 string_remap.insert(gidx, local_idx);
347 local_strings.push(program.strings[gidx as usize].clone());
348 }
349 }
350 Operand::Function(fid) => {
351 let global_idx = fid.0 as usize;
352 if !func_remap.contains_key(&fid.0) {
353 // Map global function index -> dependency-local index.
354 // If this call target was not explicitly recorded (e.g. emitted via
355 // function-valued constants), add it so content-addressed linking can
356 // remap stable function IDs correctly.
357 if let Some(callee) = program.functions.get(global_idx) {
358 let dep_idx = ensure_called(&callee.name);
359 func_remap.insert(fid.0, dep_idx);
360 }
361 }
362 }
363 _ => {}
364 }
365 }
366 }
367
368 // Remap instructions to use local indices.
369 let local_instructions: Vec<Instruction> = global_instructions
370 .iter()
371 .map(|instr| {
372 let mut remapped = instr.clone();
373 if let Some(operand) = &mut remapped.operand {
374 match operand {
375 Operand::Const(idx) => {
376 if let Some(&local) = const_remap.get(idx) {
377 *idx = local;
378 }
379 }
380 Operand::Property(idx) => {
381 if let Some(&local) = string_remap.get(idx) {
382 *idx = local;
383 }
384 }
385 Operand::Name(sid) => {
386 if let Some(&local) = string_remap.get(&(sid.0 as u16)) {
387 sid.0 = local as u32;
388 }
389 }
390 Operand::MethodCall { name, arg_count: _ } => {
391 if let Some(&local) = string_remap.get(&(name.0 as u16)) {
392 name.0 = local as u32;
393 }
394 }
395 Operand::TypedMethodCall { string_id, .. } => {
396 if let Some(&local) = string_remap.get(string_id) {
397 *string_id = local;
398 }
399 }
400 Operand::Function(fid) => {
401 if let Some(&local) = func_remap.get(&fid.0) {
402 fid.0 = local;
403 }
404 }
405 _ => {}
406 }
407 }
408 remapped
409 })
410 .collect();
411
412 // Build dependency list from called function names.
413 // Use FunctionHash::ZERO as sentinel for forward references (not yet compiled).
414 let dependencies: Vec<FunctionHash> = called_functions
415 .iter()
416 .map(|callee| {
417 blob_name_to_hash
418 .get(callee)
419 .copied()
420 .unwrap_or(FunctionHash::ZERO)
421 })
422 .collect();
423
424 // Build source map from global debug info.
425 let source_map: Vec<(usize, u32, u32)> = program
426 .debug_info
427 .line_numbers
428 .iter()
429 .filter(|(idx, _, _)| *idx >= self.instr_start && *idx < instr_end)
430 .map(|(idx, fid, line)| (idx - self.instr_start, *fid as u32, *line))
431 .collect();
432
433 // Scan instructions for CallForeign operands and collect content hashes
434 // from the program's foreign_functions table.
435 let mut foreign_deps: Vec<[u8; 32]> = Vec::new();
436 for instr in &local_instructions {
437 if instr.opcode == crate::bytecode::OpCode::CallForeign {
438 if let Some(Operand::ForeignFunction(idx)) = instr.operand {
439 if let Some(entry) = program.foreign_functions.get(idx as usize) {
440 if let Some(hash) = entry.content_hash {
441 foreign_deps.push(hash);
442 }
443 }
444 }
445 }
446 }
447 foreign_deps.sort();
448 foreign_deps.dedup();
449
450 let mut blob = FunctionBlob {
451 content_hash: FunctionHash::ZERO,
452 name: self.name.clone(),
453 arity: func.arity,
454 param_names: func.param_names.clone(),
455 locals_count: func.locals_count,
456 is_closure: func.is_closure,
457 captures_count: func.captures_count,
458 is_async: func.is_async,
459 ref_params: func.ref_params.clone(),
460 ref_mutates: func.ref_mutates.clone(),
461 mutable_captures: func.mutable_captures.clone(),
462 frame_descriptor: func.frame_descriptor.clone(),
463 required_permissions: self.required_permissions.clone(),
464 instructions: local_instructions,
465 constants: local_constants,
466 strings: local_strings,
467 dependencies,
468 callee_names: called_functions,
469 type_schemas: self.type_schemas.clone(),
470 foreign_dependencies: foreign_deps,
471 source_map,
472 };
473 blob.finalize();
474 blob
475 }
476}
477
478#[derive(Debug, Clone, Copy, PartialEq, Eq)]
479pub enum TypeDiagnosticMode {
480 ReliableOnly,
481 Strict,
482 RecoverAll,
483}
484
485#[derive(Debug, Clone, Copy, PartialEq, Eq)]
486pub enum CompileDiagnosticMode {
487 FailFast,
488 RecoverAll,
489}
490
491/// Compiler state
492pub struct BytecodeCompiler {
493 /// The program being built
494 pub(crate) program: BytecodeProgram,
495
496 /// Current function being compiled
497 pub(crate) current_function: Option<usize>,
498
499 /// Local variable mappings (name -> index)
500 pub(crate) locals: Vec<HashMap<String, u16>>,
501
502 /// ModuleBinding variable mappings (name -> index)
503 pub(crate) module_bindings: HashMap<String, u16>,
504
505 /// Next local variable index
506 pub(crate) next_local: u16,
507
508 /// Next module_binding variable index
509 pub(crate) next_global: u16,
510
511 /// Loop context stack for break/continue
512 pub(crate) loop_stack: Vec<LoopContext>,
513
514 /// Counter for synthetic closure function names
515 pub(crate) closure_counter: u64,
516
517 /// When compiling a DataTable closure method (e.g. dt.filter(row => ...)),
518 /// this holds the (schema_id, type_name) to tag the closure's row parameter as RowView.
519 pub(crate) closure_row_schema: Option<(u32, String)>,
520
521 /// Unified type metadata for the last compiled expression.
522 ///
523 /// This is the single source for relational/value kind propagation
524 /// (Table<T>, Indexed<T>, known object schema, etc.).
525 pub(crate) last_expr_type_info: Option<VariableTypeInfo>,
526
527 /// Type tracker for optimized field access
528 pub(crate) type_tracker: TypeTracker,
529
530 /// Schema ID of the last compiled expression (if it's a TypedObject).
531 /// Used for compile-time typed merge optimization.
532 pub(crate) last_expr_schema: Option<SchemaId>,
533
534 /// Numeric type of the last compiled expression (for typed opcode emission).
535 /// Set by literal compilation, variable loads, and other expression compilers.
536 /// Read by binary op compilation to emit typed opcodes (e.g., MulInt).
537 pub(crate) last_expr_numeric_type: Option<crate::type_tracking::NumericType>,
538
539 /// Result mode for the expression currently being compiled.
540 pub(crate) current_expr_result_mode: ExprResultMode,
541
542 /// Whether the last compiled expression left a raw reference on the stack.
543 ///
544 /// `auto_deref_mode` is only set for propagated ref results (identifier loads,
545 /// ref-returning calls) that should implicitly dereference in value contexts.
546 /// Explicit `&expr` results keep `raw_mode` without enabling auto-deref.
547 pub(crate) last_expr_reference_result: ExprReferenceResult,
548
549 /// Known pass modes for local callable bindings (closures / function aliases).
550 pub(crate) local_callable_pass_modes: HashMap<u16, Vec<ParamPassMode>>,
551
552 /// Known safe return-reference summaries for local callable bindings.
553 pub(crate) local_callable_return_reference_summaries:
554 HashMap<u16, FunctionReturnReferenceSummary>,
555
556 /// Known pass modes for module-binding callable values.
557 pub(crate) module_binding_callable_pass_modes: HashMap<u16, Vec<ParamPassMode>>,
558
559 /// Known safe return-reference summaries for module-binding callable values.
560 pub(crate) module_binding_callable_return_reference_summaries:
561 HashMap<u16, FunctionReturnReferenceSummary>,
562
563 /// Named functions that safely return one reference parameter unchanged.
564 pub(crate) function_return_reference_summaries: HashMap<String, FunctionReturnReferenceSummary>,
565
566 /// The return-reference summary of the function currently being compiled, if any.
567 pub(crate) current_function_return_reference_summary: Option<FunctionReturnReferenceSummary>,
568
569 /// Type inference engine for match exhaustiveness and type checking
570 pub(crate) type_inference: shape_runtime::type_system::inference::TypeInferenceEngine,
571
572 /// Track type aliases defined in the program
573 /// Maps alias name -> target type (for type validation)
574 pub(crate) type_aliases: HashMap<String, String>,
575
576 /// Current source line being compiled (for debug info)
577 pub(crate) current_line: u32,
578
579 /// Current source file ID (for multi-file debug info)
580 pub(crate) current_file_id: u16,
581
582 /// Source text (for error messages)
583 pub(crate) source_text: Option<String>,
584
585 /// Source lines (split from source_text for quick access)
586 pub(crate) source_lines: Vec<String>,
587
588 /// Imported symbols: local_name -> ImportedSymbol
589 pub(crate) imported_names: HashMap<String, ImportedSymbol>,
590 /// Imported annotations: local_name -> ImportedAnnotationSymbol
591 pub(crate) imported_annotations: HashMap<String, ImportedAnnotationSymbol>,
592 /// Qualified builtin function declarations available as module-scoped callables.
593 pub(crate) module_builtin_functions: HashMap<String, ModuleBuiltinFunction>,
594 /// Module namespace bindings introduced by `use module.path`.
595 /// Used to avoid UFCS rewrites for module calls like `duckdb.connect(...)`.
596 pub(crate) module_namespace_bindings: HashSet<String>,
597 /// Imported synthetic/local module path -> original source module path.
598 /// Used when code inside a wrapper module needs to dispatch to native exports
599 /// from the underlying source module.
600 pub(crate) module_scope_sources: HashMap<String, String>,
601 /// Active lexical module scope stack while compiling `mod Name { ... }`.
602 pub(crate) module_scope_stack: Vec<String>,
603
604 /// Known exports for import suggestions: function_name -> module_path
605 /// Used to provide helpful error messages like "Did you mean to import from...?"
606 pub(crate) known_exports: HashMap<String, String>,
607 /// Function arity bounds keyed by function name: (required_params, total_params).
608 /// Required params are non-default parameters. Defaults are only allowed
609 /// in trailing positions.
610 pub(crate) function_arity_bounds: HashMap<String, (usize, usize)>,
611 /// Function const parameter indices keyed by function name.
612 /// Const parameters must receive compile-time constant arguments at call sites.
613 pub(crate) function_const_params: HashMap<String, Vec<usize>>,
614 /// Original function definitions keyed by function name.
615 /// Used for const-template specialization at call sites.
616 pub(crate) function_defs: HashMap<String, FunctionDef>,
617 /// Foreign function definitions keyed by function name.
618 /// Used to resolve the effective (Result-wrapped) return type at call sites.
619 pub(crate) foreign_function_defs: HashMap<String, shape_ast::ast::ForeignFunctionDef>,
620 /// Cached const specializations keyed by `(base_name + const-arg fingerprint)`.
621 pub(crate) const_specializations: HashMap<String, usize>,
622 /// Monotonic counter for unique specialization symbol names.
623 pub(crate) next_const_specialization_id: u64,
624 /// Const-parameter bindings for specialized function symbols.
625 /// These bindings are exposed to comptime handlers as typed module_bindings.
626 pub(crate) specialization_const_bindings:
627 HashMap<String, Vec<(String, shape_value::ValueWord)>>,
628
629 /// Struct type definitions: type_name -> (field_names in order, definition span)
630 pub(crate) struct_types: HashMap<String, (Vec<String>, shape_ast::ast::Span)>,
631 /// Generic metadata for struct types used to instantiate runtime type names
632 /// (e.g. `MyType<number>`) at struct-literal construction sites.
633 pub(crate) struct_generic_info: HashMap<String, StructGenericInfo>,
634 /// Names of `type C` declarations with native layout metadata.
635 pub(crate) native_layout_types: HashSet<String>,
636 /// Generated conversion pair cache keys: `c_type::object_type`.
637 pub(crate) generated_native_conversion_pairs: HashSet<String>,
638
639 /// Whether the current function being compiled is async
640 pub(crate) current_function_is_async: bool,
641
642 /// Directory of the source file being compiled (for resolving relative source paths)
643 pub(crate) source_dir: Option<std::path::PathBuf>,
644
645 /// Collected compilation errors (for multi-error reporting)
646 pub(crate) errors: Vec<shape_ast::error::ShapeError>,
647
648 /// Hoisted fields from optimistic hoisting pre-pass.
649 /// Maps variable name → list of property names assigned later (e.g., a.y = 2 → "a" → ["y"]).
650 /// Used to include future property assignments in inline object schemas at compile time.
651 pub(crate) hoisted_fields: HashMap<String, Vec<String>>,
652
653 /// When compiling a variable initializer, the name of the variable being assigned to.
654 /// Used by compile_typed_object_literal to include hoisted fields in the schema.
655 pub(crate) pending_variable_name: Option<String>,
656 /// Lexical names that will later need their binding value to remain a raw reference.
657 /// This is only used to choose `Value` vs `PreserveRef` lowering for bindings; MIR
658 /// remains the sole authority for borrow legality.
659 pub(crate) future_reference_use_name_scopes: Vec<HashSet<String>>,
660
661 /// Known trait names (populated in the first pass so meta definitions can reference traits)
662 pub(crate) known_traits: std::collections::HashSet<String>,
663
664 /// Full trait definitions keyed by trait name.
665 /// Used to install default method implementations for impl blocks that omit them.
666 pub(crate) trait_defs: HashMap<String, shape_ast::ast::types::TraitDef>,
667
668 /// Extension registry for comptime execution
669 pub(crate) extension_registry: Option<Arc<Vec<shape_runtime::module_exports::ModuleExports>>>,
670
671 /// Comptime field values per type: type_name -> (field_name -> ValueWord)
672 /// These are type-level constants baked at compile time with zero runtime cost.
673 pub(crate) comptime_fields: HashMap<String, HashMap<String, shape_value::ValueWord>>,
674 /// Type diagnostic mode for shared analyzer diagnostics.
675 pub(crate) type_diagnostic_mode: TypeDiagnosticMode,
676 /// Expression compilation diagnostic mode.
677 pub(crate) compile_diagnostic_mode: CompileDiagnosticMode,
678 /// Whether this compiler instance is compiling code for comptime execution.
679 /// Enables comptime-only builtins and comptime-specific statement semantics.
680 pub(crate) comptime_mode: bool,
681 /// Functions removed by comptime annotation handlers (`remove target`).
682 /// These are still present in `program.functions` (registered in the first pass)
683 /// but must produce a clear compile-time error when called instead of jumping
684 /// to an invalid entry point.
685 pub(crate) removed_functions: HashSet<String>,
686 /// Internal guard for compiler-synthesized `__comptime__` helper calls.
687 /// User source must never access `__comptime__` directly.
688 pub(crate) allow_internal_comptime_namespace: bool,
689 /// Method table for data-driven method signature queries.
690 /// Used to replace hardcoded heuristics (e.g., is_type_preserving_table_method)
691 /// with MethodTable lookups (is_self_returning, takes_closure_with_receiver_param).
692 pub(crate) method_table: MethodTable,
693 /// Locals that are reference-typed in the current function.
694 pub(crate) ref_locals: HashSet<u16>,
695 /// Subset of ref_locals that hold exclusive (`&mut`) borrows.
696 /// Used to enforce the three concurrency rules at task boundaries.
697 pub(crate) exclusive_ref_locals: HashSet<u16>,
698 /// Subset of ref_locals that were INFERRED as by-reference (not explicitly declared `&`).
699 /// Inferred-ref params are owned values passed by reference for performance;
700 /// closures may capture them (the value is dereferenced at capture time).
701 pub(crate) inferred_ref_locals: HashSet<u16>,
702 /// Locals whose binding value is itself a first-class reference (`let r = &x`).
703 /// Reads auto-deref; writes still rebind the local.
704 pub(crate) reference_value_locals: HashSet<u16>,
705 /// Subset of reference_value_locals that hold exclusive (`&mut`) references.
706 pub(crate) exclusive_reference_value_locals: HashSet<u16>,
707 /// Local variable indices declared as `const` (immutable binding).
708 pub(crate) const_locals: HashSet<u16>,
709 /// Module binding indices declared as `const` (immutable binding).
710 pub(crate) const_module_bindings: HashSet<u16>,
711 /// Local variable indices declared as immutable `let` (not `let mut` or `var`).
712 pub(crate) immutable_locals: HashSet<u16>,
713 /// Local variable indices that are function parameters (first N locals in a function).
714 /// Used to avoid trusting inferred type hints for params with no explicit annotation.
715 pub(crate) param_locals: HashSet<u16>,
716 /// Module binding indices declared as immutable `let`.
717 pub(crate) immutable_module_bindings: HashSet<u16>,
718 /// Module bindings whose value is itself a first-class reference.
719 pub(crate) reference_value_module_bindings: HashSet<u16>,
720 /// Subset of reference_value_module_bindings that hold exclusive (`&mut`) references.
721 pub(crate) exclusive_reference_value_module_bindings: HashSet<u16>,
722 /// ModuleBinding-ref writebacks collected while compiling current call args.
723 pub(crate) call_arg_module_binding_ref_writebacks: Vec<Vec<(u16, u16)>>,
724 /// Inferred reference parameters for untyped params: function -> per-param flag.
725 pub(crate) inferred_ref_params: HashMap<String, Vec<bool>>,
726 /// Inferred mutating-reference params: function -> per-param flag.
727 pub(crate) inferred_ref_mutates: HashMap<String, Vec<bool>>,
728 /// Effective per-parameter pass mode (explicit + inferred), by function name.
729 pub(crate) inferred_param_pass_modes: HashMap<String, Vec<ParamPassMode>>,
730 /// Inferred parameter type hints for unannotated params.
731 /// Keyed by function name; each entry is a per-param optional type string.
732 pub(crate) inferred_param_type_hints: HashMap<String, Vec<Option<String>>>,
733 /// Stack of scopes, each containing locals that need Drop calls at scope exit.
734 /// Each entry is (local_index, is_async).
735 pub(crate) drop_locals: Vec<Vec<(u16, bool)>>,
736 /// Per-type drop kind: tracks whether each type has sync, async, or both drop impls.
737 /// Populated during the first-pass registration of impl blocks.
738 pub(crate) drop_type_info: HashMap<String, DropKind>,
739 /// Module bindings that need Drop calls at program exit.
740 /// Each entry is (binding_index, is_async).
741 pub(crate) drop_module_bindings: Vec<(u16, bool)>,
742 /// Mutable closure captures in the current function being compiled.
743 /// Maps captured variable name -> upvalue index (for LoadClosure/StoreClosure).
744 /// Only populated while compiling a closure body that has mutable captures.
745 pub(crate) mutable_closure_captures: HashMap<String, u16>,
746
747 /// Variables in the current scope that have been boxed into SharedCells
748 /// by a mutable closure capture. When a subsequent closure captures one
749 /// of these variables (even immutably), it must use the SharedCell path
750 /// so it shares the same mutable cell.
751 pub(crate) boxed_locals: HashSet<String>,
752
753 /// Active permission set for capability checking.
754 ///
755 /// When set, imported stdlib functions are checked against capability_tags.
756 /// If a function requires a permission not in this set, a compile error is
757 /// emitted and the function never enters bytecode.
758 ///
759 /// `None` means no checking (backwards-compatible default).
760 pub(crate) permission_set: Option<shape_abi_v1::PermissionSet>,
761
762 // -- Content-addressed blob tracking --
763 /// Active blob builder (set while compiling a function body).
764 pub(crate) current_blob_builder: Option<FunctionBlobBuilder>,
765 /// Completed function blobs (finalized with content hash).
766 pub(crate) completed_blobs: Vec<FunctionBlob>,
767 /// Map from function name to content hash (populated after finalization).
768 pub(crate) blob_name_to_hash: HashMap<String, FunctionHash>,
769 /// The content-addressed program produced alongside BytecodeProgram.
770 pub(crate) content_addressed_program: Option<ContentAddressedProgram>,
771 /// Content hash per compiled function index (function_id -> blob hash).
772 /// This is the stable identity bridge for the flat runtime format.
773 pub(crate) function_hashes_by_id: Vec<Option<FunctionHash>>,
774
775 /// Optional blob-level cache for incremental compilation.
776 /// When set, compiled blobs are stored after finalization and looked up
777 /// by content hash to avoid redundant work across compilations.
778 pub(crate) blob_cache: Option<BlobCache>,
779
780 /// Temporary function name aliases for comptime replace body.
781 /// Maps alias (e.g., `__original__`) to actual function name (e.g., `__original__myFunc`).
782 /// Set before compiling a replacement body and cleared after.
783 pub(crate) function_aliases: HashMap<String, String>,
784
785 /// Parameters of the function currently being compiled.
786 /// Used by match exhaustiveness checking to fall back to type annotations
787 /// when the type inference engine cannot resolve a parameter's type.
788 pub(crate) current_function_params: Vec<shape_ast::ast::FunctionParameter>,
789
790 /// Legacy cache of function names collected from stdlib-loaded modules.
791 ///
792 /// Internal builtin access is now gated by per-definition declaring-module
793 /// provenance, not by membership in this set.
794 pub stdlib_function_names: HashSet<String>,
795
796 /// Per-function flag: when true, `get_builtin_function` resolves `__*` names.
797 /// Toggled during compilation for definitions originating from `std::*`.
798 pub(crate) allow_internal_builtins: bool,
799
800 /// Package-scoped native library resolutions for the current host.
801 pub(crate) native_resolution_context:
802 Option<shape_runtime::native_resolution::NativeResolutionSet>,
803
804 /// Active synthetic MIR context while compiling non-function code.
805 pub(crate) non_function_mir_context_stack: Vec<String>,
806
807 /// MIR lowered for compiled functions and synthetic non-function contexts.
808 pub(crate) mir_functions: HashMap<String, crate::mir::types::MirFunction>,
809
810 /// Borrow analyses produced from lowered MIR for compiled functions and
811 /// synthetic non-function contexts.
812 pub(crate) mir_borrow_analyses: HashMap<String, crate::mir::BorrowAnalysis>,
813
814 /// Storage plans produced by the storage planning pass for each function.
815 /// Maps function name to the plan mapping each MIR slot to a `BindingStorageClass`.
816 pub(crate) mir_storage_plans: HashMap<String, crate::mir::StoragePlan>,
817
818 /// Per-function borrow summaries for interprocedural alias checking.
819 /// Describes which parameters conflict and must not alias at call sites.
820 pub(crate) function_borrow_summaries: HashMap<String, crate::mir::FunctionBorrowSummary>,
821
822 /// Per-function mapping from AST spans to MIR program points.
823 /// Used to bridge the bytecode compiler (which knows AST spans) to
824 /// MIR ownership decisions (which are keyed by `Point`).
825 pub(crate) mir_span_to_point:
826 HashMap<String, HashMap<shape_ast::ast::Span, crate::mir::types::Point>>,
827
828 /// Field-level definite-initialization and liveness analyses for compiled functions.
829 pub(crate) mir_field_analyses: HashMap<String, crate::mir::FieldAnalysis>,
830
831 /// Graph-compiled namespace map: local namespace name -> canonical module path.
832 /// Populated during graph-driven compilation to resolve qualified names.
833 pub(crate) graph_namespace_map: HashMap<String, String>,
834
835 /// Module dependency graph (set during graph-driven compilation).
836 pub(crate) module_graph: Option<std::sync::Arc<crate::module_graph::ModuleGraph>>,
837}
838
839impl Default for BytecodeCompiler {
840 fn default() -> Self {
841 Self::new()
842 }
843}
844
845mod compiler_impl_initialization;
846mod compiler_impl_reference_model;
847
848/// Infer effective reference parameters and mutation behavior without compiling bytecode.
849///
850/// Returns `(inferred_ref_params, inferred_ref_mutates)` keyed by function name.
851/// - `inferred_ref_params[f][i] == true` means parameter `i` of `f` is inferred/treated as ref.
852/// - `inferred_ref_mutates[f][i] == true` means that reference parameter is mutating (`&mut`).
853pub fn infer_reference_model(
854 program: &Program,
855) -> (HashMap<String, Vec<bool>>, HashMap<String, Vec<bool>>) {
856 let (inferred_ref_params, inferred_ref_mutates, _) =
857 BytecodeCompiler::infer_reference_model(program);
858 (inferred_ref_params, inferred_ref_mutates)
859}
860
861/// Infer effective parameter pass modes (`ByValue` / `ByRefShared` / `ByRefExclusive`)
862/// keyed by function name.
863pub fn infer_param_pass_modes(program: &Program) -> HashMap<String, Vec<ParamPassMode>> {
864 let (inferred_ref_params, inferred_ref_mutates, _) =
865 BytecodeCompiler::infer_reference_model(program);
866 BytecodeCompiler::build_param_pass_mode_map(
867 program,
868 &inferred_ref_params,
869 &inferred_ref_mutates,
870 )
871}
872
873#[cfg(all(test, feature = "deep-tests"))]
874#[path = "compiler_tests.rs"]
875mod compiler_deep;