Skip to main content

shape_vm/compiler/
compiler_impl_part4.rs

1use super::*;
2
3impl BytecodeCompiler {
4    pub(super) fn infer_reference_model(
5        program: &Program,
6    ) -> (
7        HashMap<String, Vec<bool>>,
8        HashMap<String, Vec<bool>>,
9        HashMap<String, Vec<Option<String>>>,
10    ) {
11        let funcs = Self::collect_program_functions(program);
12        let mut inference = shape_runtime::type_system::inference::TypeInferenceEngine::new();
13        let (types, _) = inference.infer_program_best_effort(program);
14        let inferred_ref_params = Self::infer_reference_params_from_types(program, &types);
15        let inferred_param_type_hints = Self::infer_param_type_hints_from_types(program, &types);
16
17        let mut effective_ref_params: HashMap<String, Vec<bool>> = HashMap::new();
18        for (name, func) in &funcs {
19            let inferred = inferred_ref_params.get(name).cloned().unwrap_or_default();
20            let mut refs = vec![false; func.params.len()];
21            for (idx, param) in func.params.iter().enumerate() {
22                refs[idx] = param.is_reference || inferred.get(idx).copied().unwrap_or(false);
23            }
24            effective_ref_params.insert(name.clone(), refs);
25        }
26
27        let mut direct_mutates: HashMap<String, Vec<bool>> = HashMap::new();
28        let mut edges: Vec<(String, usize, String, usize)> = Vec::new();
29
30        for (name, func) in &funcs {
31            let caller_refs = effective_ref_params
32                .get(name)
33                .cloned()
34                .unwrap_or_else(|| vec![false; func.params.len()]);
35            let mut direct = vec![false; func.params.len()];
36            let mut param_index_by_name: HashMap<String, usize> = HashMap::new();
37            for (idx, param) in func.params.iter().enumerate() {
38                for param_name in param.get_identifiers() {
39                    param_index_by_name.insert(param_name, idx);
40                }
41            }
42            for stmt in &func.body {
43                Self::analyze_statement_for_ref_mutation(
44                    stmt,
45                    name,
46                    &param_index_by_name,
47                    &caller_refs,
48                    &effective_ref_params,
49                    &mut direct,
50                    &mut edges,
51                );
52            }
53            direct_mutates.insert(name.clone(), direct);
54        }
55
56        let mut result = direct_mutates;
57        let mut changed = true;
58        while changed {
59            changed = false;
60            for (caller, caller_idx, callee, callee_idx) in &edges {
61                let callee_mutates = result
62                    .get(callee)
63                    .and_then(|flags| flags.get(*callee_idx))
64                    .copied()
65                    .unwrap_or(false);
66                if !callee_mutates {
67                    continue;
68                }
69                if let Some(caller_flags) = result.get_mut(caller)
70                    && let Some(flag) = caller_flags.get_mut(*caller_idx)
71                    && !*flag
72                {
73                    *flag = true;
74                    changed = true;
75                }
76            }
77        }
78
79        (inferred_ref_params, result, inferred_param_type_hints)
80    }
81
82    pub(super) fn inferred_type_to_hint_name(ty: &Type) -> Option<String> {
83        match ty {
84            Type::Concrete(annotation) => Some(annotation.to_type_string()),
85            Type::Generic { base, args } => {
86                let base_name = Self::inferred_type_to_hint_name(base)?;
87                if args.is_empty() {
88                    return Some(base_name);
89                }
90                let mut arg_names = Vec::with_capacity(args.len());
91                for arg in args {
92                    arg_names.push(Self::inferred_type_to_hint_name(arg)?);
93                }
94                Some(format!("{}<{}>", base_name, arg_names.join(", ")))
95            }
96            Type::Variable(_) | Type::Constrained { .. } | Type::Function { .. } => None,
97        }
98    }
99
100    pub(super) fn infer_param_type_hints_from_types(
101        program: &Program,
102        inferred_types: &HashMap<String, Type>,
103    ) -> HashMap<String, Vec<Option<String>>> {
104        let funcs = Self::collect_program_functions(program);
105        let mut hints = HashMap::new();
106
107        for (name, func) in funcs {
108            let mut param_hints = vec![None; func.params.len()];
109            let Some(Type::Function { params, .. }) = inferred_types.get(&name) else {
110                hints.insert(name, param_hints);
111                continue;
112            };
113
114            for (idx, param) in func.params.iter().enumerate() {
115                if param.type_annotation.is_some() || param.simple_name().is_none() {
116                    continue;
117                }
118                if let Some(inferred_param_ty) = params.get(idx) {
119                    param_hints[idx] = Self::inferred_type_to_hint_name(inferred_param_ty);
120                }
121            }
122
123            hints.insert(name, param_hints);
124        }
125
126        hints
127    }
128
129    pub(crate) fn is_definition_annotation_target(
130        target_kind: shape_ast::ast::functions::AnnotationTargetKind,
131    ) -> bool {
132        matches!(
133            target_kind,
134            shape_ast::ast::functions::AnnotationTargetKind::Function
135                | shape_ast::ast::functions::AnnotationTargetKind::Type
136                | shape_ast::ast::functions::AnnotationTargetKind::Module
137        )
138    }
139
140    /// Validate that an annotation is applicable to the requested target kind.
141    pub(crate) fn validate_annotation_target_usage(
142        &self,
143        ann: &shape_ast::ast::Annotation,
144        target_kind: shape_ast::ast::functions::AnnotationTargetKind,
145        fallback_span: shape_ast::ast::Span,
146    ) -> Result<()> {
147        let Some(compiled) = self.program.compiled_annotations.get(&ann.name) else {
148            let span = if ann.span == shape_ast::ast::Span::DUMMY {
149                fallback_span
150            } else {
151                ann.span
152            };
153            return Err(ShapeError::SemanticError {
154                message: format!("Unknown annotation '@{}'", ann.name),
155                location: Some(self.span_to_source_location(span)),
156            });
157        };
158
159        let has_definition_lifecycle =
160            compiled.on_define_handler.is_some() || compiled.metadata_handler.is_some();
161        if has_definition_lifecycle && !Self::is_definition_annotation_target(target_kind) {
162            let target_label = format!("{:?}", target_kind).to_lowercase();
163            let span = if ann.span == shape_ast::ast::Span::DUMMY {
164                fallback_span
165            } else {
166                ann.span
167            };
168            return Err(ShapeError::SemanticError {
169                message: format!(
170                    "Annotation '{}' defines definition-time lifecycle hooks (`on_define`/`metadata`) and cannot be applied to a {}. Allowed targets for these hooks are: function, type, module",
171                    ann.name, target_label
172                ),
173                location: Some(self.span_to_source_location(span)),
174            });
175        }
176
177        if compiled.allowed_targets.is_empty() || compiled.allowed_targets.contains(&target_kind) {
178            return Ok(());
179        }
180
181        let allowed: Vec<String> = compiled
182            .allowed_targets
183            .iter()
184            .map(|k| format!("{:?}", k).to_lowercase())
185            .collect();
186        let target_label = format!("{:?}", target_kind).to_lowercase();
187
188        let span = if ann.span == shape_ast::ast::Span::DUMMY {
189            fallback_span
190        } else {
191            ann.span
192        };
193
194        Err(ShapeError::SemanticError {
195            message: format!(
196                "Annotation '{}' cannot be applied to a {}. Allowed targets: {}",
197                ann.name,
198                target_label,
199                allowed.join(", ")
200            ),
201            location: Some(self.span_to_source_location(span)),
202        })
203    }
204
205    /// Compile a program to bytecode
206    pub fn compile(mut self, program: &Program) -> Result<BytecodeProgram> {
207        // First: desugar the program (converts FromQuery to method chains, etc.)
208        let mut program = program.clone();
209        shape_ast::transform::desugar_program(&mut program);
210        let analysis_program =
211            shape_ast::transform::augment_program_with_generated_extends(&program);
212
213        // Run the shared analyzer and surface diagnostics that are currently
214        // proven reliable in the compiler execution path.
215        let mut known_bindings: Vec<String> = self.module_bindings.keys().cloned().collect();
216        let namespace_bindings = Self::collect_namespace_import_bindings(&analysis_program);
217        known_bindings.extend(namespace_bindings.iter().cloned());
218        self.module_namespace_bindings
219            .extend(namespace_bindings.into_iter());
220        // Auto-register extension module names as implicit namespace bindings
221        // so that `regex.is_match(...)` works without a `use regex` statement.
222        if let Some(ref registry) = self.extension_registry {
223            for ext in registry.iter() {
224                if !self.module_namespace_bindings.contains(&ext.name) {
225                    self.module_namespace_bindings.insert(ext.name.clone());
226                    known_bindings.push(ext.name.clone());
227                }
228            }
229        }
230        for namespace in self.module_namespace_bindings.clone() {
231            let binding_idx = self.get_or_create_module_binding(&namespace);
232            self.register_extension_module_schema(&namespace);
233            let module_schema_name = format!("__mod_{}", namespace);
234            if self
235                .type_tracker
236                .schema_registry()
237                .get(&module_schema_name)
238                .is_some()
239            {
240                self.set_module_binding_type_info(binding_idx, &module_schema_name);
241            }
242        }
243        known_bindings.sort();
244        known_bindings.dedup();
245        let analysis_mode = if matches!(self.type_diagnostic_mode, TypeDiagnosticMode::RecoverAll) {
246            TypeAnalysisMode::RecoverAll
247        } else {
248            TypeAnalysisMode::FailFast
249        };
250        if let Err(errors) = analyze_program_with_mode(
251            &analysis_program,
252            self.source_text.as_deref(),
253            None,
254            Some(&known_bindings),
255            analysis_mode,
256        ) {
257            match self.type_diagnostic_mode {
258                TypeDiagnosticMode::Strict => {
259                    return Err(Self::type_errors_to_shape(errors));
260                }
261                TypeDiagnosticMode::ReliableOnly => {
262                    let strict_errors: Vec<_> = errors
263                        .into_iter()
264                        .filter(|error| Self::should_emit_type_diagnostic(&error.error))
265                        .collect();
266                    if !strict_errors.is_empty() {
267                        return Err(Self::type_errors_to_shape(strict_errors));
268                    }
269                }
270                TypeDiagnosticMode::RecoverAll => {
271                    self.errors.extend(
272                        errors
273                            .into_iter()
274                            .map(Self::type_error_with_location_to_shape),
275                    );
276                }
277            }
278        }
279
280        let (inferred_ref_params, inferred_ref_mutates, inferred_param_type_hints) =
281            Self::infer_reference_model(&program);
282        self.inferred_param_pass_modes = Self::build_param_pass_mode_map(
283            &program,
284            &inferred_ref_params,
285            &inferred_ref_mutates,
286        );
287        self.inferred_ref_params = inferred_ref_params;
288        self.inferred_ref_mutates = inferred_ref_mutates;
289        self.inferred_param_type_hints = inferred_param_type_hints;
290
291        // Hoisting pre-pass: collect all property assignments (e.g., a.y = 2)
292        // so inline object schemas include future fields from the start.
293        // Uses the existing PropertyAssignmentCollector — no duplication.
294        {
295            use shape_runtime::type_system::inference::PropertyAssignmentCollector;
296            let assignments = PropertyAssignmentCollector::collect(&program);
297            let grouped = PropertyAssignmentCollector::group_by_variable(&assignments);
298            for (var_name, var_assignments) in grouped {
299                let field_names: Vec<String> =
300                    var_assignments.iter().map(|a| a.property.clone()).collect();
301                self.hoisted_fields.insert(var_name, field_names);
302            }
303        }
304
305        // First pass: collect all function definitions
306        for item in &program.items {
307            self.register_item_functions(item)?;
308        }
309
310        // Start __main__ blob builder for top-level code.
311        self.current_blob_builder = Some(FunctionBlobBuilder::new(
312            "__main__".to_string(),
313            self.program.current_offset(),
314            self.program.constants.len(),
315            self.program.strings.len(),
316        ));
317
318        // Push a top-level drop scope so that block expressions and
319        // statement-level VarDecls can track locals for auto-drop.
320        self.push_drop_scope();
321
322        // Second pass: compile all items (collect errors instead of early-returning)
323        let item_count = program.items.len();
324        for (idx, item) in program.items.iter().enumerate() {
325            let is_last = idx == item_count - 1;
326            if let Err(e) = self.compile_item_with_context(item, is_last) {
327                self.errors.push(e);
328            }
329        }
330
331        // Return collected errors before emitting Halt
332        if !self.errors.is_empty() {
333            if self.errors.len() == 1 {
334                return Err(self.errors.remove(0));
335            }
336            return Err(shape_ast::error::ShapeError::MultiError(self.errors));
337        }
338
339        // Emit drops for top-level locals (from the top-level drop scope)
340        self.pop_drop_scope()?;
341
342        // Emit drops for top-level module bindings that have Drop impls
343        {
344            let bindings: Vec<(u16, bool)> =
345                std::mem::take(&mut self.drop_module_bindings);
346            for (binding_idx, is_async) in bindings.into_iter().rev() {
347                self.emit_drop_call_for_module_binding(binding_idx, is_async);
348            }
349        }
350
351        // Add halt instruction at the end
352        self.emit(Instruction::simple(OpCode::Halt));
353
354        // Store module_binding variable names for REPL persistence
355        // Build a Vec<String> where index matches the module_binding variable index
356        let mut module_binding_names = vec![String::new(); self.module_bindings.len()];
357        for (name, &idx) in &self.module_bindings {
358            module_binding_names[idx as usize] = name.clone();
359        }
360        self.program.module_binding_names = module_binding_names;
361
362        // Store top-level locals count so executor can advance sp past them
363        self.program.top_level_locals_count = self.next_local;
364
365        // Persist storage hints for JIT width-aware lowering.
366        self.populate_program_storage_hints();
367
368        // Transfer type schema registry for TypedObject field resolution
369        self.program.type_schema_registry = self.type_tracker.schema_registry().clone();
370
371        // Transfer final function definitions after comptime mutation/specialization.
372        self.program.expanded_function_defs = self.function_defs.clone();
373
374        // Finalize the __main__ blob and build the content-addressed program.
375        self.build_content_addressed_program();
376
377        // Transfer content-addressed program to the bytecode output.
378        self.program.content_addressed = self.content_addressed_program.take();
379        if self.program.functions.is_empty() {
380            self.program.function_blob_hashes.clear();
381        } else {
382            if self.function_hashes_by_id.len() < self.program.functions.len() {
383                self.function_hashes_by_id
384                    .resize(self.program.functions.len(), None);
385            } else if self.function_hashes_by_id.len() > self.program.functions.len() {
386                self.function_hashes_by_id
387                    .truncate(self.program.functions.len());
388            }
389            self.program.function_blob_hashes = self.function_hashes_by_id.clone();
390        }
391
392        // Transfer source text for error messages
393        if let Some(source) = self.source_text {
394            // Set in legacy field for backward compatibility
395            self.program.debug_info.source_text = source.clone();
396            // Also set in source map if not already set
397            if self.program.debug_info.source_map.files.is_empty() {
398                self.program
399                    .debug_info
400                    .source_map
401                    .add_file("<main>".to_string());
402            }
403            if self.program.debug_info.source_map.source_texts.is_empty() {
404                self.program
405                    .debug_info
406                    .source_map
407                    .set_source_text(0, source);
408            }
409        }
410
411        Ok(self.program)
412    }
413
414    /// Compile a program to bytecode with source text for error messages
415    pub fn compile_with_source(
416        mut self,
417        program: &Program,
418        source: &str,
419    ) -> Result<BytecodeProgram> {
420        self.set_source(source);
421        self.compile(program)
422    }
423
424    /// Compile an imported module's AST to a standalone BytecodeProgram.
425    ///
426    /// This takes the Module's AST (Program), compiles all exported functions
427    /// to bytecode, and returns the compiled program along with a mapping of
428    /// exported function names to their function indices in the compiled output.
429    ///
430    /// The returned `BytecodeProgram` and function name mapping allow the import
431    /// handler to resolve imported function calls to the correct bytecode indices.
432    ///
433    /// Currently handles function exports only. Types and values can be added later.
434    pub fn compile_module_ast(
435        module_ast: &Program,
436    ) -> Result<(BytecodeProgram, HashMap<String, usize>)> {
437        let mut compiler = BytecodeCompiler::new();
438        // Stdlib modules need access to __* builtins (intrinsics, into, etc.)
439        compiler.allow_internal_builtins = true;
440        let bytecode = compiler.compile(module_ast)?;
441
442        // Build name → function index mapping for exported functions
443        let mut export_map = HashMap::new();
444        for (idx, func) in bytecode.functions.iter().enumerate() {
445            export_map.insert(func.name.clone(), idx);
446        }
447
448        Ok((bytecode, export_map))
449    }
450}