Skip to main content

shape_jit/translator/
compiler.rs

1//! Main compilation logic for BytecodeToIR
2
3use cranelift::codegen::ir::FuncRef;
4use cranelift::prelude::*;
5use std::collections::HashMap;
6
7use crate::context::*;
8use crate::nan_boxing::*;
9use shape_vm::bytecode::{BytecodeProgram, DeoptInfo, InlineFrameInfo, OpCode, Operand};
10use shape_vm::feedback::FeedbackVector;
11use shape_vm::type_tracking::{SlotKind, StorageHint};
12
13use super::loop_analysis;
14use super::types::{BytecodeToIR, CompilationMode, FFIFuncRefs, InlineCandidate};
15use crate::optimizer;
16
17impl<'a, 'b> BytecodeToIR<'a, 'b> {
18    pub(crate) fn new(
19        builder: &'a mut FunctionBuilder<'b>,
20        program: &'a BytecodeProgram,
21        ctx_ptr: Value,
22        ffi: FFIFuncRefs,
23        user_funcs: HashMap<u16, FuncRef>,
24        user_func_arities: HashMap<u16, u16>,
25    ) -> Self {
26        // Pre-compute loop end targets by scanning for matching LoopStart/LoopEnd pairs
27        let mut loop_ends = HashMap::new();
28        let mut loop_starts = Vec::new();
29        for (i, instr) in program.instructions.iter().enumerate() {
30            match instr.opcode {
31                OpCode::LoopStart => loop_starts.push(i),
32                OpCode::LoopEnd => {
33                    if let Some(start_idx) = loop_starts.pop() {
34                        loop_ends.insert(start_idx, i);
35                    }
36                }
37                _ => {}
38            }
39        }
40
41        // Run loop analysis before code generation
42        let loop_info = loop_analysis::analyze_loops(program);
43        let optimization_plan = optimizer::build_function_plan(program, &loop_info);
44
45        // Analyze which functions can be inlined at call sites
46        let inline_candidates = Self::analyze_inline_candidates(program);
47        let mut local_types = HashMap::new();
48        for (idx, hint) in program
49            .top_level_local_storage_hints
50            .iter()
51            .copied()
52            .enumerate()
53        {
54            if hint != StorageHint::Unknown {
55                local_types.insert(idx as u16, hint);
56            }
57        }
58        let mut module_binding_types = HashMap::new();
59        for (idx, hint) in program
60            .module_binding_storage_hints
61            .iter()
62            .copied()
63            .enumerate()
64        {
65            if hint != StorageHint::Unknown {
66                module_binding_types.insert(idx as u16, hint);
67            }
68        }
69
70        Self {
71            builder,
72            program,
73            ctx_ptr,
74            stack_depth: 0,
75            stack_vars: HashMap::new(),
76            locals: HashMap::new(),
77            next_var: 0,
78            blocks: HashMap::new(),
79            current_block_idx: 0,
80            ffi,
81            loop_stack: Vec::new(),
82            loop_ends,
83            exit_block: None,
84            compile_time_sp: 0,
85            merge_blocks: std::collections::HashSet::new(),
86            block_stack_depth: HashMap::new(),
87            pending_data_offset: None,
88            exception_handlers: Vec::new(),
89            current_instr_idx: 0,
90            user_funcs,
91            user_func_arities,
92            stack_types: HashMap::new(),
93            local_types,
94            module_binding_types,
95            typed_stack: super::storage::TypedStack::new(),
96            // Kernel mode fields (unused in standard mode)
97            mode: CompilationMode::Standard,
98            kernel_cursor_index: None,
99            kernel_series_ptrs: None,
100            kernel_state_ptr: None,
101            kernel_config: None,
102            loop_info,
103            optimization_plan,
104            hoisted_locals: HashMap::new(),
105            local_f64_cache: HashMap::new(),
106            // Function inlining
107            inline_candidates,
108            inline_local_base: 0,
109            inline_depth: 0,
110            // Reference tracking
111            ref_stack_slots: HashMap::new(),
112            // Integer unboxing
113            unboxed_int_locals: std::collections::HashSet::new(),
114            unboxed_int_module_bindings: std::collections::HashSet::new(),
115            promoted_module_bindings: HashMap::new(),
116            register_carried_module_bindings: std::collections::HashSet::new(),
117            unboxed_loop_depth: 0,
118            unboxed_scope_stack: Vec::new(),
119            register_carried_loop_depth: 0,
120            pending_rebox: None,
121            pending_rebox_module_bindings: None,
122            pending_flush_module_bindings: None,
123            // Float unboxing
124            unboxed_f64_locals: std::collections::HashSet::new(),
125            f64_local_vars: HashMap::new(),
126            pending_rebox_f64: None,
127            precomputed_f64_for_invariant_int: HashMap::new(),
128            precomputed_f64_scope_stack: Vec::new(),
129            // Skip ranges (empty by default)
130            skip_ranges: Vec::new(),
131            // Array LICM
132            hoisted_array_info: HashMap::new(),
133            hoisted_ref_array_info: HashMap::new(),
134            // Call LICM
135            licm_hoisted_results: HashMap::new(),
136            licm_skip_indices: std::collections::HashSet::new(),
137            // Numeric parameter hints (compile-time)
138            numeric_param_hints: std::collections::HashSet::new(),
139            deopt_block: None,
140            deopt_signal_var: None,
141            // Deopt tracking
142            deopt_points: Vec::new(),
143            func_locals_count: 0,
144            deferred_spills: Vec::new(),
145            // Loop unrolling
146            pending_unroll: None,
147            trusted_array_push_local_sites: std::collections::HashSet::new(),
148            trusted_array_push_local_iv_by_site: HashMap::new(),
149            // Shape guard tracking
150            shape_guards_emitted: Vec::new(),
151            // Feedback-guided speculation (populated by Tier 2 requests)
152            feedback: None,
153            // Multi-frame inline deopt
154            compiling_function_id: 0, // Set by caller (compile_optimizing_function)
155            inline_frame_stack: Vec::new(),
156            // Escape analysis / scalar replacement
157            scalar_replaced_arrays: HashMap::new(),
158        }
159    }
160
161    /// Create compiler in kernel mode for simulation hot path.
162    ///
163    /// Kernel mode bypasses JITContext and uses direct pointers:
164    /// - cursor_index: Current row in the simulation (usize)
165    /// - series_ptrs: Pointer to series data array (*const *const f64)
166    /// - state_ptr: Pointer to TypedObject state buffer (*mut u8)
167    ///
168    /// This enables >10M ticks/sec by eliminating all indirection.
169    pub(crate) fn new_kernel_mode(
170        builder: &'a mut FunctionBuilder<'b>,
171        program: &'a BytecodeProgram,
172        cursor_index: Value,
173        series_ptrs: Value,
174        state_ptr: Value,
175        ffi: FFIFuncRefs,
176        config: SimulationKernelConfig,
177    ) -> Self {
178        // Pre-compute loop ends (same as standard mode)
179        let mut loop_ends = HashMap::new();
180        let mut loop_starts = Vec::new();
181        for (i, instr) in program.instructions.iter().enumerate() {
182            match instr.opcode {
183                OpCode::LoopStart => loop_starts.push(i),
184                OpCode::LoopEnd => {
185                    if let Some(start_idx) = loop_starts.pop() {
186                        loop_ends.insert(start_idx, i);
187                    }
188                }
189                _ => {}
190            }
191        }
192
193        // Run loop analysis for kernel mode too
194        let loop_info = loop_analysis::analyze_loops(program);
195        let optimization_plan = optimizer::build_function_plan(program, &loop_info);
196        let mut local_types = HashMap::new();
197        for (idx, hint) in program
198            .top_level_local_storage_hints
199            .iter()
200            .copied()
201            .enumerate()
202        {
203            if hint != StorageHint::Unknown {
204                local_types.insert(idx as u16, hint);
205            }
206        }
207        let mut module_binding_types = HashMap::new();
208        for (idx, hint) in program
209            .module_binding_storage_hints
210            .iter()
211            .copied()
212            .enumerate()
213        {
214            if hint != StorageHint::Unknown {
215                module_binding_types.insert(idx as u16, hint);
216            }
217        }
218
219        Self {
220            builder,
221            program,
222            ctx_ptr: cursor_index, // Reuse field (not used as ctx in kernel mode)
223            stack_depth: 0,
224            stack_vars: HashMap::new(),
225            locals: HashMap::new(),
226            next_var: 0,
227            blocks: HashMap::new(),
228            current_block_idx: 0,
229            ffi,
230            loop_stack: Vec::new(),
231            loop_ends,
232            exit_block: None,
233            compile_time_sp: 0,
234            merge_blocks: std::collections::HashSet::new(),
235            block_stack_depth: HashMap::new(),
236            pending_data_offset: None,
237            exception_handlers: Vec::new(),
238            current_instr_idx: 0,
239            user_funcs: HashMap::new(),
240            user_func_arities: HashMap::new(),
241            stack_types: HashMap::new(),
242            local_types,
243            module_binding_types,
244            typed_stack: super::storage::TypedStack::new(),
245            // Kernel mode fields
246            mode: CompilationMode::Kernel,
247            kernel_cursor_index: Some(cursor_index),
248            kernel_series_ptrs: Some(series_ptrs),
249            kernel_state_ptr: Some(state_ptr),
250            kernel_config: Some(config),
251            loop_info,
252            optimization_plan,
253            hoisted_locals: HashMap::new(),
254            local_f64_cache: HashMap::new(),
255            // No inlining in kernel mode (no user functions)
256            inline_candidates: HashMap::new(),
257            inline_local_base: 0,
258            inline_depth: 0,
259            // Reference tracking
260            ref_stack_slots: HashMap::new(),
261            // Integer unboxing
262            unboxed_int_locals: std::collections::HashSet::new(),
263            unboxed_int_module_bindings: std::collections::HashSet::new(),
264            promoted_module_bindings: HashMap::new(),
265            register_carried_module_bindings: std::collections::HashSet::new(),
266            unboxed_loop_depth: 0,
267            unboxed_scope_stack: Vec::new(),
268            register_carried_loop_depth: 0,
269            pending_rebox: None,
270            pending_rebox_module_bindings: None,
271            pending_flush_module_bindings: None,
272            // Float unboxing
273            unboxed_f64_locals: std::collections::HashSet::new(),
274            f64_local_vars: HashMap::new(),
275            pending_rebox_f64: None,
276            precomputed_f64_for_invariant_int: HashMap::new(),
277            precomputed_f64_scope_stack: Vec::new(),
278            // Skip ranges (empty by default)
279            skip_ranges: Vec::new(),
280            // Array LICM
281            hoisted_array_info: HashMap::new(),
282            hoisted_ref_array_info: HashMap::new(),
283            // Call LICM
284            licm_hoisted_results: HashMap::new(),
285            licm_skip_indices: std::collections::HashSet::new(),
286            // Numeric parameter hints (compile-time)
287            numeric_param_hints: std::collections::HashSet::new(),
288            deopt_block: None,
289            deopt_signal_var: None,
290            // Deopt tracking
291            deopt_points: Vec::new(),
292            func_locals_count: 0,
293            deferred_spills: Vec::new(),
294            // Loop unrolling
295            pending_unroll: None,
296            trusted_array_push_local_sites: std::collections::HashSet::new(),
297            trusted_array_push_local_iv_by_site: HashMap::new(),
298            // Shape guard tracking
299            shape_guards_emitted: Vec::new(),
300            // Feedback-guided speculation (not used in kernel mode)
301            feedback: None,
302            // Multi-frame inline deopt (not used in kernel mode)
303            compiling_function_id: 0,
304            inline_frame_stack: Vec::new(),
305            // Escape analysis / scalar replacement (not used in kernel mode)
306            scalar_replaced_arrays: HashMap::new(),
307        }
308    }
309
310    /// Check if an instruction index falls within a skip range.
311    fn is_skipped(&self, idx: usize) -> bool {
312        self.skip_ranges
313            .iter()
314            .any(|&(start, end)| idx >= start && idx < end)
315    }
316
317    pub(crate) fn compile(&mut self) -> Result<Value, String> {
318        // Phase 1: Find all jump targets and create basic blocks
319        self.create_blocks_for_jumps();
320
321        // Create an exit block for the epilogue - all paths will jump here
322        let exit_block = self.builder.create_block();
323        self.builder.append_block_param(exit_block, types::I64);
324        self.exit_block = Some(exit_block);
325
326        // Initialize function signal to success (0). Some guarded helper paths
327        // may set this to a negative value and jump to exit_block.
328        let deopt_signal_var = Variable::new(self.next_var);
329        self.next_var += 1;
330        self.builder.declare_var(deopt_signal_var, types::I32);
331        let zero_i32 = self.builder.ins().iconst(types::I32, 0);
332        self.builder.def_var(deopt_signal_var, zero_i32);
333        self.deopt_signal_var = Some(deopt_signal_var);
334
335        // Find the first non-skipped instruction and jump from entry to its block.
336        let first_idx = (0..self.program.instructions.len())
337            .find(|&i| !self.is_skipped(i))
338            .unwrap_or(0);
339        if let Some(&block0) = self.blocks.get(&first_idx) {
340            if !self.numeric_param_hints.is_empty() {
341                let mut params: Vec<u16> = self.numeric_param_hints.iter().copied().collect();
342                params.sort_unstable();
343                for local_idx in params {
344                    self.local_types
345                        .entry(local_idx)
346                        .or_insert(StorageHint::Float64);
347                }
348            }
349            self.builder.ins().jump(block0, &[]);
350            self.block_stack_depth.insert(first_idx, 0);
351        }
352
353        // Phase 2: Compile instructions with control flow
354        let instrs = self.program.instructions.clone();
355        let mut need_fallthrough = false;
356        let mut block_terminated = false;
357
358        for (i, instr) in instrs.iter().enumerate() {
359            // Skip function body instructions (compiled separately)
360            if self.is_skipped(i) {
361                continue;
362            }
363
364            if let Some(&block) = self.blocks.get(&i) {
365                if need_fallthrough && !block_terminated {
366                    self.block_stack_depth.entry(i).or_insert(self.stack_depth);
367                    if self.merge_blocks.contains(&i) {
368                        let val = self.stack_pop().unwrap_or_else(|| {
369                            self.builder.ins().iconst(types::I64, TAG_NULL as i64)
370                        });
371                        self.builder.ins().jump(block, &[val]);
372                    } else {
373                        self.builder.ins().jump(block, &[]);
374                    }
375                }
376                self.builder.switch_to_block(block);
377                self.current_block_idx = i;
378                need_fallthrough = false;
379                block_terminated = false;
380
381                // Integer unboxing: rebox raw i64 locals at loop exit.
382                // compile_loop_end sets pending_rebox; the rebox code runs at the
383                // start of the loop's end_block (the first block switch after LoopEnd).
384                if let Some(rebox_locals) = self.pending_rebox.take() {
385                    for &local_idx in &rebox_locals {
386                        let var = self.get_or_create_local(local_idx);
387                        let raw_int = self.builder.use_var(var);
388                        let f64_val = self.builder.ins().fcvt_from_sint(types::F64, raw_int);
389                        let boxed = self.f64_to_i64(f64_val);
390                        self.builder.def_var(var, boxed);
391                    }
392                }
393
394                // Float unboxing rebox: convert raw f64 → NaN-boxed i64.
395                if let Some(rebox_f64s) = self.pending_rebox_f64.take() {
396                    for &local_idx in &rebox_f64s {
397                        if let Some(&f64_var) = self.f64_local_vars.get(&local_idx) {
398                            let f64_val = self.builder.use_var(f64_var);
399                            let boxed = self.f64_to_i64(f64_val);
400                            let i64_var = self.get_or_create_local(local_idx);
401                            self.builder.def_var(i64_var, boxed);
402                        }
403                        self.f64_local_vars.remove(&local_idx);
404                    }
405                    // Only clear all f64 vars when no outer scopes remain
406                    if self.unboxed_scope_stack.is_empty() {
407                        self.f64_local_vars.clear();
408                    }
409                }
410
411                // Rebox promoted module bindings: convert raw i64 → NaN-boxed
412                // and write back to ctx.locals[] memory.
413                if let Some(rebox_mbs) = self.pending_rebox_module_bindings.take() {
414                    for &mb_idx in &rebox_mbs {
415                        if let Some(&var) = self.promoted_module_bindings.get(&mb_idx) {
416                            let raw_int = self.builder.use_var(var);
417                            let f64_val = self.builder.ins().fcvt_from_sint(types::F64, raw_int);
418                            let boxed = self.f64_to_i64(f64_val);
419                            // Write back to memory
420                            let byte_offset = LOCALS_OFFSET + (mb_idx as i32 * 8);
421                            self.builder.ins().store(
422                                MemFlags::new(),
423                                boxed,
424                                self.ctx_ptr,
425                                byte_offset,
426                            );
427                        }
428                        self.promoted_module_bindings.remove(&mb_idx);
429                        self.register_carried_module_bindings.remove(&mb_idx);
430                    }
431                }
432
433                // Flush boxed, register-carried module bindings to ctx.locals[] at loop exit.
434                if let Some(flush_mbs) = self.pending_flush_module_bindings.take() {
435                    for &mb_idx in &flush_mbs {
436                        if let Some(&var) = self.promoted_module_bindings.get(&mb_idx) {
437                            let val = self.builder.use_var(var);
438                            let byte_offset = LOCALS_OFFSET + (mb_idx as i32 * 8);
439                            self.builder.ins().store(
440                                MemFlags::new(),
441                                val,
442                                self.ctx_ptr,
443                                byte_offset,
444                            );
445                        }
446                        self.promoted_module_bindings.remove(&mb_idx);
447                        self.register_carried_module_bindings.remove(&mb_idx);
448                    }
449                }
450
451                if let Some(&expected_depth) = self.block_stack_depth.get(&i) {
452                    self.stack_depth = expected_depth;
453                    // Clear typed_stack at block boundaries: f64 SSA Values from
454                    // predecessor blocks may not dominate this block, so cached
455                    // shadows are invalid. The optimization still applies within
456                    // basic blocks (where tight inner loops live).
457                    self.typed_stack.clear();
458                    // Clear local_f64_cache: cached f64 Values from predecessor
459                    // blocks may not dominate this block.
460                    self.local_f64_cache.clear();
461                }
462
463                if self.merge_blocks.contains(&i) {
464                    let params = self.builder.block_params(block);
465                    if !params.is_empty() {
466                        self.stack_push(params[0]);
467                    }
468                }
469            }
470
471            if block_terminated {
472                continue;
473            }
474
475            // Call LICM: skip instructions that are part of a hoisted call sequence
476            // (arg pushes and argc push), and replace the call instruction itself
477            // with a push of the pre-computed result.
478            if self.licm_skip_indices.contains(&i) {
479                continue;
480            }
481            if let Some(&result_var) = self.licm_hoisted_results.get(&i) {
482                let result_val = self.builder.use_var(result_var);
483                self.stack_push(result_val);
484                continue;
485            }
486
487            // Track current instruction index for property lookup in compile_get_prop
488            self.current_instr_idx = i;
489            self.compile_instruction(instr, i)?;
490
491            match instr.opcode {
492                OpCode::Jump
493                | OpCode::Return
494                | OpCode::ReturnValue
495                | OpCode::Break
496                | OpCode::Continue
497                | OpCode::Throw => {
498                    block_terminated = true;
499                }
500                OpCode::JumpIfFalse | OpCode::JumpIfFalseTrusted | OpCode::JumpIfTrue => {
501                    block_terminated = true;
502                }
503                _ => {
504                    need_fallthrough = self.blocks.contains_key(&(i + 1));
505                }
506            }
507        }
508
509        if !block_terminated {
510            let default_val = self
511                .stack_pop_boxed()
512                .unwrap_or_else(|| self.builder.ins().iconst(types::I64, TAG_NULL as i64));
513            self.builder.ins().jump(exit_block, &[default_val]);
514        }
515
516        for block in self.blocks.values() {
517            self.builder.seal_block(*block);
518        }
519
520        // Emit deferred per-guard spill blocks.
521        // Each block stores live locals + operand stack to ctx_buf,
522        // then jumps to the shared deopt block with its deopt_id.
523        let deferred = std::mem::take(&mut self.deferred_spills);
524        for spill in &deferred {
525            self.builder.switch_to_block(spill.block);
526
527            // Store live locals to ctx_buf[LOCALS_OFFSET + bc_idx * 8]
528            // Unboxed locals need type-aware storage:
529            // - f64 locals: bitcast(I64, f64_val) to get raw bits
530            // - int locals: store directly (raw i64 fits in u64)
531            // - NaN-boxed: store as-is
532            for &(bc_idx, var) in &spill.live_locals {
533                let val = self.builder.use_var(var);
534                let store_val = if spill.f64_locals.contains(&bc_idx) {
535                    // Float-unboxed local: val is Cranelift f64, bitcast to i64 bits
536                    // Check if this local has an f64 variable
537                    if let Some(&f64_var) = self.f64_local_vars.get(&bc_idx) {
538                        let f64_val = self.builder.use_var(f64_var);
539                        self.builder
540                            .ins()
541                            .bitcast(types::I64, MemFlags::new(), f64_val)
542                    } else {
543                        // Fallback: the regular variable holds NaN-boxed, use as-is
544                        val
545                    }
546                } else {
547                    // Int-unboxed or NaN-boxed: store directly.
548                    // Int locals hold raw i64, which unmarshal_jit_result handles
549                    // with SlotKind::Int64 → ValueWord::from_i64(bits as i64).
550                    // NaN-boxed locals store as-is (SlotKind::Unknown passthrough).
551                    val
552                };
553                let offset = LOCALS_OFFSET + (bc_idx as i32) * 8;
554                self.builder
555                    .ins()
556                    .store(MemFlags::trusted(), store_val, self.ctx_ptr, offset);
557            }
558
559            // Store on-stack operand values (via stack_vars)
560            for i in 0..spill.on_stack_count {
561                let var = self.get_or_create_stack_var(i);
562                let val = self.builder.use_var(var);
563                let offset = LOCALS_OFFSET + (128 + i as i32) * 8;
564                self.builder
565                    .ins()
566                    .store(MemFlags::trusted(), val, self.ctx_ptr, offset);
567            }
568
569            // Store extra pre-popped values (passed as block params)
570            let block_params = self.builder.block_params(spill.block).to_vec();
571            for (j, &param) in block_params.iter().enumerate() {
572                let stack_pos = spill.on_stack_count + j;
573                let offset = LOCALS_OFFSET + (128 + stack_pos as i32) * 8;
574                self.builder
575                    .ins()
576                    .store(MemFlags::trusted(), param, self.ctx_ptr, offset);
577            }
578
579            // Store inline frame locals for multi-frame deopt
580            let mut ctx_buf_pos = 128u16 + (spill.on_stack_count + spill.extra_param_count) as u16;
581            // Use the ctx_buf_positions from the DeoptInfo inline_frames
582            for iframe in &spill.inline_frames {
583                for &(_, var) in &iframe.live_locals {
584                    let val = self.builder.use_var(var);
585                    let offset = LOCALS_OFFSET + (ctx_buf_pos as i32) * 8;
586                    self.builder
587                        .ins()
588                        .store(MemFlags::trusted(), val, self.ctx_ptr, offset);
589                    ctx_buf_pos += 1;
590                }
591            }
592
593            // Jump to shared deopt block
594            let deopt = self.get_or_create_deopt_block();
595            let deopt_id_val = self.builder.ins().iconst(types::I32, spill.deopt_id as i64);
596            self.builder.ins().jump(deopt, &[deopt_id_val]);
597            self.builder.seal_block(spill.block);
598        }
599
600        if let Some(deopt_block) = self.deopt_block {
601            self.builder.switch_to_block(deopt_block);
602            let deopt_signal_var = self
603                .deopt_signal_var
604                .expect("deopt_signal_var must be initialized in compile()");
605            let deopt_id_i32 = self.builder.block_params(deopt_block)[0];
606            let deopt_id_u64 = self.builder.ins().uextend(types::I64, deopt_id_i32);
607            // VM deopt handler reads deopt_id from ctx word 0.
608            self.builder
609                .ins()
610                .store(MemFlags::trusted(), deopt_id_u64, self.ctx_ptr, 0);
611            let deopt_code = self.builder.ins().iconst(types::I32, (u32::MAX - 1) as i64);
612            self.builder.def_var(deopt_signal_var, deopt_code);
613            let null_val = self.builder.ins().iconst(types::I64, TAG_NULL as i64);
614            self.builder.ins().jump(exit_block, &[null_val]);
615            self.builder.seal_block(deopt_block);
616        }
617
618        self.builder.switch_to_block(exit_block);
619        self.builder.seal_block(exit_block);
620
621        let ret_val_i64 = self.builder.block_params(exit_block)[0];
622
623        self.builder
624            .ins()
625            .store(MemFlags::trusted(), ret_val_i64, self.ctx_ptr, STACK_OFFSET);
626
627        let one = self.builder.ins().iconst(types::I64, 1);
628        self.builder
629            .ins()
630            .store(MemFlags::trusted(), one, self.ctx_ptr, STACK_PTR_OFFSET);
631
632        // Return signal (0 success, negative deopt).
633        let signal_var = self
634            .deopt_signal_var
635            .expect("deopt_signal_var must be initialized in compile()");
636        let signal = self.builder.use_var(signal_var);
637        Ok(signal)
638    }
639
640    fn create_blocks_for_jumps(&mut self) {
641        let mut block_starts: std::collections::HashSet<usize> = std::collections::HashSet::new();
642        let mut incoming_edges: HashMap<usize, usize> = HashMap::new();
643
644        for (i, instr) in self.program.instructions.iter().enumerate() {
645            if self.is_skipped(i) {
646                continue;
647            }
648            match instr.opcode {
649                OpCode::Jump => {
650                    if let Some(Operand::Offset(offset)) = &instr.operand {
651                        let target_idx = ((i as i32) + 1 + *offset) as usize;
652                        if !self.is_skipped(target_idx) {
653                            block_starts.insert(target_idx);
654                            *incoming_edges.entry(target_idx).or_insert(0) += 1;
655                        }
656                    }
657                }
658                OpCode::JumpIfFalse | OpCode::JumpIfFalseTrusted | OpCode::JumpIfTrue => {
659                    if let Some(Operand::Offset(offset)) = &instr.operand {
660                        let target_idx = ((i as i32) + 1 + *offset) as usize;
661                        if !self.is_skipped(target_idx) {
662                            block_starts.insert(target_idx);
663                            *incoming_edges.entry(target_idx).or_insert(0) += 1;
664                        }
665                        let next_idx = i + 1;
666                        if !self.is_skipped(next_idx) {
667                            block_starts.insert(next_idx);
668                            *incoming_edges.entry(next_idx).or_insert(0) += 1;
669                        }
670                    }
671                }
672                OpCode::LoopStart | OpCode::LoopEnd => {
673                    block_starts.insert(i);
674                    *incoming_edges.entry(i).or_insert(0) += 1;
675                    let next_idx = i + 1;
676                    if next_idx < self.program.instructions.len() && !self.is_skipped(next_idx) {
677                        block_starts.insert(next_idx);
678                        *incoming_edges.entry(next_idx).or_insert(0) += 1;
679                    }
680                }
681                OpCode::SetupTry => {
682                    if let Some(Operand::Offset(offset)) = &instr.operand {
683                        let catch_idx = ((i as i32) + 1 + *offset) as usize;
684                        if !self.is_skipped(catch_idx) {
685                            block_starts.insert(catch_idx);
686                            *incoming_edges.entry(catch_idx).or_insert(0) += 1;
687                        }
688                    }
689                }
690                _ => {}
691            }
692        }
693
694        // Find the first non-skipped instruction index to use as block 0.
695        // When stdlib is prepended, instruction 0 is in a skip range — we must
696        // start from the first instruction the JIT will actually compile.
697        let first_idx = (0..self.program.instructions.len())
698            .find(|&i| !self.is_skipped(i))
699            .unwrap_or(0);
700        block_starts.insert(first_idx);
701        *incoming_edges.entry(first_idx).or_insert(0) += 1;
702
703        for (i, instr) in self.program.instructions.iter().enumerate() {
704            if self.is_skipped(i) {
705                continue;
706            }
707            let is_terminator = matches!(
708                instr.opcode,
709                OpCode::Jump
710                    | OpCode::Return
711                    | OpCode::ReturnValue
712                    | OpCode::Break
713                    | OpCode::Continue
714                    | OpCode::Throw
715            );
716            let is_conditional = matches!(
717                instr.opcode,
718                OpCode::JumpIfFalse | OpCode::JumpIfFalseTrusted | OpCode::JumpIfTrue
719            );
720
721            if !is_terminator && !is_conditional {
722                let next_idx = i + 1;
723                if next_idx < self.program.instructions.len()
724                    && block_starts.contains(&next_idx)
725                    && !self.is_skipped(next_idx)
726                {
727                    *incoming_edges.entry(next_idx).or_insert(0) += 1;
728                }
729            }
730        }
731
732        if !self.blocks.contains_key(&first_idx) {
733            let block = self.builder.create_block();
734            self.blocks.insert(first_idx, block);
735        }
736
737        for (i, instr) in self.program.instructions.iter().enumerate() {
738            if self.is_skipped(i) {
739                continue;
740            }
741            match instr.opcode {
742                OpCode::Jump
743                | OpCode::JumpIfFalse
744                | OpCode::JumpIfFalseTrusted
745                | OpCode::JumpIfTrue => {
746                    if let Some(Operand::Offset(offset)) = &instr.operand {
747                        let target_idx = ((i as i32) + 1 + *offset) as usize;
748                        if !self.is_skipped(target_idx) && !self.blocks.contains_key(&target_idx) {
749                            let block = self.builder.create_block();
750                            let needs_merge_param = false;
751                            if needs_merge_param {
752                                self.builder.append_block_param(block, types::I64);
753                                self.merge_blocks.insert(target_idx);
754                            }
755                            self.blocks.insert(target_idx, block);
756                        }
757                    }
758                }
759                OpCode::LoopStart | OpCode::LoopEnd => {
760                    if !self.blocks.contains_key(&i) {
761                        let block = self.builder.create_block();
762                        self.blocks.insert(i, block);
763                    }
764                    let next_idx = i + 1;
765                    if next_idx < self.program.instructions.len()
766                        && !self.is_skipped(next_idx)
767                        && !self.blocks.contains_key(&next_idx)
768                    {
769                        let block = self.builder.create_block();
770                        let needs_merge_param = false;
771                        if needs_merge_param {
772                            self.builder.append_block_param(block, types::I64);
773                            self.merge_blocks.insert(next_idx);
774                        }
775                        self.blocks.insert(next_idx, block);
776                    }
777                }
778                OpCode::SetupTry => {
779                    if let Some(Operand::Offset(offset)) = &instr.operand {
780                        let catch_idx = ((i as i32) + 1 + *offset) as usize;
781                        if !self.is_skipped(catch_idx) && !self.blocks.contains_key(&catch_idx) {
782                            let block = self.builder.create_block();
783                            self.builder.append_block_param(block, types::I64);
784                            self.merge_blocks.insert(catch_idx);
785                            self.blocks.insert(catch_idx, block);
786                        }
787                    }
788                }
789                _ => {}
790            }
791            if matches!(
792                instr.opcode,
793                OpCode::JumpIfFalse | OpCode::JumpIfFalseTrusted | OpCode::JumpIfTrue
794            ) {
795                let next_idx = i + 1;
796                if !self.is_skipped(next_idx) && !self.blocks.contains_key(&next_idx) {
797                    let block = self.builder.create_block();
798                    let needs_merge_param = false;
799                    if needs_merge_param {
800                        self.builder.append_block_param(block, types::I64);
801                        self.merge_blocks.insert(next_idx);
802                    }
803                    self.blocks.insert(next_idx, block);
804                }
805            }
806        }
807    }
808
809    pub(crate) fn get_or_create_local(&mut self, idx: u16) -> Variable {
810        // Apply inline base offset to avoid caller/callee local collisions
811        let effective_idx = idx.wrapping_add(self.inline_local_base);
812        if let Some(var) = self.locals.get(&effective_idx) {
813            return *var;
814        }
815
816        let var = Variable::new(self.next_var);
817        self.next_var += 1;
818        self.builder.declare_var(var, types::I64);
819        self.locals.insert(effective_idx, var);
820        var
821    }
822
823    /// Analyze which functions are eligible for inlining at call sites.
824    ///
825    /// A function is an inline candidate if:
826    /// - It has < 80 bytecode instructions
827    /// - It is not a closure (no captured state)
828    /// - It does not use CallValue (closure calls need captured state)
829    /// - It is straight-line (no jumps, loops, or exception handlers)
830    /// Non-leaf functions (with Call/CallMethod/BuiltinCall) ARE allowed.
831    pub(crate) fn analyze_inline_candidates(
832        program: &BytecodeProgram,
833    ) -> HashMap<u16, InlineCandidate> {
834        let mut candidates = HashMap::new();
835        let num_funcs = program.functions.len();
836        if num_funcs == 0 {
837            return candidates;
838        }
839
840        for (fn_id, func) in program.functions.iter().enumerate() {
841            let fn_id = fn_id as u16;
842
843            // Skip closures — they have captured state
844            if func.is_closure || func.body_length == 0 {
845                continue;
846            }
847
848            let entry_point = func.entry_point;
849            let func_end = entry_point + func.body_length;
850            let instr_count = func.body_length;
851
852            // Skip if too large or out of bounds
853            if instr_count > 80 || instr_count == 0 {
854                continue;
855            }
856            if entry_point >= program.instructions.len() || func_end > program.instructions.len() {
857                continue;
858            }
859
860            let body = &program.instructions[entry_point..func_end];
861
862            // Allow non-leaf functions (functions that call other functions).
863            // Nested calls are handled by compile_call which respects inline_depth.
864            // Only exclude CallValue (closure calls need captured state management
865            // that may not be set up correctly in the inline namespace).
866            let has_closure_calls = body.iter().any(|i| matches!(i.opcode, OpCode::CallValue));
867            if has_closure_calls {
868                continue;
869            }
870
871            // Must be straight-line (no branches, loops, exception handling,
872            // or reference operations that create internal blocks)
873            let has_control_flow = body.iter().any(|i| {
874                matches!(
875                    i.opcode,
876                    OpCode::Jump
877                        | OpCode::JumpIfFalse
878                        | OpCode::JumpIfTrue
879                        | OpCode::LoopStart
880                        | OpCode::LoopEnd
881                        | OpCode::Break
882                        | OpCode::Continue
883                        | OpCode::SetupTry
884                        | OpCode::SetIndexRef  // Creates 4 internal blocks — cannot inline
885                        | OpCode::MakeRef      // Creates stack slots + ref tracking
886                        | OpCode::DerefLoad    // Reference dereference
887                        | OpCode::DerefStore // Reference write-through
888                )
889            });
890            if has_control_flow {
891                continue;
892            }
893
894            candidates.insert(
895                fn_id,
896                InlineCandidate {
897                    entry_point,
898                    instruction_count: instr_count,
899                    arity: func.arity,
900                    locals_count: func.locals_count,
901                },
902            );
903        }
904
905        candidates
906    }
907
908    /// Record a deopt point with a per-guard spill block.
909    ///
910    /// Creates a dedicated Cranelift block that stores all live locals
911    /// and operand stack values to ctx_buf, then jumps to the shared
912    /// deopt block. The returned `(deopt_id, spill_block)` tuple lets
913    /// the caller emit `brif(cond, cont, [], spill_block, [extra_vals])`.
914    ///
915    /// `extra_stack_values`: Cranelift Values that were popped from the
916    /// JIT operand stack before the guard but must be on the interpreter
917    /// stack at resume. Passed as block parameters to the spill block.
918    ///
919    /// Handles unboxed int/f64 locals (marks them with proper SlotKind)
920    /// and multi-frame inline deopt (captures caller frame state).
921    pub(crate) fn emit_deopt_point_with_spill(
922        &mut self,
923        bytecode_ip: usize,
924        extra_stack_values: &[Value],
925    ) -> (usize, Option<Block>) {
926        let locals_count = self.func_locals_count;
927
928        // Snapshot live locals for the innermost (current) frame.
929        // When inlining, locals use inline_local_base offset keys.
930        let inline_base = self.inline_local_base;
931        let live_locals: Vec<(u16, Variable)> = self
932            .locals
933            .iter()
934            .filter(|(idx, _)| {
935                if inline_base > 0 {
936                    // Inlined frame: only include locals in the current inline namespace
937                    **idx >= inline_base && **idx < inline_base + 128
938                } else {
939                    **idx < 128 // cap at DEOPT_STACK_CTX_BASE
940                }
941            })
942            .map(|(idx, var)| {
943                // Map back to bytecode-local index (subtract inline base)
944                let bc_idx = idx.wrapping_sub(inline_base);
945                (bc_idx, *var)
946            })
947            .collect();
948
949        // Determine SlotKind for each local based on unboxing state
950        let local_kinds: Vec<SlotKind> = live_locals
951            .iter()
952            .map(|&(bc_idx, _)| {
953                if self.unboxed_int_locals.contains(&bc_idx) {
954                    SlotKind::Int64
955                } else if self.unboxed_f64_locals.contains(&bc_idx) {
956                    SlotKind::Float64
957                } else {
958                    SlotKind::NanBoxed // boxed local: NaN-boxed passthrough
959                }
960            })
961            .collect();
962
963        // Track which locals are unboxed for the spill emission
964        let f64_locals: std::collections::HashSet<u16> = live_locals
965            .iter()
966            .filter(|&&(bc_idx, _)| self.unboxed_f64_locals.contains(&bc_idx))
967            .map(|&(bc_idx, _)| bc_idx)
968            .collect();
969        let int_locals: std::collections::HashSet<u16> = live_locals
970            .iter()
971            .filter(|&&(bc_idx, _)| self.unboxed_int_locals.contains(&bc_idx))
972            .map(|&(bc_idx, _)| bc_idx)
973            .collect();
974
975        let on_stack_count = self.stack_depth;
976        let extra_count = extra_stack_values.len();
977        let total_stack_depth = on_stack_count + extra_count;
978
979        // Build DeoptInfo with real data
980        let deopt_id = self.deopt_points.len();
981        let mut local_mapping = Vec::new();
982        let mut all_kinds = Vec::new();
983
984        // Locals: identity mapping (ctx_buf_position, bytecode_local_idx)
985        for (i, &(bc_idx, _)) in live_locals.iter().enumerate() {
986            local_mapping.push((bc_idx, bc_idx));
987            all_kinds.push(local_kinds[i]);
988        }
989        // Operand stack: (DEOPT_STACK_CTX_BASE + i, locals_count + i)
990        // Stack values are always NaN-boxed.
991        for i in 0..total_stack_depth {
992            local_mapping.push((128 + i as u16, locals_count + i as u16));
993            all_kinds.push(SlotKind::NanBoxed); // operand stack: NaN-boxed passthrough
994        }
995
996        // Build inline_frames for multi-frame deopt
997        let mut inline_frames = Vec::new();
998        let mut deferred_inline_frames = Vec::new();
999        if self.inline_depth > 0 {
1000            // Capture caller frame(s) from the inline_frame_stack.
1001            // inline_frame_stack is ordered outermost-first; DeoptInfo uses
1002            // the same outermost-first ordering ([0]=outermost physical function).
1003            let mut ctx_buf_offset = live_locals.len() as u16 + total_stack_depth as u16 + 128;
1004            for ictx in self.inline_frame_stack.iter() {
1005                let frame_mapping: Vec<(u16, u16)> = ictx
1006                    .locals_snapshot
1007                    .iter()
1008                    .enumerate()
1009                    .map(|(j, &(bc_idx, _))| {
1010                        let ctx_pos = ctx_buf_offset + j as u16;
1011                        (ctx_pos, bc_idx)
1012                    })
1013                    .collect();
1014                let frame_kinds = ictx.local_kinds.clone();
1015
1016                inline_frames.push(InlineFrameInfo {
1017                    function_id: ictx.function_id,
1018                    resume_ip: ictx.call_site_ip,
1019                    local_mapping: frame_mapping,
1020                    local_kinds: frame_kinds.clone(),
1021                    stack_depth: ictx.stack_depth as u16,
1022                });
1023
1024                deferred_inline_frames.push(super::types::DeferredInlineFrame {
1025                    live_locals: ictx.locals_snapshot.clone(),
1026                    _local_kinds: frame_kinds,
1027                    _f64_locals: ictx.f64_locals.clone(),
1028                    _int_locals: ictx.int_locals.clone(),
1029                });
1030
1031                ctx_buf_offset += ictx.locals_snapshot.len() as u16;
1032            }
1033        }
1034
1035        // For multi-frame deopt, record the innermost (inlined callee) function ID
1036        // so the VM can push a synthetic frame for it.
1037        let innermost_function_id = if self.inline_depth > 0 {
1038            // The last entry on inline_frame_stack is the immediate caller.
1039            // The callee_fn_id of that entry is the function where the guard fired.
1040            self.inline_frame_stack.last().map(|ctx| ctx.callee_fn_id)
1041        } else {
1042            None
1043        };
1044
1045        self.deopt_points.push(DeoptInfo {
1046            resume_ip: bytecode_ip,
1047            local_mapping,
1048            local_kinds: all_kinds,
1049            stack_depth: total_stack_depth as u16,
1050            innermost_function_id,
1051            inline_frames,
1052        });
1053
1054        // Create per-guard spill block with block params for extra values
1055        let spill_block = self.builder.create_block();
1056        for _ in 0..extra_count {
1057            self.builder.append_block_param(spill_block, types::I64);
1058        }
1059
1060        // Defer the spill block body emission to compile() epilogue
1061        self.deferred_spills.push(super::types::DeferredSpill {
1062            block: spill_block,
1063            deopt_id: deopt_id as u32,
1064            live_locals: live_locals.clone(),
1065            _local_kinds: local_kinds,
1066            on_stack_count,
1067            extra_param_count: extra_count,
1068            f64_locals,
1069            _int_locals: int_locals,
1070            inline_frames: deferred_inline_frames,
1071        });
1072
1073        (deopt_id, Some(spill_block))
1074    }
1075
1076    /// Return the deopt points accumulated during compilation.
1077    ///
1078    /// This transfers ownership of the collected deopt metadata out of the
1079    /// compiler so it can be attached to the compilation result.
1080    pub(crate) fn take_deopt_points(&mut self) -> Vec<DeoptInfo> {
1081        std::mem::take(&mut self.deopt_points)
1082    }
1083
1084    /// Verify deopt point metadata for consistency.
1085    ///
1086    /// Checks:
1087    /// - `local_mapping` and `local_kinds` have equal length
1088    /// - Unboxed locals are NOT tagged as `SlotKind::Unknown`
1089    /// - ctx_buf positions are within bounds
1090    ///
1091    /// Returns `Err` on validation failure, causing the JIT compile to abort
1092    /// and the function to fall back to the interpreter.
1093    pub(crate) fn verify_deopt_points(
1094        points: &[DeoptInfo],
1095        unboxed_ints: &std::collections::HashSet<u16>,
1096        unboxed_f64s: &std::collections::HashSet<u16>,
1097    ) -> Result<(), String> {
1098        // VM ctx_buf is 216 u64 words with locals starting at offset 8.
1099        // Max ctx_pos before overflow: 216 - 8 = 208.
1100        const CTX_BUF_LOCALS_MAX: u16 = 208;
1101
1102        for (i, dp) in points.iter().enumerate() {
1103            if dp.local_mapping.len() != dp.local_kinds.len() {
1104                return Err(format!(
1105                    "DeoptInfo[{}]: local_mapping len {} != local_kinds len {}",
1106                    i,
1107                    dp.local_mapping.len(),
1108                    dp.local_kinds.len()
1109                ));
1110            }
1111            // Skip empty deopt points (generic fallback)
1112            if dp.local_mapping.is_empty() {
1113                continue;
1114            }
1115            for (j, &(ctx_pos, bc_idx)) in dp.local_mapping.iter().enumerate() {
1116                let kind = dp.local_kinds[j];
1117
1118                // ctx_buf bounds check: ctx_pos must fit within VM's ctx_buf
1119                if ctx_pos >= CTX_BUF_LOCALS_MAX {
1120                    return Err(format!(
1121                        "DeoptInfo[{}] mapping[{}]: ctx_pos {} exceeds ctx_buf limit {}",
1122                        i, j, ctx_pos, CTX_BUF_LOCALS_MAX
1123                    ));
1124                }
1125
1126                // Precise deopt points must not use SlotKind::Unknown.
1127                // Boxed locals → NanBoxed, unboxed int → Int64, unboxed f64 → Float64.
1128                if kind == SlotKind::Unknown {
1129                    return Err(format!(
1130                        "DeoptInfo[{}] mapping[{}]: slot (ctx_pos={}, bc_idx={}) tagged as Unknown \
1131                         in precise deopt path — use NanBoxed, Int64, or Float64",
1132                        i, j, ctx_pos, bc_idx
1133                    ));
1134                }
1135
1136                // Unboxed int locals must be tagged Int64
1137                if unboxed_ints.contains(&bc_idx) && ctx_pos < 128 && kind != SlotKind::Int64 {
1138                    return Err(format!(
1139                        "DeoptInfo[{}] mapping[{}]: unboxed int local {} tagged as {:?}, expected Int64",
1140                        i, j, bc_idx, kind
1141                    ));
1142                }
1143                // Unboxed f64 locals must be tagged Float64
1144                if unboxed_f64s.contains(&bc_idx) && ctx_pos < 128 && kind != SlotKind::Float64 {
1145                    return Err(format!(
1146                        "DeoptInfo[{}] mapping[{}]: unboxed f64 local {} tagged as {:?}, expected Float64",
1147                        i, j, bc_idx, kind
1148                    ));
1149                }
1150            }
1151
1152            // Also verify inline frames
1153            for (fi, iframe) in dp.inline_frames.iter().enumerate() {
1154                if iframe.local_mapping.len() != iframe.local_kinds.len() {
1155                    return Err(format!(
1156                        "DeoptInfo[{}].inline_frames[{}]: local_mapping len {} != local_kinds len {}",
1157                        i,
1158                        fi,
1159                        iframe.local_mapping.len(),
1160                        iframe.local_kinds.len()
1161                    ));
1162                }
1163                for (j, &(ctx_pos, bc_idx)) in iframe.local_mapping.iter().enumerate() {
1164                    if ctx_pos >= CTX_BUF_LOCALS_MAX {
1165                        return Err(format!(
1166                            "DeoptInfo[{}].inline_frames[{}] mapping[{}]: ctx_pos {} exceeds ctx_buf limit {}",
1167                            i, fi, j, ctx_pos, CTX_BUF_LOCALS_MAX
1168                        ));
1169                    }
1170                    let kind = iframe
1171                        .local_kinds
1172                        .get(j)
1173                        .copied()
1174                        .unwrap_or(SlotKind::Unknown);
1175                    if kind == SlotKind::Unknown {
1176                        return Err(format!(
1177                            "DeoptInfo[{}].inline_frames[{}] mapping[{}]: slot (ctx_pos={}, bc_idx={}) \
1178                             tagged as Unknown in precise path",
1179                            i, fi, j, ctx_pos, bc_idx
1180                        ));
1181                    }
1182                }
1183            }
1184        }
1185        Ok(())
1186    }
1187
1188    /// Attach a feedback vector snapshot for feedback-guided speculation.
1189    ///
1190    /// When set, the compiler consults IC feedback at each eligible bytecode
1191    /// site (call, property access, arithmetic) to emit speculative guards
1192    /// with typed fast paths. Guard failures branch to the deopt block.
1193    pub(crate) fn set_feedback(&mut self, feedback: FeedbackVector) {
1194        self.feedback = Some(feedback);
1195    }
1196
1197    /// Return the shape guard IDs accumulated during compilation.
1198    ///
1199    /// These should be registered as shape dependencies with the DeoptTracker
1200    /// so that shape transitions can invalidate stale JIT code.
1201    pub(crate) fn take_shape_guards(&mut self) -> Vec<shape_value::shape_graph::ShapeId> {
1202        std::mem::take(&mut self.shape_guards_emitted)
1203    }
1204
1205    pub fn compile_kernel(&mut self) -> Result<Value, String> {
1206        assert!(
1207            self.mode == CompilationMode::Kernel,
1208            "compile_kernel() requires kernel mode"
1209        );
1210
1211        // Kernel mode: simple linear instruction stream
1212        // For V1, we don't support complex control flow in kernels
1213        let instrs = self.program.instructions.clone();
1214        for (idx, instr) in instrs.iter().enumerate() {
1215            self.current_instr_idx = idx;
1216            self.compile_instruction(instr, idx)?;
1217        }
1218
1219        // Return result: 0 (continue) or value from stack converted to i32
1220        let result = if self.stack_depth > 0 {
1221            let val = self.stack_pop().unwrap();
1222            // Convert NaN-boxed value to i32 result code
1223            // If it's a number, truncate to i32; otherwise return 0
1224            self.builder.ins().ireduce(types::I32, val)
1225        } else {
1226            self.builder.ins().iconst(types::I32, 0)
1227        };
1228
1229        Ok(result)
1230    }
1231}
1232
1233#[cfg(test)]
1234#[path = "compiler_tests.rs"]
1235mod tests;