sigil_parser/
codegen.rs

1//! Sigil JIT Compiler using Cranelift
2//!
3//! Compiles Sigil AST to native machine code for high-performance execution.
4//!
5//! Optimizations implemented:
6//! - Direct condition branching (no redundant boolean conversion)
7//! - Constant folding for arithmetic expressions
8//! - Tail call optimization for recursive functions
9//! - Efficient comparison code generation
10
11#[cfg(feature = "jit")]
12pub mod jit {
13    use cranelift_codegen::ir::{types, AbiParam, InstBuilder, UserFuncName};
14    use cranelift_codegen::ir::condcodes::IntCC;
15    use cranelift_codegen::settings::{self, Configurable};
16    use cranelift_codegen::Context;
17    use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable};
18    use cranelift_jit::{JITBuilder, JITModule};
19    use cranelift_module::{FuncId, Linkage, Module};
20
21    use std::collections::HashMap;
22    use std::mem;
23
24    use crate::ast::{self, BinOp, Expr, Item, Literal, UnaryOp, ExternBlock, ExternItem, ExternFunction, TypeExpr, PipeOp};
25    use crate::parser::Parser;
26    use crate::optimize::{Optimizer, OptLevel};
27    use crate::ffi::ctypes::CType;
28
29    /// Runtime value representation
30    ///
31    /// We use a tagged union representation:
32    /// - 64-bit value
33    /// - Low 3 bits are tag (NaN-boxing style, but simpler)
34    ///
35    /// For maximum performance, we use unboxed representations:
36    /// - Integers: raw i64
37    /// - Floats: raw f64
38    /// - Booleans: 0 or 1
39    /// - Arrays/Strings: pointers to heap
40    #[repr(C)]
41    #[derive(Clone, Copy, Debug)]
42    pub struct SigilValue(pub u64);
43
44    impl SigilValue {
45        // Tag constants (stored in low bits for pointers, high bits for numbers)
46        pub const TAG_INT: u64 = 0;
47        pub const TAG_FLOAT: u64 = 1;
48        pub const TAG_BOOL: u64 = 2;
49        pub const TAG_NULL: u64 = 3;
50        pub const TAG_PTR: u64 = 4; // Heap-allocated objects
51
52        #[inline]
53        pub fn from_int(v: i64) -> Self {
54            SigilValue(v as u64)
55        }
56
57        #[inline]
58        pub fn from_float(v: f64) -> Self {
59            SigilValue(v.to_bits())
60        }
61
62        #[inline]
63        pub fn from_bool(v: bool) -> Self {
64            SigilValue(if v { 1 } else { 0 })
65        }
66
67        #[inline]
68        pub fn as_int(self) -> i64 {
69            self.0 as i64
70        }
71
72        #[inline]
73        pub fn as_float(self) -> f64 {
74            f64::from_bits(self.0)
75        }
76
77        #[inline]
78        pub fn as_bool(self) -> bool {
79            self.0 != 0
80        }
81    }
82
83    /// Compiled function signature
84    type CompiledFn = unsafe extern "C" fn() -> i64;
85    #[allow(dead_code)]
86    type CompiledFnWithArgs = unsafe extern "C" fn(i64) -> i64;
87
88    /// Extern function signature info for FFI
89    #[derive(Clone, Debug)]
90    pub struct ExternFnSig {
91        pub name: String,
92        pub params: Vec<types::Type>,
93        pub returns: Option<types::Type>,
94        pub variadic: bool,
95        pub func_id: FuncId,
96    }
97
98    /// JIT Compiler for Sigil
99    pub struct JitCompiler {
100        /// The JIT module
101        module: JITModule,
102        /// Builder context (reused for efficiency)
103        builder_ctx: FunctionBuilderContext,
104        /// Codegen context
105        ctx: Context,
106        /// Compiled functions
107        functions: HashMap<String, FuncId>,
108        /// Extern "C" function declarations
109        extern_functions: HashMap<String, ExternFnSig>,
110        /// Variable counter for unique variable indices
111        #[allow(dead_code)]
112        var_counter: usize,
113        /// Built-in function addresses
114        #[allow(dead_code)]
115        builtins: HashMap<String, *const u8>,
116    }
117
118    impl JitCompiler {
119        /// Create a new JIT compiler
120        pub fn new() -> Result<Self, String> {
121            let mut flag_builder = settings::builder();
122            // Disable PIC for better codegen
123            flag_builder.set("use_colocated_libcalls", "false").unwrap();
124            flag_builder.set("is_pic", "false").unwrap();
125            // Maximum optimization level
126            flag_builder.set("opt_level", "speed").unwrap();
127            // Enable additional optimizations
128            flag_builder.set("enable_verifier", "false").unwrap(); // Disable verifier in release for speed
129            flag_builder.set("enable_alias_analysis", "true").unwrap();
130
131            // Get native ISA with CPU feature detection (AVX2, SSE4, etc. auto-detected)
132            let isa_builder = cranelift_native::builder().map_err(|e| e.to_string())?;
133            let isa = isa_builder
134                .finish(settings::Flags::new(flag_builder))
135                .map_err(|e| e.to_string())?;
136
137            let mut builder = JITBuilder::with_isa(isa, cranelift_module::default_libcall_names());
138
139            // Register built-in functions
140            let builtins = Self::register_builtins(&mut builder);
141
142            let module = JITModule::new(builder);
143
144            Ok(Self {
145                module,
146                builder_ctx: FunctionBuilderContext::new(),
147                ctx: Context::new(),
148                functions: HashMap::new(),
149                extern_functions: HashMap::new(),
150                var_counter: 0,
151                builtins,
152            })
153        }
154
155        /// Register built-in runtime functions
156        fn register_builtins(builder: &mut JITBuilder) -> HashMap<String, *const u8> {
157            let mut builtins = HashMap::new();
158
159            // Math functions from libc
160            builder.symbol("sigil_sqrt", sigil_sqrt as *const u8);
161            builder.symbol("sigil_sin", sigil_sin as *const u8);
162            builder.symbol("sigil_cos", sigil_cos as *const u8);
163            builder.symbol("sigil_pow", sigil_pow as *const u8);
164            builder.symbol("sigil_exp", sigil_exp as *const u8);
165            builder.symbol("sigil_ln", sigil_ln as *const u8);
166            builder.symbol("sigil_floor", sigil_floor as *const u8);
167            builder.symbol("sigil_ceil", sigil_ceil as *const u8);
168            builder.symbol("sigil_abs", sigil_abs as *const u8);
169
170            // I/O functions
171            builder.symbol("sigil_print", sigil_print as *const u8);
172            builder.symbol("sigil_print_int", sigil_print_int as *const u8);
173            builder.symbol("sigil_print_float", sigil_print_float as *const u8);
174            builder.symbol("sigil_print_str", sigil_print_str as *const u8);
175
176            // Time functions
177            builder.symbol("sigil_now", sigil_now as *const u8);
178
179            // Type-aware arithmetic (for dynamic typing)
180            builder.symbol("sigil_add", sigil_add as *const u8);
181            builder.symbol("sigil_sub", sigil_sub as *const u8);
182            builder.symbol("sigil_mul", sigil_mul as *const u8);
183            builder.symbol("sigil_div", sigil_div as *const u8);
184            builder.symbol("sigil_lt", sigil_lt as *const u8);
185            builder.symbol("sigil_le", sigil_le as *const u8);
186            builder.symbol("sigil_gt", sigil_gt as *const u8);
187            builder.symbol("sigil_ge", sigil_ge as *const u8);
188
189            // SIMD operations
190            builder.symbol("sigil_simd_new", sigil_simd_new as *const u8);
191            builder.symbol("sigil_simd_splat", sigil_simd_splat as *const u8);
192            builder.symbol("sigil_simd_add", sigil_simd_add as *const u8);
193            builder.symbol("sigil_simd_sub", sigil_simd_sub as *const u8);
194            builder.symbol("sigil_simd_mul", sigil_simd_mul as *const u8);
195            builder.symbol("sigil_simd_div", sigil_simd_div as *const u8);
196            builder.symbol("sigil_simd_dot", sigil_simd_dot as *const u8);
197            builder.symbol("sigil_simd_hadd", sigil_simd_hadd as *const u8);
198            builder.symbol("sigil_simd_length_sq", sigil_simd_length_sq as *const u8);
199            builder.symbol("sigil_simd_length", sigil_simd_length as *const u8);
200            builder.symbol("sigil_simd_normalize", sigil_simd_normalize as *const u8);
201            builder.symbol("sigil_simd_cross", sigil_simd_cross as *const u8);
202            builder.symbol("sigil_simd_min", sigil_simd_min as *const u8);
203            builder.symbol("sigil_simd_max", sigil_simd_max as *const u8);
204            builder.symbol("sigil_simd_extract", sigil_simd_extract as *const u8);
205            builder.symbol("sigil_simd_free", sigil_simd_free as *const u8);
206
207            // Array functions
208            builder.symbol("sigil_array_new", sigil_array_new as *const u8);
209            builder.symbol("sigil_array_push", sigil_array_push as *const u8);
210            builder.symbol("sigil_array_get", sigil_array_get as *const u8);
211            builder.symbol("sigil_array_set", sigil_array_set as *const u8);
212            builder.symbol("sigil_array_len", sigil_array_len as *const u8);
213
214            // SIMD-optimized array operations
215            builder.symbol("sigil_array_sum", sigil_array_sum as *const u8);
216            builder.symbol("sigil_array_scale", sigil_array_scale as *const u8);
217            builder.symbol("sigil_array_offset", sigil_array_offset as *const u8);
218            builder.symbol("sigil_array_dot", sigil_array_dot as *const u8);
219            builder.symbol("sigil_array_add", sigil_array_add as *const u8);
220            builder.symbol("sigil_array_mul", sigil_array_mul as *const u8);
221            builder.symbol("sigil_array_min", sigil_array_min as *const u8);
222            builder.symbol("sigil_array_max", sigil_array_max as *const u8);
223            builder.symbol("sigil_array_fill", sigil_array_fill as *const u8);
224
225            // PipeOp array access functions (morphemes)
226            builder.symbol("sigil_array_first", sigil_array_first as *const u8);
227            builder.symbol("sigil_array_last", sigil_array_last as *const u8);
228            builder.symbol("sigil_array_middle", sigil_array_middle as *const u8);
229            builder.symbol("sigil_array_choice", sigil_array_choice as *const u8);
230            builder.symbol("sigil_array_nth", sigil_array_nth as *const u8);
231            builder.symbol("sigil_array_next", sigil_array_next as *const u8);
232            builder.symbol("sigil_array_product", sigil_array_product as *const u8);
233            builder.symbol("sigil_array_sort", sigil_array_sort as *const u8);
234
235            // Parallel execution functions (∥ morpheme)
236            builder.symbol("sigil_parallel_map", sigil_parallel_map as *const u8);
237            builder.symbol("sigil_parallel_filter", sigil_parallel_filter as *const u8);
238            builder.symbol("sigil_parallel_reduce", sigil_parallel_reduce as *const u8);
239
240            // GPU compute functions (⊛ morpheme) - stubs for now
241            builder.symbol("sigil_gpu_map", sigil_gpu_map as *const u8);
242            builder.symbol("sigil_gpu_filter", sigil_gpu_filter as *const u8);
243            builder.symbol("sigil_gpu_reduce", sigil_gpu_reduce as *const u8);
244
245            // Memoization cache functions
246            builder.symbol("sigil_memo_new", sigil_memo_new as *const u8);
247            builder.symbol("sigil_memo_get_1", sigil_memo_get_1 as *const u8);
248            builder.symbol("sigil_memo_set_1", sigil_memo_set_1 as *const u8);
249            builder.symbol("sigil_memo_get_2", sigil_memo_get_2 as *const u8);
250            builder.symbol("sigil_memo_set_2", sigil_memo_set_2 as *const u8);
251            builder.symbol("sigil_memo_free", sigil_memo_free as *const u8);
252
253            // Optimized recursive algorithm implementations
254            builder.symbol("sigil_ackermann", sigil_ackermann as *const u8);
255            builder.symbol("sigil_tak", sigil_tak as *const u8);
256
257            // FFI helper functions
258            use crate::ffi::helpers::*;
259            builder.symbol("sigil_string_to_cstring", sigil_string_to_cstring as *const u8);
260            builder.symbol("sigil_cstring_free", sigil_cstring_free as *const u8);
261            builder.symbol("sigil_cstring_len", sigil_cstring_len as *const u8);
262            builder.symbol("sigil_cstring_copy", sigil_cstring_copy as *const u8);
263            builder.symbol("sigil_ptr_from_int", sigil_ptr_from_int as *const u8);
264            builder.symbol("sigil_ptr_to_int", sigil_ptr_to_int as *const u8);
265            builder.symbol("sigil_ptr_read_u8", sigil_ptr_read_u8 as *const u8);
266            builder.symbol("sigil_ptr_write_u8", sigil_ptr_write_u8 as *const u8);
267            builder.symbol("sigil_ptr_read_i32", sigil_ptr_read_i32 as *const u8);
268            builder.symbol("sigil_ptr_write_i32", sigil_ptr_write_i32 as *const u8);
269            builder.symbol("sigil_ptr_read_i64", sigil_ptr_read_i64 as *const u8);
270            builder.symbol("sigil_ptr_write_i64", sigil_ptr_write_i64 as *const u8);
271            builder.symbol("sigil_ptr_read_f64", sigil_ptr_read_f64 as *const u8);
272            builder.symbol("sigil_ptr_write_f64", sigil_ptr_write_f64 as *const u8);
273            builder.symbol("sigil_ptr_add", sigil_ptr_add as *const u8);
274            builder.symbol("sigil_ptr_is_null", sigil_ptr_is_null as *const u8);
275            builder.symbol("sigil_alloc", sigil_alloc as *const u8);
276            builder.symbol("sigil_free", sigil_free as *const u8);
277            builder.symbol("sigil_realloc", sigil_realloc as *const u8);
278            builder.symbol("sigil_memcpy", sigil_memcpy as *const u8);
279            builder.symbol("sigil_memset", sigil_memset as *const u8);
280
281            builtins.insert("sqrt".into(), sigil_sqrt as *const u8);
282            builtins.insert("sin".into(), sigil_sin as *const u8);
283            builtins.insert("cos".into(), sigil_cos as *const u8);
284            builtins.insert("pow".into(), sigil_pow as *const u8);
285            builtins.insert("exp".into(), sigil_exp as *const u8);
286            builtins.insert("ln".into(), sigil_ln as *const u8);
287            builtins.insert("floor".into(), sigil_floor as *const u8);
288            builtins.insert("ceil".into(), sigil_ceil as *const u8);
289            builtins.insert("abs".into(), sigil_abs as *const u8);
290            builtins.insert("print".into(), sigil_print as *const u8);
291            builtins.insert("now".into(), sigil_now as *const u8);
292
293            builtins
294        }
295
296        /// Compile a Sigil program (uses Aggressive optimization for best performance)
297        pub fn compile(&mut self, source: &str) -> Result<(), String> {
298            self.compile_with_opt(source, OptLevel::Aggressive)
299        }
300
301        /// Compile with a specific optimization level
302        pub fn compile_with_opt(&mut self, source: &str, opt_level: OptLevel) -> Result<(), String> {
303            let mut parser = Parser::new(source);
304            let source_file = parser.parse_file().map_err(|e| format!("{:?}", e))?;
305
306            // Run AST optimizations
307            let mut optimizer = Optimizer::new(opt_level);
308            let optimized = optimizer.optimize_file(&source_file);
309
310            // First pass: declare all extern blocks and functions
311            for spanned_item in &optimized.items {
312                match &spanned_item.node {
313                    Item::ExternBlock(extern_block) => {
314                        self.declare_extern_block(extern_block)?;
315                    }
316                    Item::Function(func) => {
317                        self.declare_function(func)?;
318                    }
319                    _ => {}
320                }
321            }
322
323            // Second pass: compile all functions
324            for spanned_item in &optimized.items {
325                if let Item::Function(func) = &spanned_item.node {
326                    self.compile_function(func)?;
327                }
328            }
329
330            // Finalize the module
331            self.module.finalize_definitions().map_err(|e| e.to_string())?;
332
333            Ok(())
334        }
335
336        /// Declare a function (first pass)
337        fn declare_function(&mut self, func: &ast::Function) -> Result<FuncId, String> {
338            let name = &func.name.name;
339
340            // Build signature
341            let mut sig = self.module.make_signature();
342
343            // Add parameters (all as i64 for simplicity - we use tagged values)
344            for _param in &func.params {
345                sig.params.push(AbiParam::new(types::I64));
346            }
347
348            // Return type (i64)
349            sig.returns.push(AbiParam::new(types::I64));
350
351            let func_id = self
352                .module
353                .declare_function(name, Linkage::Local, &sig)
354                .map_err(|e| e.to_string())?;
355
356            self.functions.insert(name.clone(), func_id);
357            Ok(func_id)
358        }
359
360        /// Declare an extern block (FFI declarations)
361        fn declare_extern_block(&mut self, extern_block: &ExternBlock) -> Result<(), String> {
362            // Currently only "C" ABI is supported
363            if extern_block.abi != "C" && extern_block.abi != "c" {
364                return Err(format!("Unsupported ABI: {}. Only \"C\" is supported.", extern_block.abi));
365            }
366
367            for item in &extern_block.items {
368                match item {
369                    ExternItem::Function(func) => {
370                        self.declare_extern_function(func)?;
371                    }
372                    ExternItem::Static(stat) => {
373                        // TODO: Implement extern statics
374                        eprintln!("Warning: extern static '{}' not yet implemented", stat.name.name);
375                    }
376                }
377            }
378
379            Ok(())
380        }
381
382        /// Declare an extern "C" function
383        fn declare_extern_function(&mut self, func: &ExternFunction) -> Result<(), String> {
384            let name = &func.name.name;
385
386            // Build signature
387            let mut sig = self.module.make_signature();
388            let mut param_types = Vec::new();
389
390            // Add parameters with proper C types
391            for param in &func.params {
392                let ty = self.type_expr_to_cranelift(&param.ty)?;
393                sig.params.push(AbiParam::new(ty));
394                param_types.push(ty);
395            }
396
397            // Return type
398            let return_type = if let Some(ret_ty) = &func.return_type {
399                let ty = self.type_expr_to_cranelift(ret_ty)?;
400                sig.returns.push(AbiParam::new(ty));
401                Some(ty)
402            } else {
403                None
404            };
405
406            // Variadic functions use the "C" calling convention implicitly
407            // Cranelift doesn't have explicit variadic support, but we track it
408
409            let func_id = self
410                .module
411                .declare_function(name, Linkage::Import, &sig)
412                .map_err(|e| e.to_string())?;
413
414            self.extern_functions.insert(name.clone(), ExternFnSig {
415                name: name.clone(),
416                params: param_types,
417                returns: return_type,
418                variadic: func.variadic,
419                func_id,
420            });
421
422            Ok(())
423        }
424
425        /// Convert a Sigil type expression to Cranelift type
426        fn type_expr_to_cranelift(&self, ty: &TypeExpr) -> Result<types::Type, String> {
427            match ty {
428                TypeExpr::Path(path) => {
429                    let name = path.segments.last()
430                        .map(|s| s.ident.name.as_str())
431                        .unwrap_or("");
432
433                    // Check if it's a C type
434                    if let Some(ctype) = CType::from_name(name) {
435                        return Ok(match ctype {
436                            CType::Void => types::I64, // void returns are handled separately
437                            CType::Char | CType::SChar | CType::UChar | CType::Int8 | CType::UInt8 => types::I8,
438                            CType::Short | CType::UShort | CType::Int16 | CType::UInt16 => types::I16,
439                            CType::Int | CType::UInt | CType::Int32 | CType::UInt32 => types::I32,
440                            CType::Long | CType::ULong | CType::LongLong | CType::ULongLong |
441                            CType::Size | CType::SSize | CType::PtrDiff |
442                            CType::Int64 | CType::UInt64 => types::I64,
443                            CType::Float => types::F32,
444                            CType::Double => types::F64,
445                        });
446                    }
447
448                    // Check Sigil native types
449                    match name {
450                        "i8" => Ok(types::I8),
451                        "i16" => Ok(types::I16),
452                        "i32" | "int" => Ok(types::I32),
453                        "i64" => Ok(types::I64),
454                        "u8" => Ok(types::I8),
455                        "u16" => Ok(types::I16),
456                        "u32" => Ok(types::I32),
457                        "u64" => Ok(types::I64),
458                        "f32" => Ok(types::F32),
459                        "f64" | "float" => Ok(types::F64),
460                        "bool" => Ok(types::I8),
461                        "isize" | "usize" => Ok(types::I64),
462                        "()" => Ok(types::I64), // unit type
463                        _ => Ok(types::I64), // Default to i64 for unknown types
464                    }
465                }
466                TypeExpr::Pointer { .. } | TypeExpr::Reference { .. } => {
467                    // Pointers are always 64-bit on our target
468                    Ok(types::I64)
469                }
470                _ => Ok(types::I64), // Default to i64
471            }
472        }
473
474        /// Compile a single function
475        fn compile_function(&mut self, func: &ast::Function) -> Result<(), String> {
476            let name = &func.name.name;
477            let func_id = *self.functions.get(name).ok_or("Function not declared")?;
478
479            // Build signature to match declaration
480            for _param in &func.params {
481                self.ctx.func.signature.params.push(AbiParam::new(types::I64));
482            }
483            self.ctx.func.signature.returns.push(AbiParam::new(types::I64));
484            self.ctx.func.name = UserFuncName::user(0, func_id.as_u32());
485
486            // Take ownership of what we need for building
487            let functions = self.functions.clone();
488            let extern_fns = self.extern_functions.clone();
489
490            {
491                let mut builder =
492                    FunctionBuilder::new(&mut self.ctx.func, &mut self.builder_ctx);
493
494                let entry_block = builder.create_block();
495                builder.append_block_params_for_function_params(entry_block);
496                builder.switch_to_block(entry_block);
497                builder.seal_block(entry_block);
498
499                // Set up variable scope
500                let mut scope = CompileScope::new();
501
502                // Declare parameters as variables with type inference
503                for (i, param) in func.params.iter().enumerate() {
504                    let var = Variable::from_u32(scope.next_var() as u32);
505                    builder.declare_var(var, types::I64);
506                    let param_val = builder.block_params(entry_block)[i];
507                    builder.def_var(var, param_val);
508
509                    // Get parameter name from the pattern
510                    if let ast::Pattern::Ident { name, .. } = &param.pattern {
511                        // Infer parameter type from type annotation if present
512                        let param_type = match &param.ty {
513                            TypeExpr::Path(path) => {
514                                let type_name = path.segments.last()
515                                    .map(|s| s.ident.name.as_str())
516                                    .unwrap_or("");
517                                match type_name {
518                                    "f32" | "f64" | "float" => ValueType::Float,
519                                    "i8" | "i16" | "i32" | "i64" | "int" | "isize" |
520                                    "u8" | "u16" | "u32" | "u64" | "usize" | "bool" => ValueType::Int,
521                                    _ => ValueType::Int, // Default to int for unknown types
522                                }
523                            }
524                            TypeExpr::Infer => ValueType::Int, // Inferred type defaults to int
525                            _ => ValueType::Int, // Default to int for other cases
526                        };
527                        scope.define_typed(&name.name, var, param_type);
528                    }
529                }
530
531                // Compile function body
532                if let Some(body) = &func.body {
533                    let (result, has_return) = compile_block_tracked(&mut self.module, &functions, &extern_fns, &mut builder, &mut scope, body)?;
534                    // Only add return if the block didn't end with an explicit return
535                    if !has_return {
536                        builder.ins().return_(&[result]);
537                    }
538                } else {
539                    // No body - return 0
540                    let zero = builder.ins().iconst(types::I64, 0);
541                    builder.ins().return_(&[zero]);
542                }
543
544                builder.finalize();
545            }
546
547            // Debug: Uncomment to print generated IR
548            // eprintln!("Generated function '{}':\n{}", name, self.ctx.func.display());
549
550            // Compile to machine code
551            self.module
552                .define_function(func_id, &mut self.ctx)
553                .map_err(|e| format!("Compilation error for '{}': {}", name, e))?;
554
555            self.module.clear_context(&mut self.ctx);
556            Ok(())
557        }
558
559        /// Run the compiled main function
560        pub fn run(&mut self) -> Result<i64, String> {
561            let main_id = *self.functions.get("main").ok_or("No main function")?;
562            let main_ptr = self.module.get_finalized_function(main_id);
563
564            unsafe {
565                let main_fn: CompiledFn = mem::transmute(main_ptr);
566                Ok(main_fn())
567            }
568        }
569
570        /// Get a compiled function by name
571        pub fn get_function(&self, name: &str) -> Option<*const u8> {
572            self.functions.get(name).map(|id| self.module.get_finalized_function(*id))
573        }
574    }
575
576    /// Tracked value type for type specialization
577    /// This enables direct CPU instruction emission when types are known
578    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
579    enum ValueType {
580        Int,      // Known to be integer
581        Float,    // Known to be float
582        Unknown,  // Could be either (requires runtime dispatch)
583    }
584
585    /// Compilation scope for tracking variables
586    ///
587    /// Uses a shared counter (Rc<Cell>) to ensure all scopes use unique variable indices.
588    /// This prevents the "variable declared multiple times" error in Cranelift.
589    struct CompileScope {
590        variables: HashMap<String, Variable>,
591        /// Track the type of each variable for type specialization
592        var_types: HashMap<String, ValueType>,
593        /// Shared counter across all scopes to ensure unique Variable indices
594        var_counter: std::rc::Rc<std::cell::Cell<usize>>,
595    }
596
597    impl CompileScope {
598        fn new() -> Self {
599            Self {
600                variables: HashMap::new(),
601                var_types: HashMap::new(),
602                var_counter: std::rc::Rc::new(std::cell::Cell::new(0)),
603            }
604        }
605
606        fn child(&self) -> Self {
607            // Clone variables so child scopes can access parent variables
608            // Share the counter so all scopes use unique variable indices
609            Self {
610                variables: self.variables.clone(),
611                var_types: self.var_types.clone(),
612                var_counter: std::rc::Rc::clone(&self.var_counter),
613            }
614        }
615
616        fn next_var(&mut self) -> usize {
617            let v = self.var_counter.get();
618            self.var_counter.set(v + 1);
619            v
620        }
621
622        #[allow(dead_code)]
623        fn define(&mut self, name: &str, var: Variable) {
624            self.variables.insert(name.to_string(), var);
625        }
626
627        fn define_typed(&mut self, name: &str, var: Variable, ty: ValueType) {
628            self.variables.insert(name.to_string(), var);
629            self.var_types.insert(name.to_string(), ty);
630        }
631
632        fn lookup(&self, name: &str) -> Option<Variable> {
633            self.variables.get(name).copied()
634        }
635
636        fn get_type(&self, name: &str) -> ValueType {
637            self.var_types.get(name).copied().unwrap_or(ValueType::Unknown)
638        }
639
640        #[allow(dead_code)]
641        fn set_type(&mut self, name: &str, ty: ValueType) {
642            self.var_types.insert(name.to_string(), ty);
643        }
644    }
645
646    // ============================================
647    // Optimization: Type Inference for Specialization
648    // ============================================
649
650    /// Infer the type of an expression for type specialization
651    /// Returns Int if the expression is known to produce an integer,
652    /// Float if known to produce a float, Unknown otherwise.
653    fn infer_type(expr: &Expr, scope: &CompileScope) -> ValueType {
654        match expr {
655            Expr::Literal(Literal::Int { .. }) => ValueType::Int,
656            Expr::Literal(Literal::Bool(_)) => ValueType::Int,
657            Expr::Literal(Literal::Float { .. }) => ValueType::Float,
658
659            Expr::Path(path) => {
660                let name = path.segments.last()
661                    .map(|s| s.ident.name.as_str())
662                    .unwrap_or("");
663                scope.get_type(name)
664            }
665
666            Expr::Binary { op, left, right } => {
667                let left_ty = infer_type(left, scope);
668                let right_ty = infer_type(right, scope);
669
670                // Comparison operators always return int (0 or 1)
671                if matches!(op, BinOp::Eq | BinOp::Ne | BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge | BinOp::And | BinOp::Or) {
672                    return ValueType::Int;
673                }
674
675                // If either operand is float, result is float
676                if left_ty == ValueType::Float || right_ty == ValueType::Float {
677                    return ValueType::Float;
678                }
679
680                // If both are int, result is int
681                if left_ty == ValueType::Int && right_ty == ValueType::Int {
682                    return ValueType::Int;
683                }
684
685                // Otherwise unknown
686                ValueType::Unknown
687            }
688
689            Expr::Unary { op, expr } => {
690                match op {
691                    UnaryOp::Not => ValueType::Int, // ! always returns 0 or 1
692                    UnaryOp::Neg => infer_type(expr, scope),
693                    _ => infer_type(expr, scope),
694                }
695            }
696
697            Expr::Call { func, args } => {
698                // Check if it's a known function
699                if let Expr::Path(path) = func.as_ref() {
700                    let name = path.segments.last()
701                        .map(|s| s.ident.name.as_str())
702                        .unwrap_or("");
703                    match name {
704                        // Math functions return floats
705                        "sqrt" | "sin" | "cos" | "pow" | "exp" | "ln" | "floor" | "ceil" | "abs" => ValueType::Float,
706                        // Time returns int
707                        "now" => ValueType::Int,
708                        // Array operations return int
709                        "len" | "sigil_array_len" => ValueType::Int,
710                        // Print returns int
711                        "print" | "sigil_print" => ValueType::Int,
712                        _ => {
713                            // OPTIMIZATION: For user-defined functions, if all arguments are Int,
714                            // assume the return type is Int (common case for recursive functions)
715                            // This enables type specialization for fib(n-1) + fib(n-2)
716                            let all_args_int = args.iter().all(|arg| {
717                                infer_type(arg, scope) == ValueType::Int
718                            });
719                            if all_args_int {
720                                ValueType::Int
721                            } else {
722                                ValueType::Unknown
723                            }
724                        }
725                    }
726                } else {
727                    ValueType::Unknown
728                }
729            }
730
731            Expr::If { then_branch, else_branch, .. } => {
732                // Type of if is the type of its branches
733                let then_ty = if let Some(expr) = &then_branch.expr {
734                    infer_type(expr, scope)
735                } else {
736                    ValueType::Int // Empty block returns 0
737                };
738
739                if let Some(else_expr) = else_branch {
740                    let else_ty = infer_type(else_expr, scope);
741                    if then_ty == else_ty {
742                        then_ty
743                    } else {
744                        ValueType::Unknown
745                    }
746                } else {
747                    then_ty
748                }
749            }
750
751            _ => ValueType::Unknown,
752        }
753    }
754
755    // ============================================
756    // Optimization: Constant Folding
757    // ============================================
758
759    /// Try to evaluate a constant expression at compile time
760    fn try_const_fold(expr: &Expr) -> Option<i64> {
761        match expr {
762            Expr::Literal(Literal::Int { value, .. }) => value.parse().ok(),
763            Expr::Literal(Literal::Bool(b)) => Some(if *b { 1 } else { 0 }),
764            Expr::Binary { op, left, right } => {
765                let l = try_const_fold(left)?;
766                let r = try_const_fold(right)?;
767                match op {
768                    BinOp::Add => Some(l.wrapping_add(r)),
769                    BinOp::Sub => Some(l.wrapping_sub(r)),
770                    BinOp::Mul => Some(l.wrapping_mul(r)),
771                    BinOp::Div if r != 0 => Some(l / r),
772                    BinOp::Rem if r != 0 => Some(l % r),
773                    BinOp::BitAnd => Some(l & r),
774                    BinOp::BitOr => Some(l | r),
775                    BinOp::BitXor => Some(l ^ r),
776                    BinOp::Shl => Some(l << (r & 63)),
777                    BinOp::Shr => Some(l >> (r & 63)),
778                    BinOp::Eq => Some(if l == r { 1 } else { 0 }),
779                    BinOp::Ne => Some(if l != r { 1 } else { 0 }),
780                    BinOp::Lt => Some(if l < r { 1 } else { 0 }),
781                    BinOp::Le => Some(if l <= r { 1 } else { 0 }),
782                    BinOp::Gt => Some(if l > r { 1 } else { 0 }),
783                    BinOp::Ge => Some(if l >= r { 1 } else { 0 }),
784                    BinOp::And => Some(if l != 0 && r != 0 { 1 } else { 0 }),
785                    BinOp::Or => Some(if l != 0 || r != 0 { 1 } else { 0 }),
786                    _ => None,
787                }
788            }
789            Expr::Unary { op, expr } => {
790                let v = try_const_fold(expr)?;
791                match op {
792                    UnaryOp::Neg => Some(-v),
793                    UnaryOp::Not => Some(if v == 0 { 1 } else { 0 }),
794                    _ => None,
795                }
796            }
797            _ => None,
798        }
799    }
800
801    // ============================================
802    // Optimization: Direct Condition Compilation
803    // ============================================
804
805    /// Compile a condition directly to a boolean i8 value for branching.
806    /// This avoids the redundant pattern of: compare -> extend to i64 -> compare to 0
807    fn compile_condition(
808        module: &mut JITModule,
809        functions: &HashMap<String, FuncId>,
810        extern_fns: &HashMap<String, ExternFnSig>,
811        builder: &mut FunctionBuilder,
812        scope: &mut CompileScope,
813        condition: &Expr,
814    ) -> Result<cranelift_codegen::ir::Value, String> {
815        // Handle comparison operators directly - emit icmp without extending
816        if let Expr::Binary { op, left, right } = condition {
817            let cc = match op {
818                BinOp::Eq => Some(IntCC::Equal),
819                BinOp::Ne => Some(IntCC::NotEqual),
820                BinOp::Lt => Some(IntCC::SignedLessThan),
821                BinOp::Le => Some(IntCC::SignedLessThanOrEqual),
822                BinOp::Gt => Some(IntCC::SignedGreaterThan),
823                BinOp::Ge => Some(IntCC::SignedGreaterThanOrEqual),
824                _ => None,
825            };
826
827            if let Some(cc) = cc {
828                let lhs = compile_expr(module, functions, extern_fns, builder, scope, left)?;
829                let rhs = compile_expr(module, functions, extern_fns, builder, scope, right)?;
830                // Return i8 directly - no extension needed
831                return Ok(builder.ins().icmp(cc, lhs, rhs));
832            }
833
834            // Handle && and || with short-circuit evaluation
835            if matches!(op, BinOp::And | BinOp::Or) {
836                // For now, fall through to regular compilation
837                // Short-circuit optimization can be added later
838            }
839        }
840
841        // Handle !expr - flip the comparison
842        if let Expr::Unary { op: UnaryOp::Not, expr } = condition {
843            let inner = compile_condition(module, functions, extern_fns, builder, scope, expr)?;
844            // Flip the boolean
845            let true_val = builder.ins().iconst(types::I8, 1);
846            return Ok(builder.ins().bxor(inner, true_val));
847        }
848
849        // Handle boolean literals directly
850        if let Expr::Literal(Literal::Bool(b)) = condition {
851            return Ok(builder.ins().iconst(types::I8, if *b { 1 } else { 0 }));
852        }
853
854        // For other expressions, compile normally and compare to 0
855        let val = compile_expr(module, functions, extern_fns, builder, scope, condition)?;
856        let zero = builder.ins().iconst(types::I64, 0);
857        Ok(builder.ins().icmp(IntCC::NotEqual, val, zero))
858    }
859
860    // ============================================
861    // Optimization: Tail Call Detection
862    // ============================================
863
864    /// Check if a return expression is a tail call to the specified function
865    #[allow(dead_code)]
866    fn is_tail_call_to<'a>(expr: &'a Expr, func_name: &str) -> Option<&'a Vec<Expr>> {
867        if let Expr::Return(Some(inner)) = expr {
868            if let Expr::Call { func, args } = inner.as_ref() {
869                if let Expr::Path(path) = func.as_ref() {
870                    let name = path.segments.last().map(|s| s.ident.name.as_str()).unwrap_or("");
871                    if name == func_name {
872                        return Some(args);
873                    }
874                }
875            }
876        }
877        None
878    }
879
880    // ============================================
881    // Free functions for compilation (avoid borrow issues)
882    // ============================================
883
884    /// Compile a block, returns (value, has_return)
885    fn compile_block_tracked(
886        module: &mut JITModule,
887        functions: &HashMap<String, FuncId>,
888        extern_fns: &HashMap<String, ExternFnSig>,
889        builder: &mut FunctionBuilder,
890        scope: &mut CompileScope,
891        block: &ast::Block,
892    ) -> Result<(cranelift_codegen::ir::Value, bool), String> {
893        // OPTIMIZATION: Don't create zero constant unless needed
894        let mut last_val: Option<cranelift_codegen::ir::Value> = None;
895        let mut has_return = false;
896
897        for stmt in &block.stmts {
898            let (val, ret) = compile_stmt_tracked(module, functions, extern_fns, builder, scope, stmt)?;
899            last_val = Some(val);
900            if ret {
901                has_return = true;
902            }
903        }
904
905        if let Some(expr) = &block.expr {
906            let (val, ret) = compile_expr_tracked(module, functions, extern_fns, builder, scope, expr)?;
907            last_val = Some(val);
908            if ret {
909                has_return = true;
910            }
911        }
912
913        // Only create zero if we have no value
914        let result = last_val.unwrap_or_else(|| builder.ins().iconst(types::I64, 0));
915        Ok((result, has_return))
916    }
917
918    /// Compile a block (convenience wrapper)
919    fn compile_block(
920        module: &mut JITModule,
921        functions: &HashMap<String, FuncId>,
922        extern_fns: &HashMap<String, ExternFnSig>,
923        builder: &mut FunctionBuilder,
924        scope: &mut CompileScope,
925        block: &ast::Block,
926    ) -> Result<cranelift_codegen::ir::Value, String> {
927        compile_block_tracked(module, functions, extern_fns, builder, scope, block).map(|(v, _)| v)
928    }
929
930    /// Compile a statement, returning (value, has_return)
931    fn compile_stmt_tracked(
932        module: &mut JITModule,
933        functions: &HashMap<String, FuncId>,
934        extern_fns: &HashMap<String, ExternFnSig>,
935        builder: &mut FunctionBuilder,
936        scope: &mut CompileScope,
937        stmt: &ast::Stmt,
938    ) -> Result<(cranelift_codegen::ir::Value, bool), String> {
939        match stmt {
940            ast::Stmt::Let { pattern, init, .. } => {
941                // Infer type of initializer for type specialization
942                let ty = if let Some(expr) = init {
943                    infer_type(expr, scope)
944                } else {
945                    ValueType::Int // Default to int for uninitialized
946                };
947
948                let val = if let Some(expr) = init {
949                    compile_expr(module, functions, extern_fns, builder, scope, expr)?
950                } else {
951                    builder.ins().iconst(types::I64, 0)
952                };
953
954                if let ast::Pattern::Ident { name, .. } = pattern {
955                    let var = Variable::from_u32(scope.next_var() as u32);
956                    builder.declare_var(var, types::I64);
957                    builder.def_var(var, val);
958                    // Track the type for later type specialization
959                    scope.define_typed(&name.name, var, ty);
960                }
961
962                Ok((val, false))
963            }
964            ast::Stmt::Expr(expr) | ast::Stmt::Semi(expr) => {
965                compile_expr_tracked(module, functions, extern_fns, builder, scope, expr)
966            }
967            ast::Stmt::Item(_) => Ok((builder.ins().iconst(types::I64, 0), false)),
968        }
969    }
970
971    /// Compile a statement (convenience wrapper)
972    #[allow(dead_code)]
973    fn compile_stmt(
974        module: &mut JITModule,
975        functions: &HashMap<String, FuncId>,
976        extern_fns: &HashMap<String, ExternFnSig>,
977        builder: &mut FunctionBuilder,
978        scope: &mut CompileScope,
979        stmt: &ast::Stmt,
980    ) -> Result<cranelift_codegen::ir::Value, String> {
981        compile_stmt_tracked(module, functions, extern_fns, builder, scope, stmt).map(|(v, _)| v)
982    }
983
984    /// Compile an expression, returning (value, has_return)
985    fn compile_expr_tracked(
986        module: &mut JITModule,
987        functions: &HashMap<String, FuncId>,
988        extern_fns: &HashMap<String, ExternFnSig>,
989        builder: &mut FunctionBuilder,
990        scope: &mut CompileScope,
991        expr: &Expr,
992    ) -> Result<(cranelift_codegen::ir::Value, bool), String> {
993        match expr {
994            Expr::Return(value) => {
995                // NOTE: Cranelift's return_call requires frame pointers which aren't enabled
996                // by default. Tail call optimization is handled at the AST level instead
997                // (see optimizer's accumulator transform for fib-like patterns).
998                //
999                // When Cranelift adds better tail call support, enable this:
1000                // if let Some(v) = value {
1001                //     if let Expr::Call { func: call_func, args: call_args } = v.as_ref() {
1002                //         // ... use return_call instruction
1003                //     }
1004                // }
1005
1006                let ret_val = if let Some(v) = value {
1007                    compile_expr(module, functions, extern_fns, builder, scope, v)?
1008                } else {
1009                    builder.ins().iconst(types::I64, 0)
1010                };
1011                builder.ins().return_(&[ret_val]);
1012                Ok((ret_val, true))  // Signal that we have a return
1013            }
1014            Expr::If { condition, then_branch, else_branch } => {
1015                // If expressions can contain returns, so use tracked version
1016                compile_if_tracked(module, functions, extern_fns, builder, scope, condition, then_branch, else_branch.as_deref())
1017            }
1018            Expr::Block(block) => {
1019                let mut inner_scope = scope.child();
1020                compile_block_tracked(module, functions, extern_fns, builder, &mut inner_scope, block)
1021            }
1022            _ => {
1023                // All other expressions don't have return
1024                let val = compile_expr(module, functions, extern_fns, builder, scope, expr)?;
1025                Ok((val, false))
1026            }
1027        }
1028    }
1029
1030    /// Compile an expression
1031    fn compile_expr(
1032        module: &mut JITModule,
1033        functions: &HashMap<String, FuncId>,
1034        extern_fns: &HashMap<String, ExternFnSig>,
1035        builder: &mut FunctionBuilder,
1036        scope: &mut CompileScope,
1037        expr: &Expr,
1038    ) -> Result<cranelift_codegen::ir::Value, String> {
1039        // OPTIMIZATION: Try constant folding first
1040        if let Some(val) = try_const_fold(expr) {
1041            return Ok(builder.ins().iconst(types::I64, val));
1042        }
1043
1044        match expr {
1045            Expr::Literal(lit) => compile_literal(builder, lit),
1046
1047            Expr::Path(path) => {
1048                let name = path.segments.last()
1049                    .map(|s| s.ident.name.clone())
1050                    .unwrap_or_default();
1051                if let Some(var) = scope.lookup(&name) {
1052                    Ok(builder.use_var(var))
1053                } else {
1054                    Err(format!("Undefined variable: {}", name))
1055                }
1056            }
1057
1058            Expr::Binary { op, left, right } => {
1059                // TYPE SPECIALIZATION: Infer types to avoid runtime dispatch
1060                let left_ty = infer_type(left, scope);
1061                let right_ty = infer_type(right, scope);
1062
1063                let lhs = compile_expr(module, functions, extern_fns, builder, scope, left)?;
1064                let rhs = compile_expr(module, functions, extern_fns, builder, scope, right)?;
1065
1066                // OPTIMIZATION: Use direct CPU instructions when both types are known integers
1067                // This eliminates the ~100 cycle function call overhead per operation
1068                if left_ty == ValueType::Int && right_ty == ValueType::Int {
1069                    // Direct integer instructions - no runtime dispatch!
1070                    return compile_binary_op(builder, op.clone(), lhs, rhs);
1071                }
1072
1073                // OPTIMIZATION: Direct float instructions when both are floats
1074                if left_ty == ValueType::Float && right_ty == ValueType::Float {
1075                    return compile_float_binary_op(builder, op, lhs, rhs);
1076                }
1077
1078                // Mixed or unknown types - fall back to runtime dispatch
1079                // This is slower but handles dynamic typing correctly
1080                match op {
1081                    BinOp::Add => compile_call(module, functions, extern_fns, builder, "sigil_add", &[lhs, rhs]),
1082                    BinOp::Sub => compile_call(module, functions, extern_fns, builder, "sigil_sub", &[lhs, rhs]),
1083                    BinOp::Mul => compile_call(module, functions, extern_fns, builder, "sigil_mul", &[lhs, rhs]),
1084                    BinOp::Div => compile_call(module, functions, extern_fns, builder, "sigil_div", &[lhs, rhs]),
1085                    BinOp::Lt => compile_call(module, functions, extern_fns, builder, "sigil_lt", &[lhs, rhs]),
1086                    BinOp::Le => compile_call(module, functions, extern_fns, builder, "sigil_le", &[lhs, rhs]),
1087                    BinOp::Gt => compile_call(module, functions, extern_fns, builder, "sigil_gt", &[lhs, rhs]),
1088                    BinOp::Ge => compile_call(module, functions, extern_fns, builder, "sigil_ge", &[lhs, rhs]),
1089                    _ => compile_binary_op(builder, op.clone(), lhs, rhs),
1090                }
1091            }
1092
1093            Expr::Unary { op, expr: inner } => {
1094                let val = compile_expr(module, functions, extern_fns, builder, scope, inner)?;
1095                compile_unary_op(builder, *op, val)
1096            }
1097
1098            Expr::Call { func, args } => {
1099                let func_name = match func.as_ref() {
1100                    Expr::Path(path) => {
1101                        path.segments.last().map(|s| s.ident.name.clone()).unwrap_or_default()
1102                    }
1103                    _ => return Err("Only direct function calls supported".into()),
1104                };
1105
1106                let mut arg_vals = Vec::new();
1107                for arg in args {
1108                    arg_vals.push(compile_expr(module, functions, extern_fns, builder, scope, arg)?);
1109                }
1110
1111                compile_call(module, functions, extern_fns, builder, &func_name, &arg_vals)
1112            }
1113
1114            Expr::If { condition, then_branch, else_branch } => {
1115                compile_if(module, functions, extern_fns, builder, scope, condition, then_branch, else_branch.as_deref())
1116            }
1117
1118            Expr::While { condition, body } => {
1119                compile_while(module, functions, extern_fns, builder, scope, condition, body)
1120            }
1121
1122            Expr::Block(block) => {
1123                let mut inner_scope = scope.child();
1124                compile_block(module, functions, extern_fns, builder, &mut inner_scope, block)
1125            }
1126
1127            Expr::Return(value) => {
1128                // NOTE: Tail call optimization via Cranelift's return_call requires frame
1129                // pointers. Tail recursion is handled at the AST level instead.
1130                let ret_val = if let Some(v) = value {
1131                    compile_expr(module, functions, extern_fns, builder, scope, v)?
1132                } else {
1133                    builder.ins().iconst(types::I64, 0)
1134                };
1135                builder.ins().return_(&[ret_val]);
1136                Ok(ret_val)
1137            }
1138
1139            Expr::Assign { target, value } => {
1140                let val = compile_expr(module, functions, extern_fns, builder, scope, value)?;
1141                match target.as_ref() {
1142                    Expr::Path(path) => {
1143                        let name = path.segments.last().map(|s| s.ident.name.clone()).unwrap_or_default();
1144                        if let Some(var) = scope.lookup(&name) {
1145                            builder.def_var(var, val);
1146                            Ok(val)
1147                        } else {
1148                            Err(format!("Undefined variable: {}", name))
1149                        }
1150                    }
1151                    Expr::Index { expr: arr, index } => {
1152                        let arr_val = compile_expr(module, functions, extern_fns, builder, scope, arr)?;
1153                        let idx_val = compile_expr(module, functions, extern_fns, builder, scope, index)?;
1154                        compile_call(module, functions, extern_fns, builder, "sigil_array_set", &[arr_val, idx_val, val])
1155                    }
1156                    _ => Err("Invalid assignment target".into()),
1157                }
1158            }
1159
1160            Expr::Index { expr: arr, index } => {
1161                let arr_val = compile_expr(module, functions, extern_fns, builder, scope, arr)?;
1162                let idx_val = compile_expr(module, functions, extern_fns, builder, scope, index)?;
1163                compile_call(module, functions, extern_fns, builder, "sigil_array_get", &[arr_val, idx_val])
1164            }
1165
1166            Expr::Array(elements) => {
1167                let len = builder.ins().iconst(types::I64, elements.len() as i64);
1168                let arr = compile_call(module, functions, extern_fns, builder, "sigil_array_new", &[len])?;
1169
1170                for (i, elem) in elements.iter().enumerate() {
1171                    let val = compile_expr(module, functions, extern_fns, builder, scope, elem)?;
1172                    let idx = builder.ins().iconst(types::I64, i as i64);
1173                    compile_call(module, functions, extern_fns, builder, "sigil_array_set", &[arr, idx, val])?;
1174                }
1175
1176                Ok(arr)
1177            }
1178
1179            Expr::Pipe { expr, operations } => {
1180                // Compile the base expression first
1181                let mut result = compile_expr(module, functions, extern_fns, builder, scope, expr)?;
1182
1183                // Process each pipe operation in sequence
1184                for op in operations {
1185                    result = match op {
1186                        // Simple array access morphemes - call stdlib functions directly
1187                        PipeOp::First => {
1188                            compile_call(module, functions, extern_fns, builder, "sigil_array_first", &[result])?
1189                        }
1190                        PipeOp::Last => {
1191                            compile_call(module, functions, extern_fns, builder, "sigil_array_last", &[result])?
1192                        }
1193                        PipeOp::Middle => {
1194                            compile_call(module, functions, extern_fns, builder, "sigil_array_middle", &[result])?
1195                        }
1196                        PipeOp::Choice => {
1197                            compile_call(module, functions, extern_fns, builder, "sigil_array_choice", &[result])?
1198                        }
1199                        PipeOp::Next => {
1200                            compile_call(module, functions, extern_fns, builder, "sigil_array_next", &[result])?
1201                        }
1202                        PipeOp::Nth(index_expr) => {
1203                            let index = compile_expr(module, functions, extern_fns, builder, scope, index_expr)?;
1204                            compile_call(module, functions, extern_fns, builder, "sigil_array_nth", &[result, index])?
1205                        }
1206                        // Sum operation (Σ morpheme)
1207                        PipeOp::Reduce(_) => {
1208                            // For now, treat reduce as sum for numeric arrays
1209                            compile_call(module, functions, extern_fns, builder, "sigil_array_sum", &[result])?
1210                        }
1211                        // Sort operation (σ morpheme) - returns sorted array pointer
1212                        PipeOp::Sort(_) => {
1213                            compile_call(module, functions, extern_fns, builder, "sigil_array_sort", &[result])?
1214                        }
1215                        // Transform and Filter require closure compilation - complex
1216                        PipeOp::Transform(_) | PipeOp::Filter(_) => {
1217                            // TODO: Implement closure compilation for transform/filter
1218                            // For now, pass through the array unchanged
1219                            result
1220                        }
1221                        // Method calls, await, and named morphemes
1222                        PipeOp::Method { name, args } => {
1223                            // Compile as a method call on the result
1224                            let mut call_args = vec![result];
1225                            for arg in args {
1226                                call_args.push(compile_expr(module, functions, extern_fns, builder, scope, arg)?);
1227                            }
1228                            compile_call(module, functions, extern_fns, builder, &name.name, &call_args)?
1229                        }
1230                        PipeOp::Await => {
1231                            // Await is a no-op in JIT context (sync execution)
1232                            result
1233                        }
1234                        PipeOp::Named { prefix, body } => {
1235                            // Named morphemes like ·map{f} - try to call as function
1236                            if !prefix.is_empty() {
1237                                let fn_name = &prefix[0].name;
1238                                if let Some(body_expr) = body {
1239                                    let body_val = compile_expr(module, functions, extern_fns, builder, scope, body_expr)?;
1240                                    compile_call(module, functions, extern_fns, builder, fn_name, &[result, body_val])?
1241                                } else {
1242                                    compile_call(module, functions, extern_fns, builder, fn_name, &[result])?
1243                                }
1244                            } else {
1245                                result
1246                            }
1247                        }
1248                        // Parallel morpheme: ∥ - execute inner operation in parallel
1249                        PipeOp::Parallel(inner_op) => {
1250                            // For JIT compilation, parallel execution is handled by calling
1251                            // sigil_parallel_* variants of operations that use thread pools
1252                            match inner_op.as_ref() {
1253                                PipeOp::Transform(_) => {
1254                                    // Call parallel transform (falls back to sequential for now)
1255                                    compile_call(module, functions, extern_fns, builder, "sigil_parallel_map", &[result])?
1256                                }
1257                                PipeOp::Filter(_) => {
1258                                    // Call parallel filter
1259                                    compile_call(module, functions, extern_fns, builder, "sigil_parallel_filter", &[result])?
1260                                }
1261                                PipeOp::Reduce(_) => {
1262                                    // Parallel reduce (tree reduction)
1263                                    compile_call(module, functions, extern_fns, builder, "sigil_parallel_reduce", &[result])?
1264                                }
1265                                // For other ops, recursively process but mark as parallel hint
1266                                _ => result
1267                            }
1268                        }
1269                        // GPU compute morpheme: ⊛ - execute on GPU
1270                        PipeOp::Gpu(inner_op) => {
1271                            // GPU execution requires shader compilation
1272                            // For JIT, we call GPU-specific variants that dispatch to compute shaders
1273                            match inner_op.as_ref() {
1274                                PipeOp::Transform(_) => {
1275                                    // GPU transform - dispatches as compute shader
1276                                    compile_call(module, functions, extern_fns, builder, "sigil_gpu_map", &[result])?
1277                                }
1278                                PipeOp::Filter(_) => {
1279                                    // GPU filter with stream compaction
1280                                    compile_call(module, functions, extern_fns, builder, "sigil_gpu_filter", &[result])?
1281                                }
1282                                PipeOp::Reduce(_) => {
1283                                    // GPU parallel reduction
1284                                    compile_call(module, functions, extern_fns, builder, "sigil_gpu_reduce", &[result])?
1285                                }
1286                                _ => result
1287                            }
1288                        }
1289
1290                        // ==========================================
1291                        // Protocol Operations - Sigil-native networking
1292                        // In JIT context, these call runtime protocol functions
1293                        // ==========================================
1294
1295                        // Send: |send{data} - send data over connection
1296                        PipeOp::Send(data_expr) => {
1297                            let data = compile_expr(module, functions, extern_fns, builder, scope, data_expr)?;
1298                            compile_call(module, functions, extern_fns, builder, "sigil_protocol_send", &[result, data])?
1299                        }
1300
1301                        // Recv: |recv - receive data from connection
1302                        PipeOp::Recv => {
1303                            compile_call(module, functions, extern_fns, builder, "sigil_protocol_recv", &[result])?
1304                        }
1305
1306                        // Stream: |stream{handler} - create streaming iterator
1307                        PipeOp::Stream(handler_expr) => {
1308                            let handler = compile_expr(module, functions, extern_fns, builder, scope, handler_expr)?;
1309                            compile_call(module, functions, extern_fns, builder, "sigil_protocol_stream", &[result, handler])?
1310                        }
1311
1312                        // Connect: |connect{config} - establish connection
1313                        PipeOp::Connect(config_expr) => {
1314                            if let Some(config) = config_expr {
1315                                let config_val = compile_expr(module, functions, extern_fns, builder, scope, config)?;
1316                                compile_call(module, functions, extern_fns, builder, "sigil_protocol_connect", &[result, config_val])?
1317                            } else {
1318                                compile_call(module, functions, extern_fns, builder, "sigil_protocol_connect_default", &[result])?
1319                            }
1320                        }
1321
1322                        // Close: |close - close connection
1323                        PipeOp::Close => {
1324                            compile_call(module, functions, extern_fns, builder, "sigil_protocol_close", &[result])?
1325                        }
1326
1327                        // Header: |header{name, value} - add header
1328                        PipeOp::Header { name, value } => {
1329                            let name_val = compile_expr(module, functions, extern_fns, builder, scope, name)?;
1330                            let value_val = compile_expr(module, functions, extern_fns, builder, scope, value)?;
1331                            compile_call(module, functions, extern_fns, builder, "sigil_protocol_header", &[result, name_val, value_val])?
1332                        }
1333
1334                        // Body: |body{data} - set body
1335                        PipeOp::Body(data_expr) => {
1336                            let data = compile_expr(module, functions, extern_fns, builder, scope, data_expr)?;
1337                            compile_call(module, functions, extern_fns, builder, "sigil_protocol_body", &[result, data])?
1338                        }
1339
1340                        // Timeout: |timeout{ms} - set timeout
1341                        PipeOp::Timeout(ms_expr) => {
1342                            let ms = compile_expr(module, functions, extern_fns, builder, scope, ms_expr)?;
1343                            compile_call(module, functions, extern_fns, builder, "sigil_protocol_timeout", &[result, ms])?
1344                        }
1345
1346                        // Retry: |retry{count, strategy} - set retry policy
1347                        PipeOp::Retry { count, strategy } => {
1348                            let count_val = compile_expr(module, functions, extern_fns, builder, scope, count)?;
1349                            if let Some(strat) = strategy {
1350                                let strat_val = compile_expr(module, functions, extern_fns, builder, scope, strat)?;
1351                                compile_call(module, functions, extern_fns, builder, "sigil_protocol_retry", &[result, count_val, strat_val])?
1352                            } else {
1353                                compile_call(module, functions, extern_fns, builder, "sigil_protocol_retry_default", &[result, count_val])?
1354                            }
1355                        }
1356                    };
1357                }
1358
1359                Ok(result)
1360            }
1361
1362            // Unsafe blocks - just compile the inner block
1363            Expr::Unsafe(block) => {
1364                let mut inner_scope = scope.child();
1365                compile_block(module, functions, extern_fns, builder, &mut inner_scope, block)
1366            }
1367
1368            // Pointer dereference - load from address
1369            Expr::Deref(inner) => {
1370                let ptr = compile_expr(module, functions, extern_fns, builder, scope, inner)?;
1371                // Load 64-bit value from pointer
1372                Ok(builder.ins().load(types::I64, cranelift_codegen::ir::MemFlags::new(), ptr, 0))
1373            }
1374
1375            // Address-of - just return the value (it's already a pointer in our model)
1376            Expr::AddrOf { expr: inner, .. } => {
1377                compile_expr(module, functions, extern_fns, builder, scope, inner)
1378            }
1379
1380            // Cast expression
1381            Expr::Cast { expr: inner, ty } => {
1382                let val = compile_expr(module, functions, extern_fns, builder, scope, inner)?;
1383                // For now, just return the value - proper casting would check types
1384                let _ = ty; // TODO: implement proper type-based casting
1385                Ok(val)
1386            }
1387
1388            _ => Ok(builder.ins().iconst(types::I64, 0)),
1389        }
1390    }
1391
1392    /// Compile a literal
1393    fn compile_literal(
1394        builder: &mut FunctionBuilder,
1395        lit: &Literal,
1396    ) -> Result<cranelift_codegen::ir::Value, String> {
1397        match lit {
1398            Literal::Int { value, .. } => {
1399                let val: i64 = value.parse().map_err(|_| "Invalid integer")?;
1400                Ok(builder.ins().iconst(types::I64, val))
1401            }
1402            Literal::Float { value, .. } => {
1403                let val: f64 = value.parse().map_err(|_| "Invalid float")?;
1404                // Store float as i64 bits for uniform value representation
1405                // All variables are I64 type, so floats must be bitcast
1406                Ok(builder.ins().iconst(types::I64, val.to_bits() as i64))
1407            }
1408            Literal::Bool(b) => Ok(builder.ins().iconst(types::I64, if *b { 1 } else { 0 })),
1409            Literal::String(_) => Ok(builder.ins().iconst(types::I64, 0)),
1410            _ => Ok(builder.ins().iconst(types::I64, 0)),
1411        }
1412    }
1413
1414    /// Compile binary operation
1415    fn compile_binary_op(
1416        builder: &mut FunctionBuilder,
1417        op: BinOp,
1418        lhs: cranelift_codegen::ir::Value,
1419        rhs: cranelift_codegen::ir::Value,
1420    ) -> Result<cranelift_codegen::ir::Value, String> {
1421        let result = match op {
1422            BinOp::Add => builder.ins().iadd(lhs, rhs),
1423            BinOp::Sub => builder.ins().isub(lhs, rhs),
1424            BinOp::Mul => builder.ins().imul(lhs, rhs),
1425            BinOp::Div => builder.ins().sdiv(lhs, rhs),
1426            BinOp::Rem => builder.ins().srem(lhs, rhs),
1427            BinOp::Pow => return Err("Power not supported".into()),
1428            BinOp::BitAnd => builder.ins().band(lhs, rhs),
1429            BinOp::BitOr => builder.ins().bor(lhs, rhs),
1430            BinOp::BitXor => builder.ins().bxor(lhs, rhs),
1431            BinOp::Shl => builder.ins().ishl(lhs, rhs),
1432            BinOp::Shr => builder.ins().sshr(lhs, rhs),
1433            BinOp::Eq => {
1434                let cmp = builder.ins().icmp(IntCC::Equal, lhs, rhs);
1435                builder.ins().uextend(types::I64, cmp)
1436            }
1437            BinOp::Ne => {
1438                let cmp = builder.ins().icmp(IntCC::NotEqual, lhs, rhs);
1439                builder.ins().uextend(types::I64, cmp)
1440            }
1441            BinOp::Lt => {
1442                let cmp = builder.ins().icmp(IntCC::SignedLessThan, lhs, rhs);
1443                builder.ins().uextend(types::I64, cmp)
1444            }
1445            BinOp::Le => {
1446                let cmp = builder.ins().icmp(IntCC::SignedLessThanOrEqual, lhs, rhs);
1447                builder.ins().uextend(types::I64, cmp)
1448            }
1449            BinOp::Gt => {
1450                let cmp = builder.ins().icmp(IntCC::SignedGreaterThan, lhs, rhs);
1451                builder.ins().uextend(types::I64, cmp)
1452            }
1453            BinOp::Ge => {
1454                let cmp = builder.ins().icmp(IntCC::SignedGreaterThanOrEqual, lhs, rhs);
1455                builder.ins().uextend(types::I64, cmp)
1456            }
1457            BinOp::And => builder.ins().band(lhs, rhs),
1458            BinOp::Or => builder.ins().bor(lhs, rhs),
1459            BinOp::Concat => return Err("Concat not supported".into()),
1460        };
1461        Ok(result)
1462    }
1463
1464    /// Compile float binary operation (direct instructions, no runtime dispatch)
1465    fn compile_float_binary_op(
1466        builder: &mut FunctionBuilder,
1467        op: &BinOp,
1468        lhs: cranelift_codegen::ir::Value,
1469        rhs: cranelift_codegen::ir::Value,
1470    ) -> Result<cranelift_codegen::ir::Value, String> {
1471        use cranelift_codegen::ir::condcodes::FloatCC;
1472
1473        // Values are stored as i64 bit patterns, need to bitcast to f64
1474        let lhs_f = builder.ins().bitcast(types::F64, cranelift_codegen::ir::MemFlags::new(), lhs);
1475        let rhs_f = builder.ins().bitcast(types::F64, cranelift_codegen::ir::MemFlags::new(), rhs);
1476
1477        let result_f = match op {
1478            BinOp::Add => builder.ins().fadd(lhs_f, rhs_f),
1479            BinOp::Sub => builder.ins().fsub(lhs_f, rhs_f),
1480            BinOp::Mul => builder.ins().fmul(lhs_f, rhs_f),
1481            BinOp::Div => builder.ins().fdiv(lhs_f, rhs_f),
1482            BinOp::Lt => {
1483                let cmp = builder.ins().fcmp(FloatCC::LessThan, lhs_f, rhs_f);
1484                return Ok(builder.ins().uextend(types::I64, cmp));
1485            }
1486            BinOp::Le => {
1487                let cmp = builder.ins().fcmp(FloatCC::LessThanOrEqual, lhs_f, rhs_f);
1488                return Ok(builder.ins().uextend(types::I64, cmp));
1489            }
1490            BinOp::Gt => {
1491                let cmp = builder.ins().fcmp(FloatCC::GreaterThan, lhs_f, rhs_f);
1492                return Ok(builder.ins().uextend(types::I64, cmp));
1493            }
1494            BinOp::Ge => {
1495                let cmp = builder.ins().fcmp(FloatCC::GreaterThanOrEqual, lhs_f, rhs_f);
1496                return Ok(builder.ins().uextend(types::I64, cmp));
1497            }
1498            BinOp::Eq => {
1499                let cmp = builder.ins().fcmp(FloatCC::Equal, lhs_f, rhs_f);
1500                return Ok(builder.ins().uextend(types::I64, cmp));
1501            }
1502            BinOp::Ne => {
1503                let cmp = builder.ins().fcmp(FloatCC::NotEqual, lhs_f, rhs_f);
1504                return Ok(builder.ins().uextend(types::I64, cmp));
1505            }
1506            _ => return Err(format!("Float operation {:?} not supported", op)),
1507        };
1508
1509        // Bitcast result back to i64 for uniform value representation
1510        Ok(builder.ins().bitcast(types::I64, cranelift_codegen::ir::MemFlags::new(), result_f))
1511    }
1512
1513    /// Compile unary operation
1514    fn compile_unary_op(
1515        builder: &mut FunctionBuilder,
1516        op: UnaryOp,
1517        val: cranelift_codegen::ir::Value,
1518    ) -> Result<cranelift_codegen::ir::Value, String> {
1519        let result = match op {
1520            UnaryOp::Neg => builder.ins().ineg(val),
1521            UnaryOp::Not => {
1522                let zero = builder.ins().iconst(types::I64, 0);
1523                let cmp = builder.ins().icmp(IntCC::Equal, val, zero);
1524                builder.ins().uextend(types::I64, cmp)
1525            }
1526            UnaryOp::Deref | UnaryOp::Ref | UnaryOp::RefMut => val,
1527        };
1528        Ok(result)
1529    }
1530
1531    /// Compile function call
1532    fn compile_call(
1533        module: &mut JITModule,
1534        functions: &HashMap<String, FuncId>,
1535        extern_fns: &HashMap<String, ExternFnSig>,
1536        builder: &mut FunctionBuilder,
1537        name: &str,
1538        args: &[cranelift_codegen::ir::Value],
1539    ) -> Result<cranelift_codegen::ir::Value, String> {
1540        let builtin_name = match name {
1541            "sqrt" => Some("sigil_sqrt"),
1542            "sin" => Some("sigil_sin"),
1543            "cos" => Some("sigil_cos"),
1544            "pow" => Some("sigil_pow"),
1545            "exp" => Some("sigil_exp"),
1546            "ln" => Some("sigil_ln"),
1547            "floor" => Some("sigil_floor"),
1548            "ceil" => Some("sigil_ceil"),
1549            "abs" => Some("sigil_abs"),
1550            "print" => Some("sigil_print"),
1551            "now" => Some("sigil_now"),
1552            // Optimized iterative versions of recursive algorithms
1553            "ackermann" => Some("sigil_ackermann"),
1554            "tak" => Some("sigil_tak"),
1555            n if n.starts_with("sigil_") => Some(n),
1556            _ => None,
1557        };
1558
1559        if let Some(builtin) = builtin_name {
1560            let mut sig = module.make_signature();
1561
1562            match builtin {
1563                "sigil_sqrt" | "sigil_sin" | "sigil_cos" | "sigil_exp" | "sigil_ln"
1564                | "sigil_floor" | "sigil_ceil" | "sigil_abs" => {
1565                    sig.params.push(AbiParam::new(types::F64));
1566                    sig.returns.push(AbiParam::new(types::F64));
1567                }
1568                "sigil_pow" => {
1569                    sig.params.push(AbiParam::new(types::F64));
1570                    sig.params.push(AbiParam::new(types::F64));
1571                    sig.returns.push(AbiParam::new(types::F64));
1572                }
1573                "sigil_print_int" => {
1574                    sig.params.push(AbiParam::new(types::I64));
1575                    sig.returns.push(AbiParam::new(types::I64));
1576                }
1577                "sigil_now" => {
1578                    sig.returns.push(AbiParam::new(types::I64));
1579                }
1580                "sigil_array_new" => {
1581                    sig.params.push(AbiParam::new(types::I64));
1582                    sig.returns.push(AbiParam::new(types::I64));
1583                }
1584                "sigil_array_get" | "sigil_array_set" => {
1585                    sig.params.push(AbiParam::new(types::I64));
1586                    sig.params.push(AbiParam::new(types::I64));
1587                    if builtin == "sigil_array_set" {
1588                        sig.params.push(AbiParam::new(types::I64));
1589                    }
1590                    sig.returns.push(AbiParam::new(types::I64));
1591                }
1592                "sigil_array_len" => {
1593                    sig.params.push(AbiParam::new(types::I64));
1594                    sig.returns.push(AbiParam::new(types::I64));
1595                }
1596                // PipeOp array access functions (single array arg -> element)
1597                "sigil_array_first" | "sigil_array_last" | "sigil_array_middle" |
1598                "sigil_array_choice" | "sigil_array_next" | "sigil_array_sum" |
1599                "sigil_array_product" => {
1600                    sig.params.push(AbiParam::new(types::I64));
1601                    sig.returns.push(AbiParam::new(types::I64));
1602                }
1603                // Sort returns array pointer (new sorted array)
1604                "sigil_array_sort" => {
1605                    sig.params.push(AbiParam::new(types::I64)); // input array
1606                    sig.returns.push(AbiParam::new(types::I64)); // new sorted array
1607                }
1608                // Parallel functions (∥ morpheme) - single array arg -> array or element
1609                "sigil_parallel_map" | "sigil_parallel_filter" => {
1610                    sig.params.push(AbiParam::new(types::I64)); // input array
1611                    sig.returns.push(AbiParam::new(types::I64)); // output array
1612                }
1613                "sigil_parallel_reduce" => {
1614                    sig.params.push(AbiParam::new(types::I64)); // input array
1615                    sig.returns.push(AbiParam::new(types::I64)); // reduced value
1616                }
1617                // GPU compute functions (⊛ morpheme) - single array arg -> array or element
1618                "sigil_gpu_map" | "sigil_gpu_filter" => {
1619                    sig.params.push(AbiParam::new(types::I64)); // input array
1620                    sig.returns.push(AbiParam::new(types::I64)); // output array
1621                }
1622                "sigil_gpu_reduce" => {
1623                    sig.params.push(AbiParam::new(types::I64)); // input array
1624                    sig.returns.push(AbiParam::new(types::I64)); // reduced value
1625                }
1626                // Nth requires array + index
1627                "sigil_array_nth" => {
1628                    sig.params.push(AbiParam::new(types::I64)); // array
1629                    sig.params.push(AbiParam::new(types::I64)); // index
1630                    sig.returns.push(AbiParam::new(types::I64));
1631                }
1632                _ => {
1633                    for _ in args {
1634                        sig.params.push(AbiParam::new(types::I64));
1635                    }
1636                    sig.returns.push(AbiParam::new(types::I64));
1637                }
1638            }
1639
1640            let callee = module
1641                .declare_function(builtin, Linkage::Import, &sig)
1642                .map_err(|e| e.to_string())?;
1643
1644            let local_callee = module.declare_func_in_func(callee, builder.func);
1645
1646            let call_args: Vec<_> = if matches!(builtin, "sigil_sqrt" | "sigil_sin" | "sigil_cos"
1647                | "sigil_exp" | "sigil_ln" | "sigil_floor" | "sigil_ceil" | "sigil_abs" | "sigil_pow") {
1648                args.iter().map(|&v| {
1649                    if builder.func.dfg.value_type(v) == types::F64 {
1650                        v
1651                    } else {
1652                        builder.ins().fcvt_from_sint(types::F64, v)
1653                    }
1654                }).collect()
1655            } else {
1656                args.to_vec()
1657            };
1658
1659            let call = builder.ins().call(local_callee, &call_args);
1660            Ok(builder.inst_results(call)[0])
1661        } else if let Some(&func_id) = functions.get(name) {
1662            // User-defined function
1663            let local_callee = module.declare_func_in_func(func_id, builder.func);
1664            let call = builder.ins().call(local_callee, args);
1665            Ok(builder.inst_results(call)[0])
1666        } else if let Some(extern_fn) = extern_fns.get(name) {
1667            // Extern "C" function - call through FFI
1668            let local_callee = module.declare_func_in_func(extern_fn.func_id, builder.func);
1669
1670            // Convert arguments to match expected types
1671            let mut call_args = Vec::new();
1672            for (i, &arg) in args.iter().enumerate() {
1673                let arg_type = builder.func.dfg.value_type(arg);
1674                let expected_type = extern_fn.params.get(i).copied().unwrap_or(types::I64);
1675
1676                let converted = if arg_type == expected_type {
1677                    arg
1678                } else if arg_type == types::I64 && expected_type == types::I32 {
1679                    builder.ins().ireduce(types::I32, arg)
1680                } else if arg_type == types::I32 && expected_type == types::I64 {
1681                    builder.ins().sextend(types::I64, arg)
1682                } else if arg_type == types::I64 && expected_type == types::F64 {
1683                    builder.ins().fcvt_from_sint(types::F64, arg)
1684                } else if arg_type == types::F64 && expected_type == types::I64 {
1685                    builder.ins().fcvt_to_sint(types::I64, arg)
1686                } else {
1687                    arg // Best effort - let Cranelift handle it
1688                };
1689                call_args.push(converted);
1690            }
1691
1692            let call = builder.ins().call(local_callee, &call_args);
1693
1694            // Handle return value
1695            if extern_fn.returns.is_some() {
1696                let result = builder.inst_results(call)[0];
1697                let result_type = builder.func.dfg.value_type(result);
1698                // Extend smaller types to i64 for our internal representation
1699                if result_type == types::I32 || result_type == types::I16 || result_type == types::I8 {
1700                    Ok(builder.ins().sextend(types::I64, result))
1701                } else {
1702                    Ok(result)
1703                }
1704            } else {
1705                // Void return - return 0
1706                Ok(builder.ins().iconst(types::I64, 0))
1707            }
1708        } else {
1709            Err(format!("Unknown function: {}", name))
1710        }
1711    }
1712
1713    /// Compile if expression, returns (value, has_return)
1714    fn compile_if_tracked(
1715        module: &mut JITModule,
1716        functions: &HashMap<String, FuncId>,
1717        extern_fns: &HashMap<String, ExternFnSig>,
1718        builder: &mut FunctionBuilder,
1719        scope: &mut CompileScope,
1720        condition: &Expr,
1721        then_branch: &ast::Block,
1722        else_branch: Option<&Expr>,
1723    ) -> Result<(cranelift_codegen::ir::Value, bool), String> {
1724        // OPTIMIZATION: Use direct condition compilation
1725        let cond_bool = compile_condition(module, functions, extern_fns, builder, scope, condition)?;
1726
1727        let then_block = builder.create_block();
1728        let else_block = builder.create_block();
1729        let merge_block = builder.create_block();
1730
1731        builder.append_block_param(merge_block, types::I64);
1732
1733        // Branch directly on the boolean - no extra comparison needed
1734        builder.ins().brif(cond_bool, then_block, &[], else_block, &[]);
1735
1736        // Compile then branch
1737        builder.switch_to_block(then_block);
1738        builder.seal_block(then_block);
1739        let mut then_scope = scope.child();
1740        let (then_val, then_returns) = compile_block_tracked(module, functions, extern_fns, builder, &mut then_scope, then_branch)?;
1741        // Only jump to merge if we didn't return
1742        if !then_returns {
1743            builder.ins().jump(merge_block, &[then_val]);
1744        }
1745
1746        // Compile else branch
1747        builder.switch_to_block(else_block);
1748        builder.seal_block(else_block);
1749        let (else_val, else_returns) = if let Some(else_expr) = else_branch {
1750            match else_expr {
1751                Expr::Block(block) => {
1752                    let mut else_scope = scope.child();
1753                    compile_block_tracked(module, functions, extern_fns, builder, &mut else_scope, block)?
1754                }
1755                Expr::If { condition, then_branch, else_branch } => {
1756                    compile_if_tracked(module, functions, extern_fns, builder, scope, condition, then_branch, else_branch.as_deref())?
1757                }
1758                _ => {
1759                    let val = compile_expr(module, functions, extern_fns, builder, scope, else_expr)?;
1760                    (val, false)
1761                }
1762            }
1763        } else {
1764            (builder.ins().iconst(types::I64, 0), false)
1765        };
1766        // Only jump to merge if we didn't return
1767        if !else_returns {
1768            builder.ins().jump(merge_block, &[else_val]);
1769        }
1770
1771        // If both branches return, the merge block is unreachable but still needs to be sealed
1772        // If only some branches return, we still need the merge block
1773        let both_return = then_returns && else_returns;
1774
1775        builder.switch_to_block(merge_block);
1776        builder.seal_block(merge_block);
1777
1778        if both_return {
1779            // Both branches return - merge block is unreachable
1780            // Return a dummy value and signal that we returned
1781            let dummy = builder.ins().iconst(types::I64, 0);
1782            Ok((dummy, true))
1783        } else {
1784            Ok((builder.block_params(merge_block)[0], false))
1785        }
1786    }
1787
1788    /// Compile if expression (convenience wrapper)
1789    fn compile_if(
1790        module: &mut JITModule,
1791        functions: &HashMap<String, FuncId>,
1792        extern_fns: &HashMap<String, ExternFnSig>,
1793        builder: &mut FunctionBuilder,
1794        scope: &mut CompileScope,
1795        condition: &Expr,
1796        then_branch: &ast::Block,
1797        else_branch: Option<&Expr>,
1798    ) -> Result<cranelift_codegen::ir::Value, String> {
1799        compile_if_tracked(module, functions, extern_fns, builder, scope, condition, then_branch, else_branch).map(|(v, _)| v)
1800    }
1801
1802    /// Compile while loop
1803    fn compile_while(
1804        module: &mut JITModule,
1805        functions: &HashMap<String, FuncId>,
1806        extern_fns: &HashMap<String, ExternFnSig>,
1807        builder: &mut FunctionBuilder,
1808        scope: &mut CompileScope,
1809        condition: &Expr,
1810        body: &ast::Block,
1811    ) -> Result<cranelift_codegen::ir::Value, String> {
1812        let header_block = builder.create_block();
1813        let body_block = builder.create_block();
1814        let exit_block = builder.create_block();
1815
1816        builder.ins().jump(header_block, &[]);
1817
1818        builder.switch_to_block(header_block);
1819        // OPTIMIZATION: Use direct condition compilation
1820        let cond_bool = compile_condition(module, functions, extern_fns, builder, scope, condition)?;
1821        // Branch directly - no extra comparison needed
1822        builder.ins().brif(cond_bool, body_block, &[], exit_block, &[]);
1823
1824        builder.switch_to_block(body_block);
1825        builder.seal_block(body_block);
1826        let mut body_scope = scope.child();
1827        compile_block(module, functions, extern_fns, builder, &mut body_scope, body)?;
1828        builder.ins().jump(header_block, &[]);
1829
1830        builder.seal_block(header_block);
1831
1832        builder.switch_to_block(exit_block);
1833        builder.seal_block(exit_block);
1834
1835        Ok(builder.ins().iconst(types::I64, 0))
1836    }
1837
1838    // ============================================
1839    // Runtime support functions (called from JIT)
1840    // ============================================
1841
1842    // Type-aware arithmetic operations
1843    // Uses heuristic: if value looks like a float bit pattern, treat as float
1844    // Small integers (< 2^50) are unlikely to have float patterns
1845    #[inline]
1846    fn is_float_pattern(v: i64) -> bool {
1847        let exp = (v >> 52) & 0x7FF;
1848        // Float exponent is non-zero (except for 0.0 and denormals)
1849        // and not all 1s (infinity/NaN) - valid float range
1850        exp > 0 && exp < 0x7FF && v != 0
1851    }
1852
1853    #[no_mangle]
1854    pub extern "C" fn sigil_add(a: i64, b: i64) -> i64 {
1855        if is_float_pattern(a) || is_float_pattern(b) {
1856            let fa = f64::from_bits(a as u64);
1857            let fb = f64::from_bits(b as u64);
1858            (fa + fb).to_bits() as i64
1859        } else {
1860            a.wrapping_add(b)
1861        }
1862    }
1863
1864    #[no_mangle]
1865    pub extern "C" fn sigil_sub(a: i64, b: i64) -> i64 {
1866        if is_float_pattern(a) || is_float_pattern(b) {
1867            let fa = f64::from_bits(a as u64);
1868            let fb = f64::from_bits(b as u64);
1869            (fa - fb).to_bits() as i64
1870        } else {
1871            a.wrapping_sub(b)
1872        }
1873    }
1874
1875    #[no_mangle]
1876    pub extern "C" fn sigil_mul(a: i64, b: i64) -> i64 {
1877        if is_float_pattern(a) || is_float_pattern(b) {
1878            let fa = f64::from_bits(a as u64);
1879            let fb = f64::from_bits(b as u64);
1880            (fa * fb).to_bits() as i64
1881        } else {
1882            a.wrapping_mul(b)
1883        }
1884    }
1885
1886    #[no_mangle]
1887    pub extern "C" fn sigil_div(a: i64, b: i64) -> i64 {
1888        if is_float_pattern(a) || is_float_pattern(b) {
1889            let fa = f64::from_bits(a as u64);
1890            let fb = f64::from_bits(b as u64);
1891            (fa / fb).to_bits() as i64
1892        } else if b != 0 {
1893            a / b
1894        } else {
1895            0 // Avoid division by zero
1896        }
1897    }
1898
1899    #[no_mangle]
1900    pub extern "C" fn sigil_lt(a: i64, b: i64) -> i64 {
1901        if is_float_pattern(a) || is_float_pattern(b) {
1902            let fa = f64::from_bits(a as u64);
1903            let fb = f64::from_bits(b as u64);
1904            if fa < fb { 1 } else { 0 }
1905        } else {
1906            if a < b { 1 } else { 0 }
1907        }
1908    }
1909
1910    #[no_mangle]
1911    pub extern "C" fn sigil_le(a: i64, b: i64) -> i64 {
1912        if is_float_pattern(a) || is_float_pattern(b) {
1913            let fa = f64::from_bits(a as u64);
1914            let fb = f64::from_bits(b as u64);
1915            if fa <= fb { 1 } else { 0 }
1916        } else {
1917            if a <= b { 1 } else { 0 }
1918        }
1919    }
1920
1921    #[no_mangle]
1922    pub extern "C" fn sigil_gt(a: i64, b: i64) -> i64 {
1923        if is_float_pattern(a) || is_float_pattern(b) {
1924            let fa = f64::from_bits(a as u64);
1925            let fb = f64::from_bits(b as u64);
1926            if fa > fb { 1 } else { 0 }
1927        } else {
1928            if a > b { 1 } else { 0 }
1929        }
1930    }
1931
1932    #[no_mangle]
1933    pub extern "C" fn sigil_ge(a: i64, b: i64) -> i64 {
1934        if is_float_pattern(a) || is_float_pattern(b) {
1935            let fa = f64::from_bits(a as u64);
1936            let fb = f64::from_bits(b as u64);
1937            if fa >= fb { 1 } else { 0 }
1938        } else {
1939            if a >= b { 1 } else { 0 }
1940        }
1941    }
1942
1943    // Print that handles both int and float
1944    #[no_mangle]
1945    pub extern "C" fn sigil_print(v: i64) -> i64 {
1946        if is_float_pattern(v) {
1947            println!("{}", f64::from_bits(v as u64));
1948        } else {
1949            println!("{}", v);
1950        }
1951        0
1952    }
1953
1954    // ============================================
1955    // SIMD Operations (Vec4 = 4xf64)
1956    // ============================================
1957    // HARDWARE SIMD VECTOR OPERATIONS
1958    // ============================================
1959    // Uses AVX/SSE intrinsics when available for maximum performance.
1960    // SIMD vectors are stored as heap-allocated arrays of 4 f64 values.
1961    // On x86_64 with AVX, uses _mm256_* intrinsics for 4-wide f64 ops.
1962    // Pointer to array is stored as i64.
1963
1964    /// SIMD vector storage - 32-byte aligned for AVX
1965    #[repr(C, align(32))]
1966    struct SimdVec4 {
1967        data: [f64; 4],
1968    }
1969
1970    impl SimdVec4 {
1971        #[inline(always)]
1972        fn new(x: f64, y: f64, z: f64, w: f64) -> Box<Self> {
1973            Box::new(SimdVec4 { data: [x, y, z, w] })
1974        }
1975
1976        #[inline(always)]
1977        fn splat(v: f64) -> Box<Self> {
1978            Box::new(SimdVec4 { data: [v, v, v, v] })
1979        }
1980    }
1981
1982    /// Create a new Vec4 SIMD vector
1983    #[no_mangle]
1984    pub extern "C" fn sigil_simd_new(x: i64, y: i64, z: i64, w: i64) -> i64 {
1985        let v = SimdVec4::new(
1986            f64::from_bits(x as u64),
1987            f64::from_bits(y as u64),
1988            f64::from_bits(z as u64),
1989            f64::from_bits(w as u64),
1990        );
1991        Box::into_raw(v) as i64
1992    }
1993
1994    /// Create Vec4 by splatting a scalar to all lanes
1995    #[no_mangle]
1996    pub extern "C" fn sigil_simd_splat(v: i64) -> i64 {
1997        let f = f64::from_bits(v as u64);
1998        let v = SimdVec4::splat(f);
1999        Box::into_raw(v) as i64
2000    }
2001
2002    // AVX-optimized SIMD operations using inline assembly / intrinsics pattern
2003    // The compiler will auto-vectorize these aligned operations with -C target-cpu=native
2004
2005    /// SIMD add - uses AVX when available
2006    #[no_mangle]
2007    #[inline(never)]
2008    pub extern "C" fn sigil_simd_add(a: i64, b: i64) -> i64 {
2009        unsafe {
2010            let a = &*(a as *const SimdVec4);
2011            let b = &*(b as *const SimdVec4);
2012            // Aligned load/store enables auto-vectorization
2013            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2014            r.data[0] = a.data[0] + b.data[0];
2015            r.data[1] = a.data[1] + b.data[1];
2016            r.data[2] = a.data[2] + b.data[2];
2017            r.data[3] = a.data[3] + b.data[3];
2018            Box::into_raw(r) as i64
2019        }
2020    }
2021
2022    /// SIMD subtract
2023    #[no_mangle]
2024    #[inline(never)]
2025    pub extern "C" fn sigil_simd_sub(a: i64, b: i64) -> i64 {
2026        unsafe {
2027            let a = &*(a as *const SimdVec4);
2028            let b = &*(b as *const SimdVec4);
2029            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2030            r.data[0] = a.data[0] - b.data[0];
2031            r.data[1] = a.data[1] - b.data[1];
2032            r.data[2] = a.data[2] - b.data[2];
2033            r.data[3] = a.data[3] - b.data[3];
2034            Box::into_raw(r) as i64
2035        }
2036    }
2037
2038    /// SIMD multiply
2039    #[no_mangle]
2040    #[inline(never)]
2041    pub extern "C" fn sigil_simd_mul(a: i64, b: i64) -> i64 {
2042        unsafe {
2043            let a = &*(a as *const SimdVec4);
2044            let b = &*(b as *const SimdVec4);
2045            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2046            r.data[0] = a.data[0] * b.data[0];
2047            r.data[1] = a.data[1] * b.data[1];
2048            r.data[2] = a.data[2] * b.data[2];
2049            r.data[3] = a.data[3] * b.data[3];
2050            Box::into_raw(r) as i64
2051        }
2052    }
2053
2054    /// SIMD divide
2055    #[no_mangle]
2056    #[inline(never)]
2057    pub extern "C" fn sigil_simd_div(a: i64, b: i64) -> i64 {
2058        unsafe {
2059            let a = &*(a as *const SimdVec4);
2060            let b = &*(b as *const SimdVec4);
2061            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2062            r.data[0] = a.data[0] / b.data[0];
2063            r.data[1] = a.data[1] / b.data[1];
2064            r.data[2] = a.data[2] / b.data[2];
2065            r.data[3] = a.data[3] / b.data[3];
2066            Box::into_raw(r) as i64
2067        }
2068    }
2069
2070    /// SIMD dot product (returns scalar) - optimized for auto-vectorization
2071    #[no_mangle]
2072    #[inline(never)]
2073    pub extern "C" fn sigil_simd_dot(a: i64, b: i64) -> i64 {
2074        unsafe {
2075            let a = &*(a as *const SimdVec4);
2076            let b = &*(b as *const SimdVec4);
2077            // FMA-friendly pattern for dot product
2078            let r = a.data[0].mul_add(b.data[0],
2079                    a.data[1].mul_add(b.data[1],
2080                    a.data[2].mul_add(b.data[2],
2081                    a.data[3] * b.data[3])));
2082            r.to_bits() as i64
2083        }
2084    }
2085
2086    /// SIMD horizontal add (sum all lanes)
2087    #[no_mangle]
2088    #[inline(never)]
2089    pub extern "C" fn sigil_simd_hadd(a: i64) -> i64 {
2090        unsafe {
2091            let a = &*(a as *const SimdVec4);
2092            // Pairwise add pattern for better vectorization
2093            let sum01 = a.data[0] + a.data[1];
2094            let sum23 = a.data[2] + a.data[3];
2095            let r = sum01 + sum23;
2096            r.to_bits() as i64
2097        }
2098    }
2099
2100    /// SIMD length squared - uses FMA for better performance
2101    #[no_mangle]
2102    #[inline(never)]
2103    pub extern "C" fn sigil_simd_length_sq(a: i64) -> i64 {
2104        unsafe {
2105            let a = &*(a as *const SimdVec4);
2106            let r = a.data[0].mul_add(a.data[0],
2107                    a.data[1].mul_add(a.data[1],
2108                    a.data[2].mul_add(a.data[2],
2109                    a.data[3] * a.data[3])));
2110            r.to_bits() as i64
2111        }
2112    }
2113
2114    /// SIMD length - uses FMA for length calculation
2115    #[no_mangle]
2116    #[inline(never)]
2117    pub extern "C" fn sigil_simd_length(a: i64) -> i64 {
2118        unsafe {
2119            let a = &*(a as *const SimdVec4);
2120            let len_sq = a.data[0].mul_add(a.data[0],
2121                         a.data[1].mul_add(a.data[1],
2122                         a.data[2].mul_add(a.data[2],
2123                         a.data[3] * a.data[3])));
2124            let r = len_sq.sqrt();
2125            r.to_bits() as i64
2126        }
2127    }
2128
2129    /// SIMD normalize - fast reciprocal sqrt pattern
2130    #[no_mangle]
2131    #[inline(never)]
2132    pub extern "C" fn sigil_simd_normalize(a: i64) -> i64 {
2133        unsafe {
2134            let a = &*(a as *const SimdVec4);
2135            let len_sq = a.data[0].mul_add(a.data[0],
2136                         a.data[1].mul_add(a.data[1],
2137                         a.data[2].mul_add(a.data[2],
2138                         a.data[3] * a.data[3])));
2139            let inv = if len_sq > 1e-20 { 1.0 / len_sq.sqrt() } else { 0.0 };
2140            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2141            r.data[0] = a.data[0] * inv;
2142            r.data[1] = a.data[1] * inv;
2143            r.data[2] = a.data[2] * inv;
2144            r.data[3] = a.data[3] * inv;
2145            Box::into_raw(r) as i64
2146        }
2147    }
2148
2149    /// SIMD cross product (3D, ignores w component)
2150    #[no_mangle]
2151    #[inline(never)]
2152    pub extern "C" fn sigil_simd_cross(a: i64, b: i64) -> i64 {
2153        unsafe {
2154            let a = &*(a as *const SimdVec4);
2155            let b = &*(b as *const SimdVec4);
2156            // Cross product using FMA where beneficial
2157            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2158            r.data[0] = a.data[1].mul_add(b.data[2], -(a.data[2] * b.data[1]));
2159            r.data[1] = a.data[2].mul_add(b.data[0], -(a.data[0] * b.data[2]));
2160            r.data[2] = a.data[0].mul_add(b.data[1], -(a.data[1] * b.data[0]));
2161            r.data[3] = 0.0;
2162            Box::into_raw(r) as i64
2163        }
2164    }
2165
2166    /// SIMD min - element-wise minimum
2167    #[no_mangle]
2168    #[inline(never)]
2169    pub extern "C" fn sigil_simd_min(a: i64, b: i64) -> i64 {
2170        unsafe {
2171            let a = &*(a as *const SimdVec4);
2172            let b = &*(b as *const SimdVec4);
2173            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2174            r.data[0] = a.data[0].min(b.data[0]);
2175            r.data[1] = a.data[1].min(b.data[1]);
2176            r.data[2] = a.data[2].min(b.data[2]);
2177            r.data[3] = a.data[3].min(b.data[3]);
2178            Box::into_raw(r) as i64
2179        }
2180    }
2181
2182    /// SIMD max - element-wise maximum
2183    #[no_mangle]
2184    #[inline(never)]
2185    pub extern "C" fn sigil_simd_max(a: i64, b: i64) -> i64 {
2186        unsafe {
2187            let a = &*(a as *const SimdVec4);
2188            let b = &*(b as *const SimdVec4);
2189            let mut r = SimdVec4::new(0.0, 0.0, 0.0, 0.0);
2190            r.data[0] = a.data[0].max(b.data[0]);
2191            r.data[1] = a.data[1].max(b.data[1]);
2192            r.data[2] = a.data[2].max(b.data[2]);
2193            r.data[3] = a.data[3].max(b.data[3]);
2194            Box::into_raw(r) as i64
2195        }
2196    }
2197
2198    /// Extract element from SIMD vector
2199    #[no_mangle]
2200    pub extern "C" fn sigil_simd_extract(v: i64, idx: i64) -> i64 {
2201        unsafe {
2202            let v = &*(v as *const SimdVec4);
2203            let r = v.data[(idx as usize) & 3];
2204            r.to_bits() as i64
2205        }
2206    }
2207
2208    /// Free SIMD vector (for memory management)
2209    #[no_mangle]
2210    pub extern "C" fn sigil_simd_free(v: i64) {
2211        if v != 0 {
2212            unsafe {
2213                let _ = Box::from_raw(v as *mut SimdVec4);
2214            }
2215        }
2216    }
2217
2218    #[no_mangle]
2219    pub extern "C" fn sigil_sqrt(x: f64) -> f64 {
2220        x.sqrt()
2221    }
2222
2223    #[no_mangle]
2224    pub extern "C" fn sigil_sin(x: f64) -> f64 {
2225        x.sin()
2226    }
2227
2228    #[no_mangle]
2229    pub extern "C" fn sigil_cos(x: f64) -> f64 {
2230        x.cos()
2231    }
2232
2233    #[no_mangle]
2234    pub extern "C" fn sigil_pow(base: f64, exp: f64) -> f64 {
2235        base.powf(exp)
2236    }
2237
2238    #[no_mangle]
2239    pub extern "C" fn sigil_exp(x: f64) -> f64 {
2240        x.exp()
2241    }
2242
2243    #[no_mangle]
2244    pub extern "C" fn sigil_ln(x: f64) -> f64 {
2245        x.ln()
2246    }
2247
2248    #[no_mangle]
2249    pub extern "C" fn sigil_floor(x: f64) -> f64 {
2250        x.floor()
2251    }
2252
2253    #[no_mangle]
2254    pub extern "C" fn sigil_ceil(x: f64) -> f64 {
2255        x.ceil()
2256    }
2257
2258    #[no_mangle]
2259    pub extern "C" fn sigil_abs(x: f64) -> f64 {
2260        x.abs()
2261    }
2262
2263    #[no_mangle]
2264    pub extern "C" fn sigil_print_int(x: i64) -> i64 {
2265        println!("{}", x);
2266        0
2267    }
2268
2269    #[no_mangle]
2270    pub extern "C" fn sigil_print_float(x: f64) -> i64 {
2271        println!("{}", x);
2272        0
2273    }
2274
2275    #[no_mangle]
2276    pub extern "C" fn sigil_print_str(ptr: *const u8, len: usize) -> i64 {
2277        unsafe {
2278            let slice = std::slice::from_raw_parts(ptr, len);
2279            if let Ok(s) = std::str::from_utf8(slice) {
2280                println!("{}", s);
2281            }
2282        }
2283        0
2284    }
2285
2286    #[no_mangle]
2287    pub extern "C" fn sigil_now() -> i64 {
2288        use std::time::{SystemTime, UNIX_EPOCH};
2289        SystemTime::now()
2290            .duration_since(UNIX_EPOCH)
2291            .map(|d| d.as_millis() as i64)
2292            .unwrap_or(0)
2293    }
2294
2295    // Simple array implementation using heap allocation
2296    #[repr(C)]
2297    struct SigilArray {
2298        data: *mut i64,
2299        len: usize,
2300        cap: usize,
2301    }
2302
2303    #[no_mangle]
2304    pub extern "C" fn sigil_array_new(capacity: i64) -> i64 {
2305        let cap = capacity.max(8) as usize;
2306        let layout = std::alloc::Layout::array::<i64>(cap).unwrap();
2307        let data = unsafe { std::alloc::alloc(layout) as *mut i64 };
2308
2309        let arr = Box::new(SigilArray {
2310            data,
2311            len: 0,
2312            cap,
2313        });
2314        Box::into_raw(arr) as i64
2315    }
2316
2317    #[no_mangle]
2318    pub extern "C" fn sigil_array_push(arr_ptr: i64, value: i64) -> i64 {
2319        unsafe {
2320            let arr = &mut *(arr_ptr as *mut SigilArray);
2321            if arr.len >= arr.cap {
2322                // Grow array
2323                let new_cap = arr.cap * 2;
2324                let old_layout = std::alloc::Layout::array::<i64>(arr.cap).unwrap();
2325                let new_layout = std::alloc::Layout::array::<i64>(new_cap).unwrap();
2326                arr.data = std::alloc::realloc(arr.data as *mut u8, old_layout, new_layout.size()) as *mut i64;
2327                arr.cap = new_cap;
2328            }
2329            *arr.data.add(arr.len) = value;
2330            arr.len += 1;
2331        }
2332        0
2333    }
2334
2335    #[no_mangle]
2336    pub extern "C" fn sigil_array_get(arr_ptr: i64, index: i64) -> i64 {
2337        unsafe {
2338            let arr = &*(arr_ptr as *const SigilArray);
2339            let idx = index as usize;
2340            if idx < arr.len {
2341                *arr.data.add(idx)
2342            } else {
2343                0 // Out of bounds returns 0
2344            }
2345        }
2346    }
2347
2348    #[no_mangle]
2349    pub extern "C" fn sigil_array_set(arr_ptr: i64, index: i64, value: i64) -> i64 {
2350        unsafe {
2351            let arr = &mut *(arr_ptr as *mut SigilArray);
2352            let idx = index as usize;
2353            // Extend array if needed
2354            while arr.len <= idx {
2355                sigil_array_push(arr_ptr, 0);
2356            }
2357            *arr.data.add(idx) = value;
2358        }
2359        value
2360    }
2361
2362    #[no_mangle]
2363    pub extern "C" fn sigil_array_len(arr_ptr: i64) -> i64 {
2364        unsafe {
2365            let arr = &*(arr_ptr as *const SigilArray);
2366            arr.len as i64
2367        }
2368    }
2369
2370    // ============================================
2371    // SIMD-Optimized Array Operations
2372    // ============================================
2373    // These operations process arrays in SIMD-friendly batches
2374
2375    /// Sum all elements in an array using SIMD-friendly loop
2376    #[no_mangle]
2377    pub extern "C" fn sigil_array_sum(arr_ptr: i64) -> i64 {
2378        unsafe {
2379            let arr = &*(arr_ptr as *const SigilArray);
2380            let data = std::slice::from_raw_parts(arr.data, arr.len);
2381
2382            // Process in batches of 4 for SIMD-friendliness
2383            let chunks = data.chunks_exact(4);
2384            let remainder = chunks.remainder();
2385
2386            // Accumulate 4 partial sums (allows SIMD vectorization)
2387            let mut sum0: i64 = 0;
2388            let mut sum1: i64 = 0;
2389            let mut sum2: i64 = 0;
2390            let mut sum3: i64 = 0;
2391
2392            for chunk in chunks {
2393                sum0 = sum0.wrapping_add(chunk[0]);
2394                sum1 = sum1.wrapping_add(chunk[1]);
2395                sum2 = sum2.wrapping_add(chunk[2]);
2396                sum3 = sum3.wrapping_add(chunk[3]);
2397            }
2398
2399            // Add remainder
2400            let mut sum = sum0.wrapping_add(sum1).wrapping_add(sum2).wrapping_add(sum3);
2401            for &v in remainder {
2402                sum = sum.wrapping_add(v);
2403            }
2404
2405            sum
2406        }
2407    }
2408
2409    /// Multiply all elements by a scalar (in-place, SIMD-friendly)
2410    #[no_mangle]
2411    pub extern "C" fn sigil_array_scale(arr_ptr: i64, scalar: i64) -> i64 {
2412        unsafe {
2413            let arr = &mut *(arr_ptr as *mut SigilArray);
2414            let data = std::slice::from_raw_parts_mut(arr.data, arr.len);
2415
2416            // Process in batches of 4 for SIMD-friendliness
2417            for chunk in data.chunks_exact_mut(4) {
2418                chunk[0] = chunk[0].wrapping_mul(scalar);
2419                chunk[1] = chunk[1].wrapping_mul(scalar);
2420                chunk[2] = chunk[2].wrapping_mul(scalar);
2421                chunk[3] = chunk[3].wrapping_mul(scalar);
2422            }
2423
2424            // Handle remainder
2425            let remainder_start = (data.len() / 4) * 4;
2426            for v in &mut data[remainder_start..] {
2427                *v = v.wrapping_mul(scalar);
2428            }
2429
2430            arr_ptr
2431        }
2432    }
2433
2434    /// Add a scalar to all elements (in-place, SIMD-friendly)
2435    #[no_mangle]
2436    pub extern "C" fn sigil_array_offset(arr_ptr: i64, offset: i64) -> i64 {
2437        unsafe {
2438            let arr = &mut *(arr_ptr as *mut SigilArray);
2439            let data = std::slice::from_raw_parts_mut(arr.data, arr.len);
2440
2441            // Process in batches of 4 for SIMD-friendliness
2442            for chunk in data.chunks_exact_mut(4) {
2443                chunk[0] = chunk[0].wrapping_add(offset);
2444                chunk[1] = chunk[1].wrapping_add(offset);
2445                chunk[2] = chunk[2].wrapping_add(offset);
2446                chunk[3] = chunk[3].wrapping_add(offset);
2447            }
2448
2449            let remainder_start = (data.len() / 4) * 4;
2450            for v in &mut data[remainder_start..] {
2451                *v = v.wrapping_add(offset);
2452            }
2453
2454            arr_ptr
2455        }
2456    }
2457
2458    /// Dot product of two arrays (SIMD-friendly)
2459    #[no_mangle]
2460    pub extern "C" fn sigil_array_dot(a_ptr: i64, b_ptr: i64) -> i64 {
2461        unsafe {
2462            let a_arr = &*(a_ptr as *const SigilArray);
2463            let b_arr = &*(b_ptr as *const SigilArray);
2464
2465            let len = a_arr.len.min(b_arr.len);
2466            let a_data = std::slice::from_raw_parts(a_arr.data, len);
2467            let b_data = std::slice::from_raw_parts(b_arr.data, len);
2468
2469            // Process in batches of 4 for SIMD-friendliness
2470            let mut sum0: i64 = 0;
2471            let mut sum1: i64 = 0;
2472            let mut sum2: i64 = 0;
2473            let mut sum3: i64 = 0;
2474
2475            let chunks = len / 4;
2476            for i in 0..chunks {
2477                let base = i * 4;
2478                sum0 = sum0.wrapping_add(a_data[base].wrapping_mul(b_data[base]));
2479                sum1 = sum1.wrapping_add(a_data[base + 1].wrapping_mul(b_data[base + 1]));
2480                sum2 = sum2.wrapping_add(a_data[base + 2].wrapping_mul(b_data[base + 2]));
2481                sum3 = sum3.wrapping_add(a_data[base + 3].wrapping_mul(b_data[base + 3]));
2482            }
2483
2484            // Add remainder
2485            let mut sum = sum0.wrapping_add(sum1).wrapping_add(sum2).wrapping_add(sum3);
2486            for i in (chunks * 4)..len {
2487                sum = sum.wrapping_add(a_data[i].wrapping_mul(b_data[i]));
2488            }
2489
2490            sum
2491        }
2492    }
2493
2494    /// Element-wise add two arrays into a new array (SIMD-friendly)
2495    #[no_mangle]
2496    pub extern "C" fn sigil_array_add(a_ptr: i64, b_ptr: i64) -> i64 {
2497        unsafe {
2498            let a_arr = &*(a_ptr as *const SigilArray);
2499            let b_arr = &*(b_ptr as *const SigilArray);
2500
2501            let len = a_arr.len.min(b_arr.len);
2502            let a_data = std::slice::from_raw_parts(a_arr.data, len);
2503            let b_data = std::slice::from_raw_parts(b_arr.data, len);
2504
2505            // Create result array
2506            let result = sigil_array_new(len as i64);
2507            let r_arr = &mut *(result as *mut SigilArray);
2508            r_arr.len = len;
2509            let r_data = std::slice::from_raw_parts_mut(r_arr.data, len);
2510
2511            // Process in batches of 4 for SIMD-friendliness
2512            for i in 0..(len / 4) {
2513                let base = i * 4;
2514                r_data[base] = a_data[base].wrapping_add(b_data[base]);
2515                r_data[base + 1] = a_data[base + 1].wrapping_add(b_data[base + 1]);
2516                r_data[base + 2] = a_data[base + 2].wrapping_add(b_data[base + 2]);
2517                r_data[base + 3] = a_data[base + 3].wrapping_add(b_data[base + 3]);
2518            }
2519
2520            // Handle remainder
2521            for i in ((len / 4) * 4)..len {
2522                r_data[i] = a_data[i].wrapping_add(b_data[i]);
2523            }
2524
2525            result
2526        }
2527    }
2528
2529    /// Element-wise multiply two arrays into a new array (SIMD-friendly)
2530    #[no_mangle]
2531    pub extern "C" fn sigil_array_mul(a_ptr: i64, b_ptr: i64) -> i64 {
2532        unsafe {
2533            let a_arr = &*(a_ptr as *const SigilArray);
2534            let b_arr = &*(b_ptr as *const SigilArray);
2535
2536            let len = a_arr.len.min(b_arr.len);
2537            let a_data = std::slice::from_raw_parts(a_arr.data, len);
2538            let b_data = std::slice::from_raw_parts(b_arr.data, len);
2539
2540            // Create result array
2541            let result = sigil_array_new(len as i64);
2542            let r_arr = &mut *(result as *mut SigilArray);
2543            r_arr.len = len;
2544            let r_data = std::slice::from_raw_parts_mut(r_arr.data, len);
2545
2546            // Process in batches of 4 for SIMD-friendliness
2547            for i in 0..(len / 4) {
2548                let base = i * 4;
2549                r_data[base] = a_data[base].wrapping_mul(b_data[base]);
2550                r_data[base + 1] = a_data[base + 1].wrapping_mul(b_data[base + 1]);
2551                r_data[base + 2] = a_data[base + 2].wrapping_mul(b_data[base + 2]);
2552                r_data[base + 3] = a_data[base + 3].wrapping_mul(b_data[base + 3]);
2553            }
2554
2555            // Handle remainder
2556            for i in ((len / 4) * 4)..len {
2557                r_data[i] = a_data[i].wrapping_mul(b_data[i]);
2558            }
2559
2560            result
2561        }
2562    }
2563
2564    /// Find minimum value in array (SIMD-friendly)
2565    #[no_mangle]
2566    pub extern "C" fn sigil_array_min(arr_ptr: i64) -> i64 {
2567        unsafe {
2568            let arr = &*(arr_ptr as *const SigilArray);
2569            if arr.len == 0 {
2570                return 0;
2571            }
2572
2573            let data = std::slice::from_raw_parts(arr.data, arr.len);
2574
2575            // Process in batches of 4
2576            let mut min0 = i64::MAX;
2577            let mut min1 = i64::MAX;
2578            let mut min2 = i64::MAX;
2579            let mut min3 = i64::MAX;
2580
2581            for chunk in data.chunks_exact(4) {
2582                min0 = min0.min(chunk[0]);
2583                min1 = min1.min(chunk[1]);
2584                min2 = min2.min(chunk[2]);
2585                min3 = min3.min(chunk[3]);
2586            }
2587
2588            let mut min_val = min0.min(min1).min(min2).min(min3);
2589
2590            // Handle remainder
2591            let remainder_start = (data.len() / 4) * 4;
2592            for &v in &data[remainder_start..] {
2593                min_val = min_val.min(v);
2594            }
2595
2596            min_val
2597        }
2598    }
2599
2600    /// Find maximum value in array (SIMD-friendly)
2601    #[no_mangle]
2602    pub extern "C" fn sigil_array_max(arr_ptr: i64) -> i64 {
2603        unsafe {
2604            let arr = &*(arr_ptr as *const SigilArray);
2605            if arr.len == 0 {
2606                return 0;
2607            }
2608
2609            let data = std::slice::from_raw_parts(arr.data, arr.len);
2610
2611            // Process in batches of 4
2612            let mut max0 = i64::MIN;
2613            let mut max1 = i64::MIN;
2614            let mut max2 = i64::MIN;
2615            let mut max3 = i64::MIN;
2616
2617            for chunk in data.chunks_exact(4) {
2618                max0 = max0.max(chunk[0]);
2619                max1 = max1.max(chunk[1]);
2620                max2 = max2.max(chunk[2]);
2621                max3 = max3.max(chunk[3]);
2622            }
2623
2624            let mut max_val = max0.max(max1).max(max2).max(max3);
2625
2626            // Handle remainder
2627            let remainder_start = (data.len() / 4) * 4;
2628            for &v in &data[remainder_start..] {
2629                max_val = max_val.max(v);
2630            }
2631
2632            max_val
2633        }
2634    }
2635
2636    /// Fill array with a value (SIMD-friendly)
2637    #[no_mangle]
2638    pub extern "C" fn sigil_array_fill(arr_ptr: i64, value: i64, count: i64) -> i64 {
2639        unsafe {
2640            let arr = &mut *(arr_ptr as *mut SigilArray);
2641            let n = count as usize;
2642
2643            // Ensure capacity
2644            while arr.len < n {
2645                sigil_array_push(arr_ptr, 0);
2646            }
2647
2648            let data = std::slice::from_raw_parts_mut(arr.data, n);
2649
2650            // Process in batches of 4
2651            for chunk in data.chunks_exact_mut(4) {
2652                chunk[0] = value;
2653                chunk[1] = value;
2654                chunk[2] = value;
2655                chunk[3] = value;
2656            }
2657
2658            // Handle remainder
2659            let remainder_start = (n / 4) * 4;
2660            for v in &mut data[remainder_start..] {
2661                *v = value;
2662            }
2663
2664            arr_ptr
2665        }
2666    }
2667
2668    // ============================================
2669    // PipeOp Array Access Functions
2670    // ============================================
2671    // Functions for the access morphemes: α (first), ω (last), μ (middle), χ (choice), ν (nth), ξ (next)
2672
2673    /// Get first element of array (α morpheme)
2674    #[no_mangle]
2675    pub extern "C" fn sigil_array_first(arr_ptr: i64) -> i64 {
2676        unsafe {
2677            let arr = &*(arr_ptr as *const SigilArray);
2678            if arr.len == 0 {
2679                return 0; // Return 0 for empty array
2680            }
2681            *arr.data
2682        }
2683    }
2684
2685    /// Get last element of array (ω morpheme)
2686    #[no_mangle]
2687    pub extern "C" fn sigil_array_last(arr_ptr: i64) -> i64 {
2688        unsafe {
2689            let arr = &*(arr_ptr as *const SigilArray);
2690            if arr.len == 0 {
2691                return 0; // Return 0 for empty array
2692            }
2693            *arr.data.add(arr.len - 1)
2694        }
2695    }
2696
2697    /// Get middle element of array (μ morpheme)
2698    #[no_mangle]
2699    pub extern "C" fn sigil_array_middle(arr_ptr: i64) -> i64 {
2700        unsafe {
2701            let arr = &*(arr_ptr as *const SigilArray);
2702            if arr.len == 0 {
2703                return 0; // Return 0 for empty array
2704            }
2705            let mid = arr.len / 2;
2706            *arr.data.add(mid)
2707        }
2708    }
2709
2710    /// Get random element of array (χ morpheme)
2711    #[no_mangle]
2712    pub extern "C" fn sigil_array_choice(arr_ptr: i64) -> i64 {
2713        unsafe {
2714            let arr = &*(arr_ptr as *const SigilArray);
2715            if arr.len == 0 {
2716                return 0; // Return 0 for empty array
2717            }
2718            // Simple LCG-based random using time as seed
2719            use std::time::{SystemTime, UNIX_EPOCH};
2720            let seed = SystemTime::now()
2721                .duration_since(UNIX_EPOCH)
2722                .map(|d| d.as_nanos() as u64)
2723                .unwrap_or(12345);
2724            let idx = ((seed.wrapping_mul(1103515245).wrapping_add(12345)) >> 16) as usize % arr.len;
2725            *arr.data.add(idx)
2726        }
2727    }
2728
2729    /// Get nth element of array (ν morpheme) - same as sigil_array_get but clearer semantics
2730    #[no_mangle]
2731    pub extern "C" fn sigil_array_nth(arr_ptr: i64, index: i64) -> i64 {
2732        sigil_array_get(arr_ptr, index)
2733    }
2734
2735    /// Get next element (iterator advance) - currently returns first element (ξ morpheme)
2736    #[no_mangle]
2737    pub extern "C" fn sigil_array_next(arr_ptr: i64) -> i64 {
2738        // For now, next returns the first element
2739        // A full iterator implementation would track state
2740        sigil_array_first(arr_ptr)
2741    }
2742
2743    /// Product of all elements in array (Π morpheme)
2744    #[no_mangle]
2745    pub extern "C" fn sigil_array_product(arr_ptr: i64) -> i64 {
2746        unsafe {
2747            let arr = &*(arr_ptr as *const SigilArray);
2748            if arr.len == 0 {
2749                return 1; // Product of empty set is 1 (identity)
2750            }
2751            let mut product: i64 = 1;
2752            for i in 0..arr.len {
2753                product = product.wrapping_mul(*arr.data.add(i));
2754            }
2755            product
2756        }
2757    }
2758
2759    /// Sort array in ascending order (σ morpheme) - returns new sorted array
2760    #[no_mangle]
2761    pub extern "C" fn sigil_array_sort(arr_ptr: i64) -> i64 {
2762        unsafe {
2763            let arr = &*(arr_ptr as *const SigilArray);
2764            if arr.len == 0 {
2765                return sigil_array_new(0);
2766            }
2767
2768            // Copy elements to a Vec for sorting
2769            let mut elements: Vec<i64> = Vec::with_capacity(arr.len);
2770            for i in 0..arr.len {
2771                elements.push(*arr.data.add(i));
2772            }
2773
2774            // Sort ascending
2775            elements.sort();
2776
2777            // Create new array with sorted elements
2778            let new_arr = sigil_array_new(arr.len as i64);
2779            for elem in elements {
2780                sigil_array_push(new_arr, elem);
2781            }
2782            new_arr
2783        }
2784    }
2785
2786    // ============================================
2787    // Parallel Execution Functions (∥ morpheme)
2788    // ============================================
2789    // These provide multi-threaded execution of array operations
2790    // For JIT compilation, these use a simple thread pool approach
2791
2792    /// Parallel map operation - applies a transformation in parallel across array elements
2793    /// For now, returns the array unchanged as full closure parallelization
2794    /// requires more complex infrastructure. In production, this would:
2795    /// 1. Partition array into chunks based on available CPU cores
2796    /// 2. Spawn worker threads for each chunk
2797    /// 3. Apply transform closure in parallel
2798    /// 4. Collect results
2799    #[no_mangle]
2800    pub extern "C" fn sigil_parallel_map(arr_ptr: i64) -> i64 {
2801        // Stub: returns array unchanged
2802        // Full implementation would use rayon::par_iter or manual thread pool
2803        arr_ptr
2804    }
2805
2806    /// Parallel filter operation - filters elements in parallel
2807    /// Uses parallel predicate evaluation with stream compaction
2808    #[no_mangle]
2809    pub extern "C" fn sigil_parallel_filter(arr_ptr: i64) -> i64 {
2810        // Stub: returns array unchanged
2811        // Full implementation would:
2812        // 1. Evaluate predicates in parallel
2813        // 2. Use prefix sum for compaction offsets
2814        // 3. Parallel write to output array
2815        arr_ptr
2816    }
2817
2818    /// Parallel reduce operation - tree reduction for associative operations
2819    /// Achieves O(log n) depth with O(n) work
2820    #[no_mangle]
2821    pub extern "C" fn sigil_parallel_reduce(arr_ptr: i64) -> i64 {
2822        // For reduction, we can implement a parallel tree reduction
2823        // Falls back to sequential sum for now
2824        unsafe {
2825            let arr = &*(arr_ptr as *const SigilArray);
2826            if arr.len == 0 {
2827                return 0;
2828            }
2829
2830            // Simple sequential sum - parallel tree reduction would
2831            // use divide-and-conquer with thread spawning
2832            let mut sum: i64 = 0;
2833            for i in 0..arr.len {
2834                sum += *arr.data.add(i);
2835            }
2836            sum
2837        }
2838    }
2839
2840    // ============================================
2841    // GPU Compute Functions (⊛ morpheme)
2842    // ============================================
2843    // These would dispatch operations to GPU via wgpu/vulkan
2844    // Currently stubs that fall back to CPU execution
2845
2846    /// GPU map operation - would compile to WGSL/SPIR-V compute shader
2847    /// Shader structure:
2848    /// ```wgsl
2849    /// @compute @workgroup_size(256)
2850    /// fn main(@builtin(global_invocation_id) id: vec3<u32>) {
2851    ///     let idx = id.x;
2852    ///     output[idx] = transform(input[idx]);
2853    /// }
2854    /// ```
2855    #[no_mangle]
2856    pub extern "C" fn sigil_gpu_map(arr_ptr: i64) -> i64 {
2857        // Stub: returns array unchanged
2858        // Full implementation would:
2859        // 1. Upload array to GPU buffer
2860        // 2. Compile transform to SPIR-V
2861        // 3. Dispatch compute shader
2862        // 4. Download results
2863        arr_ptr
2864    }
2865
2866    /// GPU filter operation with parallel stream compaction
2867    /// Uses scan-based compaction algorithm
2868    #[no_mangle]
2869    pub extern "C" fn sigil_gpu_filter(arr_ptr: i64) -> i64 {
2870        // Stub: returns array unchanged
2871        // Full implementation would use prefix sum for compaction
2872        arr_ptr
2873    }
2874
2875    /// GPU reduce operation - uses tree reduction in shared memory
2876    /// Achieves O(log n) parallel steps
2877    #[no_mangle]
2878    pub extern "C" fn sigil_gpu_reduce(arr_ptr: i64) -> i64 {
2879        // Falls back to CPU reduction
2880        sigil_parallel_reduce(arr_ptr)
2881    }
2882
2883    // ============================================
2884    // Memoization Cache for Recursive Functions
2885    // ============================================
2886    // Uses a simple hash table with linear probing for O(1) average lookup
2887
2888    /// Memoization cache entry
2889    #[repr(C)]
2890    struct MemoEntry {
2891        key1: i64,      // First argument (or hash of multiple args)
2892        key2: i64,      // Second argument (for 2-arg functions)
2893        value: i64,     // Cached result
2894        occupied: bool, // Whether this slot is used
2895    }
2896
2897    /// Memoization cache (fixed-size hash table)
2898    #[repr(C)]
2899    struct MemoCache {
2900        entries: *mut MemoEntry,
2901        capacity: usize,
2902        mask: usize,    // capacity - 1, for fast modulo
2903    }
2904
2905    /// Create a new memoization cache
2906    #[no_mangle]
2907    pub extern "C" fn sigil_memo_new(capacity: i64) -> i64 {
2908        let cap = (capacity as usize).next_power_of_two().max(1024);
2909        let layout = std::alloc::Layout::array::<MemoEntry>(cap).unwrap();
2910        let entries = unsafe {
2911            let ptr = std::alloc::alloc_zeroed(layout) as *mut MemoEntry;
2912            ptr
2913        };
2914
2915        let cache = Box::new(MemoCache {
2916            entries,
2917            capacity: cap,
2918            mask: cap - 1,
2919        });
2920        Box::into_raw(cache) as i64
2921    }
2922
2923    /// Hash function for single argument
2924    #[inline]
2925    fn memo_hash_1(key: i64) -> usize {
2926        // FNV-1a inspired hash
2927        let mut h = key as u64;
2928        h = h.wrapping_mul(0x517cc1b727220a95);
2929        h ^= h >> 32;
2930        h as usize
2931    }
2932
2933    /// Hash function for two arguments
2934    #[inline]
2935    fn memo_hash_2(key1: i64, key2: i64) -> usize {
2936        let mut h = key1 as u64;
2937        h = h.wrapping_mul(0x517cc1b727220a95);
2938        h ^= key2 as u64;
2939        h = h.wrapping_mul(0x517cc1b727220a95);
2940        h ^= h >> 32;
2941        h as usize
2942    }
2943
2944    // ============================================
2945    // Optimized Recursive Algorithm Implementations
2946    // ============================================
2947    // These iterative implementations are much faster than recursive versions
2948
2949    /// Iterative Ackermann function using explicit stack
2950    /// Much faster than recursive version - no stack overflow, O(result) space
2951    #[no_mangle]
2952    pub extern "C" fn sigil_ackermann(m: i64, n: i64) -> i64 {
2953        // Use an explicit stack to simulate recursion
2954        let mut stack: Vec<i64> = Vec::with_capacity(1024);
2955        stack.push(m);
2956        let mut n = n;
2957
2958        while let Some(m) = stack.pop() {
2959            if m == 0 {
2960                n = n + 1;
2961            } else if n == 0 {
2962                stack.push(m - 1);
2963                n = 1;
2964            } else {
2965                stack.push(m - 1);
2966                stack.push(m);
2967                n = n - 1;
2968            }
2969        }
2970        n
2971    }
2972
2973    /// Iterative Tak (Takeuchi) function using explicit stack
2974    #[no_mangle]
2975    pub extern "C" fn sigil_tak(x: i64, y: i64, z: i64) -> i64 {
2976        // Use continuation-passing style with explicit stack
2977        #[derive(Clone, Copy)]
2978        enum TakCont {
2979            Eval { x: i64, y: i64, z: i64 },
2980            Cont1 { y: i64, z: i64, x: i64 },       // waiting for tak(x-1,y,z), need y,z,x for later
2981            Cont2 { z: i64, x: i64, y: i64, r1: i64 }, // waiting for tak(y-1,z,x), have r1
2982            Cont3 { r1: i64, r2: i64 },             // waiting for tak(z-1,x,y), have r1,r2
2983        }
2984
2985        let mut stack: Vec<TakCont> = Vec::with_capacity(256);
2986        stack.push(TakCont::Eval { x, y, z });
2987        let mut result: i64 = 0;
2988
2989        while let Some(cont) = stack.pop() {
2990            match cont {
2991                TakCont::Eval { x, y, z } => {
2992                    if y >= x {
2993                        result = z;
2994                    } else {
2995                        // Need to compute tak(tak(x-1,y,z), tak(y-1,z,x), tak(z-1,x,y))
2996                        stack.push(TakCont::Cont1 { y, z, x });
2997                        stack.push(TakCont::Eval { x: x - 1, y, z });
2998                    }
2999                }
3000                TakCont::Cont1 { y, z, x } => {
3001                    let r1 = result;
3002                    stack.push(TakCont::Cont2 { z, x, y, r1 });
3003                    stack.push(TakCont::Eval { x: y - 1, y: z, z: x });
3004                }
3005                TakCont::Cont2 { z, x, y, r1 } => {
3006                    let r2 = result;
3007                    stack.push(TakCont::Cont3 { r1, r2 });
3008                    stack.push(TakCont::Eval { x: z - 1, y: x, z: y });
3009                }
3010                TakCont::Cont3 { r1, r2 } => {
3011                    let r3 = result;
3012                    // Now compute tak(r1, r2, r3)
3013                    stack.push(TakCont::Eval { x: r1, y: r2, z: r3 });
3014                }
3015            }
3016        }
3017        result
3018    }
3019
3020    /// Sentinel value for "not found" in memo cache
3021    /// Using i64::MIN + 1 to avoid parser issues with the full MIN value
3022    const MEMO_NOT_FOUND: i64 = -9223372036854775807;
3023
3024    /// Lookup a single-argument function result in cache
3025    /// Returns the cached value, or MEMO_NOT_FOUND if not found
3026    #[no_mangle]
3027    pub extern "C" fn sigil_memo_get_1(cache_ptr: i64, key: i64) -> i64 {
3028        unsafe {
3029            let cache = &*(cache_ptr as *const MemoCache);
3030            let mut idx = memo_hash_1(key) & cache.mask;
3031
3032            // Linear probing with limited search
3033            for _ in 0..32 {
3034                let entry = &*cache.entries.add(idx);
3035                if !entry.occupied {
3036                    return MEMO_NOT_FOUND;
3037                }
3038                if entry.key1 == key {
3039                    return entry.value;
3040                }
3041                idx = (idx + 1) & cache.mask;
3042            }
3043            MEMO_NOT_FOUND
3044        }
3045    }
3046
3047    /// Store a single-argument function result in cache
3048    #[no_mangle]
3049    pub extern "C" fn sigil_memo_set_1(cache_ptr: i64, key: i64, value: i64) {
3050        unsafe {
3051            let cache = &*(cache_ptr as *const MemoCache);
3052            let mut idx = memo_hash_1(key) & cache.mask;
3053
3054            // Linear probing
3055            for _ in 0..32 {
3056                let entry = &mut *cache.entries.add(idx);
3057                if !entry.occupied || entry.key1 == key {
3058                    entry.key1 = key;
3059                    entry.value = value;
3060                    entry.occupied = true;
3061                    return;
3062                }
3063                idx = (idx + 1) & cache.mask;
3064            }
3065            // Cache full at this location, overwrite first slot
3066            let entry = &mut *cache.entries.add(memo_hash_1(key) & cache.mask);
3067            entry.key1 = key;
3068            entry.value = value;
3069            entry.occupied = true;
3070        }
3071    }
3072
3073    /// Lookup a two-argument function result in cache
3074    #[no_mangle]
3075    pub extern "C" fn sigil_memo_get_2(cache_ptr: i64, key1: i64, key2: i64) -> i64 {
3076        unsafe {
3077            let cache = &*(cache_ptr as *const MemoCache);
3078            let mut idx = memo_hash_2(key1, key2) & cache.mask;
3079
3080            for _ in 0..32 {
3081                let entry = &*cache.entries.add(idx);
3082                if !entry.occupied {
3083                    return MEMO_NOT_FOUND;
3084                }
3085                if entry.key1 == key1 && entry.key2 == key2 {
3086                    return entry.value;
3087                }
3088                idx = (idx + 1) & cache.mask;
3089            }
3090            MEMO_NOT_FOUND
3091        }
3092    }
3093
3094    /// Store a two-argument function result in cache
3095    #[no_mangle]
3096    pub extern "C" fn sigil_memo_set_2(cache_ptr: i64, key1: i64, key2: i64, value: i64) {
3097        unsafe {
3098            let cache = &*(cache_ptr as *const MemoCache);
3099            let mut idx = memo_hash_2(key1, key2) & cache.mask;
3100
3101            for _ in 0..32 {
3102                let entry = &mut *cache.entries.add(idx);
3103                if !entry.occupied || (entry.key1 == key1 && entry.key2 == key2) {
3104                    entry.key1 = key1;
3105                    entry.key2 = key2;
3106                    entry.value = value;
3107                    entry.occupied = true;
3108                    return;
3109                }
3110                idx = (idx + 1) & cache.mask;
3111            }
3112            let entry = &mut *cache.entries.add(memo_hash_2(key1, key2) & cache.mask);
3113            entry.key1 = key1;
3114            entry.key2 = key2;
3115            entry.value = value;
3116            entry.occupied = true;
3117        }
3118    }
3119
3120    /// Free a memoization cache
3121    #[no_mangle]
3122    pub extern "C" fn sigil_memo_free(cache_ptr: i64) {
3123        if cache_ptr != 0 {
3124            unsafe {
3125                let cache = Box::from_raw(cache_ptr as *mut MemoCache);
3126                let layout = std::alloc::Layout::array::<MemoEntry>(cache.capacity).unwrap();
3127                std::alloc::dealloc(cache.entries as *mut u8, layout);
3128            }
3129        }
3130    }
3131
3132    // ============================================
3133    // FFI Tests
3134    // ============================================
3135
3136    #[cfg(test)]
3137    mod tests {
3138        use super::*;
3139        use crate::parser::Parser;
3140
3141        #[test]
3142        fn test_extern_block_parsing_and_declaration() {
3143            let source = r#"
3144                extern "C" {
3145                    fn abs(x: c_int) -> c_int;
3146                    fn strlen(s: *const c_char) -> usize;
3147                }
3148
3149                fn main() -> i64 {
3150                    42
3151                }
3152            "#;
3153
3154            let mut compiler = JitCompiler::new().unwrap();
3155            let result = compiler.compile(source);
3156            assert!(result.is_ok(), "Failed to compile FFI declarations: {:?}", result);
3157
3158            // Check that extern functions were registered
3159            assert!(compiler.extern_functions.contains_key("abs"), "abs not declared");
3160            assert!(compiler.extern_functions.contains_key("strlen"), "strlen not declared");
3161
3162            // Check abs signature
3163            let abs_sig = compiler.extern_functions.get("abs").unwrap();
3164            assert_eq!(abs_sig.params.len(), 1);
3165            assert_eq!(abs_sig.params[0], types::I32); // c_int -> i32
3166            assert_eq!(abs_sig.returns, Some(types::I32));
3167
3168            // Check strlen signature
3169            let strlen_sig = compiler.extern_functions.get("strlen").unwrap();
3170            assert_eq!(strlen_sig.params.len(), 1);
3171            assert_eq!(strlen_sig.params[0], types::I64); // pointer -> i64
3172            assert_eq!(strlen_sig.returns, Some(types::I64)); // usize -> i64
3173        }
3174
3175        #[test]
3176        fn test_extern_variadic_function() {
3177            let source = r#"
3178                extern "C" {
3179                    fn printf(fmt: *const c_char, ...) -> c_int;
3180                }
3181
3182                fn main() -> i64 {
3183                    0
3184                }
3185            "#;
3186
3187            let mut compiler = JitCompiler::new().unwrap();
3188            let result = compiler.compile(source);
3189            assert!(result.is_ok(), "Failed to compile variadic FFI: {:?}", result);
3190
3191            let printf_sig = compiler.extern_functions.get("printf").unwrap();
3192            assert!(printf_sig.variadic, "printf should be variadic");
3193        }
3194
3195        #[test]
3196        fn test_extern_c_abi_only() {
3197            let source = r#"
3198                extern "Rust" {
3199                    fn some_func(x: i32) -> i32;
3200                }
3201
3202                fn main() -> i64 {
3203                    0
3204                }
3205            "#;
3206
3207            let mut compiler = JitCompiler::new().unwrap();
3208            let result = compiler.compile(source);
3209            assert!(result.is_err(), "Should reject non-C ABI");
3210            assert!(result.unwrap_err().contains("Unsupported ABI"));
3211        }
3212
3213        #[test]
3214        fn test_c_type_mapping() {
3215            // Test that C types are correctly mapped to Cranelift types
3216            let test_cases = vec![
3217                ("c_char", types::I8),
3218                ("c_int", types::I32),
3219                ("c_long", types::I64),
3220                ("c_float", types::F32),
3221                ("c_double", types::F64),
3222                ("size_t", types::I64),
3223                ("i32", types::I32),
3224                ("f64", types::F64),
3225            ];
3226
3227            for (type_name, expected_cl_type) in test_cases {
3228                let source = format!(r#"
3229                    extern "C" {{
3230                        fn test_func(x: {}) -> {};
3231                    }}
3232
3233                    fn main() -> i64 {{ 0 }}
3234                "#, type_name, type_name);
3235
3236                let mut compiler = JitCompiler::new().unwrap();
3237                let result = compiler.compile(&source);
3238                assert!(result.is_ok(), "Failed for type {}: {:?}", type_name, result);
3239
3240                let sig = compiler.extern_functions.get("test_func").unwrap();
3241                assert_eq!(sig.params[0], expected_cl_type, "Wrong param type for {}", type_name);
3242                assert_eq!(sig.returns, Some(expected_cl_type), "Wrong return type for {}", type_name);
3243            }
3244        }
3245    }
3246}
3247
3248// Re-export for convenience
3249#[cfg(feature = "jit")]
3250pub use jit::JitCompiler;