Skip to main content

shape_jit/
context.rs

1//! JIT Context and Related Data Structures
2//!
3//! Contains the runtime context and data structures used by JIT-compiled code.
4
5use super::nan_boxing::*;
6
7// ============================================================================
8// JITContext Field Offsets for Direct Memory Access
9// ============================================================================
10//
11// These must match the #[repr(C)] struct layout of JITContext
12// Regenerate with: rustc --edition 2024 scripts/jit_offsets.rs && ./jit_offsets
13
14// Generic column access - columns are accessed via column_ptrs array indexed by column_map
15// Timestamps pointer for time-based access
16pub const TIMESTAMPS_PTR_OFFSET: i32 = 24;
17
18// DataFrame access offsets
19pub const COLUMN_PTRS_OFFSET: i32 = 32;
20pub const COLUMN_COUNT_OFFSET: i32 = 40;
21pub const ROW_COUNT_OFFSET: i32 = 48;
22pub const CURRENT_ROW_OFFSET: i32 = 56;
23
24// Locals and stack offsets
25pub const LOCALS_OFFSET: i32 = 64;
26pub const STACK_OFFSET: i32 = 2112; // 64 + (256 * 8)
27pub const STACK_PTR_OFFSET: i32 = 6208; // 2112 + (512 * 8)
28
29// GC safepoint flag pointer offset (for inline safepoint check)
30pub const GC_SAFEPOINT_FLAG_PTR_OFFSET: i32 = 6328;
31
32// ============================================================================
33// Compile-time layout verification for JITContext
34// ============================================================================
35//
36// These assertions ensure the hardcoded byte offsets above remain in sync with
37// the actual #[repr(C)] struct layout. A mismatch will produce a compile error.
38const _: () = {
39    assert!(
40        std::mem::offset_of!(JITContext, timestamps_ptr) == TIMESTAMPS_PTR_OFFSET as usize,
41        "TIMESTAMPS_PTR_OFFSET does not match JITContext layout"
42    );
43    assert!(
44        std::mem::offset_of!(JITContext, column_ptrs) == COLUMN_PTRS_OFFSET as usize,
45        "COLUMN_PTRS_OFFSET does not match JITContext layout"
46    );
47    assert!(
48        std::mem::offset_of!(JITContext, column_count) == COLUMN_COUNT_OFFSET as usize,
49        "COLUMN_COUNT_OFFSET does not match JITContext layout"
50    );
51    assert!(
52        std::mem::offset_of!(JITContext, row_count) == ROW_COUNT_OFFSET as usize,
53        "ROW_COUNT_OFFSET does not match JITContext layout"
54    );
55    assert!(
56        std::mem::offset_of!(JITContext, current_row) == CURRENT_ROW_OFFSET as usize,
57        "CURRENT_ROW_OFFSET does not match JITContext layout"
58    );
59    assert!(
60        std::mem::offset_of!(JITContext, locals) == LOCALS_OFFSET as usize,
61        "LOCALS_OFFSET does not match JITContext layout"
62    );
63    assert!(
64        std::mem::offset_of!(JITContext, stack) == STACK_OFFSET as usize,
65        "STACK_OFFSET does not match JITContext layout"
66    );
67    assert!(
68        std::mem::offset_of!(JITContext, stack_ptr) == STACK_PTR_OFFSET as usize,
69        "STACK_PTR_OFFSET does not match JITContext layout"
70    );
71    assert!(
72        std::mem::offset_of!(JITContext, gc_safepoint_flag_ptr) == GC_SAFEPOINT_FLAG_PTR_OFFSET as usize,
73        "GC_SAFEPOINT_FLAG_PTR_OFFSET does not match JITContext layout"
74    );
75};
76
77// ============================================================================
78// Type Aliases
79// ============================================================================
80
81/// Function pointer type for JIT-compiled strategy functions
82pub type JittedStrategyFn = unsafe extern "C" fn(*mut JITContext) -> i32;
83
84/// Legacy function signature for simple numeric computations
85pub type JittedFn = unsafe extern "C" fn(*mut f64, *const f64, usize) -> f64;
86
87/// OSR entry function signature.
88///
89/// This has the same binary signature as `JittedStrategyFn` -- the difference
90/// is semantic: for OSR entry, the caller pre-fills `JITContext.locals` from
91/// the interpreter's live frame before invocation, and reads modified locals
92/// back on return.
93///
94/// # Arguments
95/// * `ctx_ptr` - Pointer to a `JITContext` with locals pre-filled from the
96///   interpreter frame (marshaled using the `OsrEntryPoint.local_kinds`).
97///
98/// # Returns
99/// * `0`          - Success: execution completed. Modified locals are in
100///                  `JITContext.locals`. The VM reads them back and continues
101///                  at `OsrEntryPoint.exit_ip`.
102/// * `i32::MIN+1` - Deopt requested: a type guard failed mid-loop. The VM
103///                  reads locals from `JITContext.locals` and resumes at
104///                  the `DeoptInfo.resume_ip` for the failing guard.
105/// * Other negative - Error.
106pub type OsrEntryFn = unsafe extern "C" fn(*mut JITContext) -> i32;
107
108// ============================================================================
109// Simulation Kernel ABI (Zero-Allocation Hot Path)
110// ============================================================================
111
112/// Function pointer type for simulation kernel functions (single series).
113///
114/// This is the "fused step" ABI that enables >10M ticks/sec by:
115/// - Bypassing JITContext setup overhead
116/// - Using direct pointer arithmetic for data access
117/// - Avoiding all allocations in the hot loop
118///
119/// # Arguments
120/// * `cursor_index` - Current position in the series (0-based)
121/// * `series_ptrs` - Pointer to array of column pointers (*const *const f64)
122/// * `state_ptr` - Pointer to TypedObject state (*mut u8)
123///
124/// # Returns
125/// * 0 = continue execution
126/// * 1 = signal generated (written to state)
127/// * negative = error
128///
129/// # Safety
130/// The caller must ensure:
131/// - `cursor_index` is within bounds
132/// - `series_ptrs` points to valid column pointer array
133/// - `state_ptr` points to valid TypedObject with correct schema
134pub type SimulationKernelFn = unsafe extern "C" fn(
135    cursor_index: usize,
136    series_ptrs: *const *const f64,
137    state_ptr: *mut u8,
138) -> i32;
139
140/// Function pointer type for correlated (multi-series) kernel functions.
141///
142/// This extends the simulation kernel ABI to support multiple aligned time series.
143/// Each series is accessed via compile-time resolved indices.
144///
145/// # Arguments
146/// * `cursor_index` - Current position in all series (0-based, must be aligned)
147/// * `series_ptrs` - Pointer to array of series data pointers (*const *const f64)
148///                   Each pointer is a single f64 array (one series's data)
149/// * `table_count` - Number of series (for bounds checking, known at compile time)
150/// * `state_ptr` - Pointer to TypedObject state (*mut u8)
151///
152/// # Memory Layout
153/// ```text
154/// series_ptrs[0] -> [spy_close[0], spy_close[1], ..., spy_close[n-1]]
155/// series_ptrs[1] -> [vix_close[0], vix_close[1], ..., vix_close[n-1]]
156/// ...
157/// ```
158///
159/// # JIT Access Pattern
160/// ```asm
161/// ; context.spy (series index 0)
162/// mov rax, [series_ptrs + 0*8]     ; series pointer
163/// mov xmm0, [rax + cursor_index*8] ; value at cursor
164/// ```
165///
166/// # Returns
167/// * 0 = continue execution
168/// * 1 = signal generated (written to state)
169/// * negative = error
170///
171/// # Safety
172/// The caller must ensure:
173/// - `cursor_index` is within bounds for ALL series
174/// - `series_ptrs` points to valid array of `table_count` data pointers
175/// - All series have the same length (aligned timestamps)
176/// - `state_ptr` points to valid TypedObject with correct schema
177pub type CorrelatedKernelFn = unsafe extern "C" fn(
178    cursor_index: usize,
179    series_ptrs: *const *const f64,
180    table_count: usize,
181    state_ptr: *mut u8,
182) -> i32;
183
184/// Configuration for compiling a simulation kernel.
185///
186/// This provides the field offset mappings needed to generate
187/// direct memory access code for the kernel ABI.
188///
189/// Supports two modes:
190/// - **Single-series**: `column_map` maps field names (close, volume) to column indices
191/// - **Multi-series**: `table_map` maps series names (spy, vix) to series indices
192#[derive(Debug, Clone)]
193pub struct SimulationKernelConfig {
194    /// Column index mappings: (field_name, column_index)
195    /// e.g., [("close", 3), ("volume", 4)]
196    /// Used in single-series mode for accessing columns within one series
197    pub column_map: Vec<(String, usize)>,
198
199    /// Series index mappings: (series_name, series_index)
200    /// e.g., [("spy", 0), ("vix", 1), ("temperature", 2)]
201    /// Used in multi-series mode for accessing multiple correlated series
202    ///
203    /// CRITICAL for JIT: Resolved at compile time, NOT runtime.
204    /// `context.spy` → `series_ptrs[0][cursor_idx]`
205    pub table_map: Vec<(String, usize)>,
206
207    /// State field offsets: (field_name, byte_offset)
208    /// e.g., [("cash", 0), ("position", 8), ("entry_price", 16)]
209    pub state_field_offsets: Vec<(String, usize)>,
210
211    /// Schema ID for the state TypedObject
212    pub state_schema_id: u32,
213
214    /// Total number of columns in the data (single-series mode)
215    pub column_count: usize,
216
217    /// Total number of series (multi-series mode)
218    pub table_count: usize,
219}
220
221impl SimulationKernelConfig {
222    /// Create a new kernel config for single-series mode.
223    pub fn new(state_schema_id: u32, column_count: usize) -> Self {
224        Self {
225            column_map: Vec::new(),
226            table_map: Vec::new(),
227            state_field_offsets: Vec::new(),
228            state_schema_id,
229            column_count,
230            table_count: 0,
231        }
232    }
233
234    /// Create a new kernel config for multi-series (correlated) mode.
235    ///
236    /// Use this when simulating across multiple aligned time series
237    /// (e.g., SPY vs VIX, temperature vs pressure).
238    pub fn new_multi_table(state_schema_id: u32, table_count: usize) -> Self {
239        Self {
240            column_map: Vec::new(),
241            table_map: Vec::new(),
242            state_field_offsets: Vec::new(),
243            state_schema_id,
244            column_count: 0,
245            table_count,
246        }
247    }
248
249    /// Map a data field name to a column index (single-series mode).
250    pub fn map_column(mut self, field_name: &str, column_index: usize) -> Self {
251        self.column_map.push((field_name.to_string(), column_index));
252        self
253    }
254
255    /// Map a series name to a series index (multi-series mode).
256    ///
257    /// CRITICAL: This mapping is resolved at compile time.
258    /// `context.spy` in Shape → `series_ptrs[0][cursor_idx]` in generated code.
259    pub fn map_series(mut self, series_name: &str, series_index: usize) -> Self {
260        self.table_map.push((series_name.to_string(), series_index));
261        self
262    }
263
264    /// Map a state field name to a byte offset.
265    pub fn map_state_field(mut self, field_name: &str, offset: usize) -> Self {
266        self.state_field_offsets
267            .push((field_name.to_string(), offset));
268        self
269    }
270
271    /// Get column index for a field name (single-series mode).
272    pub fn get_column_index(&self, field_name: &str) -> Option<usize> {
273        self.column_map
274            .iter()
275            .find(|(name, _)| name == field_name)
276            .map(|(_, idx)| *idx)
277    }
278
279    /// Get series index for a series name (multi-series mode).
280    ///
281    /// This is used by the JIT compiler at compile time.
282    pub fn get_series_index(&self, series_name: &str) -> Option<usize> {
283        self.table_map
284            .iter()
285            .find(|(name, _)| name == series_name)
286            .map(|(_, idx)| *idx)
287    }
288
289    /// Get state field offset for a field name.
290    pub fn get_state_offset(&self, field_name: &str) -> Option<usize> {
291        self.state_field_offsets
292            .iter()
293            .find(|(name, _)| name == field_name)
294            .map(|(_, offset)| *offset)
295    }
296
297    /// Check if this config is for multi-series mode.
298    pub fn is_multi_table(&self) -> bool {
299        self.table_count > 0 || !self.table_map.is_empty()
300    }
301}
302
303// ============================================================================
304// JIT Data Structures
305// ============================================================================
306
307/// JIT-compatible closure structure
308/// Holds function_id and a pointer to a heap-allocated array of captured values.
309/// Supports unlimited captures (no fixed-size limit).
310#[repr(C)]
311pub struct JITClosure {
312    pub function_id: u16,
313    pub captures_count: u16,
314    pub captures_ptr: *const u64, // Pointer to heap-allocated capture array (NaN-boxed)
315}
316
317impl JITClosure {
318    /// Create a new JITClosure with dynamically allocated captures.
319    ///
320    /// The captures slice is copied into a heap-allocated `Box<[u64]>` that is
321    /// leaked into a raw pointer. Call `drop_captures()` to reclaim the memory.
322    pub fn new(function_id: u16, captures: &[u64]) -> Box<Self> {
323        let captures_box: Box<[u64]> = captures.to_vec().into_boxed_slice();
324        let captures_ptr = Box::into_raw(captures_box) as *const u64;
325        Box::new(JITClosure {
326            function_id,
327            captures_count: captures.len() as u16,
328            captures_ptr,
329        })
330    }
331
332    /// Safely read a capture value by index.
333    ///
334    /// # Safety
335    /// The captures_ptr must be valid and index must be < captures_count.
336    #[inline]
337    pub unsafe fn get_capture(&self, index: usize) -> u64 {
338        debug_assert!(index < self.captures_count as usize);
339        unsafe { *self.captures_ptr.add(index) }
340    }
341
342    /// Free the heap-allocated captures array.
343    ///
344    /// Idempotent: safe to call multiple times (no-op after first call).
345    ///
346    /// # Safety
347    /// The captures_ptr must point to a valid allocation created by `new()`,
348    /// or be null (no-op).
349    pub unsafe fn drop_captures(&mut self) {
350        if !self.captures_ptr.is_null() && self.captures_count > 0 {
351            let count = self.captures_count as usize;
352            let _ = unsafe {
353                Box::from_raw(std::slice::from_raw_parts_mut(
354                    self.captures_ptr as *mut u64,
355                    count,
356                ))
357            };
358            self.captures_ptr = std::ptr::null();
359        }
360    }
361}
362
363impl Drop for JITClosure {
364    fn drop(&mut self) {
365        // SAFETY: drop_captures is idempotent — if captures_ptr is already null
366        // (e.g. from an explicit drop_captures() call), this is a no-op.
367        unsafe { self.drop_captures() };
368    }
369}
370
371/// JIT-compatible duration structure
372#[repr(C)]
373pub struct JITDuration {
374    pub value: f64,
375    pub unit: u8, // 0=seconds, 1=minutes, 2=hours, 3=days, 4=weeks, 5=bars
376}
377
378impl JITDuration {
379    pub fn new(value: f64, unit: u8) -> Box<Self> {
380        Box::new(JITDuration { value, unit })
381    }
382
383    pub fn box_duration(duration: Box<JITDuration>) -> u64 {
384        use crate::nan_boxing::{HK_DURATION, jit_box};
385        jit_box(HK_DURATION, *duration)
386    }
387}
388
389/// JIT-compatible range structure
390/// Represents a range with start and end values (both NaN-boxed)
391#[repr(C)]
392pub struct JITRange {
393    pub start: u64, // NaN-boxed start value
394    pub end: u64,   // NaN-boxed end value
395}
396
397impl JITRange {
398    pub fn new(start: u64, end: u64) -> Box<Self> {
399        Box::new(JITRange { start, end })
400    }
401
402    pub fn box_range(range: Box<JITRange>) -> u64 {
403        use crate::nan_boxing::{HK_RANGE, jit_box};
404        jit_box(HK_RANGE, *range)
405    }
406}
407
408/// JIT-compatible SignalBuilder structure
409/// Represents a signal builder for method chaining (series.where().then().capture())
410#[repr(C)]
411pub struct JITSignalBuilder {
412    pub series: u64,                  // NaN-boxed TAG_TABLE
413    pub conditions: Vec<u64>,         // Array of (condition_type, condition_series) pairs
414    pub captures: Vec<(String, u64)>, // (name, value) pairs for captured values
415}
416
417impl JITSignalBuilder {
418    pub fn new(series: u64) -> Box<Self> {
419        Box::new(JITSignalBuilder {
420            series,
421            conditions: Vec::new(),
422            captures: Vec::new(),
423        })
424    }
425
426    pub fn add_where(&mut self, condition_series: u64) {
427        // 0 = WHERE condition
428        self.conditions.push(0);
429        self.conditions.push(condition_series);
430    }
431
432    pub fn add_then(&mut self, condition_series: u64, max_gap: u64) {
433        // 1 = THEN condition
434        self.conditions.push(1);
435        self.conditions.push(condition_series);
436        self.conditions.push(max_gap);
437    }
438
439    pub fn add_capture(&mut self, name: String, value: u64) {
440        self.captures.push((name, value));
441    }
442
443    pub fn box_builder(builder: Box<JITSignalBuilder>) -> u64 {
444        use crate::nan_boxing::{HK_JIT_SIGNAL_BUILDER, jit_box};
445        jit_box(HK_JIT_SIGNAL_BUILDER, *builder)
446    }
447}
448
449/// JIT-compatible data reference structure
450/// Represents a reference to a specific data row in time
451#[repr(C)]
452pub struct JITDataReference {
453    pub timestamp: i64,
454    pub symbol: *const String, // Pointer to symbol string
455    pub timeframe_value: u32,  // Timeframe value
456    pub timeframe_unit: u8,    // 0=Second, 1=Minute, 2=Hour, 3=Day, 4=Week, 5=Month, 6=Bar
457    pub has_timezone: bool,
458    pub timezone: *const String, // Pointer to timezone string (may be null)
459}
460
461impl JITDataReference {
462    pub fn box_data_ref(data_ref: Box<JITDataReference>) -> u64 {
463        use crate::nan_boxing::{HK_DATA_REFERENCE, jit_box};
464        jit_box(HK_DATA_REFERENCE, *data_ref)
465    }
466}
467
468// ============================================================================
469// JITContext - Main Execution Context
470// ============================================================================
471
472/// JIT execution context passed to compiled functions
473/// This struct must be C-compatible (#[repr(C)]) for FFI
474///
475/// Uses NaN-boxing for full type support
476#[repr(C)]
477#[derive(Debug, Clone)]
478pub struct JITContext {
479    // Position state
480    pub in_position: bool,
481    pub position_side: i8,       // 0=None, 1=Long, -1=Short
482    pub entry_price: u64,        // NaN-boxed f64
483    pub unrealized_pnl_pct: u64, // NaN-boxed f64
484
485    // Timestamps pointer for time-based data access
486    pub timestamps_ptr: *const i64,
487
488    // ========== Generic DataFrame Access (industry-agnostic) ==========
489    /// Array of column pointers (SIMD-aligned f64 arrays)
490    /// Column order matches DataFrameSchema.column_names
491    pub column_ptrs: *const *const f64,
492    /// Number of columns in the DataFrame
493    pub column_count: usize,
494    /// Number of rows in the DataFrame
495    pub row_count: usize,
496    /// Current row index (for backtest iteration)
497    pub current_row: usize,
498
499    // Local variables (NaN-boxed values)
500    pub locals: [u64; 256],
501
502    // NaN-boxed stack for JIT execution
503    pub stack: [u64; 512],
504    pub stack_ptr: usize,
505
506    // Heap object storage (owned by VM, JIT just holds pointers)
507    pub heap_ptr: *mut std::ffi::c_void,
508
509    // Function table for Call opcode (pointer to array of function pointers)
510    pub function_table: *const JittedStrategyFn,
511    pub function_table_len: usize,
512
513    // ExecutionContext pointer for fallback to interpreter
514    pub exec_context_ptr: *mut std::ffi::c_void,
515
516    // Function names for closure-to-Value conversion
517    // Points to contiguous String array from BytecodeProgram.functions
518    pub function_names_ptr: *const String,
519    pub function_names_len: usize,
520
521    // ========== Async Execution Support ==========
522    /// Pointer to event queue (for FFI calls to poll/push events)
523    /// Points to a SharedEventQueue behind the scenes
524    pub event_queue_ptr: *mut std::ffi::c_void,
525
526    /// Suspension state: 0 = running, 1 = yielded, 2 = suspended
527    pub suspension_state: u32,
528
529    /// Iterations since last yield (for cooperative scheduling)
530    pub iterations_since_yield: u64,
531
532    /// Yield threshold - yield after this many iterations
533    /// 0 = never yield automatically
534    pub yield_threshold: u64,
535
536    /// Alert pipeline pointer (for FFI calls to emit alerts)
537    /// Points to AlertRouter behind the scenes
538    pub alert_pipeline_ptr: *mut std::ffi::c_void,
539
540    // ========== Simulation Mode Support ==========
541    /// Simulation mode: 0 = disabled, 1 = DenseKernel, 2 = HybridKernel
542    pub simulation_mode: u32,
543
544    /// Pointer to simulation state (TypedObject for DenseKernel)
545    /// JIT code accesses state fields via direct memory offset
546    pub simulation_state_ptr: *mut u8,
547
548    /// Size of simulation state data (for deallocation)
549    pub simulation_state_size: usize,
550
551    // ========== GC Integration ==========
552    /// Pointer to GC safepoint flag (AtomicBool raw pointer).
553    /// Null when GC is not enabled. The JIT safepoint function reads this
554    /// to determine if a GC cycle is requested.
555    pub gc_safepoint_flag_ptr: *const u8,
556
557    /// Pointer to GcHeap for allocation fast path.
558    /// Null when GC is not enabled.
559    pub gc_heap_ptr: *mut std::ffi::c_void,
560
561    /// Opaque pointer to JIT foreign-call bridge state.
562    /// Null when no foreign functions are linked for this execution.
563    pub foreign_bridge_ptr: *const std::ffi::c_void,
564}
565
566impl Default for JITContext {
567    fn default() -> Self {
568        Self {
569            in_position: false,
570            position_side: 0,
571            entry_price: box_number(0.0),
572            unrealized_pnl_pct: box_number(0.0),
573            // Timestamps pointer
574            timestamps_ptr: std::ptr::null(),
575            // Generic DataFrame access
576            column_ptrs: std::ptr::null(),
577            column_count: 0,
578            row_count: 0,
579            current_row: 0,
580            // Local variables and stack
581            locals: [TAG_NULL; 256],
582            stack: [TAG_NULL; 512],
583            stack_ptr: 0,
584            heap_ptr: std::ptr::null_mut(),
585            function_table: std::ptr::null(),
586            function_table_len: 0,
587            exec_context_ptr: std::ptr::null_mut(),
588            function_names_ptr: std::ptr::null(),
589            function_names_len: 0,
590            // Async execution support
591            event_queue_ptr: std::ptr::null_mut(),
592            suspension_state: 0,
593            iterations_since_yield: 0,
594            yield_threshold: 0, // 0 = no automatic yielding
595            alert_pipeline_ptr: std::ptr::null_mut(),
596            // Simulation mode support
597            simulation_mode: 0,
598            simulation_state_ptr: std::ptr::null_mut(),
599            simulation_state_size: 0,
600            // GC integration
601            gc_safepoint_flag_ptr: std::ptr::null(),
602            gc_heap_ptr: std::ptr::null_mut(),
603            foreign_bridge_ptr: std::ptr::null(),
604        }
605    }
606}
607
608impl JITContext {
609    /// Get column value at offset from current row
610    /// column_index is the column index in the DataFrame schema
611    pub fn get_column_value(&self, column_index: usize, offset: i32) -> f64 {
612        if self.column_ptrs.is_null() || column_index >= self.column_count {
613            return 0.0;
614        }
615        let row_idx = (self.current_row as i32 + offset) as usize;
616        if row_idx < self.row_count {
617            unsafe {
618                let col_ptr = *self.column_ptrs.add(column_index);
619                if !col_ptr.is_null() {
620                    *col_ptr.add(row_idx)
621                } else {
622                    0.0
623                }
624            }
625        } else {
626            0.0
627        }
628    }
629
630    /// Update current row index for DataFrame iteration
631    #[inline]
632    pub fn set_current_row(&mut self, index: usize) {
633        self.current_row = index;
634    }
635
636    /// Update current row for backtest iteration (alias for backward compatibility)
637    #[inline]
638    pub fn update_current_row(&mut self, index: usize) {
639        self.current_row = index;
640    }
641
642    // ========================================================================
643    // Simulation Mode Methods
644    // ========================================================================
645
646    /// Check if in simulation mode
647    #[inline]
648    pub fn is_simulation_mode(&self) -> bool {
649        self.simulation_mode > 0
650    }
651
652    /// Set up context for DenseKernel simulation.
653    ///
654    /// # Arguments
655    /// * `state_ptr` - Pointer to TypedObject state
656    /// * `state_size` - Size of state data
657    /// * `column_ptrs` - Pointers to data columns
658    /// * `column_count` - Number of columns
659    /// * `row_count` - Number of rows
660    /// * `timestamps` - Pointer to timestamp array
661    pub fn setup_simulation(
662        &mut self,
663        state_ptr: *mut u8,
664        state_size: usize,
665        column_ptrs: *const *const f64,
666        column_count: usize,
667        row_count: usize,
668        timestamps: *const i64,
669    ) {
670        self.simulation_mode = 1; // DenseKernel mode
671        self.simulation_state_ptr = state_ptr;
672        self.simulation_state_size = state_size;
673        self.column_ptrs = column_ptrs;
674        self.column_count = column_count;
675        self.row_count = row_count;
676        self.current_row = 0;
677        self.timestamps_ptr = timestamps;
678    }
679
680    /// Get simulation state field as f64.
681    ///
682    /// # Safety
683    /// Caller must ensure offset is valid for the state TypedObject.
684    #[inline]
685    pub unsafe fn get_state_field_f64(&self, offset: usize) -> f64 {
686        if self.simulation_state_ptr.is_null() {
687            return 0.0;
688        }
689        let field_ptr = unsafe { self.simulation_state_ptr.add(8 + offset) } as *const u64;
690        let bits = unsafe { *field_ptr };
691        unbox_number(bits)
692    }
693
694    /// Set simulation state field as f64.
695    ///
696    /// # Safety
697    /// Caller must ensure offset is valid for the state TypedObject.
698    #[inline]
699    pub unsafe fn set_state_field_f64(&mut self, offset: usize, value: f64) {
700        if self.simulation_state_ptr.is_null() {
701            return;
702        }
703        let field_ptr = unsafe { self.simulation_state_ptr.add(8 + offset) } as *mut u64;
704        unsafe { *field_ptr = box_number(value) };
705    }
706
707    /// Clear simulation mode.
708    pub fn clear_simulation(&mut self) {
709        self.simulation_mode = 0;
710        self.simulation_state_ptr = std::ptr::null_mut();
711        self.simulation_state_size = 0;
712    }
713}
714
715// ============================================================================
716// JITDataFrame - Generic DataFrame for JIT (industry-agnostic)
717// ============================================================================
718
719/// Generic DataFrame storage for JIT execution.
720/// Stores data as an array of columns, matching the generic column_ptrs
721/// design in JITContext.
722///
723/// Column order MUST match the DataFrameSchema used during compilation.
724pub struct JITDataFrame {
725    /// Column data arrays (each Vec is one column)
726    /// Columns are ordered by index as defined in DataFrameSchema
727    pub columns: Vec<Vec<f64>>,
728    /// Pointers to column data (for JITContext.column_ptrs)
729    pub column_ptrs: Vec<*const f64>,
730    /// Timestamps (always present, column 0 equivalent)
731    pub timestamps: Vec<i64>,
732    /// Number of rows
733    pub row_count: usize,
734}
735
736impl JITDataFrame {
737    /// Create an empty JITDataFrame
738    pub fn new() -> Self {
739        Self {
740            columns: Vec::new(),
741            column_ptrs: Vec::new(),
742            timestamps: Vec::new(),
743            row_count: 0,
744        }
745    }
746
747    /// Create from ExecutionContext using a schema mapping.
748    /// The schema determines which columns to extract and their order.
749    pub fn from_execution_context(
750        ctx: &shape_runtime::context::ExecutionContext,
751        schema: &shape_vm::bytecode::DataFrameSchema,
752    ) -> Self {
753        let mut data = Self::new();
754
755        // NOTE: Series caching not yet implemented in ExecutionContext
756        // For now, initialize empty columns for each schema column
757        // TODO: Implement series caching when available
758        let _ = (ctx, schema); // Suppress unused warnings
759        for _ in 0..schema.column_names.len() {
760            data.columns.push(Vec::new());
761            data.column_ptrs.push(std::ptr::null());
762        }
763
764        data
765    }
766
767    /// Populate a JITContext with generic DataFrame pointers.
768    /// This sets column_ptrs, column_count, row_count, and timestamps_ptr.
769    pub fn populate_context(&self, ctx: &mut JITContext) {
770        if !self.column_ptrs.is_empty() {
771            ctx.column_ptrs = self.column_ptrs.as_ptr();
772            ctx.column_count = self.column_ptrs.len();
773        }
774        ctx.row_count = self.row_count;
775
776        if !self.timestamps.is_empty() {
777            ctx.timestamps_ptr = self.timestamps.as_ptr();
778        }
779    }
780
781    /// Get the number of rows
782    pub fn len(&self) -> usize {
783        self.row_count
784    }
785
786    /// Check if empty
787    pub fn is_empty(&self) -> bool {
788        self.row_count == 0
789    }
790
791    /// Get number of columns
792    pub fn column_count(&self) -> usize {
793        self.columns.len()
794    }
795
796    /// Create from a DataTable by extracting f64 columns and an optional timestamp column.
797    ///
798    /// All f64 columns are copied into SIMD-aligned buffers. If a column named
799    /// "timestamp" (or typed as Timestamp) exists, it is extracted as i64.
800    pub fn from_datatable(dt: &shape_value::DataTable) -> Self {
801        use arrow_array::cast::AsArray;
802        use arrow_schema::{DataType, TimeUnit};
803
804        let batch = dt.inner();
805        let schema = batch.schema();
806        let num_rows = batch.num_rows();
807        let mut columns = Vec::new();
808        let mut timestamps = Vec::new();
809
810        for (i, field) in schema.fields().iter().enumerate() {
811            match field.data_type() {
812                DataType::Float64 => {
813                    let arr = batch
814                        .column(i)
815                        .as_primitive::<arrow_array::types::Float64Type>();
816                    let col: Vec<f64> = (0..num_rows).map(|r| arr.value(r)).collect();
817                    columns.push(col);
818                }
819                DataType::Timestamp(TimeUnit::Microsecond, _) => {
820                    let arr = batch
821                        .column(i)
822                        .as_primitive::<arrow_array::types::TimestampMicrosecondType>();
823                    timestamps = (0..num_rows).map(|r| arr.value(r)).collect();
824                }
825                DataType::Int64 => {
826                    // Convert i64 to f64 for JIT column access
827                    let arr = batch
828                        .column(i)
829                        .as_primitive::<arrow_array::types::Int64Type>();
830                    let col: Vec<f64> = (0..num_rows).map(|r| arr.value(r) as f64).collect();
831                    columns.push(col);
832                }
833                _ => {
834                    // Skip non-numeric columns (strings, bools, etc.)
835                }
836            }
837        }
838
839        let column_ptrs: Vec<*const f64> = columns.iter().map(|c| c.as_ptr()).collect();
840
841        Self {
842            columns,
843            column_ptrs,
844            timestamps,
845            row_count: num_rows,
846        }
847    }
848}
849
850impl Default for JITDataFrame {
851    fn default() -> Self {
852        Self::new()
853    }
854}
855
856// ============================================================================
857// JITConfig - Compilation Configuration
858// ============================================================================
859
860/// JIT compilation configuration
861#[derive(Debug, Clone)]
862pub struct JITConfig {
863    /// Optimization level (0-3)
864    pub opt_level: u8,
865    /// Enable debug symbols
866    pub debug_symbols: bool,
867    /// Minimum execution count before JIT compilation
868    pub jit_threshold: usize,
869}
870
871impl Default for JITConfig {
872    fn default() -> Self {
873        Self {
874            opt_level: 3,
875            debug_symbols: false,
876            jit_threshold: 100,
877        }
878    }
879}
880
881#[cfg(test)]
882mod tests {
883    use super::*;
884
885    #[test]
886    fn test_closure_dynamic_captures_0() {
887        // Zero captures — captures_ptr should be a valid (empty) allocation
888        let closure = JITClosure::new(42, &[]);
889        assert_eq!(closure.function_id, 42);
890        assert_eq!(closure.captures_count, 0);
891        // Drop is safe even with 0 captures
892        let mut closure = closure;
893        unsafe { closure.drop_captures() };
894    }
895
896    #[test]
897    fn test_closure_dynamic_captures_5() {
898        // Typical case: 5 captures
899        let captures = [
900            box_number(1.0),
901            box_number(2.0),
902            box_number(3.0),
903            TAG_BOOL_TRUE,
904            TAG_NULL,
905        ];
906        let closure = JITClosure::new(7, &captures);
907        assert_eq!(closure.function_id, 7);
908        assert_eq!(closure.captures_count, 5);
909
910        unsafe {
911            assert_eq!(unbox_number(closure.get_capture(0)), 1.0);
912            assert_eq!(unbox_number(closure.get_capture(1)), 2.0);
913            assert_eq!(unbox_number(closure.get_capture(2)), 3.0);
914            assert_eq!(closure.get_capture(3), TAG_BOOL_TRUE);
915            assert_eq!(closure.get_capture(4), TAG_NULL);
916        }
917    }
918
919    #[test]
920    fn test_closure_dynamic_captures_20() {
921        // Exceeds old 16-capture limit
922        let captures: Vec<u64> = (0..20).map(|i| box_number(i as f64)).collect();
923        let closure = JITClosure::new(99, &captures);
924        assert_eq!(closure.captures_count, 20);
925
926        unsafe {
927            for i in 0..20 {
928                assert_eq!(unbox_number(closure.get_capture(i)), i as f64);
929            }
930        }
931    }
932
933    #[test]
934    fn test_closure_dynamic_captures_64() {
935        // Stress test: 64 captures
936        let captures: Vec<u64> = (0..64).map(|i| box_number(i as f64 * 10.0)).collect();
937        let closure = JITClosure::new(1, &captures);
938        assert_eq!(closure.captures_count, 64);
939
940        unsafe {
941            for i in 0..64 {
942                assert_eq!(unbox_number(closure.get_capture(i)), i as f64 * 10.0);
943            }
944        }
945    }
946
947    #[test]
948    fn test_closure_captures_drop() {
949        // Verify memory is properly freed (no leak under Miri/ASAN)
950        let captures: Vec<u64> = (0..32).map(|i| box_number(i as f64)).collect();
951        let mut closure = JITClosure::new(5, &captures);
952        assert_eq!(closure.captures_count, 32);
953
954        // Verify captures are valid before drop
955        unsafe {
956            assert_eq!(unbox_number(closure.get_capture(0)), 0.0);
957            assert_eq!(unbox_number(closure.get_capture(31)), 31.0);
958        }
959
960        // Drop captures
961        unsafe { closure.drop_captures() };
962        assert!(closure.captures_ptr.is_null());
963        assert_eq!(closure.captures_count, 32); // count unchanged, ptr nulled
964    }
965
966    #[test]
967    fn test_closure_jit_box_roundtrip() {
968        // Verify JITClosure survives jit_box/jit_unbox roundtrip
969        let captures = [box_number(42.0), TAG_BOOL_FALSE];
970        let closure = JITClosure::new(10, &captures);
971        let bits = jit_box(HK_CLOSURE, *closure);
972
973        assert!(is_heap_kind(bits, HK_CLOSURE));
974
975        let recovered = unsafe { jit_unbox::<JITClosure>(bits) };
976        assert_eq!(recovered.function_id, 10);
977        assert_eq!(recovered.captures_count, 2);
978        unsafe {
979            assert_eq!(unbox_number(recovered.get_capture(0)), 42.0);
980            assert_eq!(recovered.get_capture(1), TAG_BOOL_FALSE);
981        }
982    }
983
984    #[test]
985    fn test_closure_drop_impl_frees_captures_via_jit_drop() {
986        // Verify the Drop impl on JITClosure frees the captures array
987        // when the owning JitAlloc is freed via jit_drop.
988        // Under Miri/ASAN this would catch a leak if Drop didn't work.
989        let captures: Vec<u64> = (0..24).map(|i| box_number(i as f64)).collect();
990        let closure = JITClosure::new(3, &captures);
991        let bits = jit_box(HK_CLOSURE, *closure);
992
993        // Read captures to confirm they're valid
994        let recovered = unsafe { jit_unbox::<JITClosure>(bits) };
995        assert_eq!(recovered.captures_count, 24);
996        unsafe {
997            assert_eq!(unbox_number(recovered.get_capture(23)), 23.0);
998        }
999
1000        // jit_drop frees JitAlloc<JITClosure>, which calls Drop::drop on
1001        // JITClosure, which frees the captures array.
1002        unsafe { jit_drop::<JITClosure>(bits) };
1003    }
1004
1005    #[test]
1006    fn test_closure_implicit_drop_on_box() {
1007        // Verify that simply dropping a Box<JITClosure> frees the captures.
1008        // (This tests the Drop impl without jit_box involvement.)
1009        let captures: Vec<u64> = (0..10).map(|i| box_number(i as f64)).collect();
1010        let closure = JITClosure::new(1, &captures);
1011        // closure is Box<JITClosure>, dropping it should free captures via Drop
1012        drop(closure);
1013        // No leak under Miri/ASAN
1014    }
1015}