shape_jit/context.rs
1//! JIT Context and Related Data Structures
2//!
3//! Contains the runtime context and data structures used by JIT-compiled code.
4
5use super::nan_boxing::*;
6
7// ============================================================================
8// JITContext Field Offsets for Direct Memory Access
9// ============================================================================
10//
11// These must match the #[repr(C)] struct layout of JITContext
12// Regenerate with: rustc --edition 2024 scripts/jit_offsets.rs && ./jit_offsets
13
14// Generic column access - columns are accessed via column_ptrs array indexed by column_map
15// Timestamps pointer for time-based access
16pub const TIMESTAMPS_PTR_OFFSET: i32 = 24;
17
18// DataFrame access offsets
19pub const COLUMN_PTRS_OFFSET: i32 = 32;
20pub const COLUMN_COUNT_OFFSET: i32 = 40;
21pub const ROW_COUNT_OFFSET: i32 = 48;
22pub const CURRENT_ROW_OFFSET: i32 = 56;
23
24// Locals and stack offsets
25pub const LOCALS_OFFSET: i32 = 64;
26pub const STACK_OFFSET: i32 = 2112; // 64 + (256 * 8)
27pub const STACK_PTR_OFFSET: i32 = 6208; // 2112 + (512 * 8)
28
29// GC safepoint flag pointer offset (for inline safepoint check)
30pub const GC_SAFEPOINT_FLAG_PTR_OFFSET: i32 = 6328;
31
32// ============================================================================
33// Compile-time layout verification for JITContext
34// ============================================================================
35//
36// These assertions ensure the hardcoded byte offsets above remain in sync with
37// the actual #[repr(C)] struct layout. A mismatch will produce a compile error.
38const _: () = {
39 assert!(
40 std::mem::offset_of!(JITContext, timestamps_ptr) == TIMESTAMPS_PTR_OFFSET as usize,
41 "TIMESTAMPS_PTR_OFFSET does not match JITContext layout"
42 );
43 assert!(
44 std::mem::offset_of!(JITContext, column_ptrs) == COLUMN_PTRS_OFFSET as usize,
45 "COLUMN_PTRS_OFFSET does not match JITContext layout"
46 );
47 assert!(
48 std::mem::offset_of!(JITContext, column_count) == COLUMN_COUNT_OFFSET as usize,
49 "COLUMN_COUNT_OFFSET does not match JITContext layout"
50 );
51 assert!(
52 std::mem::offset_of!(JITContext, row_count) == ROW_COUNT_OFFSET as usize,
53 "ROW_COUNT_OFFSET does not match JITContext layout"
54 );
55 assert!(
56 std::mem::offset_of!(JITContext, current_row) == CURRENT_ROW_OFFSET as usize,
57 "CURRENT_ROW_OFFSET does not match JITContext layout"
58 );
59 assert!(
60 std::mem::offset_of!(JITContext, locals) == LOCALS_OFFSET as usize,
61 "LOCALS_OFFSET does not match JITContext layout"
62 );
63 assert!(
64 std::mem::offset_of!(JITContext, stack) == STACK_OFFSET as usize,
65 "STACK_OFFSET does not match JITContext layout"
66 );
67 assert!(
68 std::mem::offset_of!(JITContext, stack_ptr) == STACK_PTR_OFFSET as usize,
69 "STACK_PTR_OFFSET does not match JITContext layout"
70 );
71 assert!(
72 std::mem::offset_of!(JITContext, gc_safepoint_flag_ptr) == GC_SAFEPOINT_FLAG_PTR_OFFSET as usize,
73 "GC_SAFEPOINT_FLAG_PTR_OFFSET does not match JITContext layout"
74 );
75};
76
77// ============================================================================
78// Type Aliases
79// ============================================================================
80
81/// Function pointer type for JIT-compiled strategy functions
82pub type JittedStrategyFn = unsafe extern "C" fn(*mut JITContext) -> i32;
83
84/// Legacy function signature for simple numeric computations
85pub type JittedFn = unsafe extern "C" fn(*mut f64, *const f64, usize) -> f64;
86
87/// OSR entry function signature.
88///
89/// This has the same binary signature as `JittedStrategyFn` -- the difference
90/// is semantic: for OSR entry, the caller pre-fills `JITContext.locals` from
91/// the interpreter's live frame before invocation, and reads modified locals
92/// back on return.
93///
94/// # Arguments
95/// * `ctx_ptr` - Pointer to a `JITContext` with locals pre-filled from the
96/// interpreter frame (marshaled using the `OsrEntryPoint.local_kinds`).
97///
98/// # Returns
99/// * `0` - Success: execution completed. Modified locals are in
100/// `JITContext.locals`. The VM reads them back and continues
101/// at `OsrEntryPoint.exit_ip`.
102/// * `i32::MIN+1` - Deopt requested: a type guard failed mid-loop. The VM
103/// reads locals from `JITContext.locals` and resumes at
104/// the `DeoptInfo.resume_ip` for the failing guard.
105/// * Other negative - Error.
106pub type OsrEntryFn = unsafe extern "C" fn(*mut JITContext) -> i32;
107
108// ============================================================================
109// Simulation Kernel ABI (Zero-Allocation Hot Path)
110// ============================================================================
111
112/// Function pointer type for simulation kernel functions (single series).
113///
114/// This is the "fused step" ABI that enables >10M ticks/sec by:
115/// - Bypassing JITContext setup overhead
116/// - Using direct pointer arithmetic for data access
117/// - Avoiding all allocations in the hot loop
118///
119/// # Arguments
120/// * `cursor_index` - Current position in the series (0-based)
121/// * `series_ptrs` - Pointer to array of column pointers (*const *const f64)
122/// * `state_ptr` - Pointer to TypedObject state (*mut u8)
123///
124/// # Returns
125/// * 0 = continue execution
126/// * 1 = signal generated (written to state)
127/// * negative = error
128///
129/// # Safety
130/// The caller must ensure:
131/// - `cursor_index` is within bounds
132/// - `series_ptrs` points to valid column pointer array
133/// - `state_ptr` points to valid TypedObject with correct schema
134pub type SimulationKernelFn = unsafe extern "C" fn(
135 cursor_index: usize,
136 series_ptrs: *const *const f64,
137 state_ptr: *mut u8,
138) -> i32;
139
140/// Function pointer type for correlated (multi-series) kernel functions.
141///
142/// This extends the simulation kernel ABI to support multiple aligned time series.
143/// Each series is accessed via compile-time resolved indices.
144///
145/// # Arguments
146/// * `cursor_index` - Current position in all series (0-based, must be aligned)
147/// * `series_ptrs` - Pointer to array of series data pointers (*const *const f64)
148/// Each pointer is a single f64 array (one series's data)
149/// * `table_count` - Number of series (for bounds checking, known at compile time)
150/// * `state_ptr` - Pointer to TypedObject state (*mut u8)
151///
152/// # Memory Layout
153/// ```text
154/// series_ptrs[0] -> [spy_close[0], spy_close[1], ..., spy_close[n-1]]
155/// series_ptrs[1] -> [vix_close[0], vix_close[1], ..., vix_close[n-1]]
156/// ...
157/// ```
158///
159/// # JIT Access Pattern
160/// ```asm
161/// ; context.spy (series index 0)
162/// mov rax, [series_ptrs + 0*8] ; series pointer
163/// mov xmm0, [rax + cursor_index*8] ; value at cursor
164/// ```
165///
166/// # Returns
167/// * 0 = continue execution
168/// * 1 = signal generated (written to state)
169/// * negative = error
170///
171/// # Safety
172/// The caller must ensure:
173/// - `cursor_index` is within bounds for ALL series
174/// - `series_ptrs` points to valid array of `table_count` data pointers
175/// - All series have the same length (aligned timestamps)
176/// - `state_ptr` points to valid TypedObject with correct schema
177pub type CorrelatedKernelFn = unsafe extern "C" fn(
178 cursor_index: usize,
179 series_ptrs: *const *const f64,
180 table_count: usize,
181 state_ptr: *mut u8,
182) -> i32;
183
184/// Configuration for compiling a simulation kernel.
185///
186/// This provides the field offset mappings needed to generate
187/// direct memory access code for the kernel ABI.
188///
189/// Supports two modes:
190/// - **Single-series**: `column_map` maps field names (close, volume) to column indices
191/// - **Multi-series**: `table_map` maps series names (spy, vix) to series indices
192#[derive(Debug, Clone)]
193pub struct SimulationKernelConfig {
194 /// Column index mappings: (field_name, column_index)
195 /// e.g., [("close", 3), ("volume", 4)]
196 /// Used in single-series mode for accessing columns within one series
197 pub column_map: Vec<(String, usize)>,
198
199 /// Series index mappings: (series_name, series_index)
200 /// e.g., [("spy", 0), ("vix", 1), ("temperature", 2)]
201 /// Used in multi-series mode for accessing multiple correlated series
202 ///
203 /// CRITICAL for JIT: Resolved at compile time, NOT runtime.
204 /// `context.spy` → `series_ptrs[0][cursor_idx]`
205 pub table_map: Vec<(String, usize)>,
206
207 /// State field offsets: (field_name, byte_offset)
208 /// e.g., [("cash", 0), ("position", 8), ("entry_price", 16)]
209 pub state_field_offsets: Vec<(String, usize)>,
210
211 /// Schema ID for the state TypedObject
212 pub state_schema_id: u32,
213
214 /// Total number of columns in the data (single-series mode)
215 pub column_count: usize,
216
217 /// Total number of series (multi-series mode)
218 pub table_count: usize,
219}
220
221impl SimulationKernelConfig {
222 /// Create a new kernel config for single-series mode.
223 pub fn new(state_schema_id: u32, column_count: usize) -> Self {
224 Self {
225 column_map: Vec::new(),
226 table_map: Vec::new(),
227 state_field_offsets: Vec::new(),
228 state_schema_id,
229 column_count,
230 table_count: 0,
231 }
232 }
233
234 /// Create a new kernel config for multi-series (correlated) mode.
235 ///
236 /// Use this when simulating across multiple aligned time series
237 /// (e.g., SPY vs VIX, temperature vs pressure).
238 pub fn new_multi_table(state_schema_id: u32, table_count: usize) -> Self {
239 Self {
240 column_map: Vec::new(),
241 table_map: Vec::new(),
242 state_field_offsets: Vec::new(),
243 state_schema_id,
244 column_count: 0,
245 table_count,
246 }
247 }
248
249 /// Map a data field name to a column index (single-series mode).
250 pub fn map_column(mut self, field_name: &str, column_index: usize) -> Self {
251 self.column_map.push((field_name.to_string(), column_index));
252 self
253 }
254
255 /// Map a series name to a series index (multi-series mode).
256 ///
257 /// CRITICAL: This mapping is resolved at compile time.
258 /// `context.spy` in Shape → `series_ptrs[0][cursor_idx]` in generated code.
259 pub fn map_series(mut self, series_name: &str, series_index: usize) -> Self {
260 self.table_map.push((series_name.to_string(), series_index));
261 self
262 }
263
264 /// Map a state field name to a byte offset.
265 pub fn map_state_field(mut self, field_name: &str, offset: usize) -> Self {
266 self.state_field_offsets
267 .push((field_name.to_string(), offset));
268 self
269 }
270
271 /// Get column index for a field name (single-series mode).
272 pub fn get_column_index(&self, field_name: &str) -> Option<usize> {
273 self.column_map
274 .iter()
275 .find(|(name, _)| name == field_name)
276 .map(|(_, idx)| *idx)
277 }
278
279 /// Get series index for a series name (multi-series mode).
280 ///
281 /// This is used by the JIT compiler at compile time.
282 pub fn get_series_index(&self, series_name: &str) -> Option<usize> {
283 self.table_map
284 .iter()
285 .find(|(name, _)| name == series_name)
286 .map(|(_, idx)| *idx)
287 }
288
289 /// Get state field offset for a field name.
290 pub fn get_state_offset(&self, field_name: &str) -> Option<usize> {
291 self.state_field_offsets
292 .iter()
293 .find(|(name, _)| name == field_name)
294 .map(|(_, offset)| *offset)
295 }
296
297 /// Check if this config is for multi-series mode.
298 pub fn is_multi_table(&self) -> bool {
299 self.table_count > 0 || !self.table_map.is_empty()
300 }
301}
302
303// ============================================================================
304// JIT Data Structures
305// ============================================================================
306
307/// JIT-compatible closure structure
308/// Holds function_id and a pointer to a heap-allocated array of captured values.
309/// Supports unlimited captures (no fixed-size limit).
310#[repr(C)]
311pub struct JITClosure {
312 pub function_id: u16,
313 pub captures_count: u16,
314 pub captures_ptr: *const u64, // Pointer to heap-allocated capture array (NaN-boxed)
315}
316
317impl JITClosure {
318 /// Create a new JITClosure with dynamically allocated captures.
319 ///
320 /// The captures slice is copied into a heap-allocated `Box<[u64]>` that is
321 /// leaked into a raw pointer. Call `drop_captures()` to reclaim the memory.
322 pub fn new(function_id: u16, captures: &[u64]) -> Box<Self> {
323 let captures_box: Box<[u64]> = captures.to_vec().into_boxed_slice();
324 let captures_ptr = Box::into_raw(captures_box) as *const u64;
325 Box::new(JITClosure {
326 function_id,
327 captures_count: captures.len() as u16,
328 captures_ptr,
329 })
330 }
331
332 /// Safely read a capture value by index.
333 ///
334 /// # Safety
335 /// The captures_ptr must be valid and index must be < captures_count.
336 #[inline]
337 pub unsafe fn get_capture(&self, index: usize) -> u64 {
338 debug_assert!(index < self.captures_count as usize);
339 unsafe { *self.captures_ptr.add(index) }
340 }
341
342 /// Free the heap-allocated captures array.
343 ///
344 /// Idempotent: safe to call multiple times (no-op after first call).
345 ///
346 /// # Safety
347 /// The captures_ptr must point to a valid allocation created by `new()`,
348 /// or be null (no-op).
349 pub unsafe fn drop_captures(&mut self) {
350 if !self.captures_ptr.is_null() && self.captures_count > 0 {
351 let count = self.captures_count as usize;
352 let _ = unsafe {
353 Box::from_raw(std::slice::from_raw_parts_mut(
354 self.captures_ptr as *mut u64,
355 count,
356 ))
357 };
358 self.captures_ptr = std::ptr::null();
359 }
360 }
361}
362
363impl Drop for JITClosure {
364 fn drop(&mut self) {
365 // SAFETY: drop_captures is idempotent — if captures_ptr is already null
366 // (e.g. from an explicit drop_captures() call), this is a no-op.
367 unsafe { self.drop_captures() };
368 }
369}
370
371/// JIT-compatible duration structure
372#[repr(C)]
373pub struct JITDuration {
374 pub value: f64,
375 pub unit: u8, // 0=seconds, 1=minutes, 2=hours, 3=days, 4=weeks, 5=bars
376}
377
378impl JITDuration {
379 pub fn new(value: f64, unit: u8) -> Box<Self> {
380 Box::new(JITDuration { value, unit })
381 }
382
383 pub fn box_duration(duration: Box<JITDuration>) -> u64 {
384 use crate::nan_boxing::{HK_DURATION, jit_box};
385 jit_box(HK_DURATION, *duration)
386 }
387}
388
389/// JIT-compatible range structure
390/// Represents a range with start and end values (both NaN-boxed)
391#[repr(C)]
392pub struct JITRange {
393 pub start: u64, // NaN-boxed start value
394 pub end: u64, // NaN-boxed end value
395}
396
397impl JITRange {
398 pub fn new(start: u64, end: u64) -> Box<Self> {
399 Box::new(JITRange { start, end })
400 }
401
402 pub fn box_range(range: Box<JITRange>) -> u64 {
403 use crate::nan_boxing::{HK_RANGE, jit_box};
404 jit_box(HK_RANGE, *range)
405 }
406}
407
408/// JIT-compatible SignalBuilder structure
409/// Represents a signal builder for method chaining (series.where().then().capture())
410#[repr(C)]
411pub struct JITSignalBuilder {
412 pub series: u64, // NaN-boxed TAG_TABLE
413 pub conditions: Vec<u64>, // Array of (condition_type, condition_series) pairs
414 pub captures: Vec<(String, u64)>, // (name, value) pairs for captured values
415}
416
417impl JITSignalBuilder {
418 pub fn new(series: u64) -> Box<Self> {
419 Box::new(JITSignalBuilder {
420 series,
421 conditions: Vec::new(),
422 captures: Vec::new(),
423 })
424 }
425
426 pub fn add_where(&mut self, condition_series: u64) {
427 // 0 = WHERE condition
428 self.conditions.push(0);
429 self.conditions.push(condition_series);
430 }
431
432 pub fn add_then(&mut self, condition_series: u64, max_gap: u64) {
433 // 1 = THEN condition
434 self.conditions.push(1);
435 self.conditions.push(condition_series);
436 self.conditions.push(max_gap);
437 }
438
439 pub fn add_capture(&mut self, name: String, value: u64) {
440 self.captures.push((name, value));
441 }
442
443 pub fn box_builder(builder: Box<JITSignalBuilder>) -> u64 {
444 use crate::nan_boxing::{HK_JIT_SIGNAL_BUILDER, jit_box};
445 jit_box(HK_JIT_SIGNAL_BUILDER, *builder)
446 }
447}
448
449/// JIT-compatible data reference structure
450/// Represents a reference to a specific data row in time
451#[repr(C)]
452pub struct JITDataReference {
453 pub timestamp: i64,
454 pub symbol: *const String, // Pointer to symbol string
455 pub timeframe_value: u32, // Timeframe value
456 pub timeframe_unit: u8, // 0=Second, 1=Minute, 2=Hour, 3=Day, 4=Week, 5=Month, 6=Bar
457 pub has_timezone: bool,
458 pub timezone: *const String, // Pointer to timezone string (may be null)
459}
460
461impl JITDataReference {
462 pub fn box_data_ref(data_ref: Box<JITDataReference>) -> u64 {
463 use crate::nan_boxing::{HK_DATA_REFERENCE, jit_box};
464 jit_box(HK_DATA_REFERENCE, *data_ref)
465 }
466}
467
468// ============================================================================
469// JITContext - Main Execution Context
470// ============================================================================
471
472/// JIT execution context passed to compiled functions
473/// This struct must be C-compatible (#[repr(C)]) for FFI
474///
475/// Uses NaN-boxing for full type support
476#[repr(C)]
477#[derive(Debug, Clone)]
478pub struct JITContext {
479 // Position state
480 pub in_position: bool,
481 pub position_side: i8, // 0=None, 1=Long, -1=Short
482 pub entry_price: u64, // NaN-boxed f64
483 pub unrealized_pnl_pct: u64, // NaN-boxed f64
484
485 // Timestamps pointer for time-based data access
486 pub timestamps_ptr: *const i64,
487
488 // ========== Generic DataFrame Access (industry-agnostic) ==========
489 /// Array of column pointers (SIMD-aligned f64 arrays)
490 /// Column order matches DataFrameSchema.column_names
491 pub column_ptrs: *const *const f64,
492 /// Number of columns in the DataFrame
493 pub column_count: usize,
494 /// Number of rows in the DataFrame
495 pub row_count: usize,
496 /// Current row index (for backtest iteration)
497 pub current_row: usize,
498
499 // Local variables (NaN-boxed values)
500 pub locals: [u64; 256],
501
502 // NaN-boxed stack for JIT execution
503 pub stack: [u64; 512],
504 pub stack_ptr: usize,
505
506 // Heap object storage (owned by VM, JIT just holds pointers)
507 pub heap_ptr: *mut std::ffi::c_void,
508
509 // Function table for Call opcode (pointer to array of function pointers)
510 pub function_table: *const JittedStrategyFn,
511 pub function_table_len: usize,
512
513 // ExecutionContext pointer for fallback to interpreter
514 pub exec_context_ptr: *mut std::ffi::c_void,
515
516 // Function names for closure-to-Value conversion
517 // Points to contiguous String array from BytecodeProgram.functions
518 pub function_names_ptr: *const String,
519 pub function_names_len: usize,
520
521 // ========== Async Execution Support ==========
522 /// Pointer to event queue (for FFI calls to poll/push events)
523 /// Points to a SharedEventQueue behind the scenes
524 pub event_queue_ptr: *mut std::ffi::c_void,
525
526 /// Suspension state: 0 = running, 1 = yielded, 2 = suspended
527 pub suspension_state: u32,
528
529 /// Iterations since last yield (for cooperative scheduling)
530 pub iterations_since_yield: u64,
531
532 /// Yield threshold - yield after this many iterations
533 /// 0 = never yield automatically
534 pub yield_threshold: u64,
535
536 /// Alert pipeline pointer (for FFI calls to emit alerts)
537 /// Points to AlertRouter behind the scenes
538 pub alert_pipeline_ptr: *mut std::ffi::c_void,
539
540 // ========== Simulation Mode Support ==========
541 /// Simulation mode: 0 = disabled, 1 = DenseKernel, 2 = HybridKernel
542 pub simulation_mode: u32,
543
544 /// Pointer to simulation state (TypedObject for DenseKernel)
545 /// JIT code accesses state fields via direct memory offset
546 pub simulation_state_ptr: *mut u8,
547
548 /// Size of simulation state data (for deallocation)
549 pub simulation_state_size: usize,
550
551 // ========== GC Integration ==========
552 /// Pointer to GC safepoint flag (AtomicBool raw pointer).
553 /// Null when GC is not enabled. The JIT safepoint function reads this
554 /// to determine if a GC cycle is requested.
555 pub gc_safepoint_flag_ptr: *const u8,
556
557 /// Pointer to GcHeap for allocation fast path.
558 /// Null when GC is not enabled.
559 pub gc_heap_ptr: *mut std::ffi::c_void,
560
561 /// Opaque pointer to JIT foreign-call bridge state.
562 /// Null when no foreign functions are linked for this execution.
563 pub foreign_bridge_ptr: *const std::ffi::c_void,
564}
565
566impl Default for JITContext {
567 fn default() -> Self {
568 Self {
569 in_position: false,
570 position_side: 0,
571 entry_price: box_number(0.0),
572 unrealized_pnl_pct: box_number(0.0),
573 // Timestamps pointer
574 timestamps_ptr: std::ptr::null(),
575 // Generic DataFrame access
576 column_ptrs: std::ptr::null(),
577 column_count: 0,
578 row_count: 0,
579 current_row: 0,
580 // Local variables and stack
581 locals: [TAG_NULL; 256],
582 stack: [TAG_NULL; 512],
583 stack_ptr: 0,
584 heap_ptr: std::ptr::null_mut(),
585 function_table: std::ptr::null(),
586 function_table_len: 0,
587 exec_context_ptr: std::ptr::null_mut(),
588 function_names_ptr: std::ptr::null(),
589 function_names_len: 0,
590 // Async execution support
591 event_queue_ptr: std::ptr::null_mut(),
592 suspension_state: 0,
593 iterations_since_yield: 0,
594 yield_threshold: 0, // 0 = no automatic yielding
595 alert_pipeline_ptr: std::ptr::null_mut(),
596 // Simulation mode support
597 simulation_mode: 0,
598 simulation_state_ptr: std::ptr::null_mut(),
599 simulation_state_size: 0,
600 // GC integration
601 gc_safepoint_flag_ptr: std::ptr::null(),
602 gc_heap_ptr: std::ptr::null_mut(),
603 foreign_bridge_ptr: std::ptr::null(),
604 }
605 }
606}
607
608impl JITContext {
609 /// Get column value at offset from current row
610 /// column_index is the column index in the DataFrame schema
611 pub fn get_column_value(&self, column_index: usize, offset: i32) -> f64 {
612 if self.column_ptrs.is_null() || column_index >= self.column_count {
613 return 0.0;
614 }
615 let row_idx = (self.current_row as i32 + offset) as usize;
616 if row_idx < self.row_count {
617 unsafe {
618 let col_ptr = *self.column_ptrs.add(column_index);
619 if !col_ptr.is_null() {
620 *col_ptr.add(row_idx)
621 } else {
622 0.0
623 }
624 }
625 } else {
626 0.0
627 }
628 }
629
630 /// Update current row index for DataFrame iteration
631 #[inline]
632 pub fn set_current_row(&mut self, index: usize) {
633 self.current_row = index;
634 }
635
636 /// Update current row for backtest iteration (alias for backward compatibility)
637 #[inline]
638 pub fn update_current_row(&mut self, index: usize) {
639 self.current_row = index;
640 }
641
642 // ========================================================================
643 // Simulation Mode Methods
644 // ========================================================================
645
646 /// Check if in simulation mode
647 #[inline]
648 pub fn is_simulation_mode(&self) -> bool {
649 self.simulation_mode > 0
650 }
651
652 /// Set up context for DenseKernel simulation.
653 ///
654 /// # Arguments
655 /// * `state_ptr` - Pointer to TypedObject state
656 /// * `state_size` - Size of state data
657 /// * `column_ptrs` - Pointers to data columns
658 /// * `column_count` - Number of columns
659 /// * `row_count` - Number of rows
660 /// * `timestamps` - Pointer to timestamp array
661 pub fn setup_simulation(
662 &mut self,
663 state_ptr: *mut u8,
664 state_size: usize,
665 column_ptrs: *const *const f64,
666 column_count: usize,
667 row_count: usize,
668 timestamps: *const i64,
669 ) {
670 self.simulation_mode = 1; // DenseKernel mode
671 self.simulation_state_ptr = state_ptr;
672 self.simulation_state_size = state_size;
673 self.column_ptrs = column_ptrs;
674 self.column_count = column_count;
675 self.row_count = row_count;
676 self.current_row = 0;
677 self.timestamps_ptr = timestamps;
678 }
679
680 /// Get simulation state field as f64.
681 ///
682 /// # Safety
683 /// Caller must ensure offset is valid for the state TypedObject.
684 #[inline]
685 pub unsafe fn get_state_field_f64(&self, offset: usize) -> f64 {
686 if self.simulation_state_ptr.is_null() {
687 return 0.0;
688 }
689 let field_ptr = unsafe { self.simulation_state_ptr.add(8 + offset) } as *const u64;
690 let bits = unsafe { *field_ptr };
691 unbox_number(bits)
692 }
693
694 /// Set simulation state field as f64.
695 ///
696 /// # Safety
697 /// Caller must ensure offset is valid for the state TypedObject.
698 #[inline]
699 pub unsafe fn set_state_field_f64(&mut self, offset: usize, value: f64) {
700 if self.simulation_state_ptr.is_null() {
701 return;
702 }
703 let field_ptr = unsafe { self.simulation_state_ptr.add(8 + offset) } as *mut u64;
704 unsafe { *field_ptr = box_number(value) };
705 }
706
707 /// Clear simulation mode.
708 pub fn clear_simulation(&mut self) {
709 self.simulation_mode = 0;
710 self.simulation_state_ptr = std::ptr::null_mut();
711 self.simulation_state_size = 0;
712 }
713}
714
715// ============================================================================
716// JITDataFrame - Generic DataFrame for JIT (industry-agnostic)
717// ============================================================================
718
719/// Generic DataFrame storage for JIT execution.
720/// Stores data as an array of columns, matching the generic column_ptrs
721/// design in JITContext.
722///
723/// Column order MUST match the DataFrameSchema used during compilation.
724pub struct JITDataFrame {
725 /// Column data arrays (each Vec is one column)
726 /// Columns are ordered by index as defined in DataFrameSchema
727 pub columns: Vec<Vec<f64>>,
728 /// Pointers to column data (for JITContext.column_ptrs)
729 pub column_ptrs: Vec<*const f64>,
730 /// Timestamps (always present, column 0 equivalent)
731 pub timestamps: Vec<i64>,
732 /// Number of rows
733 pub row_count: usize,
734}
735
736impl JITDataFrame {
737 /// Create an empty JITDataFrame
738 pub fn new() -> Self {
739 Self {
740 columns: Vec::new(),
741 column_ptrs: Vec::new(),
742 timestamps: Vec::new(),
743 row_count: 0,
744 }
745 }
746
747 /// Create from ExecutionContext using a schema mapping.
748 /// The schema determines which columns to extract and their order.
749 pub fn from_execution_context(
750 ctx: &shape_runtime::context::ExecutionContext,
751 schema: &shape_vm::bytecode::DataFrameSchema,
752 ) -> Self {
753 let mut data = Self::new();
754
755 // NOTE: Series caching not yet implemented in ExecutionContext
756 // For now, initialize empty columns for each schema column
757 // TODO: Implement series caching when available
758 let _ = (ctx, schema); // Suppress unused warnings
759 for _ in 0..schema.column_names.len() {
760 data.columns.push(Vec::new());
761 data.column_ptrs.push(std::ptr::null());
762 }
763
764 data
765 }
766
767 /// Populate a JITContext with generic DataFrame pointers.
768 /// This sets column_ptrs, column_count, row_count, and timestamps_ptr.
769 pub fn populate_context(&self, ctx: &mut JITContext) {
770 if !self.column_ptrs.is_empty() {
771 ctx.column_ptrs = self.column_ptrs.as_ptr();
772 ctx.column_count = self.column_ptrs.len();
773 }
774 ctx.row_count = self.row_count;
775
776 if !self.timestamps.is_empty() {
777 ctx.timestamps_ptr = self.timestamps.as_ptr();
778 }
779 }
780
781 /// Get the number of rows
782 pub fn len(&self) -> usize {
783 self.row_count
784 }
785
786 /// Check if empty
787 pub fn is_empty(&self) -> bool {
788 self.row_count == 0
789 }
790
791 /// Get number of columns
792 pub fn column_count(&self) -> usize {
793 self.columns.len()
794 }
795
796 /// Create from a DataTable by extracting f64 columns and an optional timestamp column.
797 ///
798 /// All f64 columns are copied into SIMD-aligned buffers. If a column named
799 /// "timestamp" (or typed as Timestamp) exists, it is extracted as i64.
800 pub fn from_datatable(dt: &shape_value::DataTable) -> Self {
801 use arrow_array::cast::AsArray;
802 use arrow_schema::{DataType, TimeUnit};
803
804 let batch = dt.inner();
805 let schema = batch.schema();
806 let num_rows = batch.num_rows();
807 let mut columns = Vec::new();
808 let mut timestamps = Vec::new();
809
810 for (i, field) in schema.fields().iter().enumerate() {
811 match field.data_type() {
812 DataType::Float64 => {
813 let arr = batch
814 .column(i)
815 .as_primitive::<arrow_array::types::Float64Type>();
816 let col: Vec<f64> = (0..num_rows).map(|r| arr.value(r)).collect();
817 columns.push(col);
818 }
819 DataType::Timestamp(TimeUnit::Microsecond, _) => {
820 let arr = batch
821 .column(i)
822 .as_primitive::<arrow_array::types::TimestampMicrosecondType>();
823 timestamps = (0..num_rows).map(|r| arr.value(r)).collect();
824 }
825 DataType::Int64 => {
826 // Convert i64 to f64 for JIT column access
827 let arr = batch
828 .column(i)
829 .as_primitive::<arrow_array::types::Int64Type>();
830 let col: Vec<f64> = (0..num_rows).map(|r| arr.value(r) as f64).collect();
831 columns.push(col);
832 }
833 _ => {
834 // Skip non-numeric columns (strings, bools, etc.)
835 }
836 }
837 }
838
839 let column_ptrs: Vec<*const f64> = columns.iter().map(|c| c.as_ptr()).collect();
840
841 Self {
842 columns,
843 column_ptrs,
844 timestamps,
845 row_count: num_rows,
846 }
847 }
848}
849
850impl Default for JITDataFrame {
851 fn default() -> Self {
852 Self::new()
853 }
854}
855
856// ============================================================================
857// JITConfig - Compilation Configuration
858// ============================================================================
859
860/// JIT compilation configuration
861#[derive(Debug, Clone)]
862pub struct JITConfig {
863 /// Optimization level (0-3)
864 pub opt_level: u8,
865 /// Enable debug symbols
866 pub debug_symbols: bool,
867 /// Minimum execution count before JIT compilation
868 pub jit_threshold: usize,
869}
870
871impl Default for JITConfig {
872 fn default() -> Self {
873 Self {
874 opt_level: 3,
875 debug_symbols: false,
876 jit_threshold: 100,
877 }
878 }
879}
880
881#[cfg(test)]
882mod tests {
883 use super::*;
884
885 #[test]
886 fn test_closure_dynamic_captures_0() {
887 // Zero captures — captures_ptr should be a valid (empty) allocation
888 let closure = JITClosure::new(42, &[]);
889 assert_eq!(closure.function_id, 42);
890 assert_eq!(closure.captures_count, 0);
891 // Drop is safe even with 0 captures
892 let mut closure = closure;
893 unsafe { closure.drop_captures() };
894 }
895
896 #[test]
897 fn test_closure_dynamic_captures_5() {
898 // Typical case: 5 captures
899 let captures = [
900 box_number(1.0),
901 box_number(2.0),
902 box_number(3.0),
903 TAG_BOOL_TRUE,
904 TAG_NULL,
905 ];
906 let closure = JITClosure::new(7, &captures);
907 assert_eq!(closure.function_id, 7);
908 assert_eq!(closure.captures_count, 5);
909
910 unsafe {
911 assert_eq!(unbox_number(closure.get_capture(0)), 1.0);
912 assert_eq!(unbox_number(closure.get_capture(1)), 2.0);
913 assert_eq!(unbox_number(closure.get_capture(2)), 3.0);
914 assert_eq!(closure.get_capture(3), TAG_BOOL_TRUE);
915 assert_eq!(closure.get_capture(4), TAG_NULL);
916 }
917 }
918
919 #[test]
920 fn test_closure_dynamic_captures_20() {
921 // Exceeds old 16-capture limit
922 let captures: Vec<u64> = (0..20).map(|i| box_number(i as f64)).collect();
923 let closure = JITClosure::new(99, &captures);
924 assert_eq!(closure.captures_count, 20);
925
926 unsafe {
927 for i in 0..20 {
928 assert_eq!(unbox_number(closure.get_capture(i)), i as f64);
929 }
930 }
931 }
932
933 #[test]
934 fn test_closure_dynamic_captures_64() {
935 // Stress test: 64 captures
936 let captures: Vec<u64> = (0..64).map(|i| box_number(i as f64 * 10.0)).collect();
937 let closure = JITClosure::new(1, &captures);
938 assert_eq!(closure.captures_count, 64);
939
940 unsafe {
941 for i in 0..64 {
942 assert_eq!(unbox_number(closure.get_capture(i)), i as f64 * 10.0);
943 }
944 }
945 }
946
947 #[test]
948 fn test_closure_captures_drop() {
949 // Verify memory is properly freed (no leak under Miri/ASAN)
950 let captures: Vec<u64> = (0..32).map(|i| box_number(i as f64)).collect();
951 let mut closure = JITClosure::new(5, &captures);
952 assert_eq!(closure.captures_count, 32);
953
954 // Verify captures are valid before drop
955 unsafe {
956 assert_eq!(unbox_number(closure.get_capture(0)), 0.0);
957 assert_eq!(unbox_number(closure.get_capture(31)), 31.0);
958 }
959
960 // Drop captures
961 unsafe { closure.drop_captures() };
962 assert!(closure.captures_ptr.is_null());
963 assert_eq!(closure.captures_count, 32); // count unchanged, ptr nulled
964 }
965
966 #[test]
967 fn test_closure_jit_box_roundtrip() {
968 // Verify JITClosure survives jit_box/jit_unbox roundtrip
969 let captures = [box_number(42.0), TAG_BOOL_FALSE];
970 let closure = JITClosure::new(10, &captures);
971 let bits = jit_box(HK_CLOSURE, *closure);
972
973 assert!(is_heap_kind(bits, HK_CLOSURE));
974
975 let recovered = unsafe { jit_unbox::<JITClosure>(bits) };
976 assert_eq!(recovered.function_id, 10);
977 assert_eq!(recovered.captures_count, 2);
978 unsafe {
979 assert_eq!(unbox_number(recovered.get_capture(0)), 42.0);
980 assert_eq!(recovered.get_capture(1), TAG_BOOL_FALSE);
981 }
982 }
983
984 #[test]
985 fn test_closure_drop_impl_frees_captures_via_jit_drop() {
986 // Verify the Drop impl on JITClosure frees the captures array
987 // when the owning JitAlloc is freed via jit_drop.
988 // Under Miri/ASAN this would catch a leak if Drop didn't work.
989 let captures: Vec<u64> = (0..24).map(|i| box_number(i as f64)).collect();
990 let closure = JITClosure::new(3, &captures);
991 let bits = jit_box(HK_CLOSURE, *closure);
992
993 // Read captures to confirm they're valid
994 let recovered = unsafe { jit_unbox::<JITClosure>(bits) };
995 assert_eq!(recovered.captures_count, 24);
996 unsafe {
997 assert_eq!(unbox_number(recovered.get_capture(23)), 23.0);
998 }
999
1000 // jit_drop frees JitAlloc<JITClosure>, which calls Drop::drop on
1001 // JITClosure, which frees the captures array.
1002 unsafe { jit_drop::<JITClosure>(bits) };
1003 }
1004
1005 #[test]
1006 fn test_closure_implicit_drop_on_box() {
1007 // Verify that simply dropping a Box<JITClosure> frees the captures.
1008 // (This tests the Drop impl without jit_box involvement.)
1009 let captures: Vec<u64> = (0..10).map(|i| box_number(i as f64)).collect();
1010 let closure = JITClosure::new(1, &captures);
1011 // closure is Box<JITClosure>, dropping it should free captures via Drop
1012 drop(closure);
1013 // No leak under Miri/ASAN
1014 }
1015}