Skip to main content

shape_jit/ffi/
data.rs

1// Heap allocation audit (PR-9 V8 Gap Closure):
2//   Category A (NaN-boxed returns): 1 site
3//     jit_box(HK_TIME, ...) — jit_get_row_timestamp
4//   Category B (intermediate/consumed): 0 sites
5//   Category C (heap islands): 0 sites
6//!
7//! Generic DataFrame FFI Functions for JIT
8//!
9//! Industry-agnostic functions for accessing DataFrame rows and fields.
10//! Column indices are resolved at compile time from field names.
11
12use super::super::context::JITContext;
13use super::super::nan_boxing::*;
14
15// ============================================================================
16// Generic Field Access (by compile-time column index)
17// ============================================================================
18
19/// Get a field value from the current or offset row by column index.
20///
21/// This is the primary generic data access function.
22/// Column indices are resolved at compile time from field names.
23///
24/// # Arguments
25/// * `ctx` - JIT execution context
26/// * `row_offset` - Offset from current_row (0 = current, -1 = previous, etc.)
27/// * `column_index` - Compile-time resolved column index
28///
29/// # Returns
30/// NaN-boxed f64 value, or TAG_NULL if out of bounds
31pub extern "C" fn jit_get_field(ctx: *mut JITContext, row_offset: i32, column_index: u32) -> u64 {
32    unsafe {
33        if ctx.is_null() {
34            return TAG_NULL;
35        }
36        let ctx_ref = &*ctx;
37
38        // Calculate absolute row index
39        let row_signed = ctx_ref.current_row as i32 + row_offset;
40        if row_signed < 0 || row_signed as usize >= ctx_ref.row_count {
41            return TAG_NULL;
42        }
43        let row_idx = row_signed as usize;
44
45        // Check column bounds
46        if column_index as usize >= ctx_ref.column_count {
47            return TAG_NULL;
48        }
49
50        // Check if column_ptrs is valid
51        if ctx_ref.column_ptrs.is_null() {
52            return TAG_NULL;
53        }
54
55        // Get the column pointer
56        let col_ptr = *ctx_ref.column_ptrs.add(column_index as usize);
57        if col_ptr.is_null() {
58            return TAG_NULL;
59        }
60
61        // Get the value
62        let value = *col_ptr.add(row_idx);
63        box_number(value)
64    }
65}
66
67// ============================================================================
68// Row Reference Operations (lightweight, no data copy)
69// ============================================================================
70
71/// Create a lightweight row reference (just stores the row index).
72///
73/// This allows passing row references without copying data.
74/// The row index is stored in the NaN-boxed payload.
75///
76/// # Arguments
77/// * `ctx` - JIT execution context
78/// * `row_offset` - Offset from current_row (0 = current, -1 = previous, etc.)
79///
80/// # Returns
81/// TAG_INT with row index in payload, or TAG_NULL if out of bounds
82pub extern "C" fn jit_get_row_ref(ctx: *mut JITContext, row_offset: i32) -> u64 {
83    unsafe {
84        if ctx.is_null() {
85            return TAG_NULL;
86        }
87        let ctx_ref = &*ctx;
88
89        // Calculate absolute row index
90        let row_signed = ctx_ref.current_row as i32 + row_offset;
91        if row_signed < 0 || row_signed as usize >= ctx_ref.row_count {
92            return TAG_NULL;
93        }
94        let row_idx = row_signed as usize;
95
96        // Return a lightweight row reference (just the index)
97        box_data_row(row_idx)
98    }
99}
100
101/// Get a field value from a row reference.
102///
103/// # Arguments
104/// * `ctx` - JIT execution context
105/// * `row_ref` - TAG_INT value with row index in payload
106/// * `column_index` - Compile-time resolved column index
107///
108/// # Returns
109/// NaN-boxed f64 value, or TAG_NULL if invalid
110pub extern "C" fn jit_row_get_field(ctx: *mut JITContext, row_ref: u64, column_index: u32) -> u64 {
111    unsafe {
112        if ctx.is_null() {
113            return TAG_NULL;
114        }
115        let ctx_ref = &*ctx;
116
117        // Validate row reference tag
118        if !is_data_row(row_ref) {
119            return TAG_NULL;
120        }
121
122        // Extract row index from payload
123        let row_idx = unbox_data_row(row_ref);
124        if row_idx >= ctx_ref.row_count {
125            return TAG_NULL;
126        }
127
128        // Check column bounds
129        if column_index as usize >= ctx_ref.column_count {
130            return TAG_NULL;
131        }
132
133        // Check if column_ptrs is valid
134        if ctx_ref.column_ptrs.is_null() {
135            return TAG_NULL;
136        }
137
138        // Get the column pointer
139        let col_ptr = *ctx_ref.column_ptrs.add(column_index as usize);
140        if col_ptr.is_null() {
141            return TAG_NULL;
142        }
143
144        // Get the value
145        let value = *col_ptr.add(row_idx);
146        box_number(value)
147    }
148}
149
150/// Get the timestamp for a data row.
151///
152/// # Arguments
153/// * `ctx` - JIT execution context
154/// * `row_offset` - Offset from current_row
155///
156/// # Returns
157/// TAG_TIME with timestamp, or TAG_NULL if unavailable
158pub extern "C" fn jit_get_row_timestamp(ctx: *mut JITContext, row_offset: i32) -> u64 {
159    unsafe {
160        if ctx.is_null() {
161            return TAG_NULL;
162        }
163        let ctx_ref = &*ctx;
164
165        // Calculate absolute row index
166        let row_signed = ctx_ref.current_row as i32 + row_offset;
167        if row_signed < 0 || row_signed as usize >= ctx_ref.row_count {
168            return TAG_NULL;
169        }
170        let row_idx = row_signed as usize;
171
172        // Get timestamp from timestamps_ptr
173        if ctx_ref.timestamps_ptr.is_null() {
174            return TAG_NULL;
175        }
176
177        let timestamp = *ctx_ref.timestamps_ptr.add(row_idx);
178        // Return as heap-allocated time value
179        jit_box(HK_TIME, timestamp)
180    }
181}
182
183// ============================================================================
184// Row Count and Current Row Access
185// ============================================================================
186
187/// Get the total number of rows in the DataFrame.
188pub extern "C" fn jit_get_row_count(ctx: *mut JITContext) -> u64 {
189    unsafe {
190        if ctx.is_null() {
191            return box_number(0.0);
192        }
193        let ctx_ref = &*ctx;
194        box_number(ctx_ref.row_count as f64)
195    }
196}
197
198/// Get the current row index.
199pub extern "C" fn jit_get_current_row(ctx: *mut JITContext) -> u64 {
200    unsafe {
201        if ctx.is_null() {
202            return box_number(0.0);
203        }
204        let ctx_ref = &*ctx;
205        box_number(ctx_ref.current_row as f64)
206    }
207}
208
209// ============================================================================
210// Typed Column Access (LoadCol* opcodes)
211// ============================================================================
212
213/// Load an f64 value from a column by index and row reference.
214///
215/// # Arguments
216/// * `ctx` - JIT execution context (provides column_ptrs)
217/// * `col_id` - Column index
218/// * `row_ref` - TAG_INT with row index, or any value (uses current_row)
219///
220/// # Returns
221/// NaN-boxed f64 value, or TAG_NULL if out of bounds
222pub extern "C" fn jit_load_col_f64(ctx: *mut JITContext, col_id: u32, row_ref: u64) -> u64 {
223    unsafe {
224        if ctx.is_null() {
225            return TAG_NULL;
226        }
227        let ctx_ref = &*ctx;
228
229        let row_idx = if is_data_row(row_ref) {
230            unbox_data_row(row_ref)
231        } else {
232            ctx_ref.current_row
233        };
234
235        if row_idx >= ctx_ref.row_count || col_id as usize >= ctx_ref.column_count {
236            return TAG_NULL;
237        }
238        if ctx_ref.column_ptrs.is_null() {
239            return TAG_NULL;
240        }
241
242        let col_ptr = *ctx_ref.column_ptrs.add(col_id as usize);
243        if col_ptr.is_null() {
244            return TAG_NULL;
245        }
246
247        let value = *col_ptr.add(row_idx);
248        box_number(value)
249    }
250}
251
252/// Load an i64 value from a column (stored as f64, cast back to integer).
253///
254/// Returns NaN-boxed f64 (integer values are represented as f64 in the JIT).
255pub extern "C" fn jit_load_col_i64(ctx: *mut JITContext, col_id: u32, row_ref: u64) -> u64 {
256    unsafe {
257        if ctx.is_null() {
258            return TAG_NULL;
259        }
260        let ctx_ref = &*ctx;
261
262        let row_idx = if is_data_row(row_ref) {
263            unbox_data_row(row_ref)
264        } else {
265            ctx_ref.current_row
266        };
267
268        if row_idx >= ctx_ref.row_count || col_id as usize >= ctx_ref.column_count {
269            return TAG_NULL;
270        }
271        if ctx_ref.column_ptrs.is_null() {
272            return TAG_NULL;
273        }
274
275        let col_ptr = *ctx_ref.column_ptrs.add(col_id as usize);
276        if col_ptr.is_null() {
277            return TAG_NULL;
278        }
279
280        // Read as f64 (JIT stores all numerics as f64), truncate to integer
281        let value = *col_ptr.add(row_idx);
282        box_number(value.trunc())
283    }
284}
285
286/// Load a boolean value from a column (stored as f64: 0.0=false, else true).
287///
288/// Returns TAG_BOOL_TRUE or TAG_BOOL_FALSE.
289pub extern "C" fn jit_load_col_bool(ctx: *mut JITContext, col_id: u32, row_ref: u64) -> u64 {
290    unsafe {
291        if ctx.is_null() {
292            return TAG_NULL;
293        }
294        let ctx_ref = &*ctx;
295
296        let row_idx = if is_data_row(row_ref) {
297            unbox_data_row(row_ref)
298        } else {
299            ctx_ref.current_row
300        };
301
302        if row_idx >= ctx_ref.row_count || col_id as usize >= ctx_ref.column_count {
303            return TAG_NULL;
304        }
305        if ctx_ref.column_ptrs.is_null() {
306            return TAG_NULL;
307        }
308
309        let col_ptr = *ctx_ref.column_ptrs.add(col_id as usize);
310        if col_ptr.is_null() {
311            return TAG_NULL;
312        }
313
314        let value = *col_ptr.add(row_idx);
315        if value != 0.0 {
316            TAG_BOOL_TRUE
317        } else {
318            TAG_BOOL_FALSE
319        }
320    }
321}
322
323/// Load a string value from a column.
324///
325/// Not yet implemented — string columns require Arrow-backed buffer access.
326/// Returns TAG_NULL as a placeholder.
327pub extern "C" fn jit_load_col_str(_ctx: *mut JITContext, _col_id: u32, _row_ref: u64) -> u64 {
328    // TODO: Implement when JITContext supports Arrow-backed string columns
329    TAG_NULL
330}
331
332/// Stub for eval_data_datetime_ref - not yet implemented
333///
334/// Evaluates a data datetime reference expression.
335/// This is a placeholder that returns TAG_NULL.
336pub extern "C" fn jit_eval_data_datetime_ref(_ctx: *mut JITContext, _expr: u64) -> u64 {
337    // TODO: Implement datetime reference evaluation
338    TAG_NULL
339}
340
341/// Stub for eval_data_relative - not yet implemented
342///
343/// Evaluates a relative data access expression.
344/// This is a placeholder that returns TAG_NULL.
345pub extern "C" fn jit_eval_data_relative(_ctx: *mut JITContext, _expr: u64, _offset: i32) -> u64 {
346    // TODO: Implement relative data access
347    TAG_NULL
348}
349
350// ============================================================================
351// Type-Specialized Field Access (JIT Optimization)
352// ============================================================================
353
354/// Get a field from a typed object using precomputed offset.
355///
356/// This is the JIT optimization for typed field access. When the compiler
357/// knows an object's type at compile time, it precomputes the field offset
358/// and emits this instruction instead of a dynamic property lookup.
359///
360/// Performance: ~2ns (direct memory access)
361///
362/// # Arguments
363/// * `obj` - NaN-boxed TypedObject (TAG_TYPED_OBJECT)
364/// * `type_id` - Expected type schema ID (for type guard)
365/// * `_field_idx` - Field index (unused - offset is used instead)
366/// * `offset` - Precomputed byte offset for direct access
367///
368/// # Returns
369/// NaN-boxed field value
370///
371/// # Panics
372/// Panics if obj is not a TypedObject or has a schema mismatch.
373/// This indicates a type system bug - the type checker should guarantee
374/// that typed field access only occurs on correctly-typed objects.
375pub extern "C" fn jit_get_field_typed(obj: u64, type_id: u64, _field_idx: u64, offset: u64) -> u64 {
376    // TypedObject with direct offset access (~2ns)
377    if !is_typed_object(obj) {
378        panic!(
379            "jit_get_field_typed: expected TypedObject but got tag 0x{:x}. \
380             This indicates a type system bug - typed field access should only \
381             be emitted for statically-known TypedObjects.",
382            obj & TAG_MASK
383        );
384    }
385
386    let ptr = unbox_typed_object(obj) as *const super::typed_object::TypedObject;
387    if ptr.is_null() {
388        panic!("jit_get_field_typed: TypedObject pointer is null");
389    }
390
391    unsafe {
392        // Type guard
393        if type_id != 0 && (*ptr).schema_id != type_id as u32 {
394            panic!(
395                "jit_get_field_typed: schema mismatch - expected {} but got {}. \
396                 This indicates a type system bug.",
397                type_id,
398                (*ptr).schema_id
399            );
400        }
401        // Direct field access by offset - O(1)!
402        (*ptr).get_field(offset as usize)
403    }
404}
405
406/// Set a field on a typed object using precomputed offset.
407///
408/// This is the JIT optimization for typed field set. Similar to get,
409/// when the compiler knows the type, it precomputes the offset.
410///
411/// Performance: ~2ns (direct memory access)
412///
413/// # Arguments
414/// * `obj` - NaN-boxed TypedObject to modify (TAG_TYPED_OBJECT)
415/// * `value` - NaN-boxed value to set
416/// * `type_id` - Expected type schema ID (for type guard)
417/// * `_field_idx` - Field index (unused - offset is used instead)
418/// * `offset` - Precomputed byte offset for direct access
419///
420/// # Returns
421/// The modified object (same object reference)
422///
423/// # Panics
424/// Panics if obj is not a TypedObject or has a schema mismatch.
425/// This indicates a type system bug - the type checker should guarantee
426/// that typed field access only occurs on correctly-typed objects.
427pub extern "C" fn jit_set_field_typed(
428    obj: u64,
429    value: u64,
430    type_id: u64,
431    _field_idx: u64,
432    offset: u64,
433) -> u64 {
434    // TypedObject with direct offset access (~2ns)
435    if !is_typed_object(obj) {
436        panic!(
437            "jit_set_field_typed: expected TypedObject but got tag 0x{:x}. \
438             This indicates a type system bug - typed field access should only \
439             be emitted for statically-known TypedObjects.",
440            obj & TAG_MASK
441        );
442    }
443
444    let ptr = unbox_typed_object(obj) as *mut super::typed_object::TypedObject;
445    if ptr.is_null() {
446        panic!("jit_set_field_typed: TypedObject pointer is null");
447    }
448
449    unsafe {
450        // Type guard
451        if type_id != 0 && (*ptr).schema_id != type_id as u32 {
452            panic!(
453                "jit_set_field_typed: schema mismatch - expected {} but got {}. \
454                 This indicates a type system bug.",
455                type_id,
456                (*ptr).schema_id
457            );
458        }
459        // Direct field set by offset - O(1)!
460        let old_bits = (*ptr).get_field(offset as usize);
461        super::gc::jit_write_barrier(old_bits, value);
462        (*ptr).set_field(offset as usize, value);
463        obj
464    }
465}