shape_jit/ffi/data.rs
1// Heap allocation audit (PR-9 V8 Gap Closure):
2// Category A (NaN-boxed returns): 1 site
3// jit_box(HK_TIME, ...) — jit_get_row_timestamp
4// Category B (intermediate/consumed): 0 sites
5// Category C (heap islands): 0 sites
6//!
7//! Generic DataFrame FFI Functions for JIT
8//!
9//! Industry-agnostic functions for accessing DataFrame rows and fields.
10//! Column indices are resolved at compile time from field names.
11
12use super::super::context::JITContext;
13use super::super::nan_boxing::*;
14
15// ============================================================================
16// Generic Field Access (by compile-time column index)
17// ============================================================================
18
19/// Get a field value from the current or offset row by column index.
20///
21/// This is the primary generic data access function.
22/// Column indices are resolved at compile time from field names.
23///
24/// # Arguments
25/// * `ctx` - JIT execution context
26/// * `row_offset` - Offset from current_row (0 = current, -1 = previous, etc.)
27/// * `column_index` - Compile-time resolved column index
28///
29/// # Returns
30/// NaN-boxed f64 value, or TAG_NULL if out of bounds
31pub extern "C" fn jit_get_field(ctx: *mut JITContext, row_offset: i32, column_index: u32) -> u64 {
32 unsafe {
33 if ctx.is_null() {
34 return TAG_NULL;
35 }
36 let ctx_ref = &*ctx;
37
38 // Calculate absolute row index
39 let row_signed = ctx_ref.current_row as i32 + row_offset;
40 if row_signed < 0 || row_signed as usize >= ctx_ref.row_count {
41 return TAG_NULL;
42 }
43 let row_idx = row_signed as usize;
44
45 // Check column bounds
46 if column_index as usize >= ctx_ref.column_count {
47 return TAG_NULL;
48 }
49
50 // Check if column_ptrs is valid
51 if ctx_ref.column_ptrs.is_null() {
52 return TAG_NULL;
53 }
54
55 // Get the column pointer
56 let col_ptr = *ctx_ref.column_ptrs.add(column_index as usize);
57 if col_ptr.is_null() {
58 return TAG_NULL;
59 }
60
61 // Get the value
62 let value = *col_ptr.add(row_idx);
63 box_number(value)
64 }
65}
66
67// ============================================================================
68// Row Reference Operations (lightweight, no data copy)
69// ============================================================================
70
71/// Create a lightweight row reference (just stores the row index).
72///
73/// This allows passing row references without copying data.
74/// The row index is stored in the NaN-boxed payload.
75///
76/// # Arguments
77/// * `ctx` - JIT execution context
78/// * `row_offset` - Offset from current_row (0 = current, -1 = previous, etc.)
79///
80/// # Returns
81/// TAG_INT with row index in payload, or TAG_NULL if out of bounds
82pub extern "C" fn jit_get_row_ref(ctx: *mut JITContext, row_offset: i32) -> u64 {
83 unsafe {
84 if ctx.is_null() {
85 return TAG_NULL;
86 }
87 let ctx_ref = &*ctx;
88
89 // Calculate absolute row index
90 let row_signed = ctx_ref.current_row as i32 + row_offset;
91 if row_signed < 0 || row_signed as usize >= ctx_ref.row_count {
92 return TAG_NULL;
93 }
94 let row_idx = row_signed as usize;
95
96 // Return a lightweight row reference (just the index)
97 box_data_row(row_idx)
98 }
99}
100
101/// Get a field value from a row reference.
102///
103/// # Arguments
104/// * `ctx` - JIT execution context
105/// * `row_ref` - TAG_INT value with row index in payload
106/// * `column_index` - Compile-time resolved column index
107///
108/// # Returns
109/// NaN-boxed f64 value, or TAG_NULL if invalid
110pub extern "C" fn jit_row_get_field(ctx: *mut JITContext, row_ref: u64, column_index: u32) -> u64 {
111 unsafe {
112 if ctx.is_null() {
113 return TAG_NULL;
114 }
115 let ctx_ref = &*ctx;
116
117 // Validate row reference tag
118 if !is_data_row(row_ref) {
119 return TAG_NULL;
120 }
121
122 // Extract row index from payload
123 let row_idx = unbox_data_row(row_ref);
124 if row_idx >= ctx_ref.row_count {
125 return TAG_NULL;
126 }
127
128 // Check column bounds
129 if column_index as usize >= ctx_ref.column_count {
130 return TAG_NULL;
131 }
132
133 // Check if column_ptrs is valid
134 if ctx_ref.column_ptrs.is_null() {
135 return TAG_NULL;
136 }
137
138 // Get the column pointer
139 let col_ptr = *ctx_ref.column_ptrs.add(column_index as usize);
140 if col_ptr.is_null() {
141 return TAG_NULL;
142 }
143
144 // Get the value
145 let value = *col_ptr.add(row_idx);
146 box_number(value)
147 }
148}
149
150/// Get the timestamp for a data row.
151///
152/// # Arguments
153/// * `ctx` - JIT execution context
154/// * `row_offset` - Offset from current_row
155///
156/// # Returns
157/// TAG_TIME with timestamp, or TAG_NULL if unavailable
158pub extern "C" fn jit_get_row_timestamp(ctx: *mut JITContext, row_offset: i32) -> u64 {
159 unsafe {
160 if ctx.is_null() {
161 return TAG_NULL;
162 }
163 let ctx_ref = &*ctx;
164
165 // Calculate absolute row index
166 let row_signed = ctx_ref.current_row as i32 + row_offset;
167 if row_signed < 0 || row_signed as usize >= ctx_ref.row_count {
168 return TAG_NULL;
169 }
170 let row_idx = row_signed as usize;
171
172 // Get timestamp from timestamps_ptr
173 if ctx_ref.timestamps_ptr.is_null() {
174 return TAG_NULL;
175 }
176
177 let timestamp = *ctx_ref.timestamps_ptr.add(row_idx);
178 // Return as heap-allocated time value
179 jit_box(HK_TIME, timestamp)
180 }
181}
182
183// ============================================================================
184// Row Count and Current Row Access
185// ============================================================================
186
187/// Get the total number of rows in the DataFrame.
188pub extern "C" fn jit_get_row_count(ctx: *mut JITContext) -> u64 {
189 unsafe {
190 if ctx.is_null() {
191 return box_number(0.0);
192 }
193 let ctx_ref = &*ctx;
194 box_number(ctx_ref.row_count as f64)
195 }
196}
197
198/// Get the current row index.
199pub extern "C" fn jit_get_current_row(ctx: *mut JITContext) -> u64 {
200 unsafe {
201 if ctx.is_null() {
202 return box_number(0.0);
203 }
204 let ctx_ref = &*ctx;
205 box_number(ctx_ref.current_row as f64)
206 }
207}
208
209// ============================================================================
210// Typed Column Access (LoadCol* opcodes)
211// ============================================================================
212
213/// Load an f64 value from a column by index and row reference.
214///
215/// # Arguments
216/// * `ctx` - JIT execution context (provides column_ptrs)
217/// * `col_id` - Column index
218/// * `row_ref` - TAG_INT with row index, or any value (uses current_row)
219///
220/// # Returns
221/// NaN-boxed f64 value, or TAG_NULL if out of bounds
222pub extern "C" fn jit_load_col_f64(ctx: *mut JITContext, col_id: u32, row_ref: u64) -> u64 {
223 unsafe {
224 if ctx.is_null() {
225 return TAG_NULL;
226 }
227 let ctx_ref = &*ctx;
228
229 let row_idx = if is_data_row(row_ref) {
230 unbox_data_row(row_ref)
231 } else {
232 ctx_ref.current_row
233 };
234
235 if row_idx >= ctx_ref.row_count || col_id as usize >= ctx_ref.column_count {
236 return TAG_NULL;
237 }
238 if ctx_ref.column_ptrs.is_null() {
239 return TAG_NULL;
240 }
241
242 let col_ptr = *ctx_ref.column_ptrs.add(col_id as usize);
243 if col_ptr.is_null() {
244 return TAG_NULL;
245 }
246
247 let value = *col_ptr.add(row_idx);
248 box_number(value)
249 }
250}
251
252/// Load an i64 value from a column (stored as f64, cast back to integer).
253///
254/// Returns NaN-boxed f64 (integer values are represented as f64 in the JIT).
255pub extern "C" fn jit_load_col_i64(ctx: *mut JITContext, col_id: u32, row_ref: u64) -> u64 {
256 unsafe {
257 if ctx.is_null() {
258 return TAG_NULL;
259 }
260 let ctx_ref = &*ctx;
261
262 let row_idx = if is_data_row(row_ref) {
263 unbox_data_row(row_ref)
264 } else {
265 ctx_ref.current_row
266 };
267
268 if row_idx >= ctx_ref.row_count || col_id as usize >= ctx_ref.column_count {
269 return TAG_NULL;
270 }
271 if ctx_ref.column_ptrs.is_null() {
272 return TAG_NULL;
273 }
274
275 let col_ptr = *ctx_ref.column_ptrs.add(col_id as usize);
276 if col_ptr.is_null() {
277 return TAG_NULL;
278 }
279
280 // Read as f64 (JIT stores all numerics as f64), truncate to integer
281 let value = *col_ptr.add(row_idx);
282 box_number(value.trunc())
283 }
284}
285
286/// Load a boolean value from a column (stored as f64: 0.0=false, else true).
287///
288/// Returns TAG_BOOL_TRUE or TAG_BOOL_FALSE.
289pub extern "C" fn jit_load_col_bool(ctx: *mut JITContext, col_id: u32, row_ref: u64) -> u64 {
290 unsafe {
291 if ctx.is_null() {
292 return TAG_NULL;
293 }
294 let ctx_ref = &*ctx;
295
296 let row_idx = if is_data_row(row_ref) {
297 unbox_data_row(row_ref)
298 } else {
299 ctx_ref.current_row
300 };
301
302 if row_idx >= ctx_ref.row_count || col_id as usize >= ctx_ref.column_count {
303 return TAG_NULL;
304 }
305 if ctx_ref.column_ptrs.is_null() {
306 return TAG_NULL;
307 }
308
309 let col_ptr = *ctx_ref.column_ptrs.add(col_id as usize);
310 if col_ptr.is_null() {
311 return TAG_NULL;
312 }
313
314 let value = *col_ptr.add(row_idx);
315 if value != 0.0 {
316 TAG_BOOL_TRUE
317 } else {
318 TAG_BOOL_FALSE
319 }
320 }
321}
322
323/// Load a string value from a column.
324///
325/// Not yet implemented — string columns require Arrow-backed buffer access.
326/// Returns TAG_NULL as a placeholder.
327pub extern "C" fn jit_load_col_str(_ctx: *mut JITContext, _col_id: u32, _row_ref: u64) -> u64 {
328 // TODO: Implement when JITContext supports Arrow-backed string columns
329 TAG_NULL
330}
331
332/// Stub for eval_data_datetime_ref - not yet implemented
333///
334/// Evaluates a data datetime reference expression.
335/// This is a placeholder that returns TAG_NULL.
336pub extern "C" fn jit_eval_data_datetime_ref(_ctx: *mut JITContext, _expr: u64) -> u64 {
337 // TODO: Implement datetime reference evaluation
338 TAG_NULL
339}
340
341/// Stub for eval_data_relative - not yet implemented
342///
343/// Evaluates a relative data access expression.
344/// This is a placeholder that returns TAG_NULL.
345pub extern "C" fn jit_eval_data_relative(_ctx: *mut JITContext, _expr: u64, _offset: i32) -> u64 {
346 // TODO: Implement relative data access
347 TAG_NULL
348}
349
350// ============================================================================
351// Type-Specialized Field Access (JIT Optimization)
352// ============================================================================
353
354/// Get a field from a typed object using precomputed offset.
355///
356/// This is the JIT optimization for typed field access. When the compiler
357/// knows an object's type at compile time, it precomputes the field offset
358/// and emits this instruction instead of a dynamic property lookup.
359///
360/// Performance: ~2ns (direct memory access)
361///
362/// # Arguments
363/// * `obj` - NaN-boxed TypedObject (TAG_TYPED_OBJECT)
364/// * `type_id` - Expected type schema ID (for type guard)
365/// * `_field_idx` - Field index (unused - offset is used instead)
366/// * `offset` - Precomputed byte offset for direct access
367///
368/// # Returns
369/// NaN-boxed field value
370///
371/// # Panics
372/// Panics if obj is not a TypedObject or has a schema mismatch.
373/// This indicates a type system bug - the type checker should guarantee
374/// that typed field access only occurs on correctly-typed objects.
375pub extern "C" fn jit_get_field_typed(obj: u64, type_id: u64, _field_idx: u64, offset: u64) -> u64 {
376 // TypedObject with direct offset access (~2ns)
377 if !is_typed_object(obj) {
378 panic!(
379 "jit_get_field_typed: expected TypedObject but got tag 0x{:x}. \
380 This indicates a type system bug - typed field access should only \
381 be emitted for statically-known TypedObjects.",
382 obj & TAG_MASK
383 );
384 }
385
386 let ptr = unbox_typed_object(obj) as *const super::typed_object::TypedObject;
387 if ptr.is_null() {
388 panic!("jit_get_field_typed: TypedObject pointer is null");
389 }
390
391 unsafe {
392 // Type guard
393 if type_id != 0 && (*ptr).schema_id != type_id as u32 {
394 panic!(
395 "jit_get_field_typed: schema mismatch - expected {} but got {}. \
396 This indicates a type system bug.",
397 type_id,
398 (*ptr).schema_id
399 );
400 }
401 // Direct field access by offset - O(1)!
402 (*ptr).get_field(offset as usize)
403 }
404}
405
406/// Set a field on a typed object using precomputed offset.
407///
408/// This is the JIT optimization for typed field set. Similar to get,
409/// when the compiler knows the type, it precomputes the offset.
410///
411/// Performance: ~2ns (direct memory access)
412///
413/// # Arguments
414/// * `obj` - NaN-boxed TypedObject to modify (TAG_TYPED_OBJECT)
415/// * `value` - NaN-boxed value to set
416/// * `type_id` - Expected type schema ID (for type guard)
417/// * `_field_idx` - Field index (unused - offset is used instead)
418/// * `offset` - Precomputed byte offset for direct access
419///
420/// # Returns
421/// The modified object (same object reference)
422///
423/// # Panics
424/// Panics if obj is not a TypedObject or has a schema mismatch.
425/// This indicates a type system bug - the type checker should guarantee
426/// that typed field access only occurs on correctly-typed objects.
427pub extern "C" fn jit_set_field_typed(
428 obj: u64,
429 value: u64,
430 type_id: u64,
431 _field_idx: u64,
432 offset: u64,
433) -> u64 {
434 // TypedObject with direct offset access (~2ns)
435 if !is_typed_object(obj) {
436 panic!(
437 "jit_set_field_typed: expected TypedObject but got tag 0x{:x}. \
438 This indicates a type system bug - typed field access should only \
439 be emitted for statically-known TypedObjects.",
440 obj & TAG_MASK
441 );
442 }
443
444 let ptr = unbox_typed_object(obj) as *mut super::typed_object::TypedObject;
445 if ptr.is_null() {
446 panic!("jit_set_field_typed: TypedObject pointer is null");
447 }
448
449 unsafe {
450 // Type guard
451 if type_id != 0 && (*ptr).schema_id != type_id as u32 {
452 panic!(
453 "jit_set_field_typed: schema mismatch - expected {} but got {}. \
454 This indicates a type system bug.",
455 type_id,
456 (*ptr).schema_id
457 );
458 }
459 // Direct field set by offset - O(1)!
460 let old_bits = (*ptr).get_field(offset as usize);
461 super::gc::jit_write_barrier(old_bits, value);
462 (*ptr).set_field(offset as usize, value);
463 obj
464 }
465}