Skip to main content

shape_vm/
type_tracking.rs

1//! Type Tracking for Bytecode Compiler
2//!
3//! This module tracks known type information during compilation to enable
4//! type-specialized code generation. When a variable's type is known at
5//! compile time, the compiler can emit optimized opcodes for field access.
6//!
7//! # How Types Become Known
8//!
9//! Types are known in these situations:
10//! - Explicit type annotation: `let x: Candle = ...`
11//! - Constructor call: `let x = Candle { ... }`
12//! - Object literal: `let x = { a: 1, b: 2 }` (inline struct type)
13//! - Function with declared return type: `let x = get_candle()`
14//!
15//! # Usage
16//!
17//! The compiler uses this to emit typed field opcodes for dot access:
18//! - `GetFieldTyped` (specialized): Direct slot access by precomputed offset
19//! - `SetFieldTyped` (specialized): Direct slot update by precomputed offset
20//! Generic `GetProp`/`SetProp` are reserved for non-dot operations (index/slice).
21//!
22//! # Storage Type Hints
23//!
24//! For JIT optimization, we track storage types:
25//! - `StorageHint::NullableFloat64`: Option<f64> uses NaN sentinel
26//! - `StorageHint::Float64`: Plain f64, no nullability
27//! - `StorageHint::Unknown`: Type not determined at compile time
28
29use std::collections::HashMap;
30use std::sync::atomic::{AtomicU64, Ordering};
31
32use serde::{Deserialize, Serialize};
33use shape_ast::ast::TypeAnnotation;
34use shape_runtime::type_schema::{FieldType, SchemaId, TypeSchema, TypeSchemaRegistry};
35use shape_runtime::type_system::{BuiltinTypes, StorageType};
36
37/// Numeric type known at compile time for typed opcode emission.
38///
39/// When the compiler can determine the numeric subtype of an expression,
40/// it emits typed opcodes (e.g., `MulInt` instead of `Mul`) that skip
41/// runtime type dispatch entirely.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum NumericType {
44    /// Integer (i64) — the default integer type
45    Int,
46    /// Width-specific integer (i8, u8, i16, u16, i32, u32, u64)
47    IntWidth(shape_ast::IntWidth),
48    /// Floating point (f64)
49    Number,
50    /// Exact decimal (rust_decimal::Decimal)
51    Decimal,
52}
53
54/// Counter for generating unique inline object type names
55static INLINE_OBJECT_COUNTER: AtomicU64 = AtomicU64::new(0);
56
57/// Describes the storage kind for a single local/parameter slot in a frame.
58///
59/// Used by the JIT and VM to generate more efficient code by knowing
60/// the actual storage representation at compile time.
61///
62/// This was previously named `StorageHint`; the alias is kept for
63/// backwards compatibility.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65pub enum SlotKind {
66    /// Plain f64 value (direct float operations)
67    Float64,
68    /// Nullable f64 using NaN sentinel (Option<f64>)
69    /// IEEE 754: NaN + x = NaN, so null propagates automatically
70    NullableFloat64,
71    /// Plain i8 value
72    Int8,
73    /// Nullable i8 value
74    NullableInt8,
75    /// Plain u8 value
76    UInt8,
77    /// Nullable u8 value
78    NullableUInt8,
79    /// Plain i16 value
80    Int16,
81    /// Nullable i16 value
82    NullableInt16,
83    /// Plain u16 value
84    UInt16,
85    /// Nullable u16 value
86    NullableUInt16,
87    /// Plain i32 value
88    Int32,
89    /// Nullable i32 value
90    NullableInt32,
91    /// Plain u32 value
92    UInt32,
93    /// Nullable u32 value
94    NullableUInt32,
95    /// Plain i64 value
96    Int64,
97    /// Nullable i64 value
98    NullableInt64,
99    /// Plain u64 value
100    UInt64,
101    /// Nullable u64 value
102    NullableUInt64,
103    /// Plain isize value
104    IntSize,
105    /// Nullable isize value
106    NullableIntSize,
107    /// Plain usize value
108    UIntSize,
109    /// Nullable usize value
110    NullableUIntSize,
111    /// Boolean value
112    Bool,
113    /// String reference
114    String,
115    /// NaN-boxed value: the raw u64 bits are a valid NaN-boxed interpreter value.
116    /// Used for boxed locals and operand stack entries in precise deopt metadata.
117    /// The VM unmarshals these via direct transmute (zero-cost passthrough).
118    NanBoxed,
119    /// Type not determined at compile time (falls back to NaN-boxed dispatch).
120    /// Should NOT appear in precise deopt metadata — use NanBoxed instead.
121    /// Reserved for truly uninitialized/unresolved slots.
122    Unknown,
123}
124
125/// Backwards-compatible alias. Prefer `SlotKind` in new code.
126pub type StorageHint = SlotKind;
127
128impl Default for SlotKind {
129    fn default() -> Self {
130        SlotKind::Unknown
131    }
132}
133
134impl From<StorageType> for SlotKind {
135    /// Convert from runtime StorageType to JIT StorageHint
136    fn from(st: StorageType) -> Self {
137        Self::from_storage_type(&st)
138    }
139}
140
141impl SlotKind {
142    /// Convert from runtime StorageType
143    ///
144    /// Maps semantic storage types to JIT optimization hints:
145    /// - Primitive types map directly
146    /// - NullableFloat64 enables NaN sentinel optimization
147    /// - Complex types fall back to boxed representation
148    pub fn from_storage_type(st: &StorageType) -> Self {
149        match st {
150            // Direct mappings for primitives
151            StorageType::Float64 => StorageHint::Float64,
152            StorageType::Int64 => StorageHint::Int64,
153            StorageType::Bool => StorageHint::Bool,
154            StorageType::String => StorageHint::String,
155
156            // Nullable types with optimized storage
157            StorageType::NullableFloat64 => StorageHint::NullableFloat64,
158            StorageType::NullableInt64 => StorageHint::NullableInt64,
159            StorageType::NullableBool => StorageHint::Bool, // 3-state in Boxed
160
161            // Complex types use boxed representation
162            StorageType::Array(_)
163            | StorageType::Table { .. }
164            | StorageType::Object
165            | StorageType::Result { .. }
166            | StorageType::TaggedUnion { .. }
167            | StorageType::Function
168            | StorageType::Struct(_)
169            | StorageType::Dynamic => StorageHint::Unknown,
170        }
171    }
172
173    #[inline]
174    pub fn is_integer(self) -> bool {
175        matches!(
176            self,
177            Self::Int8
178                | Self::UInt8
179                | Self::Int16
180                | Self::UInt16
181                | Self::Int32
182                | Self::UInt32
183                | Self::Int64
184                | Self::UInt64
185                | Self::IntSize
186                | Self::UIntSize
187        )
188    }
189
190    #[inline]
191    pub fn is_nullable_integer(self) -> bool {
192        matches!(
193            self,
194            Self::NullableInt8
195                | Self::NullableUInt8
196                | Self::NullableInt16
197                | Self::NullableUInt16
198                | Self::NullableInt32
199                | Self::NullableUInt32
200                | Self::NullableInt64
201                | Self::NullableUInt64
202                | Self::NullableIntSize
203                | Self::NullableUIntSize
204        )
205    }
206
207    #[inline]
208    pub fn is_integer_family(self) -> bool {
209        self.is_integer() || self.is_nullable_integer()
210    }
211
212    #[inline]
213    pub fn is_default_int_family(self) -> bool {
214        matches!(self, Self::Int64 | Self::NullableInt64)
215    }
216
217    #[inline]
218    pub fn is_float_family(self) -> bool {
219        matches!(self, Self::Float64 | Self::NullableFloat64)
220    }
221
222    #[inline]
223    pub fn is_numeric_family(self) -> bool {
224        self.is_integer_family() || self.is_float_family()
225    }
226
227    #[inline]
228    pub fn is_pointer_sized_integer(self) -> bool {
229        matches!(
230            self,
231            Self::IntSize | Self::UIntSize | Self::NullableIntSize | Self::NullableUIntSize
232        )
233    }
234
235    #[inline]
236    pub fn is_signed_integer(self) -> Option<bool> {
237        if matches!(
238            self,
239            Self::Int8
240                | Self::NullableInt8
241                | Self::Int16
242                | Self::NullableInt16
243                | Self::Int32
244                | Self::NullableInt32
245                | Self::Int64
246                | Self::NullableInt64
247                | Self::IntSize
248                | Self::NullableIntSize
249        ) {
250            Some(true)
251        } else if matches!(
252            self,
253            Self::UInt8
254                | Self::NullableUInt8
255                | Self::UInt16
256                | Self::NullableUInt16
257                | Self::UInt32
258                | Self::NullableUInt32
259                | Self::UInt64
260                | Self::NullableUInt64
261                | Self::UIntSize
262                | Self::NullableUIntSize
263        ) {
264            Some(false)
265        } else {
266            None
267        }
268    }
269
270    #[inline]
271    pub fn integer_bit_width(self) -> Option<u16> {
272        match self {
273            Self::Int8 | Self::UInt8 | Self::NullableInt8 | Self::NullableUInt8 => Some(8),
274            Self::Int16 | Self::UInt16 | Self::NullableInt16 | Self::NullableUInt16 => Some(16),
275            Self::Int32 | Self::UInt32 | Self::NullableInt32 | Self::NullableUInt32 => Some(32),
276            Self::Int64 | Self::UInt64 | Self::NullableInt64 | Self::NullableUInt64 => Some(64),
277            Self::IntSize | Self::UIntSize | Self::NullableIntSize | Self::NullableUIntSize => {
278                Some(usize::BITS as u16)
279            }
280            _ => None,
281        }
282    }
283
284    #[inline]
285    pub fn non_nullable(self) -> Self {
286        match self {
287            Self::NullableFloat64 => Self::Float64,
288            Self::NullableInt8 => Self::Int8,
289            Self::NullableUInt8 => Self::UInt8,
290            Self::NullableInt16 => Self::Int16,
291            Self::NullableUInt16 => Self::UInt16,
292            Self::NullableInt32 => Self::Int32,
293            Self::NullableUInt32 => Self::UInt32,
294            Self::NullableInt64 => Self::Int64,
295            Self::NullableUInt64 => Self::UInt64,
296            Self::NullableIntSize => Self::IntSize,
297            Self::NullableUIntSize => Self::UIntSize,
298            other => other,
299        }
300    }
301
302    #[inline]
303    pub fn with_nullability(self, nullable: bool) -> Self {
304        if !nullable {
305            return self.non_nullable();
306        }
307        match self.non_nullable() {
308            Self::Float64 => Self::NullableFloat64,
309            Self::Int8 => Self::NullableInt8,
310            Self::UInt8 => Self::NullableUInt8,
311            Self::Int16 => Self::NullableInt16,
312            Self::UInt16 => Self::NullableUInt16,
313            Self::Int32 => Self::NullableInt32,
314            Self::UInt32 => Self::NullableUInt32,
315            Self::Int64 => Self::NullableInt64,
316            Self::UInt64 => Self::NullableUInt64,
317            Self::IntSize => Self::NullableIntSize,
318            Self::UIntSize => Self::NullableUIntSize,
319            other => other,
320        }
321    }
322
323    pub fn combine_integer_hints(lhs: Self, rhs: Self) -> Option<Self> {
324        let lhs_bits = lhs.integer_bit_width()?;
325        let rhs_bits = rhs.integer_bit_width()?;
326        let bits = lhs_bits.max(rhs_bits);
327        let signed = lhs.is_signed_integer()? || rhs.is_signed_integer()?;
328        let nullable = lhs.is_nullable_integer() || rhs.is_nullable_integer();
329        let keep_pointer_size = bits == usize::BITS as u16
330            && (lhs.is_pointer_sized_integer() || rhs.is_pointer_sized_integer());
331        let base = if keep_pointer_size {
332            if signed {
333                Self::IntSize
334            } else {
335                Self::UIntSize
336            }
337        } else {
338            match (bits, signed) {
339                (8, true) => Self::Int8,
340                (8, false) => Self::UInt8,
341                (16, true) => Self::Int16,
342                (16, false) => Self::UInt16,
343                (32, true) => Self::Int32,
344                (32, false) => Self::UInt32,
345                (64, true) => Self::Int64,
346                (64, false) => Self::UInt64,
347                _ => return None,
348            }
349        };
350        Some(base.with_nullability(nullable))
351    }
352}
353
354/// Typed frame layout metadata.
355///
356/// A `FrameDescriptor` describes the storage layout for every local slot
357/// (parameters + locals) in a single function or top-level frame.  The JIT
358/// and VM use this to allocate registers / stack space with correct widths
359/// and to skip NaN-boxing for slots whose type is statically known.
360///
361/// This is the canonical replacement for the loose `Vec<StorageHint>` arrays
362/// that were previously threaded through `BytecodeProgram` and `Function`.
363#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
364pub struct FrameDescriptor {
365    /// One entry per local slot (index 0 = first param or local).
366    /// A `Boxed` entry means the slot stores a generic NaN-boxed value.
367    pub slots: Vec<SlotKind>,
368
369    /// Return type kind for the function.
370    ///
371    /// When present and not `Unknown`, the JIT boundary ABI uses this to
372    /// unmarshal the return value from JIT-compiled code back into the
373    /// correct `ValueWord` representation.
374    #[serde(default)]
375    pub return_kind: SlotKind,
376}
377
378impl FrameDescriptor {
379    /// Create an empty descriptor (all slots will be Boxed by default).
380    pub fn new() -> Self {
381        Self {
382            slots: Vec::new(),
383            return_kind: SlotKind::Unknown,
384        }
385    }
386
387    /// Create a descriptor with `n` slots, all initialised to `SlotKind::Unknown`.
388    pub fn with_unknown_slots(n: usize) -> Self {
389        Self {
390            slots: vec![SlotKind::Unknown; n],
391            return_kind: SlotKind::Unknown,
392        }
393    }
394
395    /// Build a descriptor from an existing `Vec<SlotKind>` (or `Vec<StorageHint>`).
396    pub fn from_slots(slots: Vec<SlotKind>) -> Self {
397        Self {
398            slots,
399            return_kind: SlotKind::Unknown,
400        }
401    }
402
403    /// Number of slots described.
404    #[inline]
405    pub fn len(&self) -> usize {
406        self.slots.len()
407    }
408
409    /// Whether the descriptor is empty.
410    #[inline]
411    pub fn is_empty(&self) -> bool {
412        self.slots.is_empty()
413    }
414
415    /// Get the kind of a specific slot.  Returns `Boxed` for out-of-range indices.
416    #[inline]
417    pub fn slot(&self, index: usize) -> SlotKind {
418        self.slots.get(index).copied().unwrap_or(SlotKind::Unknown)
419    }
420
421    /// Returns `true` if every slot is `Unknown` (i.e. no specialization).
422    pub fn is_all_unknown(&self) -> bool {
423        self.slots.iter().all(|s| *s == SlotKind::Unknown)
424    }
425}
426
427/// The kind of variable: regular value, typed table, row view, or column.
428///
429/// Replaces the old `is_datatable` / `is_row_view` / `is_column` boolean flags,
430/// which were mutually exclusive but had no compiler enforcement.
431#[derive(Debug, Clone, PartialEq)]
432pub enum VariableKind {
433    /// Regular value (struct, primitive, object, etc.)
434    Value,
435    /// A DataTable with known row schema — Table<T>.
436    /// Closure methods (filter/map/etc.) propagate schema to row params.
437    Table { element_type: String },
438    /// A typed row from an Arrow DataTable — Row<T>.
439    /// Field access emits LoadColF64/I64/Bool/Str instead of GetProp.
440    RowView { element_type: String },
441    /// A typed column from an Arrow DataTable — Column<T>.
442    Column {
443        element_type: String,
444        column_type: String,
445    },
446    /// An indexed table — Indexed<T> with a designated index column.
447    /// Only Indexed tables can use resample/between operations.
448    Indexed {
449        element_type: String,
450        index_column: String,
451    },
452}
453
454/// Type information for a variable
455#[derive(Debug, Clone)]
456pub struct VariableTypeInfo {
457    /// Schema ID if type is known and registered
458    pub schema_id: Option<SchemaId>,
459    /// Type name (e.g., "Candle", "Point")
460    pub type_name: Option<String>,
461    /// Whether the type is definitely known (vs inferred/uncertain)
462    pub is_definite: bool,
463    /// Storage hint for JIT optimization
464    pub storage_hint: StorageHint,
465    /// Preserved concrete numeric runtime type (e.g. "i16", "u8", "f32", "i64")
466    /// derived from source annotations.
467    pub concrete_numeric_type: Option<String>,
468    /// What kind of variable this is (value, table, row view, column)
469    pub kind: VariableKind,
470}
471
472impl VariableTypeInfo {
473    /// Create type info for a known type
474    pub fn known(schema_id: SchemaId, type_name: String) -> Self {
475        let concrete_numeric_type = Self::infer_numeric_runtime_name(&type_name);
476        Self {
477            schema_id: Some(schema_id),
478            type_name: Some(type_name),
479            is_definite: true,
480            storage_hint: StorageHint::Unknown,
481            concrete_numeric_type,
482            kind: VariableKind::Value,
483        }
484    }
485
486    /// Create type info for an unknown/dynamic type
487    pub fn unknown() -> Self {
488        Self {
489            schema_id: None,
490            type_name: None,
491            is_definite: false,
492            storage_hint: StorageHint::Unknown,
493            concrete_numeric_type: None,
494            kind: VariableKind::Value,
495        }
496    }
497
498    /// Create type info for a type name that may or may not be registered
499    pub fn named(type_name: String) -> Self {
500        // Infer storage hint from common type names
501        let storage_hint = Self::infer_storage_hint(&type_name);
502        let concrete_numeric_type = Self::infer_numeric_runtime_name(&type_name);
503        Self {
504            schema_id: None,
505            type_name: Some(type_name),
506            is_definite: false,
507            storage_hint,
508            concrete_numeric_type,
509            kind: VariableKind::Value,
510        }
511    }
512
513    /// Create type info with explicit storage hint
514    pub fn with_storage(type_name: String, storage_hint: StorageHint) -> Self {
515        let concrete_numeric_type = Self::infer_numeric_runtime_name(&type_name);
516        Self {
517            schema_id: None,
518            type_name: Some(type_name),
519            is_definite: true,
520            storage_hint,
521            concrete_numeric_type,
522            kind: VariableKind::Value,
523        }
524    }
525
526    /// Create type info for Option<f64> (NaN sentinel optimization)
527    pub fn nullable_number() -> Self {
528        Self {
529            schema_id: None,
530            type_name: Some("Option<Number>".to_string()),
531            is_definite: true,
532            storage_hint: StorageHint::NullableFloat64,
533            concrete_numeric_type: Some("f64".to_string()),
534            kind: VariableKind::Value,
535        }
536    }
537
538    /// Create type info for plain f64
539    pub fn number() -> Self {
540        Self {
541            schema_id: None,
542            type_name: Some("Number".to_string()),
543            is_definite: true,
544            storage_hint: StorageHint::Float64,
545            concrete_numeric_type: Some("f64".to_string()),
546            kind: VariableKind::Value,
547        }
548    }
549
550    /// Create type info for a RowView variable (typed row from Arrow DataTable).
551    pub fn row_view(schema_id: SchemaId, type_name: String) -> Self {
552        Self {
553            schema_id: Some(schema_id),
554            type_name: Some(type_name.clone()),
555            is_definite: true,
556            storage_hint: StorageHint::Unknown,
557            concrete_numeric_type: None,
558            kind: VariableKind::RowView {
559                element_type: type_name,
560            },
561        }
562    }
563
564    /// Create type info for a DataTable variable with known schema (Table<T>).
565    pub fn datatable(schema_id: SchemaId, type_name: String) -> Self {
566        Self {
567            schema_id: Some(schema_id),
568            type_name: Some(type_name.clone()),
569            is_definite: true,
570            storage_hint: StorageHint::Unknown,
571            concrete_numeric_type: None,
572            kind: VariableKind::Table {
573                element_type: type_name,
574            },
575        }
576    }
577
578    /// Create type info for a Column<T> variable (ColumnRef from Arrow DataTable).
579    pub fn column(schema_id: SchemaId, type_name: String, element_type: String) -> Self {
580        Self {
581            schema_id: Some(schema_id),
582            type_name: Some(type_name.clone()),
583            is_definite: true,
584            storage_hint: StorageHint::Unknown,
585            concrete_numeric_type: None,
586            kind: VariableKind::Column {
587                element_type,
588                column_type: type_name,
589            },
590        }
591    }
592
593    /// Create type info for an Indexed table variable — Indexed<T> with known index column.
594    pub fn indexed(schema_id: SchemaId, type_name: String, index_column: String) -> Self {
595        Self {
596            schema_id: Some(schema_id),
597            type_name: Some(type_name.clone()),
598            is_definite: true,
599            storage_hint: StorageHint::Unknown,
600            concrete_numeric_type: None,
601            kind: VariableKind::Indexed {
602                element_type: type_name,
603                index_column,
604            },
605        }
606    }
607
608    /// Check if this type is known (has schema ID)
609    pub fn is_known(&self) -> bool {
610        self.schema_id.is_some()
611    }
612
613    /// Check if this type uses NaN sentinel for nullability
614    pub fn uses_nan_sentinel(&self) -> bool {
615        self.storage_hint == StorageHint::NullableFloat64
616    }
617
618    /// Check if this variable is a DataTable (Table<T>)
619    pub fn is_datatable(&self) -> bool {
620        matches!(self.kind, VariableKind::Table { .. })
621    }
622
623    /// Check if this variable is a RowView (Row<T>)
624    pub fn is_row_view(&self) -> bool {
625        matches!(self.kind, VariableKind::RowView { .. })
626    }
627
628    /// Check if this variable is a Column (Column<T>)
629    pub fn is_column(&self) -> bool {
630        matches!(self.kind, VariableKind::Column { .. })
631    }
632
633    /// Check if this variable is an Indexed table (Indexed<T>)
634    pub fn is_indexed(&self) -> bool {
635        matches!(self.kind, VariableKind::Indexed { .. })
636    }
637
638    /// Infer storage hint from type name
639    fn infer_storage_hint(type_name: &str) -> StorageHint {
640        let trimmed = type_name.trim();
641
642        if let Some(inner) = Self::option_inner_type(trimmed) {
643            let inner = inner.trim();
644            if let Some(runtime) = BuiltinTypes::canonical_numeric_runtime_name(inner)
645                && let Some(hint) = Self::storage_hint_for_runtime_numeric(runtime, true)
646            {
647                return hint;
648            }
649            if BuiltinTypes::is_bool_type_name(inner) {
650                return StorageHint::Bool;
651            }
652            if BuiltinTypes::is_string_type_name(inner) {
653                return StorageHint::String;
654            }
655            return StorageHint::Unknown;
656        }
657
658        if let Some(runtime) = BuiltinTypes::canonical_numeric_runtime_name(trimmed)
659            && let Some(hint) = Self::storage_hint_for_runtime_numeric(runtime, false)
660        {
661            return hint;
662        }
663        if BuiltinTypes::is_bool_type_name(trimmed) {
664            return StorageHint::Bool;
665        }
666        if BuiltinTypes::is_string_type_name(trimmed) {
667            return StorageHint::String;
668        }
669        StorageHint::Unknown
670    }
671
672    fn option_inner_type(type_name: &str) -> Option<&str> {
673        type_name
674            .strip_prefix("Option<")
675            .and_then(|inner| inner.strip_suffix('>'))
676    }
677
678    fn storage_hint_for_runtime_numeric(runtime_name: &str, nullable: bool) -> Option<StorageHint> {
679        let base = match runtime_name {
680            "f32" | "f64" => StorageHint::Float64,
681            "i8" => StorageHint::Int8,
682            "u8" => StorageHint::UInt8,
683            "i16" => StorageHint::Int16,
684            "u16" => StorageHint::UInt16,
685            "i32" => StorageHint::Int32,
686            "u32" => StorageHint::UInt32,
687            "i64" => StorageHint::Int64,
688            "u64" => StorageHint::UInt64,
689            "isize" => StorageHint::IntSize,
690            "usize" => StorageHint::UIntSize,
691            _ => return None,
692        };
693        Some(base.with_nullability(nullable))
694    }
695
696    fn infer_numeric_runtime_name(type_name: &str) -> Option<String> {
697        let inner = if type_name.starts_with("Option<") && type_name.ends_with('>') {
698            &type_name["Option<".len()..type_name.len() - 1]
699        } else {
700            type_name
701        };
702        BuiltinTypes::canonical_numeric_runtime_name(inner).map(ToString::to_string)
703    }
704}
705
706/// Tracks type information for variables during compilation
707#[derive(Debug)]
708pub struct TypeTracker {
709    /// Type schema registry for looking up type definitions
710    schema_registry: TypeSchemaRegistry,
711
712    /// Type info for local variables (by slot index)
713    local_types: HashMap<u16, VariableTypeInfo>,
714
715    /// Type info for module_binding variables (by slot index)
716    binding_types: HashMap<u16, VariableTypeInfo>,
717
718    /// Scoped local type mappings (for scope push/pop)
719    local_type_scopes: Vec<HashMap<u16, VariableTypeInfo>>,
720
721    /// Function return types (function name -> type name)
722    function_return_types: HashMap<String, String>,
723    /// Compile-time object schema contracts: schema id -> field type annotation.
724    ///
725    /// Used for callable typed-object fields where runtime schema stores only slot layout.
726    object_field_contracts: HashMap<SchemaId, HashMap<String, TypeAnnotation>>,
727}
728
729impl TypeTracker {
730    /// Create a new type tracker with the given schema registry
731    pub fn new(schema_registry: TypeSchemaRegistry) -> Self {
732        Self {
733            schema_registry,
734            local_types: HashMap::new(),
735            binding_types: HashMap::new(),
736            local_type_scopes: vec![HashMap::new()],
737            function_return_types: HashMap::new(),
738            object_field_contracts: HashMap::new(),
739        }
740    }
741
742    /// Create a new type tracker with an empty registry
743    pub fn empty() -> Self {
744        Self::new(TypeSchemaRegistry::new())
745    }
746
747    /// Create a new type tracker with stdlib types pre-registered
748    pub fn with_stdlib() -> Self {
749        Self::new(TypeSchemaRegistry::with_stdlib_types())
750    }
751
752    /// Get the schema registry
753    pub fn schema_registry(&self) -> &TypeSchemaRegistry {
754        &self.schema_registry
755    }
756
757    /// Get mutable schema registry
758    pub fn schema_registry_mut(&mut self) -> &mut TypeSchemaRegistry {
759        &mut self.schema_registry
760    }
761
762    /// Push a new scope for local types
763    pub fn push_scope(&mut self) {
764        self.local_type_scopes.push(HashMap::new());
765    }
766
767    /// Pop a scope, removing local type info for that scope
768    pub fn pop_scope(&mut self) {
769        if let Some(scope) = self.local_type_scopes.pop() {
770            // Remove type info for variables in this scope
771            for slot in scope.keys() {
772                self.local_types.remove(slot);
773            }
774        }
775    }
776
777    /// Set type info for a local variable
778    pub fn set_local_type(&mut self, slot: u16, type_info: VariableTypeInfo) {
779        // Try to resolve schema ID if we have a type name but no schema ID
780        let resolved_info = if type_info.type_name.is_some() && type_info.schema_id.is_none() {
781            self.resolve_type_info(type_info)
782        } else {
783            type_info
784        };
785
786        // Track in current scope
787        if let Some(scope) = self.local_type_scopes.last_mut() {
788            scope.insert(slot, resolved_info.clone());
789        }
790        self.local_types.insert(slot, resolved_info);
791    }
792
793    /// Set type info for a module_binding variable
794    pub fn set_binding_type(&mut self, slot: u16, type_info: VariableTypeInfo) {
795        let resolved_info = if type_info.type_name.is_some() && type_info.schema_id.is_none() {
796            self.resolve_type_info(type_info)
797        } else {
798            type_info
799        };
800        self.binding_types.insert(slot, resolved_info);
801    }
802
803    /// Get type info for a local variable
804    pub fn get_local_type(&self, slot: u16) -> Option<&VariableTypeInfo> {
805        self.local_types.get(&slot)
806    }
807
808    /// Get type info for a module_binding variable
809    pub fn get_binding_type(&self, slot: u16) -> Option<&VariableTypeInfo> {
810        self.binding_types.get(&slot)
811    }
812
813    /// Register a function's return type
814    pub fn register_function_return_type(&mut self, func_name: &str, return_type: &str) {
815        self.function_return_types
816            .insert(func_name.to_string(), return_type.to_string());
817    }
818
819    /// Get a function's return type
820    pub fn get_function_return_type(&self, func_name: &str) -> Option<&String> {
821        self.function_return_types.get(func_name)
822    }
823
824    /// Register compile-time field type contracts for an object schema id.
825    pub fn register_object_field_contracts(
826        &mut self,
827        schema_id: SchemaId,
828        fields: HashMap<String, TypeAnnotation>,
829    ) {
830        self.object_field_contracts.insert(schema_id, fields);
831    }
832
833    /// Lookup a compile-time field type contract for a schema field.
834    pub fn get_object_field_contract(
835        &self,
836        schema_id: SchemaId,
837        field_name: &str,
838    ) -> Option<&TypeAnnotation> {
839        self.object_field_contracts
840            .get(&schema_id)
841            .and_then(|fields| fields.get(field_name))
842    }
843
844    /// Resolve type name to schema ID
845    fn resolve_type_info(&self, mut type_info: VariableTypeInfo) -> VariableTypeInfo {
846        if let Some(ref type_name) = type_info.type_name {
847            if let Some(schema) = self.schema_registry.get(type_name) {
848                type_info.schema_id = Some(schema.id);
849                type_info.is_definite = true;
850            }
851        }
852        type_info
853    }
854
855    /// Get field offset for typed field access
856    ///
857    /// Returns (schema_id, field_offset, field_index) if type and field are known
858    pub fn get_typed_field_info(
859        &self,
860        type_name: &str,
861        field_name: &str,
862    ) -> Option<(SchemaId, usize, u16)> {
863        let schema = self.schema_registry.get(type_name)?;
864        let field = schema.get_field(field_name)?;
865        Some((schema.id, field.offset, field.index))
866    }
867
868    /// Get column index for a RowView field access.
869    ///
870    /// Returns the field index (used as col_id for ColumnAccess operand)
871    /// if the variable is a RowView and the field exists in its schema.
872    pub fn get_row_view_column_id(
873        &self,
874        slot: u16,
875        is_local: bool,
876        field_name: &str,
877    ) -> Option<u32> {
878        let type_info = if is_local {
879            self.get_local_type(slot)?
880        } else {
881            self.get_binding_type(slot)?
882        };
883        if !type_info.is_row_view() {
884            return None;
885        }
886        let type_name = type_info.type_name.as_ref()?;
887        let schema = self.schema_registry.get(type_name)?;
888        let field = schema.get_field(field_name)?;
889        Some(field.index as u32)
890    }
891
892    /// Check if we can use typed field access for a variable and field
893    pub fn can_use_typed_access(&self, slot: u16, is_local: bool, field_name: &str) -> bool {
894        let type_info = if is_local {
895            self.get_local_type(slot)
896        } else {
897            self.get_binding_type(slot)
898        };
899
900        if let Some(info) = type_info {
901            if let Some(ref type_name) = info.type_name {
902                return self
903                    .schema_registry
904                    .field_offset(type_name, field_name)
905                    .is_some();
906            }
907        }
908        false
909    }
910
911    /// Get storage hint for a local variable
912    pub fn get_local_storage_hint(&self, slot: u16) -> StorageHint {
913        self.get_local_type(slot)
914            .map(|info| info.storage_hint)
915            .unwrap_or(StorageHint::Unknown)
916    }
917
918    /// Get storage hint for a module_binding variable
919    pub fn get_module_binding_storage_hint(&self, slot: u16) -> StorageHint {
920        self.get_binding_type(slot)
921            .map(|info| info.storage_hint)
922            .unwrap_or(StorageHint::Unknown)
923    }
924
925    /// Check if a local variable uses NaN sentinel for nullability
926    pub fn local_uses_nan_sentinel(&self, slot: u16) -> bool {
927        self.get_local_storage_hint(slot) == StorageHint::NullableFloat64
928    }
929
930    /// Check if a module_binding variable uses NaN sentinel for nullability
931    pub fn module_binding_uses_nan_sentinel(&self, slot: u16) -> bool {
932        self.get_module_binding_storage_hint(slot) == StorageHint::NullableFloat64
933    }
934
935    /// Clear all local type info (for function entry)
936    pub fn clear_locals(&mut self) {
937        self.local_types.clear();
938        self.local_type_scopes.clear();
939        self.local_type_scopes.push(HashMap::new());
940    }
941
942    /// Register an inline object schema from field names
943    ///
944    /// Creates a TypeSchema for an object literal with the given fields.
945    /// All fields are assumed to be `Any` type (NaN-boxed) since we don't
946    /// have full type inference at compile time.
947    ///
948    /// Returns the SchemaId for use with NewTypedObject opcode.
949    ///
950    /// # Example
951    /// ```ignore
952    /// // For: let x = { a: 1, b: "hello" }
953    /// let schema_id = tracker.register_inline_object_schema(&["a", "b"]);
954    /// // Now emit NewTypedObject with schema_id
955    /// ```
956    pub fn register_inline_object_schema(&mut self, field_names: &[&str]) -> SchemaId {
957        if let Some(existing) = self.schema_registry.type_names().find_map(|name| {
958            self.schema_registry.get(name).and_then(|schema| {
959                if schema.fields.len() != field_names.len() {
960                    return None;
961                }
962                let same_order = schema
963                    .fields
964                    .iter()
965                    .map(|f| f.name.as_str())
966                    .eq(field_names.iter().copied());
967                if same_order { Some(schema.id) } else { None }
968            })
969        }) {
970            return existing;
971        }
972
973        // Generate a unique name for this inline object type
974        let id = INLINE_OBJECT_COUNTER.fetch_add(1, Ordering::SeqCst);
975        let type_name = format!("__inline_obj_{}", id);
976
977        // Create field definitions - all fields are Any (NaN-boxed)
978        let fields: Vec<(String, FieldType)> = field_names
979            .iter()
980            .map(|name| (name.to_string(), FieldType::Any))
981            .collect();
982
983        // Create and register the schema
984        let schema = TypeSchema::new(&type_name, fields);
985        let schema_id = schema.id;
986        self.schema_registry.register(schema);
987
988        schema_id
989    }
990
991    /// Register an inline object schema with typed fields
992    ///
993    /// Like `register_inline_object_schema` but allows specifying field types
994    /// for better JIT optimization. Deduplicates by matching both field names
995    /// and types.
996    pub fn register_inline_object_schema_typed(
997        &mut self,
998        fields: &[(&str, FieldType)],
999    ) -> SchemaId {
1000        if let Some(existing) = self.schema_registry.type_names().find_map(|name| {
1001            self.schema_registry.get(name).and_then(|schema| {
1002                if schema.fields.len() != fields.len() {
1003                    return None;
1004                }
1005                let same = schema
1006                    .fields
1007                    .iter()
1008                    .zip(fields.iter())
1009                    .all(|(f, (n, t))| f.name == *n && f.field_type == *t);
1010                if same { Some(schema.id) } else { None }
1011            })
1012        }) {
1013            return existing;
1014        }
1015
1016        let id = INLINE_OBJECT_COUNTER.fetch_add(1, Ordering::SeqCst);
1017        let type_name = format!("__inline_obj_{}", id);
1018        let field_defs: Vec<(String, FieldType)> = fields
1019            .iter()
1020            .map(|(name, ft)| (name.to_string(), ft.clone()))
1021            .collect();
1022        let schema = TypeSchema::new(&type_name, field_defs);
1023        let schema_id = schema.id;
1024        self.schema_registry.register(schema);
1025        schema_id
1026    }
1027
1028    /// Register a named struct schema (e.g. `Point { x, y }`)
1029    ///
1030    /// Unlike `register_inline_object_schema` which auto-generates names,
1031    /// this uses the actual struct type name so `.type()` can resolve it.
1032    pub fn register_named_object_schema(
1033        &mut self,
1034        type_name: &str,
1035        fields: &[(&str, FieldType)],
1036    ) -> SchemaId {
1037        let field_defs: Vec<(String, FieldType)> = fields
1038            .iter()
1039            .map(|(name, ft)| (name.to_string(), ft.clone()))
1040            .collect();
1041
1042        let schema = TypeSchema::new(type_name, field_defs);
1043        let schema_id = schema.id;
1044        self.schema_registry.register(schema);
1045
1046        schema_id
1047    }
1048
1049    /// Register an inline object schema with typed fields
1050    ///
1051    /// Like `register_inline_object_schema` but allows specifying field types
1052    /// for better JIT optimization.
1053    pub fn register_typed_object_schema(
1054        &mut self,
1055        field_defs: Vec<(String, FieldType)>,
1056    ) -> SchemaId {
1057        let id = INLINE_OBJECT_COUNTER.fetch_add(1, Ordering::SeqCst);
1058        let type_name = format!("__inline_obj_{}", id);
1059
1060        let schema = TypeSchema::new(&type_name, field_defs);
1061        let schema_id = schema.id;
1062        self.schema_registry.register(schema);
1063
1064        schema_id
1065    }
1066}
1067
1068impl Default for TypeTracker {
1069    fn default() -> Self {
1070        Self::empty()
1071    }
1072}
1073
1074#[cfg(test)]
1075mod tests {
1076    use super::*;
1077    use shape_runtime::type_schema::TypeSchemaBuilder;
1078
1079    #[test]
1080    fn test_basic_type_tracking() {
1081        let mut registry = TypeSchemaRegistry::new();
1082
1083        TypeSchemaBuilder::new("Point")
1084            .f64_field("x")
1085            .f64_field("y")
1086            .register(&mut registry);
1087
1088        let mut tracker = TypeTracker::new(registry);
1089
1090        // Set type for local slot 0
1091        tracker.set_local_type(0, VariableTypeInfo::named("Point".to_string()));
1092
1093        // Check that we can use typed access
1094        assert!(tracker.can_use_typed_access(0, true, "x"));
1095        assert!(tracker.can_use_typed_access(0, true, "y"));
1096        assert!(!tracker.can_use_typed_access(0, true, "z")); // Unknown field
1097    }
1098
1099    #[test]
1100    fn test_scope_tracking() {
1101        let mut tracker = TypeTracker::empty();
1102
1103        // Declare in outer scope
1104        tracker.set_local_type(0, VariableTypeInfo::named("Outer".to_string()));
1105
1106        // Push inner scope
1107        tracker.push_scope();
1108        tracker.set_local_type(1, VariableTypeInfo::named("Inner".to_string()));
1109
1110        assert!(tracker.get_local_type(0).is_some());
1111        assert!(tracker.get_local_type(1).is_some());
1112
1113        // Pop inner scope
1114        tracker.pop_scope();
1115
1116        // Outer still exists, inner removed
1117        assert!(tracker.get_local_type(0).is_some());
1118        assert!(tracker.get_local_type(1).is_none());
1119    }
1120
1121    #[test]
1122    fn test_function_return_types() {
1123        let mut tracker = TypeTracker::empty();
1124
1125        tracker.register_function_return_type("get_point", "Point");
1126
1127        assert_eq!(
1128            tracker.get_function_return_type("get_point"),
1129            Some(&"Point".to_string())
1130        );
1131        assert!(tracker.get_function_return_type("unknown").is_none());
1132    }
1133
1134    #[test]
1135    fn test_typed_field_info() {
1136        let mut registry = TypeSchemaRegistry::new();
1137
1138        TypeSchemaBuilder::new("Vector3")
1139            .f64_field("x")
1140            .f64_field("y")
1141            .f64_field("z")
1142            .register(&mut registry);
1143
1144        let tracker = TypeTracker::new(registry);
1145
1146        let info = tracker.get_typed_field_info("Vector3", "y");
1147        assert!(info.is_some());
1148        let (schema_id, offset, index) = info.unwrap();
1149        assert!(schema_id > 0);
1150        assert_eq!(offset, 8); // Second field, 8 bytes offset
1151        assert_eq!(index, 1);
1152    }
1153
1154    #[test]
1155    fn test_unknown_type() {
1156        let tracker = TypeTracker::empty();
1157
1158        // Unknown type should not allow typed access
1159        assert!(!tracker.can_use_typed_access(0, true, "field"));
1160    }
1161
1162    #[test]
1163    fn test_binding_type_tracking() {
1164        let mut registry = TypeSchemaRegistry::new();
1165
1166        TypeSchemaBuilder::new("Config")
1167            .f64_field("threshold")
1168            .string_field("name")
1169            .register(&mut registry);
1170
1171        let mut tracker = TypeTracker::new(registry);
1172
1173        // Set type for module_binding slot 5
1174        tracker.set_binding_type(5, VariableTypeInfo::named("Config".to_string()));
1175
1176        assert!(tracker.can_use_typed_access(5, false, "threshold"));
1177        assert!(tracker.can_use_typed_access(5, false, "name"));
1178        assert!(!tracker.can_use_typed_access(5, false, "unknown"));
1179    }
1180
1181    #[test]
1182    fn test_storage_hint_inference() {
1183        // Primitive types
1184        assert_eq!(
1185            VariableTypeInfo::infer_storage_hint("Number"),
1186            StorageHint::Float64
1187        );
1188        assert_eq!(
1189            VariableTypeInfo::infer_storage_hint("Integer"),
1190            StorageHint::Int64
1191        );
1192        assert_eq!(
1193            VariableTypeInfo::infer_storage_hint("Bool"),
1194            StorageHint::Bool
1195        );
1196        assert_eq!(
1197            VariableTypeInfo::infer_storage_hint("String"),
1198            StorageHint::String
1199        );
1200
1201        // Nullable types
1202        assert_eq!(
1203            VariableTypeInfo::infer_storage_hint("Option<Number>"),
1204            StorageHint::NullableFloat64
1205        );
1206        assert_eq!(
1207            VariableTypeInfo::infer_storage_hint("Option<Integer>"),
1208            StorageHint::NullableInt64
1209        );
1210        assert_eq!(
1211            VariableTypeInfo::infer_storage_hint("Option<byte>"),
1212            StorageHint::NullableUInt8
1213        );
1214        assert_eq!(
1215            VariableTypeInfo::infer_storage_hint("Option<char>"),
1216            StorageHint::NullableInt8
1217        );
1218        assert_eq!(
1219            VariableTypeInfo::infer_storage_hint("Option<u32>"),
1220            StorageHint::NullableUInt32
1221        );
1222
1223        // Unknown types
1224        assert_eq!(
1225            VariableTypeInfo::infer_storage_hint("SomeCustomType"),
1226            StorageHint::Unknown
1227        );
1228    }
1229
1230    #[test]
1231    fn test_width_integer_storage_hint_inference() {
1232        assert_eq!(
1233            VariableTypeInfo::infer_storage_hint("i8"),
1234            StorageHint::Int8
1235        );
1236        assert_eq!(
1237            VariableTypeInfo::infer_storage_hint("byte"),
1238            StorageHint::UInt8
1239        );
1240        assert_eq!(
1241            VariableTypeInfo::infer_storage_hint("char"),
1242            StorageHint::Int8
1243        );
1244        assert_eq!(
1245            VariableTypeInfo::infer_storage_hint("u16"),
1246            StorageHint::UInt16
1247        );
1248        assert_eq!(
1249            VariableTypeInfo::infer_storage_hint("i32"),
1250            StorageHint::Int32
1251        );
1252        assert_eq!(
1253            VariableTypeInfo::infer_storage_hint("u64"),
1254            StorageHint::UInt64
1255        );
1256        assert_eq!(
1257            VariableTypeInfo::infer_storage_hint("isize"),
1258            StorageHint::IntSize
1259        );
1260        assert_eq!(
1261            VariableTypeInfo::infer_storage_hint("usize"),
1262            StorageHint::UIntSize
1263        );
1264    }
1265
1266    #[test]
1267    fn test_concrete_numeric_type_inference() {
1268        assert_eq!(
1269            VariableTypeInfo::infer_numeric_runtime_name("int"),
1270            Some("i64".to_string())
1271        );
1272        assert_eq!(
1273            VariableTypeInfo::infer_numeric_runtime_name("i16"),
1274            Some("i16".to_string())
1275        );
1276        assert_eq!(
1277            VariableTypeInfo::infer_numeric_runtime_name("byte"),
1278            Some("u8".to_string())
1279        );
1280        assert_eq!(
1281            VariableTypeInfo::infer_numeric_runtime_name("Option<f32>"),
1282            Some("f32".to_string())
1283        );
1284        assert_eq!(
1285            VariableTypeInfo::infer_numeric_runtime_name("SomeCustomType"),
1286            None
1287        );
1288    }
1289
1290    #[test]
1291    fn test_storage_hint_from_storage_type() {
1292        assert_eq!(
1293            StorageHint::from_storage_type(&StorageType::Float64),
1294            StorageHint::Float64
1295        );
1296        assert_eq!(
1297            StorageHint::from_storage_type(&StorageType::NullableFloat64),
1298            StorageHint::NullableFloat64
1299        );
1300        assert_eq!(
1301            StorageHint::from_storage_type(&StorageType::Dynamic),
1302            StorageHint::Unknown
1303        );
1304    }
1305
1306    #[test]
1307    fn test_nullable_number_type() {
1308        let info = VariableTypeInfo::nullable_number();
1309        assert!(info.uses_nan_sentinel());
1310        assert_eq!(info.storage_hint, StorageHint::NullableFloat64);
1311    }
1312
1313    #[test]
1314    fn test_row_view_column_id_resolution() {
1315        let mut registry = TypeSchemaRegistry::new();
1316
1317        TypeSchemaBuilder::new("Candle")
1318            .f64_field("open")
1319            .f64_field("high")
1320            .f64_field("low")
1321            .f64_field("close")
1322            .i64_field("volume")
1323            .register(&mut registry);
1324
1325        let mut tracker = TypeTracker::new(registry);
1326
1327        // Get schema ID for Candle
1328        let schema = tracker.schema_registry().get("Candle").unwrap();
1329        let schema_id = schema.id;
1330
1331        // Set local slot 0 as a RowView<Candle>
1332        tracker.set_local_type(
1333            0,
1334            VariableTypeInfo::row_view(schema_id, "Candle".to_string()),
1335        );
1336
1337        // Should resolve known fields
1338        assert_eq!(tracker.get_row_view_column_id(0, true, "open"), Some(0));
1339        assert_eq!(tracker.get_row_view_column_id(0, true, "high"), Some(1));
1340        assert_eq!(tracker.get_row_view_column_id(0, true, "close"), Some(3));
1341        assert_eq!(tracker.get_row_view_column_id(0, true, "volume"), Some(4));
1342
1343        // Should return None for unknown fields
1344        assert_eq!(tracker.get_row_view_column_id(0, true, "nonexistent"), None);
1345
1346        // Non-row-view variable should return None
1347        tracker.set_local_type(1, VariableTypeInfo::named("Candle".to_string()));
1348        assert_eq!(tracker.get_row_view_column_id(1, true, "open"), None);
1349    }
1350
1351    #[test]
1352    fn test_tracker_storage_hints() {
1353        let mut tracker = TypeTracker::empty();
1354
1355        // Set local with nullable type
1356        tracker.set_local_type(0, VariableTypeInfo::nullable_number());
1357        assert!(tracker.local_uses_nan_sentinel(0));
1358
1359        // Set local with regular number
1360        tracker.set_local_type(1, VariableTypeInfo::number());
1361        assert!(!tracker.local_uses_nan_sentinel(1));
1362
1363        // Unknown slot
1364        assert!(!tracker.local_uses_nan_sentinel(99));
1365    }
1366
1367    #[test]
1368    fn test_datatable_type_info() {
1369        let mut registry = TypeSchemaRegistry::new();
1370
1371        TypeSchemaBuilder::new("Trade")
1372            .f64_field("price")
1373            .i64_field("volume")
1374            .string_field("symbol")
1375            .register(&mut registry);
1376
1377        let mut tracker = TypeTracker::new(registry);
1378
1379        let schema = tracker.schema_registry().get("Trade").unwrap();
1380        let schema_id = schema.id;
1381
1382        // Create a datatable type info
1383        tracker.set_local_type(
1384            0,
1385            VariableTypeInfo::datatable(schema_id, "Trade".to_string()),
1386        );
1387
1388        let info = tracker.get_local_type(0).unwrap();
1389        assert!(info.is_datatable());
1390        assert!(!info.is_row_view());
1391        assert_eq!(info.schema_id, Some(schema_id));
1392        assert_eq!(info.type_name.as_deref(), Some("Trade"));
1393
1394        // RowView should not be a datatable
1395        tracker.set_local_type(
1396            1,
1397            VariableTypeInfo::row_view(schema_id, "Trade".to_string()),
1398        );
1399        let info = tracker.get_local_type(1).unwrap();
1400        assert!(!info.is_datatable());
1401        assert!(info.is_row_view());
1402    }
1403}