plotnik_lib/emit/
type_table.rs

1//! Type table builder for bytecode emission.
2//!
3//! Converts query-level types (TypeContext) into bytecode-level types (BytecodeTypeId).
4
5use std::collections::{HashMap, HashSet};
6
7use plotnik_core::{Interner, Symbol};
8
9use crate::analyze::type_check::{
10    FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeContext, TypeId, TypeShape,
11};
12use crate::bytecode::{
13    StringId, TypeData, TypeDef, TypeId as BytecodeTypeId, TypeMember, TypeName,
14};
15use crate::type_system::TypeKind;
16
17use super::{EmitError, StringTableBuilder};
18
19/// Builds the type metadata, remapping query TypeIds to bytecode BytecodeTypeIds.
20#[derive(Debug)]
21pub struct TypeTableBuilder {
22    /// Map from query TypeId to bytecode BytecodeTypeId.
23    mapping: HashMap<TypeId, BytecodeTypeId>,
24    /// Type definitions (4 bytes each).
25    type_defs: Vec<TypeDef>,
26    /// Type members for structs/enums (4 bytes each).
27    type_members: Vec<TypeMember>,
28    /// Type names for named types (4 bytes each).
29    type_names: Vec<TypeName>,
30    /// Cache for dynamically created Optional wrappers: base_type -> Optional(base_type)
31    optional_wrappers: HashMap<BytecodeTypeId, BytecodeTypeId>,
32    /// Cache for deduplicated members: (StringId, BytecodeTypeId) -> member_index.
33    /// Same (name, type) pair → same member index globally.
34    /// This enables call-site scoping where uncaptured refs share the caller's scope.
35    member_cache: HashMap<(StringId, BytecodeTypeId), u16>,
36}
37
38impl TypeTableBuilder {
39    pub fn new() -> Self {
40        Self {
41            mapping: HashMap::new(),
42            type_defs: Vec::new(),
43            type_members: Vec::new(),
44            type_names: Vec::new(),
45            optional_wrappers: HashMap::new(),
46            member_cache: HashMap::new(),
47        }
48    }
49
50    /// Build type table from TypeContext.
51    ///
52    /// Types are collected in definition order, depth-first, to mirror query structure.
53    /// Used builtins are emitted first, then custom types - no reserved slots.
54    pub fn build(
55        &mut self,
56        type_ctx: &TypeContext,
57        interner: &Interner,
58        strings: &mut StringTableBuilder,
59    ) -> Result<(), EmitError> {
60        // Collect custom types in definition order, depth-first
61        let mut ordered_types: Vec<TypeId> = Vec::new();
62        let mut seen: HashSet<TypeId> = HashSet::new();
63
64        // First walk from definition types (maintains order for entrypoints)
65        for (_def_id, type_id) in type_ctx.iter_def_types() {
66            collect_types_dfs(type_id, type_ctx, &mut ordered_types, &mut seen);
67        }
68
69        // Then collect any remaining interned types not reachable from definitions
70        // (e.g., enum types inside named nodes that don't propagate TypeFlow::Scalar)
71        for (type_id, _) in type_ctx.iter_types() {
72            collect_types_dfs(type_id, type_ctx, &mut ordered_types, &mut seen);
73        }
74
75        // Determine which builtins are actually used by scanning all types
76        let mut used_builtins = [false; 3]; // [Void, Node, String]
77        let mut seen = HashSet::new();
78        for &type_id in &ordered_types {
79            collect_builtin_refs(type_id, type_ctx, &mut used_builtins, &mut seen);
80        }
81        // Also check entrypoint result types directly
82        for (_def_id, type_id) in type_ctx.iter_def_types() {
83            if type_id == TYPE_VOID {
84                used_builtins[0] = true;
85            } else if type_id == TYPE_NODE {
86                used_builtins[1] = true;
87            } else if type_id == TYPE_STRING {
88                used_builtins[2] = true;
89            }
90        }
91
92        // Phase 1: Emit used builtins first (in order: Void, Node, String)
93        let builtin_types = [
94            (TYPE_VOID, TypeKind::Void),
95            (TYPE_NODE, TypeKind::Node),
96            (TYPE_STRING, TypeKind::String),
97        ];
98        for (i, &(builtin_id, kind)) in builtin_types.iter().enumerate() {
99            if used_builtins[i] {
100                let bc_id = BytecodeTypeId(self.type_defs.len() as u16);
101                self.mapping.insert(builtin_id, bc_id);
102                self.type_defs.push(TypeDef::builtin(kind));
103            }
104        }
105
106        // Phase 2: Pre-assign BytecodeTypeIds for custom types and reserve slots
107        for &type_id in &ordered_types {
108            let bc_id = BytecodeTypeId(self.type_defs.len() as u16);
109            self.mapping.insert(type_id, bc_id);
110            self.type_defs.push(TypeDef::placeholder());
111        }
112
113        // Phase 3: Fill in custom type definitions
114        // We need to calculate slot index as offset from where custom types start
115        let builtin_count = used_builtins.iter().filter(|&&b| b).count();
116        for (i, &type_id) in ordered_types.iter().enumerate() {
117            let slot_index = builtin_count + i;
118            let type_shape = type_ctx
119                .get_type(type_id)
120                .expect("collected type must exist");
121            self.emit_type_at_slot(slot_index, type_id, type_shape, type_ctx, interner, strings)?;
122        }
123
124        // Collect TypeName entries for named definitions
125        for (def_id, type_id) in type_ctx.iter_def_types() {
126            let name_sym = type_ctx.def_name_sym(def_id);
127            let name = strings.get_or_intern(name_sym, interner)?;
128            let bc_type_id = self
129                .mapping
130                .get(&type_id)
131                .copied()
132                .unwrap_or(BytecodeTypeId(0));
133            self.type_names.push(TypeName::new(name, bc_type_id));
134        }
135
136        // Collect TypeName entries for explicit type annotations on struct captures
137        // e.g., `{(fn) @fn} @outer :: FunctionInfo` names the struct "FunctionInfo"
138        for (type_id, name_sym) in type_ctx.iter_type_names() {
139            if let Some(&bc_type_id) = self.mapping.get(&type_id) {
140                let name = strings.get_or_intern(name_sym, interner)?;
141                self.type_names.push(TypeName::new(name, bc_type_id));
142            }
143        }
144
145        Ok(())
146    }
147
148    /// Fill in a TypeDef at a pre-allocated slot.
149    fn emit_type_at_slot(
150        &mut self,
151        slot_index: usize,
152        _type_id: TypeId,
153        type_shape: &TypeShape,
154        type_ctx: &TypeContext,
155        interner: &Interner,
156        strings: &mut StringTableBuilder,
157    ) -> Result<(), EmitError> {
158        match type_shape {
159            TypeShape::Void | TypeShape::Node | TypeShape::String => {
160                // Builtins - should not reach here
161                unreachable!("builtins should be handled separately")
162            }
163
164            TypeShape::Custom(sym) => {
165                // Custom type annotation: @x :: Identifier → type Identifier = Node
166                let bc_type_id = BytecodeTypeId(slot_index as u16);
167
168                // Add TypeName entry for the custom type
169                let name = strings.get_or_intern(*sym, interner)?;
170                self.type_names.push(TypeName::new(name, bc_type_id));
171
172                // Custom types alias Node - look up Node's actual bytecode ID
173                let node_bc_id = self
174                    .mapping
175                    .get(&TYPE_NODE)
176                    .copied()
177                    .unwrap_or(BytecodeTypeId(0));
178                self.type_defs[slot_index] = TypeDef::alias(node_bc_id);
179                Ok(())
180            }
181
182            TypeShape::Optional(inner) => {
183                let inner_bc = self.resolve_type(*inner, type_ctx)?;
184                self.type_defs[slot_index] = TypeDef::optional(inner_bc);
185                Ok(())
186            }
187
188            TypeShape::Array { element, non_empty } => {
189                let element_bc = self.resolve_type(*element, type_ctx)?;
190                self.type_defs[slot_index] = if *non_empty {
191                    TypeDef::array_plus(element_bc)
192                } else {
193                    TypeDef::array_star(element_bc)
194                };
195                Ok(())
196            }
197
198            TypeShape::Struct(fields) => {
199                // Resolve field types (this may create Optional wrappers at later indices)
200                let mut resolved_fields = Vec::with_capacity(fields.len());
201                for (field_sym, field_info) in fields {
202                    let field_name = strings.get_or_intern(*field_sym, interner)?;
203                    let field_type = self.resolve_field_type(field_info, type_ctx)?;
204                    resolved_fields.push((field_name, field_type));
205                }
206
207                // Emit members contiguously for this struct
208                let member_start = self.type_members.len() as u16;
209                for (field_name, field_type) in resolved_fields {
210                    self.type_members
211                        .push(TypeMember::new(field_name, field_type));
212                }
213
214                let member_count = fields.len() as u8;
215                self.type_defs[slot_index] = TypeDef::struct_type(member_start, member_count);
216                Ok(())
217            }
218
219            TypeShape::Enum(variants) => {
220                // Resolve variant types (this may create types at later indices)
221                let mut resolved_variants = Vec::with_capacity(variants.len());
222                for (variant_sym, variant_type_id) in variants {
223                    let variant_name = strings.get_or_intern(*variant_sym, interner)?;
224                    let variant_type = self.resolve_type(*variant_type_id, type_ctx)?;
225                    resolved_variants.push((variant_name, variant_type));
226                }
227
228                // Now emit the members and update the placeholder
229                let member_start = self.type_members.len() as u16;
230                for (variant_name, variant_type) in resolved_variants {
231                    self.type_members
232                        .push(TypeMember::new(variant_name, variant_type));
233                }
234
235                let member_count = variants.len() as u8;
236                self.type_defs[slot_index] = TypeDef::enum_type(member_start, member_count);
237                Ok(())
238            }
239
240            TypeShape::Ref(_def_id) => {
241                // Ref types are not emitted - they resolve to their target
242                unreachable!("Ref types should not be collected for emission")
243            }
244        }
245    }
246
247    /// Resolve a query TypeId to bytecode BytecodeTypeId.
248    ///
249    /// Handles Ref types by following the reference chain to the actual type.
250    pub fn resolve_type(
251        &self,
252        type_id: TypeId,
253        type_ctx: &TypeContext,
254    ) -> Result<BytecodeTypeId, EmitError> {
255        // Check if already mapped
256        if let Some(&bc_id) = self.mapping.get(&type_id) {
257            return Ok(bc_id);
258        }
259
260        // Handle Ref types by following the reference
261        if let Some(type_shape) = type_ctx.get_type(type_id)
262            && let TypeShape::Ref(def_id) = type_shape
263            && let Some(def_type_id) = type_ctx.get_def_type(*def_id)
264        {
265            return self.resolve_type(def_type_id, type_ctx);
266        }
267
268        // If not found, default to first type (should not happen for well-formed types)
269        Ok(BytecodeTypeId(0))
270    }
271
272    /// Resolve a field's type, handling optionality.
273    fn resolve_field_type(
274        &mut self,
275        field_info: &FieldInfo,
276        type_ctx: &TypeContext,
277    ) -> Result<BytecodeTypeId, EmitError> {
278        let base_type = self.resolve_type(field_info.type_id, type_ctx)?;
279
280        // If the field is optional, wrap it in Optional
281        if field_info.optional {
282            self.get_or_create_optional(base_type)
283        } else {
284            Ok(base_type)
285        }
286    }
287
288    /// Get or create an Optional wrapper for a base type.
289    fn get_or_create_optional(
290        &mut self,
291        base_type: BytecodeTypeId,
292    ) -> Result<BytecodeTypeId, EmitError> {
293        // Check cache first
294        if let Some(&optional_id) = self.optional_wrappers.get(&base_type) {
295            return Ok(optional_id);
296        }
297
298        // Create new Optional wrapper at the next available index
299        let optional_id = BytecodeTypeId(self.type_defs.len() as u16);
300        self.type_defs.push(TypeDef::optional(base_type));
301        self.optional_wrappers.insert(base_type, optional_id);
302        Ok(optional_id)
303    }
304
305    /// Validate that counts fit in u16.
306    pub fn validate(&self) -> Result<(), EmitError> {
307        if self.type_defs.len() > 65535 {
308            return Err(EmitError::TooManyTypes(self.type_defs.len()));
309        }
310        if self.type_members.len() > 65535 {
311            return Err(EmitError::TooManyTypeMembers(self.type_members.len()));
312        }
313        Ok(())
314    }
315
316    /// Get the bytecode BytecodeTypeId for a query TypeId.
317    pub fn get(&self, type_id: TypeId) -> Option<BytecodeTypeId> {
318        self.mapping.get(&type_id).copied()
319    }
320
321    /// Get the absolute member base index for a struct/enum type.
322    ///
323    /// For Struct and Enum types, returns the starting index in the TypeMembers table.
324    /// Fields/variants are at indices [base..base+count).
325    pub fn get_member_base(&self, type_id: TypeId) -> Option<u16> {
326        let bc_type_id = self.mapping.get(&type_id)?;
327        let type_def = self.type_defs.get(bc_type_id.0 as usize)?;
328        match type_def.classify() {
329            TypeData::Composite { member_start, .. } => Some(member_start),
330            _ => None,
331        }
332    }
333
334    /// Look up member index by field identity (name Symbol, value TypeId).
335    ///
336    /// Members are deduplicated globally: same (name, type) pair → same index.
337    /// This enables call-site scoping where uncaptured refs share the caller's scope.
338    pub fn lookup_member(
339        &self,
340        field_name: Symbol,
341        field_type: TypeId,
342        strings: &StringTableBuilder,
343    ) -> Option<u16> {
344        // Convert query-level identifiers to bytecode-level identifiers
345        let string_id = strings.get(field_name)?;
346        let type_id = self.mapping.get(&field_type)?;
347        self.member_cache.get(&(string_id, *type_id)).copied()
348    }
349
350    /// Emit type definitions, members, and names as bytes.
351    ///
352    /// Returns (type_defs_bytes, type_members_bytes, type_names_bytes).
353    pub fn emit(&self) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
354        let mut defs_bytes = Vec::with_capacity(self.type_defs.len() * 4);
355        for def in &self.type_defs {
356            defs_bytes.extend_from_slice(&def.to_bytes());
357        }
358
359        let mut members_bytes = Vec::with_capacity(self.type_members.len() * 4);
360        for member in &self.type_members {
361            members_bytes.extend_from_slice(&member.to_bytes());
362        }
363
364        let mut names_bytes = Vec::with_capacity(self.type_names.len() * 4);
365        for type_name in &self.type_names {
366            names_bytes.extend_from_slice(&type_name.to_bytes());
367        }
368
369        (defs_bytes, members_bytes, names_bytes)
370    }
371
372    /// Number of type definitions.
373    pub fn type_defs_count(&self) -> usize {
374        self.type_defs.len()
375    }
376
377    /// Number of type members.
378    pub fn type_members_count(&self) -> usize {
379        self.type_members.len()
380    }
381
382    /// Number of type names.
383    pub fn type_names_count(&self) -> usize {
384        self.type_names.len()
385    }
386}
387
388impl Default for TypeTableBuilder {
389    fn default() -> Self {
390        Self::new()
391    }
392}
393
394/// Collect types depth-first starting from a root type.
395fn collect_types_dfs(
396    type_id: TypeId,
397    type_ctx: &TypeContext,
398    out: &mut Vec<TypeId>,
399    seen: &mut HashSet<TypeId>,
400) {
401    // Skip builtins and already-seen types
402    if type_id.is_builtin() || seen.contains(&type_id) {
403        return;
404    }
405
406    let Some(type_shape) = type_ctx.get_type(type_id) else {
407        return;
408    };
409
410    // Resolve Ref types to their target
411    if let TypeShape::Ref(def_id) = type_shape {
412        if let Some(target_id) = type_ctx.get_def_type(*def_id) {
413            collect_types_dfs(target_id, type_ctx, out, seen);
414        }
415        return;
416    }
417
418    seen.insert(type_id);
419
420    // Collect children first (depth-first), then add self
421    match type_shape {
422        TypeShape::Struct(fields) => {
423            for field_info in fields.values() {
424                collect_types_dfs(field_info.type_id, type_ctx, out, seen);
425            }
426            out.push(type_id);
427        }
428        TypeShape::Enum(variants) => {
429            for &variant_type_id in variants.values() {
430                collect_types_dfs(variant_type_id, type_ctx, out, seen);
431            }
432            out.push(type_id);
433        }
434        TypeShape::Array { element, .. } => {
435            // Collect element type first, then add the Array itself
436            collect_types_dfs(*element, type_ctx, out, seen);
437            out.push(type_id);
438        }
439        TypeShape::Optional(inner) => {
440            // Collect inner type first, then add the Optional itself
441            collect_types_dfs(*inner, type_ctx, out, seen);
442            out.push(type_id);
443        }
444        TypeShape::Custom(_) => {
445            // Custom types alias Node, no children to collect
446            out.push(type_id);
447        }
448        _ => {}
449    }
450}
451
452/// Collect which builtin types are referenced by a type.
453fn collect_builtin_refs(
454    type_id: TypeId,
455    type_ctx: &TypeContext,
456    used: &mut [bool; 3],
457    seen: &mut HashSet<TypeId>,
458) {
459    if !seen.insert(type_id) {
460        return;
461    }
462
463    let Some(type_shape) = type_ctx.get_type(type_id) else {
464        return;
465    };
466
467    match type_shape {
468        TypeShape::Void => used[0] = true,
469        TypeShape::Node => used[1] = true,
470        TypeShape::String => used[2] = true,
471        TypeShape::Custom(_) => used[1] = true, // Custom types alias Node
472        TypeShape::Struct(fields) => {
473            for field_info in fields.values() {
474                collect_builtin_refs(field_info.type_id, type_ctx, used, seen);
475            }
476        }
477        TypeShape::Enum(variants) => {
478            for &variant_type_id in variants.values() {
479                collect_builtin_refs(variant_type_id, type_ctx, used, seen);
480            }
481        }
482        TypeShape::Array { element, .. } => {
483            collect_builtin_refs(*element, type_ctx, used, seen);
484        }
485        TypeShape::Optional(inner) => {
486            collect_builtin_refs(*inner, type_ctx, used, seen);
487        }
488        TypeShape::Ref(def_id) => {
489            if let Some(target_id) = type_ctx.get_def_type(*def_id) {
490                collect_builtin_refs(target_id, type_ctx, used, seen);
491            }
492        }
493    }
494}