plotnik_compiler/emit/
type_table.rs

1//! Type table builder for bytecode emission.
2//!
3//! Converts query-level types (TypeContext) into bytecode-level types (BytecodeTypeId).
4
5use std::collections::{HashMap, HashSet};
6
7use plotnik_core::{Interner, Symbol};
8
9use crate::analyze::type_check::{
10    FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeContext, TypeId, TypeShape,
11};
12use plotnik_bytecode::{
13    StringId, TypeData, TypeDef, TypeId as BytecodeTypeId, TypeKind, TypeMember, TypeName,
14};
15
16use super::{EmitError, StringTableBuilder};
17
18/// Builds the type metadata, remapping query TypeIds to bytecode BytecodeTypeIds.
19#[derive(Debug)]
20pub struct TypeTableBuilder {
21    /// Map from query TypeId to bytecode BytecodeTypeId.
22    mapping: HashMap<TypeId, BytecodeTypeId>,
23    /// Type definitions (4 bytes each).
24    type_defs: Vec<TypeDef>,
25    /// Type members for structs/enums (4 bytes each).
26    type_members: Vec<TypeMember>,
27    /// Type names for named types (4 bytes each).
28    type_names: Vec<TypeName>,
29    /// Cache for dynamically created Optional wrappers: base_type -> Optional(base_type)
30    optional_wrappers: HashMap<BytecodeTypeId, BytecodeTypeId>,
31    /// Cache for deduplicated members: (StringId, BytecodeTypeId) -> member_index.
32    /// Same (name, type) pair → same member index globally.
33    /// This enables call-site scoping where uncaptured refs share the caller's scope.
34    member_cache: HashMap<(StringId, BytecodeTypeId), u16>,
35}
36
37impl TypeTableBuilder {
38    pub fn new() -> Self {
39        Self {
40            mapping: HashMap::new(),
41            type_defs: Vec::new(),
42            type_members: Vec::new(),
43            type_names: Vec::new(),
44            optional_wrappers: HashMap::new(),
45            member_cache: HashMap::new(),
46        }
47    }
48
49    /// Build type table from TypeContext.
50    ///
51    /// Types are collected in definition order, depth-first, to mirror query structure.
52    /// Used builtins are emitted first, then custom types - no reserved slots.
53    pub fn build(
54        &mut self,
55        type_ctx: &TypeContext,
56        interner: &Interner,
57        strings: &mut StringTableBuilder,
58    ) -> Result<(), EmitError> {
59        // Collect custom types in definition order, depth-first
60        let mut ordered_types: Vec<TypeId> = Vec::new();
61        let mut seen: HashSet<TypeId> = HashSet::new();
62
63        // First walk from definition types (maintains order for entrypoints)
64        for (_def_id, type_id) in type_ctx.iter_def_types() {
65            collect_types_dfs(type_id, type_ctx, &mut ordered_types, &mut seen);
66        }
67
68        // Then collect any remaining interned types not reachable from definitions
69        // (e.g., enum types inside named nodes that don't propagate TypeFlow::Scalar)
70        for (type_id, _) in type_ctx.iter_types() {
71            collect_types_dfs(type_id, type_ctx, &mut ordered_types, &mut seen);
72        }
73
74        // Determine which builtins are actually used by scanning all types
75        let mut used_builtins = [false; 3]; // [Void, Node, String]
76        let mut seen = HashSet::new();
77        for &type_id in &ordered_types {
78            collect_builtin_refs(type_id, type_ctx, &mut used_builtins, &mut seen);
79        }
80        // Also check entrypoint result types directly
81        for (_def_id, type_id) in type_ctx.iter_def_types() {
82            if type_id == TYPE_VOID {
83                used_builtins[0] = true;
84            } else if type_id == TYPE_NODE {
85                used_builtins[1] = true;
86            } else if type_id == TYPE_STRING {
87                used_builtins[2] = true;
88            }
89        }
90
91        // Phase 1: Emit used builtins first (in order: Void, Node, String)
92        let builtin_types = [
93            (TYPE_VOID, TypeKind::Void),
94            (TYPE_NODE, TypeKind::Node),
95            (TYPE_STRING, TypeKind::String),
96        ];
97        for (i, &(builtin_id, kind)) in builtin_types.iter().enumerate() {
98            if used_builtins[i] {
99                let bc_id = BytecodeTypeId(self.type_defs.len() as u16);
100                self.mapping.insert(builtin_id, bc_id);
101                self.type_defs.push(TypeDef::builtin(kind));
102            }
103        }
104
105        // Phase 2: Pre-assign BytecodeTypeIds for custom types and reserve slots
106        for &type_id in &ordered_types {
107            let bc_id = BytecodeTypeId(self.type_defs.len() as u16);
108            self.mapping.insert(type_id, bc_id);
109            self.type_defs.push(TypeDef::placeholder());
110        }
111
112        // Phase 3: Fill in custom type definitions
113        // We need to calculate slot index as offset from where custom types start
114        let builtin_count = used_builtins.iter().filter(|&&b| b).count();
115        for (i, &type_id) in ordered_types.iter().enumerate() {
116            let slot_index = builtin_count + i;
117            let type_shape = type_ctx
118                .get_type(type_id)
119                .expect("collected type must exist");
120            self.emit_type_at_slot(slot_index, type_id, type_shape, type_ctx, interner, strings)?;
121        }
122
123        // Collect TypeName entries for named definitions
124        for (def_id, type_id) in type_ctx.iter_def_types() {
125            let name_sym = type_ctx.def_name_sym(def_id);
126            let name = strings.get_or_intern(name_sym, interner)?;
127            let bc_type_id = self
128                .mapping
129                .get(&type_id)
130                .copied()
131                .unwrap_or(BytecodeTypeId(0));
132            self.type_names.push(TypeName::new(name, bc_type_id));
133        }
134
135        // Collect TypeName entries for explicit type annotations on struct captures
136        // e.g., `{(fn) @fn} @outer :: FunctionInfo` names the struct "FunctionInfo"
137        for (type_id, name_sym) in type_ctx.iter_type_names() {
138            if let Some(&bc_type_id) = self.mapping.get(&type_id) {
139                let name = strings.get_or_intern(name_sym, interner)?;
140                self.type_names.push(TypeName::new(name, bc_type_id));
141            }
142        }
143
144        Ok(())
145    }
146
147    /// Fill in a TypeDef at a pre-allocated slot.
148    fn emit_type_at_slot(
149        &mut self,
150        slot_index: usize,
151        _type_id: TypeId,
152        type_shape: &TypeShape,
153        type_ctx: &TypeContext,
154        interner: &Interner,
155        strings: &mut StringTableBuilder,
156    ) -> Result<(), EmitError> {
157        match type_shape {
158            TypeShape::Void | TypeShape::Node | TypeShape::String => {
159                // Builtins - should not reach here
160                unreachable!("builtins should be handled separately")
161            }
162
163            TypeShape::Custom(sym) => {
164                // Custom type annotation: @x :: Identifier → type Identifier = Node
165                let bc_type_id = BytecodeTypeId(slot_index as u16);
166
167                // Add TypeName entry for the custom type
168                let name = strings.get_or_intern(*sym, interner)?;
169                self.type_names.push(TypeName::new(name, bc_type_id));
170
171                // Custom types alias Node - look up Node's actual bytecode ID
172                let node_bc_id = self
173                    .mapping
174                    .get(&TYPE_NODE)
175                    .copied()
176                    .unwrap_or(BytecodeTypeId(0));
177                self.type_defs[slot_index] = TypeDef::alias(node_bc_id);
178                Ok(())
179            }
180
181            TypeShape::Optional(inner) => {
182                let inner_bc = self.resolve_type(*inner, type_ctx)?;
183                self.type_defs[slot_index] = TypeDef::optional(inner_bc);
184                Ok(())
185            }
186
187            TypeShape::Array { element, non_empty } => {
188                let element_bc = self.resolve_type(*element, type_ctx)?;
189                self.type_defs[slot_index] = if *non_empty {
190                    TypeDef::array_plus(element_bc)
191                } else {
192                    TypeDef::array_star(element_bc)
193                };
194                Ok(())
195            }
196
197            TypeShape::Struct(fields) => {
198                // Resolve field types (this may create Optional wrappers at later indices)
199                let mut resolved_fields = Vec::with_capacity(fields.len());
200                for (field_sym, field_info) in fields {
201                    let field_name = strings.get_or_intern(*field_sym, interner)?;
202                    let field_type = self.resolve_field_type(field_info, type_ctx)?;
203                    resolved_fields.push((field_name, field_type));
204                }
205
206                // Emit members contiguously for this struct
207                let member_start = self.type_members.len() as u16;
208                for (field_name, field_type) in resolved_fields {
209                    self.type_members
210                        .push(TypeMember::new(field_name, field_type));
211                }
212
213                let member_count = fields.len() as u8;
214                self.type_defs[slot_index] = TypeDef::struct_type(member_start, member_count);
215                Ok(())
216            }
217
218            TypeShape::Enum(variants) => {
219                // Resolve variant types (this may create types at later indices)
220                let mut resolved_variants = Vec::with_capacity(variants.len());
221                for (variant_sym, variant_type_id) in variants {
222                    let variant_name = strings.get_or_intern(*variant_sym, interner)?;
223                    let variant_type = self.resolve_type(*variant_type_id, type_ctx)?;
224                    resolved_variants.push((variant_name, variant_type));
225                }
226
227                // Now emit the members and update the placeholder
228                let member_start = self.type_members.len() as u16;
229                for (variant_name, variant_type) in resolved_variants {
230                    self.type_members
231                        .push(TypeMember::new(variant_name, variant_type));
232                }
233
234                let member_count = variants.len() as u8;
235                self.type_defs[slot_index] = TypeDef::enum_type(member_start, member_count);
236                Ok(())
237            }
238
239            TypeShape::Ref(_def_id) => {
240                // Ref types are not emitted - they resolve to their target
241                unreachable!("Ref types should not be collected for emission")
242            }
243        }
244    }
245
246    /// Resolve a query TypeId to bytecode BytecodeTypeId.
247    ///
248    /// Handles Ref types by following the reference chain to the actual type.
249    pub fn resolve_type(
250        &self,
251        type_id: TypeId,
252        type_ctx: &TypeContext,
253    ) -> Result<BytecodeTypeId, EmitError> {
254        // Check if already mapped
255        if let Some(&bc_id) = self.mapping.get(&type_id) {
256            return Ok(bc_id);
257        }
258
259        // Handle Ref types by following the reference
260        if let Some(type_shape) = type_ctx.get_type(type_id)
261            && let TypeShape::Ref(def_id) = type_shape
262            && let Some(def_type_id) = type_ctx.get_def_type(*def_id)
263        {
264            return self.resolve_type(def_type_id, type_ctx);
265        }
266
267        // If not found, default to first type (should not happen for well-formed types)
268        Ok(BytecodeTypeId(0))
269    }
270
271    /// Resolve a field's type, handling optionality.
272    fn resolve_field_type(
273        &mut self,
274        field_info: &FieldInfo,
275        type_ctx: &TypeContext,
276    ) -> Result<BytecodeTypeId, EmitError> {
277        let base_type = self.resolve_type(field_info.type_id, type_ctx)?;
278
279        // If the field is optional, wrap it in Optional
280        if field_info.optional {
281            self.get_or_create_optional(base_type)
282        } else {
283            Ok(base_type)
284        }
285    }
286
287    /// Get or create an Optional wrapper for a base type.
288    fn get_or_create_optional(
289        &mut self,
290        base_type: BytecodeTypeId,
291    ) -> Result<BytecodeTypeId, EmitError> {
292        // Check cache first
293        if let Some(&optional_id) = self.optional_wrappers.get(&base_type) {
294            return Ok(optional_id);
295        }
296
297        // Create new Optional wrapper at the next available index
298        let optional_id = BytecodeTypeId(self.type_defs.len() as u16);
299        self.type_defs.push(TypeDef::optional(base_type));
300        self.optional_wrappers.insert(base_type, optional_id);
301        Ok(optional_id)
302    }
303
304    /// Validate that counts fit in u16.
305    pub fn validate(&self) -> Result<(), EmitError> {
306        if self.type_defs.len() > 65535 {
307            return Err(EmitError::TooManyTypes(self.type_defs.len()));
308        }
309        if self.type_members.len() > 65535 {
310            return Err(EmitError::TooManyTypeMembers(self.type_members.len()));
311        }
312        Ok(())
313    }
314
315    /// Get the bytecode BytecodeTypeId for a query TypeId.
316    pub fn get(&self, type_id: TypeId) -> Option<BytecodeTypeId> {
317        self.mapping.get(&type_id).copied()
318    }
319
320    /// Get the absolute member base index for a struct/enum type.
321    ///
322    /// For Struct and Enum types, returns the starting index in the TypeMembers table.
323    /// Fields/variants are at indices [base..base+count).
324    pub fn get_member_base(&self, type_id: TypeId) -> Option<u16> {
325        let bc_type_id = self.mapping.get(&type_id)?;
326        let type_def = self.type_defs.get(bc_type_id.0 as usize)?;
327        match type_def.classify() {
328            TypeData::Composite { member_start, .. } => Some(member_start),
329            _ => None,
330        }
331    }
332
333    /// Look up member index by field identity (name Symbol, value TypeId).
334    ///
335    /// Members are deduplicated globally: same (name, type) pair → same index.
336    /// This enables call-site scoping where uncaptured refs share the caller's scope.
337    pub fn lookup_member(
338        &self,
339        field_name: Symbol,
340        field_type: TypeId,
341        strings: &StringTableBuilder,
342    ) -> Option<u16> {
343        // Convert query-level identifiers to bytecode-level identifiers
344        let string_id = strings.get(field_name)?;
345        let type_id = self.mapping.get(&field_type)?;
346        self.member_cache.get(&(string_id, *type_id)).copied()
347    }
348
349    /// Emit type definitions, members, and names as bytes.
350    ///
351    /// Returns (type_defs_bytes, type_members_bytes, type_names_bytes).
352    pub fn emit(&self) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
353        let mut defs_bytes = Vec::with_capacity(self.type_defs.len() * 4);
354        for def in &self.type_defs {
355            defs_bytes.extend_from_slice(&def.to_bytes());
356        }
357
358        let mut members_bytes = Vec::with_capacity(self.type_members.len() * 4);
359        for member in &self.type_members {
360            members_bytes.extend_from_slice(&member.to_bytes());
361        }
362
363        let mut names_bytes = Vec::with_capacity(self.type_names.len() * 4);
364        for type_name in &self.type_names {
365            names_bytes.extend_from_slice(&type_name.to_bytes());
366        }
367
368        (defs_bytes, members_bytes, names_bytes)
369    }
370
371    /// Number of type definitions.
372    pub fn type_defs_count(&self) -> usize {
373        self.type_defs.len()
374    }
375
376    /// Number of type members.
377    pub fn type_members_count(&self) -> usize {
378        self.type_members.len()
379    }
380
381    /// Number of type names.
382    pub fn type_names_count(&self) -> usize {
383        self.type_names.len()
384    }
385}
386
387impl Default for TypeTableBuilder {
388    fn default() -> Self {
389        Self::new()
390    }
391}
392
393/// Collect types depth-first starting from a root type.
394fn collect_types_dfs(
395    type_id: TypeId,
396    type_ctx: &TypeContext,
397    out: &mut Vec<TypeId>,
398    seen: &mut HashSet<TypeId>,
399) {
400    // Skip builtins and already-seen types
401    if type_id.is_builtin() || seen.contains(&type_id) {
402        return;
403    }
404
405    let Some(type_shape) = type_ctx.get_type(type_id) else {
406        return;
407    };
408
409    // Resolve Ref types to their target
410    if let TypeShape::Ref(def_id) = type_shape {
411        if let Some(target_id) = type_ctx.get_def_type(*def_id) {
412            collect_types_dfs(target_id, type_ctx, out, seen);
413        }
414        return;
415    }
416
417    seen.insert(type_id);
418
419    // Collect children first (depth-first), then add self
420    match type_shape {
421        TypeShape::Struct(fields) => {
422            for field_info in fields.values() {
423                collect_types_dfs(field_info.type_id, type_ctx, out, seen);
424            }
425            out.push(type_id);
426        }
427        TypeShape::Enum(variants) => {
428            for &variant_type_id in variants.values() {
429                collect_types_dfs(variant_type_id, type_ctx, out, seen);
430            }
431            out.push(type_id);
432        }
433        TypeShape::Array { element, .. } => {
434            // Collect element type first, then add the Array itself
435            collect_types_dfs(*element, type_ctx, out, seen);
436            out.push(type_id);
437        }
438        TypeShape::Optional(inner) => {
439            // Collect inner type first, then add the Optional itself
440            collect_types_dfs(*inner, type_ctx, out, seen);
441            out.push(type_id);
442        }
443        TypeShape::Custom(_) => {
444            // Custom types alias Node, no children to collect
445            out.push(type_id);
446        }
447        _ => {}
448    }
449}
450
451/// Collect which builtin types are referenced by a type.
452fn collect_builtin_refs(
453    type_id: TypeId,
454    type_ctx: &TypeContext,
455    used: &mut [bool; 3],
456    seen: &mut HashSet<TypeId>,
457) {
458    if !seen.insert(type_id) {
459        return;
460    }
461
462    let Some(type_shape) = type_ctx.get_type(type_id) else {
463        return;
464    };
465
466    match type_shape {
467        TypeShape::Void => used[0] = true,
468        TypeShape::Node => used[1] = true,
469        TypeShape::String => used[2] = true,
470        TypeShape::Custom(_) => used[1] = true, // Custom types alias Node
471        TypeShape::Struct(fields) => {
472            for field_info in fields.values() {
473                collect_builtin_refs(field_info.type_id, type_ctx, used, seen);
474            }
475        }
476        TypeShape::Enum(variants) => {
477            for &variant_type_id in variants.values() {
478                collect_builtin_refs(variant_type_id, type_ctx, used, seen);
479            }
480        }
481        TypeShape::Array { element, .. } => {
482            collect_builtin_refs(*element, type_ctx, used, seen);
483        }
484        TypeShape::Optional(inner) => {
485            collect_builtin_refs(*inner, type_ctx, used, seen);
486        }
487        TypeShape::Ref(def_id) => {
488            if let Some(target_id) = type_ctx.get_def_type(*def_id) {
489                collect_builtin_refs(target_id, type_ctx, used, seen);
490            }
491        }
492    }
493}