plotnik_lib/emit/
emitter.rs

1//! Core bytecode emission logic.
2//!
3//! Contains the main entry points for emitting bytecode from compiled queries.
4
5use indexmap::IndexMap;
6use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol};
7
8use crate::analyze::symbol_table::SymbolTable;
9use crate::analyze::type_check::{TypeContext, TypeId};
10use crate::bytecode::Label;
11use crate::bytecode::{
12    Entrypoint, FieldSymbol, Header, NodeSymbol, SECTION_ALIGN, TriviaEntry, TypeMetaHeader,
13};
14use crate::compile::Compiler;
15use crate::query::LinkedQuery;
16
17use super::EmitError;
18use super::layout::CacheAligned;
19use super::string_table::StringTableBuilder;
20use super::type_table::TypeTableBuilder;
21
22/// Emit bytecode from type context only (no node validation).
23pub fn emit(
24    type_ctx: &TypeContext,
25    interner: &Interner,
26    symbol_table: &SymbolTable,
27) -> Result<Vec<u8>, EmitError> {
28    emit_inner(type_ctx, interner, symbol_table, None, None)
29}
30
31/// Emit bytecode from a LinkedQuery (includes node type/field validation info).
32pub fn emit_linked(query: &LinkedQuery) -> Result<Vec<u8>, EmitError> {
33    emit_inner(
34        query.type_context(),
35        query.interner(),
36        &query.symbol_table,
37        Some(query.node_type_ids()),
38        Some(query.node_field_ids()),
39    )
40}
41
42/// Shared bytecode emission logic.
43fn emit_inner(
44    type_ctx: &TypeContext,
45    interner: &Interner,
46    symbol_table: &SymbolTable,
47    node_type_ids: Option<&IndexMap<Symbol, NodeTypeId>>,
48    node_field_ids: Option<&IndexMap<Symbol, NodeFieldId>>,
49) -> Result<Vec<u8>, EmitError> {
50    let is_linked = node_type_ids.is_some();
51    let mut strings = StringTableBuilder::new();
52    let mut types = TypeTableBuilder::new();
53    types.build(type_ctx, interner, &mut strings)?;
54
55    // Compile transitions (strings are interned here for unlinked mode)
56    let compile_result = Compiler::compile(
57        interner,
58        type_ctx,
59        symbol_table,
60        &mut strings,
61        node_type_ids,
62        node_field_ids,
63    )
64    .map_err(EmitError::Compile)?;
65
66    // Layout with cache alignment
67    // Preamble entry FIRST ensures it gets the lowest address (step 0)
68    let mut entry_labels: Vec<Label> = vec![compile_result.preamble_entry];
69    entry_labels.extend(compile_result.def_entries.values().copied());
70    let layout = CacheAligned::layout(&compile_result.instructions, &entry_labels);
71
72    // Validate transition count
73    if layout.total_steps as usize > 65535 {
74        return Err(EmitError::TooManyTransitions(layout.total_steps as usize));
75    }
76
77    // Collect node symbols (empty if not linked)
78    let mut node_symbols: Vec<NodeSymbol> = Vec::new();
79    if let Some(ids) = node_type_ids {
80        for (&sym, &node_id) in ids {
81            let name = strings.get_or_intern(sym, interner)?;
82            node_symbols.push(NodeSymbol::new(node_id.get(), name));
83        }
84    }
85
86    // Collect field symbols (empty if not linked)
87    let mut field_symbols: Vec<FieldSymbol> = Vec::new();
88    if let Some(ids) = node_field_ids {
89        for (&sym, &field_id) in ids {
90            let name = strings.get_or_intern(sym, interner)?;
91            field_symbols.push(FieldSymbol::new(field_id.get(), name));
92        }
93    }
94
95    // Collect entrypoints with actual targets from layout
96    let mut entrypoints: Vec<Entrypoint> = Vec::new();
97    for (def_id, type_id) in type_ctx.iter_def_types() {
98        let name_sym = type_ctx.def_name_sym(def_id);
99        let name = strings.get_or_intern(name_sym, interner)?;
100        let result_type = types.resolve_type(type_id, type_ctx)?;
101
102        // Get actual target from compiled result
103        let target = compile_result
104            .def_entries
105            .get(&def_id)
106            .and_then(|label| layout.label_to_step().get(label))
107            .copied()
108            .expect("entrypoint must have compiled target");
109
110        entrypoints.push(Entrypoint::new(name, target, result_type));
111    }
112
113    // Validate counts
114    strings.validate()?;
115    types.validate()?;
116    if entrypoints.len() > 65535 {
117        return Err(EmitError::TooManyEntrypoints(entrypoints.len()));
118    }
119
120    // Trivia (empty for now)
121    let trivia_entries: Vec<TriviaEntry> = Vec::new();
122
123    // Resolve and serialize transitions
124    let transitions_bytes =
125        emit_transitions(&compile_result.instructions, &layout, &types, &strings);
126
127    // Emit all byte sections
128    let (str_blob, str_table) = strings.emit();
129    let (type_defs_bytes, type_members_bytes, type_names_bytes) = types.emit();
130
131    let node_types_bytes = emit_node_symbols(&node_symbols);
132    let node_fields_bytes = emit_field_symbols(&field_symbols);
133    let trivia_bytes = emit_trivia(&trivia_entries);
134    let entrypoints_bytes = emit_entrypoints(&entrypoints);
135
136    // Build output with sections
137    let mut output = vec![0u8; 64]; // Reserve header space
138
139    let str_blob_offset = emit_section(&mut output, &str_blob);
140    let str_table_offset = emit_section(&mut output, &str_table);
141    let node_types_offset = emit_section(&mut output, &node_types_bytes);
142    let node_fields_offset = emit_section(&mut output, &node_fields_bytes);
143    let trivia_offset = emit_section(&mut output, &trivia_bytes);
144
145    // Type metadata section (header + 3 aligned sub-sections)
146    let type_meta_offset = emit_section(
147        &mut output,
148        &TypeMetaHeader::new(
149            types.type_defs_count() as u16,
150            types.type_members_count() as u16,
151            types.type_names_count() as u16,
152        )
153        .to_bytes(),
154    );
155    emit_section(&mut output, &type_defs_bytes);
156    emit_section(&mut output, &type_members_bytes);
157    emit_section(&mut output, &type_names_bytes);
158
159    let entrypoints_offset = emit_section(&mut output, &entrypoints_bytes);
160    let transitions_offset = emit_section(&mut output, &transitions_bytes);
161
162    pad_to_section(&mut output);
163    let total_size = output.len() as u32;
164
165    // Build and write header
166    let mut header = Header {
167        str_blob_offset,
168        str_table_offset,
169        node_types_offset,
170        node_fields_offset,
171        trivia_offset,
172        type_meta_offset,
173        entrypoints_offset,
174        transitions_offset,
175        str_table_count: strings.len() as u16,
176        node_types_count: node_symbols.len() as u16,
177        node_fields_count: field_symbols.len() as u16,
178        trivia_count: trivia_entries.len() as u16,
179        entrypoints_count: entrypoints.len() as u16,
180        transitions_count: layout.total_steps,
181        total_size,
182        ..Default::default()
183    };
184    header.set_linked(is_linked);
185    header.checksum = crc32fast::hash(&output[64..]);
186    output[..64].copy_from_slice(&header.to_bytes());
187
188    Ok(output)
189}
190
191/// Pad a buffer to the section alignment boundary.
192fn pad_to_section(buf: &mut Vec<u8>) {
193    let rem = buf.len() % SECTION_ALIGN;
194    if rem != 0 {
195        let padding = SECTION_ALIGN - rem;
196        buf.resize(buf.len() + padding, 0);
197    }
198}
199
200/// Emit transitions section from instructions and layout.
201fn emit_transitions(
202    instructions: &[crate::bytecode::InstructionIR],
203    layout: &crate::bytecode::LayoutResult,
204    types: &TypeTableBuilder,
205    strings: &StringTableBuilder,
206) -> Vec<u8> {
207    // Allocate buffer for all steps (8 bytes each)
208    let mut bytes = vec![0u8; layout.total_steps as usize * 8];
209
210    // Create resolver closures for member indices.
211    // lookup_member: for struct fields (deduplicated by field identity)
212    // get_member_base: for enum variants (parent_type + relative_index)
213    let lookup_member = |field_name: Symbol, field_type: TypeId| {
214        types.lookup_member(field_name, field_type, strings)
215    };
216    let get_member_base = |type_id: TypeId| types.get_member_base(type_id);
217
218    for instr in instructions {
219        let label = instr.label();
220        let Some(&step_id) = layout.label_to_step.get(&label) else {
221            continue;
222        };
223
224        let offset = step_id as usize * 8; // STEP_SIZE
225        let resolved = instr.resolve(&layout.label_to_step, lookup_member, get_member_base);
226
227        // Copy instruction bytes to the correct position
228        let end = offset + resolved.len();
229        if end <= bytes.len() {
230            bytes[offset..end].copy_from_slice(&resolved);
231        }
232    }
233
234    bytes
235}
236
237fn emit_section(output: &mut Vec<u8>, data: &[u8]) -> u32 {
238    pad_to_section(output);
239    let offset = output.len() as u32;
240    output.extend_from_slice(data);
241    offset
242}
243
244fn emit_node_symbols(symbols: &[NodeSymbol]) -> Vec<u8> {
245    let mut bytes = Vec::with_capacity(symbols.len() * 4);
246    for sym in symbols {
247        bytes.extend_from_slice(&sym.id.to_le_bytes());
248        bytes.extend_from_slice(&sym.name.get().to_le_bytes());
249    }
250    bytes
251}
252
253fn emit_field_symbols(symbols: &[FieldSymbol]) -> Vec<u8> {
254    let mut bytes = Vec::with_capacity(symbols.len() * 4);
255    for sym in symbols {
256        bytes.extend_from_slice(&sym.id.to_le_bytes());
257        bytes.extend_from_slice(&sym.name.get().to_le_bytes());
258    }
259    bytes
260}
261
262fn emit_trivia(entries: &[TriviaEntry]) -> Vec<u8> {
263    let mut bytes = Vec::with_capacity(entries.len() * 2);
264    for entry in entries {
265        bytes.extend_from_slice(&entry.node_type.to_le_bytes());
266    }
267    bytes
268}
269
270fn emit_entrypoints(entrypoints: &[Entrypoint]) -> Vec<u8> {
271    let mut bytes = Vec::with_capacity(entrypoints.len() * 8);
272    for ep in entrypoints {
273        bytes.extend_from_slice(&ep.name.get().to_le_bytes());
274        bytes.extend_from_slice(&ep.target.to_le_bytes());
275        bytes.extend_from_slice(&ep.result_type.0.to_le_bytes());
276        bytes.extend_from_slice(&ep._pad.to_le_bytes());
277    }
278    bytes
279}