plotnik_compiler/emit/
emitter.rs

1//! Core bytecode emission logic.
2
3use plotnik_core::Symbol;
4
5use crate::analyze::type_check::TypeId;
6use crate::bytecode::{InstructionIR, Label, PredicateValueIR};
7use plotnik_bytecode::{Entrypoint, FieldSymbol, Header, NodeSymbol, SECTION_ALIGN, TriviaEntry};
8use crate::compile::Compiler;
9use crate::query::LinkedQuery;
10
11use super::EmitError;
12use super::layout::CacheAligned;
13use super::regex_table::RegexTableBuilder;
14use super::string_table::StringTableBuilder;
15use super::type_table::TypeTableBuilder;
16
17/// Emit bytecode from a LinkedQuery.
18pub fn emit(query: &LinkedQuery) -> Result<Vec<u8>, EmitError> {
19    let type_ctx = query.type_context();
20    let interner = query.interner();
21    let symbol_table = &query.symbol_table;
22    let node_type_ids = query.node_type_ids();
23    let node_field_ids = query.node_field_ids();
24
25    let mut strings = StringTableBuilder::new();
26    let mut types = TypeTableBuilder::new();
27    types.build(type_ctx, interner, &mut strings)?;
28
29    let compile_result = Compiler::compile(
30        interner,
31        type_ctx,
32        symbol_table,
33        &mut strings,
34        Some(node_type_ids),
35        Some(node_field_ids),
36    )
37    .map_err(EmitError::Compile)?;
38
39    // Layout with cache alignment
40    // Preamble entry FIRST ensures it gets the lowest address (step 0)
41    let mut entry_labels: Vec<Label> = vec![compile_result.preamble_entry];
42    entry_labels.extend(compile_result.def_entries.values().copied());
43    let layout = CacheAligned::layout(&compile_result.instructions, &entry_labels);
44
45    // Validate transition count
46    if layout.total_steps as usize > 65535 {
47        return Err(EmitError::TooManyTransitions(layout.total_steps as usize));
48    }
49
50    // Collect node symbols
51    let mut node_symbols: Vec<NodeSymbol> = Vec::new();
52    for (&sym, &node_id) in node_type_ids {
53        let name = strings.get_or_intern(sym, interner)?;
54        node_symbols.push(NodeSymbol::new(node_id.get(), name));
55    }
56
57    // Collect field symbols
58    let mut field_symbols: Vec<FieldSymbol> = Vec::new();
59    for (&sym, &field_id) in node_field_ids {
60        let name = strings.get_or_intern(sym, interner)?;
61        field_symbols.push(FieldSymbol::new(field_id.get(), name));
62    }
63
64    // Collect entrypoints with actual targets from layout
65    let mut entrypoints: Vec<Entrypoint> = Vec::new();
66    for (def_id, type_id) in type_ctx.iter_def_types() {
67        let name_sym = type_ctx.def_name_sym(def_id);
68        let name = strings.get_or_intern(name_sym, interner)?;
69        let result_type = types.resolve_type(type_id, type_ctx)?;
70
71        // Get actual target from compiled result
72        let target = compile_result
73            .def_entries
74            .get(&def_id)
75            .and_then(|label| layout.label_to_step().get(label))
76            .copied()
77            .expect("entrypoint must have compiled target");
78
79        entrypoints.push(Entrypoint::new(name, target, result_type));
80    }
81
82    // Validate counts
83    strings.validate()?;
84    types.validate()?;
85    if entrypoints.len() > 65535 {
86        return Err(EmitError::TooManyEntrypoints(entrypoints.len()));
87    }
88
89    // Trivia (empty for now)
90    let trivia_entries: Vec<TriviaEntry> = Vec::new();
91
92    // Build regex table from predicates in compiled instructions
93    let mut regexes = RegexTableBuilder::new();
94    intern_regex_predicates(&compile_result.instructions, &strings, &mut regexes)?;
95    regexes.validate()?;
96
97    // Resolve and serialize transitions
98    let transitions_bytes =
99        emit_transitions(&compile_result.instructions, &layout, &types, &strings, &regexes);
100
101    // Emit all byte sections
102    let (str_blob, str_table) = strings.emit();
103    let (regex_blob, regex_table) = regexes.emit();
104    let (type_defs_bytes, type_members_bytes, type_names_bytes) = types.emit();
105
106    let node_types_bytes = emit_node_symbols(&node_symbols);
107    let node_fields_bytes = emit_field_symbols(&field_symbols);
108    let trivia_bytes = emit_trivia(&trivia_entries);
109    let entrypoints_bytes = emit_entrypoints(&entrypoints);
110
111    // Build output with sections in v2 order:
112    // Header → StringBlob → RegexBlob → StringTable → RegexTable →
113    // NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers →
114    // TypeNames → Entrypoints → Transitions
115    let mut output = vec![0u8; 64]; // Reserve header space
116
117    emit_section(&mut output, &str_blob);
118    emit_section(&mut output, &regex_blob);
119    emit_section(&mut output, &str_table);
120    emit_section(&mut output, &regex_table);
121    emit_section(&mut output, &node_types_bytes);
122    emit_section(&mut output, &node_fields_bytes);
123    emit_section(&mut output, &trivia_bytes);
124    emit_section(&mut output, &type_defs_bytes);
125    emit_section(&mut output, &type_members_bytes);
126    emit_section(&mut output, &type_names_bytes);
127    emit_section(&mut output, &entrypoints_bytes);
128    emit_section(&mut output, &transitions_bytes);
129
130    pad_to_section(&mut output);
131    let total_size = output.len() as u32;
132
133    // Build header (offsets computed from counts and blob sizes)
134    let mut header = Header {
135        str_table_count: strings.len() as u16,
136        node_types_count: node_symbols.len() as u16,
137        node_fields_count: field_symbols.len() as u16,
138        trivia_count: trivia_entries.len() as u16,
139        regex_table_count: regexes.len() as u16,
140        type_defs_count: types.type_defs_count() as u16,
141        type_members_count: types.type_members_count() as u16,
142        type_names_count: types.type_names_count() as u16,
143        entrypoints_count: entrypoints.len() as u16,
144        transitions_count: layout.total_steps,
145        str_blob_size: str_blob.len() as u32,
146        regex_blob_size: regex_blob.len() as u32,
147        total_size,
148        ..Default::default()
149    };
150    header.checksum = crc32fast::hash(&output[64..]);
151    output[..64].copy_from_slice(&header.to_bytes());
152
153    Ok(output)
154}
155
156/// Pad a buffer to the section alignment boundary.
157fn pad_to_section(buf: &mut Vec<u8>) {
158    let rem = buf.len() % SECTION_ALIGN;
159    if rem != 0 {
160        let padding = SECTION_ALIGN - rem;
161        buf.resize(buf.len() + padding, 0);
162    }
163}
164
165/// Emit transitions section from instructions and layout.
166fn emit_transitions(
167    instructions: &[crate::bytecode::InstructionIR],
168    layout: &crate::bytecode::LayoutResult,
169    types: &TypeTableBuilder,
170    strings: &StringTableBuilder,
171    regexes: &RegexTableBuilder,
172) -> Vec<u8> {
173    // Allocate buffer for all steps (8 bytes each)
174    let mut bytes = vec![0u8; layout.total_steps as usize * 8];
175
176    // Create resolver closures for member indices.
177    // lookup_member: for struct fields (deduplicated by field identity)
178    // get_member_base: for enum variants (parent_type + relative_index)
179    let lookup_member = |field_name: Symbol, field_type: TypeId| {
180        types.lookup_member(field_name, field_type, strings)
181    };
182    let get_member_base = |type_id: TypeId| types.get_member_base(type_id);
183
184    // Predicate regex resolution closure.
185    let lookup_regex = |string_id: plotnik_bytecode::StringId| regexes.get(string_id);
186
187    for instr in instructions {
188        let label = instr.label();
189        let Some(&step_id) = layout.label_to_step.get(&label) else {
190            continue;
191        };
192
193        let offset = step_id as usize * 8; // STEP_SIZE
194        let resolved = instr.resolve(&layout.label_to_step, lookup_member, get_member_base, lookup_regex);
195
196        // Copy instruction bytes to the correct position
197        let end = offset + resolved.len();
198        if end <= bytes.len() {
199            bytes[offset..end].copy_from_slice(&resolved);
200        }
201    }
202
203    bytes
204}
205
206/// Pre-scan instructions for regex predicates and intern them.
207fn intern_regex_predicates(
208    instructions: &[InstructionIR],
209    strings: &StringTableBuilder,
210    regexes: &mut RegexTableBuilder,
211) -> Result<(), EmitError> {
212    for instr in instructions {
213        if let InstructionIR::Match(m) = instr
214            && let Some(pred) = &m.predicate
215            && let PredicateValueIR::Regex(string_id) = &pred.value
216        {
217            let pattern = strings.get_str(*string_id);
218            regexes.intern(pattern, *string_id)?;
219        }
220    }
221    Ok(())
222}
223
224fn emit_section(output: &mut Vec<u8>, data: &[u8]) {
225    pad_to_section(output);
226    output.extend_from_slice(data);
227}
228
229fn emit_node_symbols(symbols: &[NodeSymbol]) -> Vec<u8> {
230    let mut bytes = Vec::with_capacity(symbols.len() * 4);
231    for sym in symbols {
232        bytes.extend_from_slice(&sym.id.to_le_bytes());
233        bytes.extend_from_slice(&sym.name.get().to_le_bytes());
234    }
235    bytes
236}
237
238fn emit_field_symbols(symbols: &[FieldSymbol]) -> Vec<u8> {
239    let mut bytes = Vec::with_capacity(symbols.len() * 4);
240    for sym in symbols {
241        bytes.extend_from_slice(&sym.id.to_le_bytes());
242        bytes.extend_from_slice(&sym.name.get().to_le_bytes());
243    }
244    bytes
245}
246
247fn emit_trivia(entries: &[TriviaEntry]) -> Vec<u8> {
248    let mut bytes = Vec::with_capacity(entries.len() * 2);
249    for entry in entries {
250        bytes.extend_from_slice(&entry.node_type.to_le_bytes());
251    }
252    bytes
253}
254
255fn emit_entrypoints(entrypoints: &[Entrypoint]) -> Vec<u8> {
256    let mut bytes = Vec::with_capacity(entrypoints.len() * 8);
257    for ep in entrypoints {
258        bytes.extend_from_slice(&ep.name.get().to_le_bytes());
259        bytes.extend_from_slice(&ep.target.to_le_bytes());
260        bytes.extend_from_slice(&ep.result_type.0.to_le_bytes());
261        bytes.extend_from_slice(&0u16.to_le_bytes()); // _pad is always 0
262    }
263    bytes
264}