plotnik_compiler/emit/
emitter.rs

1//! Core bytecode emission logic.
2
3use std::cell::RefCell;
4
5use plotnik_core::Symbol;
6
7use crate::analyze::type_check::TypeId;
8use crate::bytecode::{InstructionIR, Label, PredicateValueIR};
9use crate::compile::{CompileCtx, Compiler};
10use crate::query::LinkedQuery;
11use plotnik_bytecode::{Entrypoint, FieldSymbol, Header, NodeSymbol, SECTION_ALIGN, TriviaEntry};
12
13use super::EmitError;
14use super::layout::CacheAligned;
15use super::regex_table::RegexTableBuilder;
16use super::string_table::StringTableBuilder;
17use super::type_table::TypeTableBuilder;
18
19/// Emit bytecode from a LinkedQuery.
20pub fn emit(query: &LinkedQuery) -> Result<Vec<u8>, EmitError> {
21    let type_ctx = query.type_context();
22    let interner = query.interner();
23    let symbol_table = &query.symbol_table;
24    let node_type_ids = query.node_type_ids();
25    let node_field_ids = query.node_field_ids();
26
27    let strings = RefCell::new(StringTableBuilder::new());
28    let mut types = TypeTableBuilder::new();
29    types.build(type_ctx, interner, &mut strings.borrow_mut())?;
30
31    let ctx = CompileCtx {
32        interner,
33        type_ctx,
34        symbol_table,
35        strings: &strings,
36        node_types: Some(node_type_ids),
37        node_fields: Some(node_field_ids),
38    };
39    let compile_result = Compiler::compile(&ctx).map_err(EmitError::Compile)?;
40
41    // Layout with cache alignment
42    // Preamble entry FIRST ensures it gets the lowest address (step 0)
43    let mut entry_labels: Vec<Label> = vec![compile_result.preamble_entry];
44    entry_labels.extend(compile_result.def_entries.values().copied());
45    let layout = CacheAligned::layout(&compile_result.instructions, &entry_labels);
46
47    // Validate transition count
48    if layout.total_steps as usize > 65535 {
49        return Err(EmitError::TooManyTransitions(layout.total_steps as usize));
50    }
51
52    // Collect node symbols
53    let mut node_symbols: Vec<NodeSymbol> = Vec::new();
54    for (&sym, &node_id) in node_type_ids {
55        let name = strings.borrow_mut().get_or_intern(sym, interner)?;
56        node_symbols.push(NodeSymbol::new(node_id.get(), name));
57    }
58
59    // Collect field symbols
60    let mut field_symbols: Vec<FieldSymbol> = Vec::new();
61    for (&sym, &field_id) in node_field_ids {
62        let name = strings.borrow_mut().get_or_intern(sym, interner)?;
63        field_symbols.push(FieldSymbol::new(field_id.get(), name));
64    }
65
66    // Collect entrypoints with actual targets from layout
67    let mut entrypoints: Vec<Entrypoint> = Vec::new();
68    for (def_id, type_id) in type_ctx.iter_def_types() {
69        let name_sym = type_ctx.def_name_sym(def_id);
70        let name = strings.borrow_mut().get_or_intern(name_sym, interner)?;
71        let result_type = types.resolve_type(type_id, type_ctx)?;
72
73        // Get actual target from compiled result
74        let target = compile_result
75            .def_entries
76            .get(&def_id)
77            .and_then(|label| layout.label_to_step().get(label))
78            .copied()
79            .expect("entrypoint must have compiled target");
80
81        entrypoints.push(Entrypoint::new(name, target, result_type));
82    }
83
84    // Move strings out of RefCell for final emission
85    let strings = strings.into_inner();
86
87    // Validate counts
88    strings.validate()?;
89    types.validate()?;
90    if entrypoints.len() > 65535 {
91        return Err(EmitError::TooManyEntrypoints(entrypoints.len()));
92    }
93
94    // Trivia (empty for now)
95    let trivia_entries: Vec<TriviaEntry> = Vec::new();
96
97    // Build regex table from predicates in compiled instructions
98    let mut regexes = RegexTableBuilder::new();
99    intern_regex_predicates(&compile_result.instructions, &strings, &mut regexes)?;
100    regexes.validate()?;
101
102    // Resolve and serialize transitions
103    let transitions_bytes = emit_transitions(
104        &compile_result.instructions,
105        &layout,
106        &types,
107        &strings,
108        &regexes,
109    );
110
111    // Emit all byte sections
112    let (str_blob, str_table) = strings.emit();
113    let (regex_blob, regex_table) = regexes.emit();
114    let (type_defs_bytes, type_members_bytes, type_names_bytes) = types.emit();
115
116    let node_types_bytes = emit_node_symbols(&node_symbols);
117    let node_fields_bytes = emit_field_symbols(&field_symbols);
118    let trivia_bytes = emit_trivia(&trivia_entries);
119    let entrypoints_bytes = emit_entrypoints(&entrypoints);
120
121    // Build output with sections in v2 order:
122    // Header → StringBlob → RegexBlob → StringTable → RegexTable →
123    // NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers →
124    // TypeNames → Entrypoints → Transitions
125    let mut output = vec![0u8; 64]; // Reserve header space
126
127    emit_section(&mut output, &str_blob);
128    emit_section(&mut output, &regex_blob);
129    emit_section(&mut output, &str_table);
130    emit_section(&mut output, &regex_table);
131    emit_section(&mut output, &node_types_bytes);
132    emit_section(&mut output, &node_fields_bytes);
133    emit_section(&mut output, &trivia_bytes);
134    emit_section(&mut output, &type_defs_bytes);
135    emit_section(&mut output, &type_members_bytes);
136    emit_section(&mut output, &type_names_bytes);
137    emit_section(&mut output, &entrypoints_bytes);
138    emit_section(&mut output, &transitions_bytes);
139
140    pad_to_section(&mut output);
141    let total_size = output.len() as u32;
142
143    // Build header (offsets computed from counts and blob sizes)
144    let mut header = Header {
145        str_table_count: strings.len() as u16,
146        node_types_count: node_symbols.len() as u16,
147        node_fields_count: field_symbols.len() as u16,
148        trivia_count: trivia_entries.len() as u16,
149        regex_table_count: regexes.len() as u16,
150        type_defs_count: types.type_defs_count() as u16,
151        type_members_count: types.type_members_count() as u16,
152        type_names_count: types.type_names_count() as u16,
153        entrypoints_count: entrypoints.len() as u16,
154        transitions_count: layout.total_steps,
155        str_blob_size: str_blob.len() as u32,
156        regex_blob_size: regex_blob.len() as u32,
157        total_size,
158        ..Default::default()
159    };
160    header.checksum = crc32fast::hash(&output[64..]);
161    output[..64].copy_from_slice(&header.to_bytes());
162
163    Ok(output)
164}
165
166/// Pad a buffer to the section alignment boundary.
167fn pad_to_section(buf: &mut Vec<u8>) {
168    let rem = buf.len() % SECTION_ALIGN;
169    if rem != 0 {
170        let padding = SECTION_ALIGN - rem;
171        buf.resize(buf.len() + padding, 0);
172    }
173}
174
175/// Emit transitions section from instructions and layout.
176fn emit_transitions(
177    instructions: &[crate::bytecode::InstructionIR],
178    layout: &crate::bytecode::LayoutResult,
179    types: &TypeTableBuilder,
180    strings: &StringTableBuilder,
181    regexes: &RegexTableBuilder,
182) -> Vec<u8> {
183    // Allocate buffer for all steps (8 bytes each)
184    let mut bytes = vec![0u8; layout.total_steps as usize * 8];
185
186    // Create resolver closures for member indices.
187    // lookup_member: for struct fields (deduplicated by field identity)
188    // get_member_base: for enum variants (parent_type + relative_index)
189    let lookup_member = |field_name: Symbol, field_type: TypeId| {
190        types.lookup_member(field_name, field_type, strings)
191    };
192    let get_member_base = |type_id: TypeId| types.get_member_base(type_id);
193
194    // Predicate regex resolution closure.
195    let lookup_regex = |string_id: plotnik_bytecode::StringId| regexes.get(string_id);
196
197    for instr in instructions {
198        let label = instr.label();
199        let Some(&step_id) = layout.label_to_step.get(&label) else {
200            continue;
201        };
202
203        let offset = step_id as usize * 8; // STEP_SIZE
204        let resolved = instr.resolve(
205            &layout.label_to_step,
206            lookup_member,
207            get_member_base,
208            lookup_regex,
209        );
210
211        // Copy instruction bytes to the correct position
212        let end = offset + resolved.len();
213        if end <= bytes.len() {
214            bytes[offset..end].copy_from_slice(&resolved);
215        }
216    }
217
218    bytes
219}
220
221/// Pre-scan instructions for regex predicates and intern them.
222fn intern_regex_predicates(
223    instructions: &[InstructionIR],
224    strings: &StringTableBuilder,
225    regexes: &mut RegexTableBuilder,
226) -> Result<(), EmitError> {
227    for instr in instructions {
228        if let InstructionIR::Match(m) = instr
229            && let Some(pred) = &m.predicate
230            && let PredicateValueIR::Regex(string_id) = &pred.value
231        {
232            let pattern = strings.get_str(*string_id);
233            regexes.intern(pattern, *string_id)?;
234        }
235    }
236    Ok(())
237}
238
239fn emit_section(output: &mut Vec<u8>, data: &[u8]) {
240    pad_to_section(output);
241    output.extend_from_slice(data);
242}
243
244fn emit_node_symbols(symbols: &[NodeSymbol]) -> Vec<u8> {
245    let mut bytes = Vec::with_capacity(symbols.len() * 4);
246    for sym in symbols {
247        bytes.extend_from_slice(&sym.id.to_le_bytes());
248        bytes.extend_from_slice(&sym.name.get().to_le_bytes());
249    }
250    bytes
251}
252
253fn emit_field_symbols(symbols: &[FieldSymbol]) -> Vec<u8> {
254    let mut bytes = Vec::with_capacity(symbols.len() * 4);
255    for sym in symbols {
256        bytes.extend_from_slice(&sym.id.to_le_bytes());
257        bytes.extend_from_slice(&sym.name.get().to_le_bytes());
258    }
259    bytes
260}
261
262fn emit_trivia(entries: &[TriviaEntry]) -> Vec<u8> {
263    let mut bytes = Vec::with_capacity(entries.len() * 2);
264    for entry in entries {
265        bytes.extend_from_slice(&entry.node_type.to_le_bytes());
266    }
267    bytes
268}
269
270fn emit_entrypoints(entrypoints: &[Entrypoint]) -> Vec<u8> {
271    let mut bytes = Vec::with_capacity(entrypoints.len() * 8);
272    for ep in entrypoints {
273        bytes.extend_from_slice(&ep.name.get().to_le_bytes());
274        bytes.extend_from_slice(&ep.target.to_le_bytes());
275        bytes.extend_from_slice(&ep.result_type.0.to_le_bytes());
276        bytes.extend_from_slice(&0u16.to_le_bytes()); // _pad is always 0
277    }
278    bytes
279}