plotnik_compiler/emit/
emitter.rs1use plotnik_core::Symbol;
4
5use crate::analyze::type_check::TypeId;
6use crate::bytecode::{InstructionIR, Label, PredicateValueIR};
7use plotnik_bytecode::{Entrypoint, FieldSymbol, Header, NodeSymbol, SECTION_ALIGN, TriviaEntry};
8use crate::compile::Compiler;
9use crate::query::LinkedQuery;
10
11use super::EmitError;
12use super::layout::CacheAligned;
13use super::regex_table::RegexTableBuilder;
14use super::string_table::StringTableBuilder;
15use super::type_table::TypeTableBuilder;
16
17pub fn emit(query: &LinkedQuery) -> Result<Vec<u8>, EmitError> {
19 let type_ctx = query.type_context();
20 let interner = query.interner();
21 let symbol_table = &query.symbol_table;
22 let node_type_ids = query.node_type_ids();
23 let node_field_ids = query.node_field_ids();
24
25 let mut strings = StringTableBuilder::new();
26 let mut types = TypeTableBuilder::new();
27 types.build(type_ctx, interner, &mut strings)?;
28
29 let compile_result = Compiler::compile(
30 interner,
31 type_ctx,
32 symbol_table,
33 &mut strings,
34 Some(node_type_ids),
35 Some(node_field_ids),
36 )
37 .map_err(EmitError::Compile)?;
38
39 let mut entry_labels: Vec<Label> = vec![compile_result.preamble_entry];
42 entry_labels.extend(compile_result.def_entries.values().copied());
43 let layout = CacheAligned::layout(&compile_result.instructions, &entry_labels);
44
45 if layout.total_steps as usize > 65535 {
47 return Err(EmitError::TooManyTransitions(layout.total_steps as usize));
48 }
49
50 let mut node_symbols: Vec<NodeSymbol> = Vec::new();
52 for (&sym, &node_id) in node_type_ids {
53 let name = strings.get_or_intern(sym, interner)?;
54 node_symbols.push(NodeSymbol::new(node_id.get(), name));
55 }
56
57 let mut field_symbols: Vec<FieldSymbol> = Vec::new();
59 for (&sym, &field_id) in node_field_ids {
60 let name = strings.get_or_intern(sym, interner)?;
61 field_symbols.push(FieldSymbol::new(field_id.get(), name));
62 }
63
64 let mut entrypoints: Vec<Entrypoint> = Vec::new();
66 for (def_id, type_id) in type_ctx.iter_def_types() {
67 let name_sym = type_ctx.def_name_sym(def_id);
68 let name = strings.get_or_intern(name_sym, interner)?;
69 let result_type = types.resolve_type(type_id, type_ctx)?;
70
71 let target = compile_result
73 .def_entries
74 .get(&def_id)
75 .and_then(|label| layout.label_to_step().get(label))
76 .copied()
77 .expect("entrypoint must have compiled target");
78
79 entrypoints.push(Entrypoint::new(name, target, result_type));
80 }
81
82 strings.validate()?;
84 types.validate()?;
85 if entrypoints.len() > 65535 {
86 return Err(EmitError::TooManyEntrypoints(entrypoints.len()));
87 }
88
89 let trivia_entries: Vec<TriviaEntry> = Vec::new();
91
92 let mut regexes = RegexTableBuilder::new();
94 intern_regex_predicates(&compile_result.instructions, &strings, &mut regexes)?;
95 regexes.validate()?;
96
97 let transitions_bytes =
99 emit_transitions(&compile_result.instructions, &layout, &types, &strings, ®exes);
100
101 let (str_blob, str_table) = strings.emit();
103 let (regex_blob, regex_table) = regexes.emit();
104 let (type_defs_bytes, type_members_bytes, type_names_bytes) = types.emit();
105
106 let node_types_bytes = emit_node_symbols(&node_symbols);
107 let node_fields_bytes = emit_field_symbols(&field_symbols);
108 let trivia_bytes = emit_trivia(&trivia_entries);
109 let entrypoints_bytes = emit_entrypoints(&entrypoints);
110
111 let mut output = vec![0u8; 64]; emit_section(&mut output, &str_blob);
118 emit_section(&mut output, ®ex_blob);
119 emit_section(&mut output, &str_table);
120 emit_section(&mut output, ®ex_table);
121 emit_section(&mut output, &node_types_bytes);
122 emit_section(&mut output, &node_fields_bytes);
123 emit_section(&mut output, &trivia_bytes);
124 emit_section(&mut output, &type_defs_bytes);
125 emit_section(&mut output, &type_members_bytes);
126 emit_section(&mut output, &type_names_bytes);
127 emit_section(&mut output, &entrypoints_bytes);
128 emit_section(&mut output, &transitions_bytes);
129
130 pad_to_section(&mut output);
131 let total_size = output.len() as u32;
132
133 let mut header = Header {
135 str_table_count: strings.len() as u16,
136 node_types_count: node_symbols.len() as u16,
137 node_fields_count: field_symbols.len() as u16,
138 trivia_count: trivia_entries.len() as u16,
139 regex_table_count: regexes.len() as u16,
140 type_defs_count: types.type_defs_count() as u16,
141 type_members_count: types.type_members_count() as u16,
142 type_names_count: types.type_names_count() as u16,
143 entrypoints_count: entrypoints.len() as u16,
144 transitions_count: layout.total_steps,
145 str_blob_size: str_blob.len() as u32,
146 regex_blob_size: regex_blob.len() as u32,
147 total_size,
148 ..Default::default()
149 };
150 header.checksum = crc32fast::hash(&output[64..]);
151 output[..64].copy_from_slice(&header.to_bytes());
152
153 Ok(output)
154}
155
156fn pad_to_section(buf: &mut Vec<u8>) {
158 let rem = buf.len() % SECTION_ALIGN;
159 if rem != 0 {
160 let padding = SECTION_ALIGN - rem;
161 buf.resize(buf.len() + padding, 0);
162 }
163}
164
165fn emit_transitions(
167 instructions: &[crate::bytecode::InstructionIR],
168 layout: &crate::bytecode::LayoutResult,
169 types: &TypeTableBuilder,
170 strings: &StringTableBuilder,
171 regexes: &RegexTableBuilder,
172) -> Vec<u8> {
173 let mut bytes = vec![0u8; layout.total_steps as usize * 8];
175
176 let lookup_member = |field_name: Symbol, field_type: TypeId| {
180 types.lookup_member(field_name, field_type, strings)
181 };
182 let get_member_base = |type_id: TypeId| types.get_member_base(type_id);
183
184 let lookup_regex = |string_id: plotnik_bytecode::StringId| regexes.get(string_id);
186
187 for instr in instructions {
188 let label = instr.label();
189 let Some(&step_id) = layout.label_to_step.get(&label) else {
190 continue;
191 };
192
193 let offset = step_id as usize * 8; let resolved = instr.resolve(&layout.label_to_step, lookup_member, get_member_base, lookup_regex);
195
196 let end = offset + resolved.len();
198 if end <= bytes.len() {
199 bytes[offset..end].copy_from_slice(&resolved);
200 }
201 }
202
203 bytes
204}
205
206fn intern_regex_predicates(
208 instructions: &[InstructionIR],
209 strings: &StringTableBuilder,
210 regexes: &mut RegexTableBuilder,
211) -> Result<(), EmitError> {
212 for instr in instructions {
213 if let InstructionIR::Match(m) = instr
214 && let Some(pred) = &m.predicate
215 && let PredicateValueIR::Regex(string_id) = &pred.value
216 {
217 let pattern = strings.get_str(*string_id);
218 regexes.intern(pattern, *string_id)?;
219 }
220 }
221 Ok(())
222}
223
224fn emit_section(output: &mut Vec<u8>, data: &[u8]) {
225 pad_to_section(output);
226 output.extend_from_slice(data);
227}
228
229fn emit_node_symbols(symbols: &[NodeSymbol]) -> Vec<u8> {
230 let mut bytes = Vec::with_capacity(symbols.len() * 4);
231 for sym in symbols {
232 bytes.extend_from_slice(&sym.id.to_le_bytes());
233 bytes.extend_from_slice(&sym.name.get().to_le_bytes());
234 }
235 bytes
236}
237
238fn emit_field_symbols(symbols: &[FieldSymbol]) -> Vec<u8> {
239 let mut bytes = Vec::with_capacity(symbols.len() * 4);
240 for sym in symbols {
241 bytes.extend_from_slice(&sym.id.to_le_bytes());
242 bytes.extend_from_slice(&sym.name.get().to_le_bytes());
243 }
244 bytes
245}
246
247fn emit_trivia(entries: &[TriviaEntry]) -> Vec<u8> {
248 let mut bytes = Vec::with_capacity(entries.len() * 2);
249 for entry in entries {
250 bytes.extend_from_slice(&entry.node_type.to_le_bytes());
251 }
252 bytes
253}
254
255fn emit_entrypoints(entrypoints: &[Entrypoint]) -> Vec<u8> {
256 let mut bytes = Vec::with_capacity(entrypoints.len() * 8);
257 for ep in entrypoints {
258 bytes.extend_from_slice(&ep.name.get().to_le_bytes());
259 bytes.extend_from_slice(&ep.target.to_le_bytes());
260 bytes.extend_from_slice(&ep.result_type.0.to_le_bytes());
261 bytes.extend_from_slice(&0u16.to_le_bytes()); }
263 bytes
264}