plotnik_compiler/emit/
emitter.rs1use std::cell::RefCell;
4
5use plotnik_core::Symbol;
6
7use crate::analyze::type_check::TypeId;
8use crate::bytecode::{InstructionIR, Label, PredicateValueIR};
9use crate::compile::{CompileCtx, Compiler};
10use crate::query::LinkedQuery;
11use plotnik_bytecode::{Entrypoint, FieldSymbol, Header, NodeSymbol, SECTION_ALIGN, TriviaEntry};
12
13use super::EmitError;
14use super::layout::CacheAligned;
15use super::regex_table::RegexTableBuilder;
16use super::string_table::StringTableBuilder;
17use super::type_table::TypeTableBuilder;
18
19pub fn emit(query: &LinkedQuery) -> Result<Vec<u8>, EmitError> {
21 let type_ctx = query.type_context();
22 let interner = query.interner();
23 let symbol_table = &query.symbol_table;
24 let node_type_ids = query.node_type_ids();
25 let node_field_ids = query.node_field_ids();
26
27 let strings = RefCell::new(StringTableBuilder::new());
28 let mut types = TypeTableBuilder::new();
29 types.build(type_ctx, interner, &mut strings.borrow_mut())?;
30
31 let ctx = CompileCtx {
32 interner,
33 type_ctx,
34 symbol_table,
35 strings: &strings,
36 node_types: Some(node_type_ids),
37 node_fields: Some(node_field_ids),
38 };
39 let compile_result = Compiler::compile(&ctx).map_err(EmitError::Compile)?;
40
41 let mut entry_labels: Vec<Label> = vec![compile_result.preamble_entry];
44 entry_labels.extend(compile_result.def_entries.values().copied());
45 let layout = CacheAligned::layout(&compile_result.instructions, &entry_labels);
46
47 if layout.total_steps as usize > 65535 {
49 return Err(EmitError::TooManyTransitions(layout.total_steps as usize));
50 }
51
52 let mut node_symbols: Vec<NodeSymbol> = Vec::new();
54 for (&sym, &node_id) in node_type_ids {
55 let name = strings.borrow_mut().get_or_intern(sym, interner)?;
56 node_symbols.push(NodeSymbol::new(node_id.get(), name));
57 }
58
59 let mut field_symbols: Vec<FieldSymbol> = Vec::new();
61 for (&sym, &field_id) in node_field_ids {
62 let name = strings.borrow_mut().get_or_intern(sym, interner)?;
63 field_symbols.push(FieldSymbol::new(field_id.get(), name));
64 }
65
66 let mut entrypoints: Vec<Entrypoint> = Vec::new();
68 for (def_id, type_id) in type_ctx.iter_def_types() {
69 let name_sym = type_ctx.def_name_sym(def_id);
70 let name = strings.borrow_mut().get_or_intern(name_sym, interner)?;
71 let result_type = types.resolve_type(type_id, type_ctx)?;
72
73 let target = compile_result
75 .def_entries
76 .get(&def_id)
77 .and_then(|label| layout.label_to_step().get(label))
78 .copied()
79 .expect("entrypoint must have compiled target");
80
81 entrypoints.push(Entrypoint::new(name, target, result_type));
82 }
83
84 let strings = strings.into_inner();
86
87 strings.validate()?;
89 types.validate()?;
90 if entrypoints.len() > 65535 {
91 return Err(EmitError::TooManyEntrypoints(entrypoints.len()));
92 }
93
94 let trivia_entries: Vec<TriviaEntry> = Vec::new();
96
97 let mut regexes = RegexTableBuilder::new();
99 intern_regex_predicates(&compile_result.instructions, &strings, &mut regexes)?;
100 regexes.validate()?;
101
102 let transitions_bytes = emit_transitions(
104 &compile_result.instructions,
105 &layout,
106 &types,
107 &strings,
108 ®exes,
109 );
110
111 let (str_blob, str_table) = strings.emit();
113 let (regex_blob, regex_table) = regexes.emit();
114 let (type_defs_bytes, type_members_bytes, type_names_bytes) = types.emit();
115
116 let node_types_bytes = emit_node_symbols(&node_symbols);
117 let node_fields_bytes = emit_field_symbols(&field_symbols);
118 let trivia_bytes = emit_trivia(&trivia_entries);
119 let entrypoints_bytes = emit_entrypoints(&entrypoints);
120
121 let mut output = vec![0u8; 64]; emit_section(&mut output, &str_blob);
128 emit_section(&mut output, ®ex_blob);
129 emit_section(&mut output, &str_table);
130 emit_section(&mut output, ®ex_table);
131 emit_section(&mut output, &node_types_bytes);
132 emit_section(&mut output, &node_fields_bytes);
133 emit_section(&mut output, &trivia_bytes);
134 emit_section(&mut output, &type_defs_bytes);
135 emit_section(&mut output, &type_members_bytes);
136 emit_section(&mut output, &type_names_bytes);
137 emit_section(&mut output, &entrypoints_bytes);
138 emit_section(&mut output, &transitions_bytes);
139
140 pad_to_section(&mut output);
141 let total_size = output.len() as u32;
142
143 let mut header = Header {
145 str_table_count: strings.len() as u16,
146 node_types_count: node_symbols.len() as u16,
147 node_fields_count: field_symbols.len() as u16,
148 trivia_count: trivia_entries.len() as u16,
149 regex_table_count: regexes.len() as u16,
150 type_defs_count: types.type_defs_count() as u16,
151 type_members_count: types.type_members_count() as u16,
152 type_names_count: types.type_names_count() as u16,
153 entrypoints_count: entrypoints.len() as u16,
154 transitions_count: layout.total_steps,
155 str_blob_size: str_blob.len() as u32,
156 regex_blob_size: regex_blob.len() as u32,
157 total_size,
158 ..Default::default()
159 };
160 header.checksum = crc32fast::hash(&output[64..]);
161 output[..64].copy_from_slice(&header.to_bytes());
162
163 Ok(output)
164}
165
166fn pad_to_section(buf: &mut Vec<u8>) {
168 let rem = buf.len() % SECTION_ALIGN;
169 if rem != 0 {
170 let padding = SECTION_ALIGN - rem;
171 buf.resize(buf.len() + padding, 0);
172 }
173}
174
175fn emit_transitions(
177 instructions: &[crate::bytecode::InstructionIR],
178 layout: &crate::bytecode::LayoutResult,
179 types: &TypeTableBuilder,
180 strings: &StringTableBuilder,
181 regexes: &RegexTableBuilder,
182) -> Vec<u8> {
183 let mut bytes = vec![0u8; layout.total_steps as usize * 8];
185
186 let lookup_member = |field_name: Symbol, field_type: TypeId| {
190 types.lookup_member(field_name, field_type, strings)
191 };
192 let get_member_base = |type_id: TypeId| types.get_member_base(type_id);
193
194 let lookup_regex = |string_id: plotnik_bytecode::StringId| regexes.get(string_id);
196
197 for instr in instructions {
198 let label = instr.label();
199 let Some(&step_id) = layout.label_to_step.get(&label) else {
200 continue;
201 };
202
203 let offset = step_id as usize * 8; let resolved = instr.resolve(
205 &layout.label_to_step,
206 lookup_member,
207 get_member_base,
208 lookup_regex,
209 );
210
211 let end = offset + resolved.len();
213 if end <= bytes.len() {
214 bytes[offset..end].copy_from_slice(&resolved);
215 }
216 }
217
218 bytes
219}
220
221fn intern_regex_predicates(
223 instructions: &[InstructionIR],
224 strings: &StringTableBuilder,
225 regexes: &mut RegexTableBuilder,
226) -> Result<(), EmitError> {
227 for instr in instructions {
228 if let InstructionIR::Match(m) = instr
229 && let Some(pred) = &m.predicate
230 && let PredicateValueIR::Regex(string_id) = &pred.value
231 {
232 let pattern = strings.get_str(*string_id);
233 regexes.intern(pattern, *string_id)?;
234 }
235 }
236 Ok(())
237}
238
239fn emit_section(output: &mut Vec<u8>, data: &[u8]) {
240 pad_to_section(output);
241 output.extend_from_slice(data);
242}
243
244fn emit_node_symbols(symbols: &[NodeSymbol]) -> Vec<u8> {
245 let mut bytes = Vec::with_capacity(symbols.len() * 4);
246 for sym in symbols {
247 bytes.extend_from_slice(&sym.id.to_le_bytes());
248 bytes.extend_from_slice(&sym.name.get().to_le_bytes());
249 }
250 bytes
251}
252
253fn emit_field_symbols(symbols: &[FieldSymbol]) -> Vec<u8> {
254 let mut bytes = Vec::with_capacity(symbols.len() * 4);
255 for sym in symbols {
256 bytes.extend_from_slice(&sym.id.to_le_bytes());
257 bytes.extend_from_slice(&sym.name.get().to_le_bytes());
258 }
259 bytes
260}
261
262fn emit_trivia(entries: &[TriviaEntry]) -> Vec<u8> {
263 let mut bytes = Vec::with_capacity(entries.len() * 2);
264 for entry in entries {
265 bytes.extend_from_slice(&entry.node_type.to_le_bytes());
266 }
267 bytes
268}
269
270fn emit_entrypoints(entrypoints: &[Entrypoint]) -> Vec<u8> {
271 let mut bytes = Vec::with_capacity(entrypoints.len() * 8);
272 for ep in entrypoints {
273 bytes.extend_from_slice(&ep.name.get().to_le_bytes());
274 bytes.extend_from_slice(&ep.target.to_le_bytes());
275 bytes.extend_from_slice(&ep.result_type.0.to_le_bytes());
276 bytes.extend_from_slice(&0u16.to_le_bytes()); }
278 bytes
279}