1mod calls;
2mod expr;
3mod patterns;
4
5use std::collections::{HashMap, HashSet};
6
7use crate::ast::{FnBody, FnDef, Stmt, TopLevel, TypeDef};
8use crate::nan_value::{Arena, NanValue};
9use crate::source::find_module_file;
10use crate::types::{option, result};
11
12use super::builtin::{VmBuiltin, VmBuiltinParentThinClass};
13use super::opcode::*;
14use super::symbol::{VmSymbolTable, VmVariantCtor};
15use super::types::{CodeStore, FnChunk};
16
17pub fn compile_program(
20 items: &[TopLevel],
21 arena: &mut Arena,
22) -> Result<(CodeStore, Vec<NanValue>), CompileError> {
23 compile_program_with_modules(items, arena, None)
24}
25
26pub fn compile_program_with_modules(
28 items: &[TopLevel],
29 arena: &mut Arena,
30 module_root: Option<&str>,
31) -> Result<(CodeStore, Vec<NanValue>), CompileError> {
32 let mut compiler = ProgramCompiler::new();
33 compiler.sync_record_field_symbols(arena);
34
35 if let Some(module_root) = module_root {
36 compiler.load_modules(items, module_root, arena)?;
37 }
38
39 for item in items {
40 match item {
41 TopLevel::FnDef(fndef) => {
42 compiler.ensure_global(&fndef.name);
43 let effect_ids: Vec<u32> = fndef
44 .effects
45 .iter()
46 .map(|effect| compiler.symbols.intern_name(effect))
47 .collect();
48 let fn_id = compiler.code.add_function(FnChunk {
49 name: fndef.name.clone(),
50 arity: fndef.params.len() as u8,
51 local_count: 0,
52 code: Vec::new(),
53 constants: Vec::new(),
54 effects: effect_ids,
55 thin: false,
56 parent_thin: false,
57 leaf: false,
58 });
59 let symbol_id =
60 compiler
61 .symbols
62 .intern_function(&fndef.name, fn_id, &fndef.effects);
63 let global_idx = compiler.global_names[&fndef.name];
64 compiler.globals[global_idx as usize] = VmSymbolTable::symbol_ref(symbol_id);
65 }
66 TopLevel::TypeDef(td) => {
67 compiler.register_type_def(td, arena);
68 }
69 _ => {}
70 }
71 }
72
73 compiler.register_current_module_namespace(items);
74
75 for item in items {
76 if let TopLevel::FnDef(fndef) = item {
77 let fn_id = compiler.code.find(&fndef.name).unwrap();
78 let chunk = compiler.compile_fn(fndef, arena)?;
79 compiler.code.functions[fn_id as usize] = chunk;
80 }
81 }
82
83 compiler.compile_top_level(items, arena)?;
84 compiler.code.symbols = compiler.symbols.clone();
85 classify_thin_functions(&mut compiler.code, arena)?;
86
87 Ok((compiler.code, compiler.globals))
88}
89
90#[derive(Debug)]
91pub struct CompileError {
92 pub msg: String,
93}
94
95impl std::fmt::Display for CompileError {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 write!(f, "Compile error: {}", self.msg)
98 }
99}
100
101struct ProgramCompiler {
102 code: CodeStore,
103 symbols: VmSymbolTable,
104 globals: Vec<NanValue>,
105 global_names: HashMap<String, u16>,
106}
107
108impl ProgramCompiler {
109 fn new() -> Self {
110 let mut compiler = ProgramCompiler {
111 code: CodeStore::new(),
112 symbols: VmSymbolTable::default(),
113 globals: Vec::new(),
114 global_names: HashMap::new(),
115 };
116 compiler.bootstrap_core_symbols();
117 compiler
118 }
119
120 fn sync_record_field_symbols(&mut self, arena: &Arena) {
121 for type_id in 0..arena.type_count() {
122 let type_name = arena.get_type_name(type_id);
123 self.symbols.intern_namespace_path(type_name);
124 let field_names = arena.get_field_names(type_id);
125 if field_names.is_empty() {
126 continue;
127 }
128 let field_symbol_ids: Vec<u32> = field_names
129 .iter()
130 .map(|field_name| self.symbols.intern_name(field_name))
131 .collect();
132 self.code.register_record_fields(type_id, &field_symbol_ids);
133 }
134 }
135
136 fn load_modules(
141 &mut self,
142 items: &[TopLevel],
143 module_root: &str,
144 arena: &mut Arena,
145 ) -> Result<(), CompileError> {
146 let module = items.iter().find_map(|i| {
147 if let TopLevel::Module(m) = i {
148 Some(m)
149 } else {
150 None
151 }
152 });
153 let module = match module {
154 Some(m) => m,
155 None => return Ok(()),
156 };
157
158 let mut loaded = HashSet::new();
159 for dep_name in &module.depends {
160 self.load_module_recursive(dep_name, module_root, arena, &mut loaded)?;
161 }
162 Ok(())
163 }
164
165 fn load_module_recursive(
166 &mut self,
167 dep_name: &str,
168 module_root: &str,
169 arena: &mut Arena,
170 loaded: &mut HashSet<String>,
171 ) -> Result<(), CompileError> {
172 if loaded.contains(dep_name) {
173 return Ok(());
174 }
175 loaded.insert(dep_name.to_string());
176
177 let file_path = find_module_file(dep_name, module_root).ok_or_else(|| CompileError {
178 msg: format!("module '{}' not found (root: {})", dep_name, module_root),
179 })?;
180
181 let source = std::fs::read_to_string(&file_path).map_err(|e| CompileError {
182 msg: format!("cannot read module '{}': {}", dep_name, e),
183 })?;
184
185 let mut mod_items = crate::source::parse_source(&source).map_err(|e| CompileError {
186 msg: format!("parse error in module '{}': {}", dep_name, e),
187 })?;
188
189 crate::tco::transform_program(&mut mod_items);
190 crate::resolver::resolve_program(&mut mod_items);
191
192 if let Some(sub_module) = mod_items.iter().find_map(|i| {
193 if let TopLevel::Module(m) = i {
194 Some(m)
195 } else {
196 None
197 }
198 }) {
199 for sub_dep in &sub_module.depends {
200 self.load_module_recursive(sub_dep, module_root, arena, loaded)?;
201 }
202 }
203
204 for item in &mod_items {
205 if let TopLevel::TypeDef(td) = item {
206 self.register_type_def(td, arena);
207 }
208 }
209
210 let exposes: Option<Vec<String>> = mod_items.iter().find_map(|i| {
211 if let TopLevel::Module(m) = i {
212 if m.exposes.is_empty() {
213 None
214 } else {
215 Some(m.exposes.clone())
216 }
217 } else {
218 None
219 }
220 });
221
222 let mut module_fn_ids: Vec<(String, u32)> = Vec::new();
223 for item in &mod_items {
224 if let TopLevel::FnDef(fndef) = item {
225 let qualified_name = format!("{}.{}", dep_name, fndef.name);
226 let effect_ids: Vec<u32> = fndef
227 .effects
228 .iter()
229 .map(|effect| self.symbols.intern_name(effect))
230 .collect();
231 let fn_id = self.code.add_function(FnChunk {
232 name: qualified_name.clone(),
233 arity: fndef.params.len() as u8,
234 local_count: 0,
235 code: Vec::new(),
236 constants: Vec::new(),
237 effects: effect_ids,
238 thin: false,
239 parent_thin: false,
240 leaf: false,
241 });
242 self.symbols
243 .intern_function(&qualified_name, fn_id, &fndef.effects);
244 module_fn_ids.push((fndef.name.clone(), fn_id));
245 }
246 }
247
248 let module_scope: HashMap<String, u32> = module_fn_ids.iter().cloned().collect();
249
250 let mut fn_idx = 0;
251 for item in &mod_items {
252 if let TopLevel::FnDef(fndef) = item {
253 let (_, fn_id) = module_fn_ids[fn_idx];
254 let chunk = self.compile_fn_with_scope(fndef, arena, &module_scope)?;
255 self.code.functions[fn_id as usize] = chunk;
256 fn_idx += 1;
257 }
258 }
259
260 for (fn_name, _fn_id) in &module_fn_ids {
261 let exposed = match &exposes {
262 Some(list) => list.iter().any(|e| e == fn_name),
263 None => !fn_name.starts_with('_'),
264 };
265 if exposed {
266 let qualified = format!("{}.{}", dep_name, fn_name);
267 let global_idx = self.ensure_global(&qualified);
268 let symbol_id = self.symbols.find(&qualified).ok_or_else(|| CompileError {
269 msg: format!("missing VM symbol for exposed function {}", qualified),
270 })?;
271 self.globals[global_idx as usize] = VmSymbolTable::symbol_ref(symbol_id);
272 }
273 }
274
275 let module_symbol_id = self.symbols.intern_namespace_path(dep_name);
276 for item in &mod_items {
277 if let TopLevel::TypeDef(td) = item {
278 let type_name = match td {
279 TypeDef::Sum { name, .. } | TypeDef::Product { name, .. } => name,
280 };
281 let exposed = match &exposes {
282 Some(list) => list.iter().any(|e| e == type_name),
283 None => !type_name.starts_with('_'),
284 };
285 if exposed && let Some(type_symbol_id) = self.symbols.find(type_name) {
286 let member_symbol_id = self.symbols.intern_name(type_name);
287 self.symbols.add_namespace_member_by_id(
288 module_symbol_id,
289 member_symbol_id,
290 VmSymbolTable::symbol_ref(type_symbol_id),
291 );
292 }
293 }
294 }
295
296 for (fn_name, _fn_id) in &module_fn_ids {
297 let exposed = match &exposes {
298 Some(list) => list.iter().any(|e| e == fn_name),
299 None => !fn_name.starts_with('_'),
300 };
301 if exposed {
302 let qualified = format!("{}.{}", dep_name, fn_name);
303 if let Some(fn_symbol_id) = self.symbols.find(&qualified) {
304 let member_symbol_id = self.symbols.intern_name(fn_name);
305 self.symbols.add_namespace_member_by_id(
306 module_symbol_id,
307 member_symbol_id,
308 VmSymbolTable::symbol_ref(fn_symbol_id),
309 );
310 }
311 }
312 }
313
314 Ok(())
315 }
316
317 fn ensure_global(&mut self, name: &str) -> u16 {
318 if let Some(&idx) = self.global_names.get(name) {
319 return idx;
320 }
321 let idx = self.globals.len() as u16;
322 self.global_names.insert(name.to_string(), idx);
323 self.globals.push(NanValue::UNIT);
324 idx
325 }
326
327 fn register_type_def(&mut self, td: &TypeDef, arena: &mut Arena) {
328 match td {
329 TypeDef::Product { name, fields, .. } => {
330 self.symbols.intern_namespace_path(name);
331 let field_names: Vec<String> = fields.iter().map(|(n, _)| n.clone()).collect();
332 let field_symbol_ids: Vec<u32> = field_names
333 .iter()
334 .map(|field_name| self.symbols.intern_name(field_name))
335 .collect();
336 let type_id = arena.register_record_type(name, field_names);
337 self.code.register_record_fields(type_id, &field_symbol_ids);
338 }
339 TypeDef::Sum { name, variants, .. } => {
340 let type_symbol_id = self.symbols.intern_namespace_path(name);
341 let variant_names: Vec<String> = variants.iter().map(|v| v.name.clone()).collect();
342 let type_id = arena.register_sum_type(name, variant_names);
343 for (variant_id, variant) in variants.iter().enumerate() {
344 let ctor_id = arena
345 .find_ctor_id(type_id, variant_id as u16)
346 .expect("ctor id");
347 let qualified_name = format!("{}.{}", name, variant.name);
348 let ctor_symbol_id = self.symbols.intern_variant_ctor(
349 &qualified_name,
350 VmVariantCtor {
351 type_id,
352 variant_id: variant_id as u16,
353 ctor_id,
354 field_count: variant.fields.len() as u8,
355 },
356 );
357 let member_symbol_id = self.symbols.intern_name(&variant.name);
358 self.symbols.add_namespace_member_by_id(
359 type_symbol_id,
360 member_symbol_id,
361 VmSymbolTable::symbol_ref(ctor_symbol_id),
362 );
363 }
364 }
365 }
366 }
367
368 fn bootstrap_core_symbols(&mut self) {
369 for builtin in VmBuiltin::ALL.iter().copied() {
370 let builtin_symbol_id = self.symbols.intern_builtin(builtin);
371 if let Some((namespace, member)) = builtin.name().split_once('.') {
372 let namespace_symbol_id = self.symbols.intern_namespace_path(namespace);
373 let member_symbol_id = self.symbols.intern_name(member);
374 self.symbols.add_namespace_member_by_id(
375 namespace_symbol_id,
376 member_symbol_id,
377 VmSymbolTable::symbol_ref(builtin_symbol_id),
378 );
379 }
380 }
381
382 let result_symbol_id = self.symbols.intern_namespace_path("Result");
383 let ok_symbol_id = self.symbols.intern_wrapper("Result.Ok", 0);
384 let err_symbol_id = self.symbols.intern_wrapper("Result.Err", 1);
385 let ok_member_symbol_id = self.symbols.intern_name("Ok");
386 self.symbols.add_namespace_member_by_id(
387 result_symbol_id,
388 ok_member_symbol_id,
389 VmSymbolTable::symbol_ref(ok_symbol_id),
390 );
391 let err_member_symbol_id = self.symbols.intern_name("Err");
392 self.symbols.add_namespace_member_by_id(
393 result_symbol_id,
394 err_member_symbol_id,
395 VmSymbolTable::symbol_ref(err_symbol_id),
396 );
397 for (member, builtin_name) in result::extra_members() {
398 if let Some(symbol_id) = self.symbols.find(&builtin_name) {
399 let member_symbol_id = self.symbols.intern_name(member);
400 self.symbols.add_namespace_member_by_id(
401 result_symbol_id,
402 member_symbol_id,
403 VmSymbolTable::symbol_ref(symbol_id),
404 );
405 }
406 }
407
408 let option_symbol_id = self.symbols.intern_namespace_path("Option");
409 let some_symbol_id = self.symbols.intern_wrapper("Option.Some", 2);
410 self.symbols.intern_constant("Option.None", NanValue::NONE);
411 let some_member_symbol_id = self.symbols.intern_name("Some");
412 self.symbols.add_namespace_member_by_id(
413 option_symbol_id,
414 some_member_symbol_id,
415 VmSymbolTable::symbol_ref(some_symbol_id),
416 );
417 let none_member_symbol_id = self.symbols.intern_name("None");
418 self.symbols.add_namespace_member_by_id(
419 option_symbol_id,
420 none_member_symbol_id,
421 NanValue::NONE,
422 );
423 for (member, builtin_name) in option::extra_members() {
424 if let Some(symbol_id) = self.symbols.find(&builtin_name) {
425 let member_symbol_id = self.symbols.intern_name(member);
426 self.symbols.add_namespace_member_by_id(
427 option_symbol_id,
428 member_symbol_id,
429 VmSymbolTable::symbol_ref(symbol_id),
430 );
431 }
432 }
433 }
434
435 fn compile_fn(&mut self, fndef: &FnDef, arena: &mut Arena) -> Result<FnChunk, CompileError> {
436 let empty_scope = HashMap::new();
437 self.compile_fn_with_scope(fndef, arena, &empty_scope)
438 }
439
440 fn compile_fn_with_scope(
441 &mut self,
442 fndef: &FnDef,
443 arena: &mut Arena,
444 module_scope: &HashMap<String, u32>,
445 ) -> Result<FnChunk, CompileError> {
446 let resolution = fndef.resolution.as_ref();
447 let local_count = resolution.map_or(fndef.params.len() as u16, |r| r.local_count);
448 let local_slots: HashMap<String, u16> = resolution
449 .map(|r| r.local_slots.as_ref().clone())
450 .unwrap_or_else(|| {
451 fndef
452 .params
453 .iter()
454 .enumerate()
455 .map(|(i, (name, _))| (name.clone(), i as u16))
456 .collect()
457 });
458
459 let mut fc = FnCompiler::new(
460 &fndef.name,
461 fndef.params.len() as u8,
462 local_count,
463 fndef
464 .effects
465 .iter()
466 .map(|effect| self.symbols.intern_name(effect))
467 .collect(),
468 local_slots,
469 &self.global_names,
470 module_scope,
471 &self.code,
472 &mut self.symbols,
473 arena,
474 );
475
476 match fndef.body.as_ref() {
477 FnBody::Block(stmts) => fc.compile_body(stmts)?,
478 }
479
480 Ok(fc.finish())
481 }
482
483 fn compile_top_level(
484 &mut self,
485 items: &[TopLevel],
486 arena: &mut Arena,
487 ) -> Result<(), CompileError> {
488 let has_stmts = items.iter().any(|i| matches!(i, TopLevel::Stmt(_)));
489 if !has_stmts {
490 return Ok(());
491 }
492
493 for item in items {
494 if let TopLevel::Stmt(Stmt::Binding(name, _, _)) = item {
495 self.ensure_global(name);
496 }
497 }
498
499 let empty_mod_scope = HashMap::new();
500 let mut fc = FnCompiler::new(
501 "__top_level__",
502 0,
503 0,
504 Vec::new(),
505 HashMap::new(),
506 &self.global_names,
507 &empty_mod_scope,
508 &self.code,
509 &mut self.symbols,
510 arena,
511 );
512
513 for item in items {
514 if let TopLevel::Stmt(stmt) = item {
515 match stmt {
516 Stmt::Binding(name, _type_ann, expr) => {
517 fc.compile_expr(expr)?;
518 let idx = self.global_names[name.as_str()];
519 fc.emit_op(STORE_GLOBAL);
520 fc.emit_u16(idx);
521 }
522 Stmt::Expr(expr) => {
523 fc.compile_expr(expr)?;
524 fc.emit_op(POP);
525 }
526 }
527 }
528 }
529
530 fc.emit_op(LOAD_UNIT);
531 fc.emit_op(RETURN);
532
533 let chunk = fc.finish();
534 self.code.add_function(chunk);
535 Ok(())
536 }
537
538 fn register_current_module_namespace(&mut self, items: &[TopLevel]) {
539 let Some(module) = items.iter().find_map(|item| {
540 if let TopLevel::Module(module) = item {
541 Some(module)
542 } else {
543 None
544 }
545 }) else {
546 return;
547 };
548
549 let module_symbol_id = self.symbols.intern_namespace_path(&module.name);
550
551 for item in items {
552 match item {
553 TopLevel::FnDef(fndef) => {
554 let exposed = if module.exposes.is_empty() {
555 !fndef.name.starts_with('_')
556 } else {
557 module.exposes.iter().any(|name| name == &fndef.name)
558 };
559 if exposed && let Some(symbol_id) = self.symbols.find(&fndef.name) {
560 let member_symbol_id = self.symbols.intern_name(&fndef.name);
561 self.symbols.add_namespace_member_by_id(
562 module_symbol_id,
563 member_symbol_id,
564 VmSymbolTable::symbol_ref(symbol_id),
565 );
566 }
567 }
568 TopLevel::TypeDef(TypeDef::Product { name, .. } | TypeDef::Sum { name, .. }) => {
569 let exposed = if module.exposes.is_empty() {
570 !name.starts_with('_')
571 } else {
572 module.exposes.iter().any(|member| member == name)
573 };
574 if exposed && let Some(symbol_id) = self.symbols.find(name) {
575 let member_symbol_id = self.symbols.intern_name(name);
576 self.symbols.add_namespace_member_by_id(
577 module_symbol_id,
578 member_symbol_id,
579 VmSymbolTable::symbol_ref(symbol_id),
580 );
581 }
582 }
583 _ => {}
584 }
585 }
586 }
587}
588
589fn classify_thin_functions(code: &mut CodeStore, arena: &Arena) -> Result<(), CompileError> {
590 let thin_flags: Vec<bool> = code
591 .functions
592 .iter()
593 .map(classify_thin_chunk)
594 .collect::<Result<_, _>>()?;
595 let parent_thin_flags = classify_parent_thin_chunks(code, arena)?;
596
597 let leaf_flags: Vec<bool> = code
598 .functions
599 .iter()
600 .map(classify_leaf_chunk)
601 .collect::<Result<_, _>>()?;
602
603 for (((chunk, thin), parent_thin), leaf) in code
604 .functions
605 .iter_mut()
606 .zip(thin_flags.iter().copied())
607 .zip(parent_thin_flags)
608 .zip(leaf_flags)
609 {
610 chunk.thin = thin;
611 chunk.parent_thin = parent_thin;
612 chunk.leaf = leaf;
613 }
614
615 let thin_ignoring_tco: Vec<bool> = code
619 .functions
620 .iter()
621 .map(classify_thin_ignoring_self_tco)
622 .collect::<Result<_, _>>()?;
623 for (fn_id, qualifies) in thin_ignoring_tco.iter().enumerate() {
624 if !qualifies {
625 continue;
626 }
627 let chunk = &code.functions[fn_id];
630 let positions: Vec<usize> = find_opcode_positions(chunk, TAIL_CALL_SELF);
631 for pos in positions {
632 code.functions[fn_id].code[pos] = TAIL_CALL_SELF_THIN;
633 }
634 }
635
636 for fn_id in 0..code.functions.len() {
640 let positions: Vec<usize> = find_opcode_positions(&code.functions[fn_id], CALL_KNOWN);
641 for pos in positions {
642 let chunk_code = &code.functions[fn_id].code;
643 if pos + 3 >= chunk_code.len() {
644 continue;
645 }
646 let target_fn_id =
647 u16::from_be_bytes([chunk_code[pos + 1], chunk_code[pos + 2]]) as usize;
648 if target_fn_id >= code.functions.len() {
649 continue;
650 }
651 let target = &code.functions[target_fn_id];
652 if target.leaf && target.local_count == target.arity as u16 {
653 code.functions[fn_id].code[pos] = CALL_LEAF;
654 }
655 }
656 }
657
658 Ok(())
659}
660
661const MAX_PARENT_THIN_CODE_LEN: usize = 80;
662const MAX_PARENT_THIN_LOCALS: u16 = 8;
663
664fn classify_parent_thin_chunks(
665 code: &CodeStore,
666 _arena: &Arena,
667) -> Result<Vec<bool>, CompileError> {
668 let mut candidates: Vec<bool> = code
669 .functions
670 .iter()
671 .map(|chunk| base_parent_thin_chunk(code, chunk))
672 .collect::<Result<_, _>>()?;
673
674 loop {
675 let mut changed = false;
676 for fn_id in 0..code.functions.len() {
677 if !candidates[fn_id] {
678 continue;
679 }
680 if !parent_thin_calls_are_safe(fn_id, code, &candidates)? {
681 candidates[fn_id] = false;
682 changed = true;
683 }
684 }
685 if !changed {
686 break;
687 }
688 }
689
690 Ok(candidates)
691}
692
693fn parent_thin_builtin_is_allowed(
694 code_store: &CodeStore,
695 chunk: &FnChunk,
696 ip: usize,
697) -> Result<bool, CompileError> {
698 if ip + 5 > chunk.code.len() {
699 return Err(CompileError {
700 msg: format!("truncated bytecode in {}", chunk.name),
701 });
702 }
703 let symbol_id = u32::from_be_bytes([
704 chunk.code[ip],
705 chunk.code[ip + 1],
706 chunk.code[ip + 2],
707 chunk.code[ip + 3],
708 ]);
709 let builtin = code_store
710 .symbols
711 .resolve_builtin(symbol_id)
712 .ok_or_else(|| CompileError {
713 msg: format!("unknown builtin symbol {} in {}", symbol_id, chunk.name),
714 })?;
715 Ok(!matches!(
716 builtin.parent_thin_class(),
717 VmBuiltinParentThinClass::AllocHeavy
718 ))
719}
720
721fn base_parent_thin_chunk(code_store: &CodeStore, chunk: &FnChunk) -> Result<bool, CompileError> {
722 if !chunk.effects.is_empty()
723 || chunk.code.len() > MAX_PARENT_THIN_CODE_LEN
724 || chunk.local_count > MAX_PARENT_THIN_LOCALS
725 {
726 return Ok(false);
727 }
728
729 let code = &chunk.code;
730 let mut ip = 0usize;
731 while ip < code.len() {
732 let op = code[ip];
733 ip += 1;
734 match op {
735 STORE_GLOBAL | TAIL_CALL_SELF | TAIL_CALL_KNOWN | CALL_VALUE | MATCH_CONS
736 | LIST_HEAD_TAIL => {
737 return Ok(false);
738 }
739
740 CONCAT | LIST_CONS | LIST_GET | LIST_APPEND | LIST_PREPEND | RECORD_UPDATE => {
741 return Ok(false);
742 }
743
744 POP | DUP | LOAD_UNIT | LOAD_TRUE | LOAD_FALSE | ADD | SUB | MUL | DIV | MOD | NEG
745 | NOT | EQ | LT | GT | RETURN | PROPAGATE_ERR | LIST_LEN | LIST_GET_MATCH
746 | LIST_NIL | UNWRAP_OR | UNWRAP_RESULT_OR | NOP => {}
747
748 LIST_NEW | WRAP | TUPLE_NEW | RECORD_NEW => {
749 return Ok(false);
750 }
751
752 LOAD_LOCAL | STORE_LOCAL | RECORD_GET | EXTRACT_FIELD | EXTRACT_TUPLE_ITEM => {
753 ip = advance_opcode_ip(chunk, ip, 1)?;
754 }
755
756 LOAD_CONST | LOAD_GLOBAL | JUMP | JUMP_IF_FALSE | MATCH_FAIL | LOAD_LOCAL_2
757 | LIST_GET_OR => {
758 ip = advance_opcode_ip(chunk, ip, 2)?;
759 }
760
761 RECORD_GET_NAMED => {
762 ip = advance_opcode_ip(chunk, ip, 4)?;
763 }
764
765 CALL_BUILTIN => {
766 if !parent_thin_builtin_is_allowed(code_store, chunk, ip)? {
767 return Ok(false);
768 }
769 ip = advance_opcode_ip(chunk, ip, 5)?;
770 }
771
772 CALL_KNOWN | CALL_LEAF | MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE | LOAD_LOCAL_CONST => {
773 ip = advance_opcode_ip(chunk, ip, 3)?;
774 }
775
776 MATCH_VARIANT => {
777 ip = advance_opcode_ip(chunk, ip, 4)?;
778 }
779
780 MATCH_NIL => {
781 ip = advance_opcode_ip(chunk, ip, 2)?;
782 }
783
784 VARIANT_NEW => {
785 if ip + 5 > code.len() {
786 return Err(CompileError {
787 msg: format!("truncated bytecode in {}", chunk.name),
788 });
789 }
790 let field_count = code[ip + 4];
791 if field_count != 0 {
792 return Ok(false);
793 }
794 ip = advance_opcode_ip(chunk, ip, 5)?;
795 }
796
797 MATCH_DISPATCH | MATCH_DISPATCH_CONST => {
798 if ip >= code.len() {
799 return Err(CompileError {
800 msg: format!("truncated MATCH_DISPATCH in {}", chunk.name),
801 });
802 }
803 let count = code[ip] as usize;
804 let entry_size = if op == MATCH_DISPATCH { 11 } else { 17 };
805 ip = advance_opcode_ip(chunk, ip, 3 + count * entry_size)?;
806 }
807
808 TAIL_CALL_SELF_THIN => {
809 return Ok(false);
810 }
811
812 _ => {
813 return Err(CompileError {
814 msg: format!(
815 "unknown opcode 0x{op:02X} in {} at ip={} (code={:?})",
816 chunk.name,
817 ip - 1,
818 chunk.code
819 ),
820 });
821 }
822 }
823 }
824
825 Ok(true)
826}
827
828fn parent_thin_calls_are_safe(
829 current_fn_id: usize,
830 code: &CodeStore,
831 candidates: &[bool],
832) -> Result<bool, CompileError> {
833 let chunk = &code.functions[current_fn_id];
834 let bytes = &chunk.code;
835 let mut ip = 0usize;
836 while ip < bytes.len() {
837 let op = bytes[ip];
838 ip += 1;
839 match op {
840 CALL_KNOWN => {
841 if ip + 3 > bytes.len() {
842 return Err(CompileError {
843 msg: format!("truncated bytecode in {}", chunk.name),
844 });
845 }
846 let target_fn_id = u16::from_be_bytes([bytes[ip], bytes[ip + 1]]) as usize;
847 if target_fn_id == current_fn_id || target_fn_id >= candidates.len() {
848 return Ok(false);
849 }
850 if !code.functions[target_fn_id].effects.is_empty() {
851 return Ok(false);
852 }
853 ip = advance_opcode_ip(chunk, ip, 3)?;
854 }
855
856 LOAD_LOCAL | STORE_LOCAL | RECORD_GET | EXTRACT_FIELD | EXTRACT_TUPLE_ITEM => {
857 ip = advance_opcode_ip(chunk, ip, 1)?;
858 }
859
860 LOAD_CONST | LOAD_GLOBAL | JUMP | JUMP_IF_FALSE | MATCH_FAIL | LOAD_LOCAL_2
861 | LIST_GET_OR => {
862 ip = advance_opcode_ip(chunk, ip, 2)?;
863 }
864
865 RECORD_GET_NAMED => {
866 ip = advance_opcode_ip(chunk, ip, 4)?;
867 }
868
869 CALL_BUILTIN => {
870 ip = advance_opcode_ip(chunk, ip, 5)?;
871 }
872
873 MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE | LOAD_LOCAL_CONST => {
874 ip = advance_opcode_ip(chunk, ip, 3)?;
875 }
876
877 MATCH_VARIANT => {
878 ip = advance_opcode_ip(chunk, ip, 4)?;
879 }
880
881 MATCH_NIL | MATCH_CONS => {
882 ip = advance_opcode_ip(chunk, ip, 2)?;
883 }
884
885 LIST_NEW | WRAP | TUPLE_NEW => {
886 ip = advance_opcode_ip(chunk, ip, 1)?;
887 }
888
889 RECORD_NEW => {
890 ip = advance_opcode_ip(chunk, ip, 3)?;
891 }
892
893 VARIANT_NEW => {
894 ip = advance_opcode_ip(chunk, ip, 5)?;
895 }
896
897 MATCH_DISPATCH | MATCH_DISPATCH_CONST => {
898 if ip >= bytes.len() {
899 break;
900 }
901 let count = bytes[ip] as usize;
902 let entry_size = if op == MATCH_DISPATCH { 11 } else { 17 };
903 ip = advance_opcode_ip(chunk, ip, 3 + count * entry_size)?;
904 }
905
906 TAIL_CALL_SELF_THIN => {
907 ip = advance_opcode_ip(chunk, ip, 1)?;
908 }
909
910 _ => {}
911 }
912 }
913 Ok(true)
914}
915
916fn classify_leaf_chunk(chunk: &FnChunk) -> Result<bool, CompileError> {
919 let code = &chunk.code;
920 let mut ip = 0usize;
921 while ip < code.len() {
922 let op = code[ip];
923 ip += 1;
924 match op {
925 CALL_KNOWN | CALL_VALUE | TAIL_CALL_SELF | TAIL_CALL_KNOWN | TAIL_CALL_SELF_THIN => {
926 return Ok(false);
927 }
928
929 POP | DUP | LOAD_UNIT | LOAD_TRUE | LOAD_FALSE | ADD | SUB | MUL | DIV | MOD | NEG
931 | NOT | EQ | LT | GT | RETURN | PROPAGATE_ERR | LIST_HEAD_TAIL | LIST_GET_MATCH
932 | LIST_NIL | LIST_CONS | LIST_LEN | LIST_GET | LIST_APPEND | LIST_PREPEND
933 | UNWRAP_OR | UNWRAP_RESULT_OR | CONCAT | NOP => {}
934
935 LOAD_LOCAL | STORE_LOCAL | RECORD_GET | EXTRACT_FIELD | EXTRACT_TUPLE_ITEM
937 | LIST_NEW | WRAP | TUPLE_NEW => {
938 ip += 1;
939 }
940
941 LOAD_CONST | LOAD_GLOBAL | STORE_GLOBAL | JUMP | JUMP_IF_FALSE | MATCH_FAIL
943 | MATCH_NIL | MATCH_CONS | LOAD_LOCAL_2 | LIST_GET_OR => {
944 ip += 2;
945 }
946
947 MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE | RECORD_NEW | LOAD_LOCAL_CONST => {
949 ip += 3;
950 }
951
952 MATCH_VARIANT | RECORD_GET_NAMED => {
954 ip += 4;
955 }
956
957 CALL_BUILTIN | VARIANT_NEW => {
959 ip += 5;
960 }
961
962 MATCH_DISPATCH | MATCH_DISPATCH_CONST => {
964 if ip < code.len() {
965 let count = code[ip] as usize;
966 let entry_size = if op == MATCH_DISPATCH { 11 } else { 17 };
967 ip += 3 + count * entry_size;
968 }
969 }
970 RECORD_UPDATE => {
971 if ip + 3 <= code.len() {
972 let count = code[ip + 2] as usize;
973 ip += 3 + count;
974 }
975 }
976
977 _ => {}
978 }
979 }
980 Ok(true)
981}
982
983fn classify_thin_chunk(chunk: &FnChunk) -> Result<bool, CompileError> {
984 let code = &chunk.code;
985 let mut ip = 0usize;
986
987 while ip < code.len() {
988 let op = code[ip];
989 ip += 1;
990 match op {
991 STORE_GLOBAL | TAIL_CALL_SELF | TAIL_CALL_KNOWN | CONCAT | LIST_NIL | LIST_CONS
992 | LIST_NEW | RECORD_NEW | WRAP | TUPLE_NEW | RECORD_UPDATE | LIST_LEN | LIST_GET
993 | LIST_APPEND | LIST_PREPEND => {
994 return Ok(false);
995 }
996
997 POP | DUP | LOAD_UNIT | LOAD_TRUE | LOAD_FALSE | ADD | SUB | MUL | DIV | MOD | NEG
998 | NOT | EQ | LT | GT | RETURN | PROPAGATE_ERR | LIST_HEAD_TAIL | LIST_GET_MATCH
999 | UNWRAP_OR | UNWRAP_RESULT_OR | NOP => {}
1000
1001 LOAD_LOCAL | STORE_LOCAL | CALL_VALUE | RECORD_GET | EXTRACT_FIELD
1002 | EXTRACT_TUPLE_ITEM => {
1003 ip = advance_opcode_ip(chunk, ip, 1)?;
1004 }
1005
1006 LOAD_CONST | LOAD_GLOBAL | JUMP | JUMP_IF_FALSE | MATCH_FAIL | LOAD_LOCAL_2
1007 | LIST_GET_OR => {
1008 ip = advance_opcode_ip(chunk, ip, 2)?;
1009 }
1010
1011 RECORD_GET_NAMED => {
1012 ip = advance_opcode_ip(chunk, ip, 4)?;
1013 }
1014
1015 CALL_BUILTIN => {
1016 ip = advance_opcode_ip(chunk, ip, 5)?;
1017 }
1018
1019 CALL_KNOWN | CALL_LEAF | MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE | LOAD_LOCAL_CONST => {
1020 ip = advance_opcode_ip(chunk, ip, 3)?;
1021 }
1022
1023 MATCH_VARIANT => {
1024 ip = advance_opcode_ip(chunk, ip, 4)?;
1025 }
1026
1027 MATCH_NIL | MATCH_CONS => {
1028 ip = advance_opcode_ip(chunk, ip, 2)?;
1029 }
1030
1031 VARIANT_NEW => {
1032 if ip + 5 > code.len() {
1033 return Err(CompileError {
1034 msg: format!("truncated bytecode in {}", chunk.name),
1035 });
1036 }
1037 let field_count = code[ip + 4];
1038 if field_count != 0 {
1039 return Ok(false);
1040 }
1041 ip = advance_opcode_ip(chunk, ip, 5)?;
1042 }
1043
1044 MATCH_DISPATCH | MATCH_DISPATCH_CONST => {
1045 if ip >= code.len() {
1046 return Err(CompileError {
1047 msg: format!("truncated MATCH_DISPATCH in {}", chunk.name),
1048 });
1049 }
1050 let count = code[ip] as usize;
1051 let entry_size = if op == MATCH_DISPATCH { 11 } else { 17 };
1052 ip = advance_opcode_ip(chunk, ip, 3 + count * entry_size)?;
1053 }
1054
1055 TAIL_CALL_SELF_THIN => {
1056 return Ok(false);
1057 }
1058
1059 _ => {
1060 return Err(CompileError {
1061 msg: format!(
1062 "unknown opcode 0x{op:02X} in {} at ip={} (code={:?})",
1063 chunk.name,
1064 ip - 1,
1065 chunk.code
1066 ),
1067 });
1068 }
1069 }
1070 }
1071
1072 Ok(true)
1073}
1074
1075fn classify_thin_ignoring_self_tco(chunk: &FnChunk) -> Result<bool, CompileError> {
1078 let code = &chunk.code;
1079 let mut ip = 0usize;
1080 while ip < code.len() {
1081 let op = code[ip];
1082 ip += 1;
1083 match op {
1084 STORE_GLOBAL | TAIL_CALL_KNOWN | CONCAT | LIST_NIL | LIST_CONS | LIST_NEW
1086 | RECORD_NEW | WRAP | TUPLE_NEW | RECORD_UPDATE | LIST_LEN | LIST_GET | LIST_APPEND
1087 | LIST_PREPEND => {
1088 return Ok(false);
1089 }
1090
1091 POP | DUP | LOAD_UNIT | LOAD_TRUE | LOAD_FALSE | ADD | SUB | MUL | DIV | MOD | NEG
1092 | NOT | EQ | LT | GT | RETURN | PROPAGATE_ERR | LIST_HEAD_TAIL | LIST_GET_MATCH
1093 | UNWRAP_OR | UNWRAP_RESULT_OR | NOP => {}
1094
1095 LOAD_LOCAL | STORE_LOCAL | CALL_VALUE | RECORD_GET | EXTRACT_FIELD
1097 | EXTRACT_TUPLE_ITEM | TAIL_CALL_SELF => {
1098 ip = advance_opcode_ip(chunk, ip, 1)?;
1099 }
1100
1101 LOAD_CONST | LOAD_GLOBAL | JUMP | JUMP_IF_FALSE | MATCH_FAIL | LOAD_LOCAL_2
1102 | LIST_GET_OR => {
1103 ip = advance_opcode_ip(chunk, ip, 2)?;
1104 }
1105 RECORD_GET_NAMED => {
1106 ip = advance_opcode_ip(chunk, ip, 4)?;
1107 }
1108 CALL_BUILTIN => {
1109 ip = advance_opcode_ip(chunk, ip, 5)?;
1110 }
1111 CALL_KNOWN | CALL_LEAF | MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE | LOAD_LOCAL_CONST => {
1112 ip = advance_opcode_ip(chunk, ip, 3)?;
1113 }
1114 MATCH_VARIANT => {
1115 ip = advance_opcode_ip(chunk, ip, 4)?;
1116 }
1117 MATCH_NIL | MATCH_CONS => {
1118 ip = advance_opcode_ip(chunk, ip, 2)?;
1119 }
1120
1121 VARIANT_NEW => {
1122 if ip + 5 > code.len() {
1123 return Err(CompileError {
1124 msg: format!("truncated bytecode in {}", chunk.name),
1125 });
1126 }
1127 let field_count = code[ip + 4];
1128 if field_count != 0 {
1129 return Ok(false);
1130 }
1131 ip = advance_opcode_ip(chunk, ip, 5)?;
1132 }
1133
1134 MATCH_DISPATCH | MATCH_DISPATCH_CONST => {
1135 if ip >= code.len() {
1136 return Err(CompileError {
1137 msg: format!("truncated MATCH_DISPATCH in {}", chunk.name),
1138 });
1139 }
1140 let count = code[ip] as usize;
1141 let entry_size = if op == MATCH_DISPATCH { 11 } else { 17 };
1142 ip = advance_opcode_ip(chunk, ip, 3 + count * entry_size)?;
1143 }
1144
1145 _ => {
1146 return Err(CompileError {
1147 msg: format!(
1148 "unknown opcode 0x{op:02X} in {} at ip={} (code={:?})",
1149 chunk.name,
1150 ip - 1,
1151 chunk.code
1152 ),
1153 });
1154 }
1155 }
1156 }
1157 Ok(true)
1158}
1159
1160fn find_opcode_positions(chunk: &FnChunk, target_op: u8) -> Vec<usize> {
1162 let code = &chunk.code;
1163 let mut positions = Vec::new();
1164 let mut ip = 0usize;
1165 while ip < code.len() {
1166 let op = code[ip];
1167 if op == target_op {
1168 positions.push(ip);
1169 }
1170 ip += 1;
1171 let width = match op {
1173 LOAD_LOCAL | STORE_LOCAL | CALL_VALUE | RECORD_GET | EXTRACT_FIELD
1174 | EXTRACT_TUPLE_ITEM | LIST_NEW | WRAP | TUPLE_NEW | TAIL_CALL_SELF
1175 | TAIL_CALL_SELF_THIN => 1,
1176 LOAD_CONST | LOAD_GLOBAL | STORE_GLOBAL | JUMP | JUMP_IF_FALSE | MATCH_FAIL
1177 | MATCH_NIL | MATCH_CONS | LOAD_LOCAL_2 | LIST_GET_OR => 2,
1178 CALL_KNOWN | CALL_LEAF | MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE | RECORD_NEW
1179 | LOAD_LOCAL_CONST => 3,
1180 MATCH_VARIANT | RECORD_GET_NAMED => 4,
1181 CALL_BUILTIN | VARIANT_NEW => 5,
1182 MATCH_DISPATCH | MATCH_DISPATCH_CONST => {
1183 if ip < code.len() {
1184 let count = code[ip] as usize;
1185 let entry_size = if op == MATCH_DISPATCH { 11 } else { 17 };
1186 3 + count * entry_size
1187 } else {
1188 0
1189 }
1190 }
1191 RECORD_UPDATE => {
1192 if ip + 3 <= code.len() {
1193 let count = code[ip + 2] as usize;
1194 3 + count
1195 } else {
1196 0
1197 }
1198 }
1199 _ => 0, };
1201 ip += width;
1202 }
1203 positions
1204}
1205
1206fn advance_opcode_ip(chunk: &FnChunk, ip: usize, width: usize) -> Result<usize, CompileError> {
1207 let new_ip = ip + width;
1208 if new_ip > chunk.code.len() {
1209 return Err(CompileError {
1210 msg: format!("truncated bytecode in {}", chunk.name),
1211 });
1212 }
1213 Ok(new_ip)
1214}
1215
1216enum CallTarget {
1218 KnownFn(u32),
1220 Wrapper(u8),
1222 None_,
1224 Variant(u32, u16),
1226 Builtin(VmBuiltin),
1228 UnknownQualified(String),
1230}
1231
1232struct FnCompiler<'a> {
1233 name: String,
1234 arity: u8,
1235 local_count: u16,
1236 effects: Vec<u32>,
1237 local_slots: HashMap<String, u16>,
1238 global_names: &'a HashMap<String, u16>,
1239 module_scope: &'a HashMap<String, u32>,
1242 code_store: &'a CodeStore,
1243 symbols: &'a mut VmSymbolTable,
1244 arena: &'a mut Arena,
1245 code: Vec<u8>,
1246 constants: Vec<NanValue>,
1247 last_op_pos: usize,
1249}
1250
1251impl<'a> FnCompiler<'a> {
1252 #[allow(clippy::too_many_arguments)]
1253 fn new(
1254 name: &str,
1255 arity: u8,
1256 local_count: u16,
1257 effects: Vec<u32>,
1258 local_slots: HashMap<String, u16>,
1259 global_names: &'a HashMap<String, u16>,
1260 module_scope: &'a HashMap<String, u32>,
1261 code_store: &'a CodeStore,
1262 symbols: &'a mut VmSymbolTable,
1263 arena: &'a mut Arena,
1264 ) -> Self {
1265 FnCompiler {
1266 name: name.to_string(),
1267 arity,
1268 local_count,
1269 effects,
1270 local_slots,
1271 global_names,
1272 module_scope,
1273 code_store,
1274 symbols,
1275 arena,
1276 code: Vec::new(),
1277 constants: Vec::new(),
1278 last_op_pos: usize::MAX,
1279 }
1280 }
1281
1282 fn finish(self) -> FnChunk {
1283 FnChunk {
1284 name: self.name,
1285 arity: self.arity,
1286 local_count: self.local_count,
1287 code: self.code,
1288 constants: self.constants,
1289 effects: self.effects,
1290 thin: false,
1291 parent_thin: false,
1292 leaf: false,
1293 }
1294 }
1295
1296 fn emit_op(&mut self, op: u8) {
1297 let prev_pos = self.last_op_pos;
1298 let prev_op = if prev_pos < self.code.len() {
1299 self.code[prev_pos]
1300 } else {
1301 0xFF
1302 };
1303
1304 if op == LOAD_LOCAL && prev_op == LOAD_LOCAL && prev_pos + 2 == self.code.len() {
1306 self.code[prev_pos] = LOAD_LOCAL_2;
1307 return;
1309 }
1310 if op == LOAD_CONST && prev_op == LOAD_LOCAL && prev_pos + 2 == self.code.len() {
1312 self.code[prev_pos] = LOAD_LOCAL_CONST;
1313 return;
1315 }
1316 if op == UNWRAP_OR && self.code.len() >= 4 {
1320 let len = self.code.len();
1321 if self.code[len - 4] == LIST_GET && self.code[len - 3] == LOAD_CONST {
1323 let hi = self.code[len - 2];
1324 let lo = self.code[len - 1];
1325 self.code[len - 4] = LIST_GET_OR;
1326 self.code[len - 3] = hi;
1327 self.code[len - 2] = lo;
1328 self.code.pop();
1329 self.last_op_pos = len - 4;
1330 return;
1331 }
1332 }
1333
1334 self.last_op_pos = self.code.len();
1335 self.code.push(op);
1336 }
1337
1338 fn emit_u8(&mut self, val: u8) {
1339 self.code.push(val);
1340 }
1341
1342 fn emit_u16(&mut self, val: u16) {
1343 self.code.push((val >> 8) as u8);
1344 self.code.push((val & 0xFF) as u8);
1345 }
1346
1347 fn emit_i16(&mut self, val: i16) {
1348 self.emit_u16(val as u16);
1349 }
1350
1351 fn emit_u32(&mut self, val: u32) {
1352 self.code.push((val >> 24) as u8);
1353 self.code.push(((val >> 16) & 0xFF) as u8);
1354 self.code.push(((val >> 8) & 0xFF) as u8);
1355 self.code.push((val & 0xFF) as u8);
1356 }
1357
1358 fn emit_u64(&mut self, val: u64) {
1359 self.code.extend_from_slice(&val.to_be_bytes());
1360 }
1361
1362 fn add_constant(&mut self, val: NanValue) -> u16 {
1363 for (i, c) in self.constants.iter().enumerate() {
1364 if c.bits() == val.bits() {
1365 return i as u16;
1366 }
1367 }
1368 let idx = self.constants.len() as u16;
1369 self.constants.push(val);
1370 idx
1371 }
1372
1373 fn offset(&self) -> usize {
1374 self.code.len()
1375 }
1376
1377 fn emit_jump(&mut self, op: u8) -> usize {
1378 self.emit_op(op);
1379 let patch_pos = self.code.len();
1380 self.emit_i16(0);
1381 patch_pos
1382 }
1383
1384 fn patch_jump(&mut self, patch_pos: usize) {
1385 let target = self.code.len();
1386 let offset = (target as isize - patch_pos as isize - 2) as i16;
1387 let bytes = (offset as u16).to_be_bytes();
1388 self.code[patch_pos] = bytes[0];
1389 self.code[patch_pos + 1] = bytes[1];
1390 }
1391
1392 fn patch_jump_to(&mut self, patch_pos: usize, target: usize) {
1393 let offset = (target as isize - patch_pos as isize - 2) as i16;
1394 let bytes = (offset as u16).to_be_bytes();
1395 self.code[patch_pos] = bytes[0];
1396 self.code[patch_pos + 1] = bytes[1];
1397 }
1398
1399 fn bind_top_to_local(&mut self, name: &str) {
1400 if let Some(&slot) = self.local_slots.get(name) {
1401 self.emit_op(STORE_LOCAL);
1402 self.emit_u8(slot as u8);
1403 } else {
1404 self.emit_op(POP);
1405 }
1406 }
1407
1408 fn dup_and_bind_top_to_local(&mut self, name: &str) {
1409 self.emit_op(DUP);
1410 self.bind_top_to_local(name);
1411 }
1412}