1mod calls;
2mod expr;
3mod patterns;
4
5use std::collections::{HashMap, HashSet};
6
7use crate::ast::{FnBody, FnDef, Stmt, TopLevel, TypeDef};
8use crate::nan_value::{Arena, NanValue};
9use crate::source::find_module_file;
10
11use super::opcode::*;
12use super::types::{CodeStore, FnChunk};
13
14fn encode_vm_fn_ref(fn_id: u32) -> NanValue {
15 NanValue::new_int_inline(fn_id as i64)
17}
18
19pub fn compile_program(
22 items: &[TopLevel],
23 arena: &mut Arena,
24) -> Result<(CodeStore, Vec<NanValue>), CompileError> {
25 compile_program_with_modules(items, arena, None)
26}
27
28pub fn compile_program_with_modules(
30 items: &[TopLevel],
31 arena: &mut Arena,
32 module_root: Option<&str>,
33) -> Result<(CodeStore, Vec<NanValue>), CompileError> {
34 let mut compiler = ProgramCompiler::new();
35
36 if let Some(module_root) = module_root {
37 compiler.load_modules(items, module_root, arena)?;
38 }
39
40 for item in items {
41 match item {
42 TopLevel::FnDef(fndef) => {
43 compiler.ensure_global(&fndef.name);
44 let fn_id = compiler.code.add_function(FnChunk {
45 name: fndef.name.clone(),
46 arity: fndef.params.len() as u8,
47 local_count: 0,
48 code: Vec::new(),
49 constants: Vec::new(),
50 effects: fndef.effects.clone(),
51 thin: false,
52 parent_thin: false,
53 });
54 let global_idx = compiler.global_names[&fndef.name];
55 compiler.globals[global_idx as usize] = encode_vm_fn_ref(fn_id);
56 }
57 TopLevel::TypeDef(td) => {
58 compiler.register_type_def(td, arena);
59 }
60 _ => {}
61 }
62 }
63
64 for item in items {
65 if let TopLevel::FnDef(fndef) = item {
66 let fn_id = compiler.code.find(&fndef.name).unwrap();
67 let chunk = compiler.compile_fn(fndef, arena)?;
68 compiler.code.functions[fn_id as usize] = chunk;
69 }
70 }
71
72 compiler.compile_top_level(items, arena)?;
73 classify_thin_functions(&mut compiler.code, arena)?;
74
75 Ok((compiler.code, compiler.globals))
76}
77
78#[derive(Debug)]
79pub struct CompileError {
80 pub msg: String,
81}
82
83impl std::fmt::Display for CompileError {
84 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85 write!(f, "Compile error: {}", self.msg)
86 }
87}
88
89struct ProgramCompiler {
90 code: CodeStore,
91 globals: Vec<NanValue>,
92 global_names: HashMap<String, u16>,
93}
94
95impl ProgramCompiler {
96 fn new() -> Self {
97 ProgramCompiler {
98 code: CodeStore::new(),
99 globals: Vec::new(),
100 global_names: HashMap::new(),
101 }
102 }
103
104 fn load_modules(
109 &mut self,
110 items: &[TopLevel],
111 module_root: &str,
112 arena: &mut Arena,
113 ) -> Result<(), CompileError> {
114 let module = items.iter().find_map(|i| {
115 if let TopLevel::Module(m) = i {
116 Some(m)
117 } else {
118 None
119 }
120 });
121 let module = match module {
122 Some(m) => m,
123 None => return Ok(()),
124 };
125
126 let mut loaded = HashSet::new();
127 for dep_name in &module.depends {
128 self.load_module_recursive(dep_name, module_root, arena, &mut loaded)?;
129 }
130 Ok(())
131 }
132
133 fn load_module_recursive(
134 &mut self,
135 dep_name: &str,
136 module_root: &str,
137 arena: &mut Arena,
138 loaded: &mut HashSet<String>,
139 ) -> Result<(), CompileError> {
140 if loaded.contains(dep_name) {
141 return Ok(());
142 }
143 loaded.insert(dep_name.to_string());
144
145 let file_path = find_module_file(dep_name, module_root).ok_or_else(|| CompileError {
146 msg: format!("module '{}' not found (root: {})", dep_name, module_root),
147 })?;
148
149 let source = std::fs::read_to_string(&file_path).map_err(|e| CompileError {
150 msg: format!("cannot read module '{}': {}", dep_name, e),
151 })?;
152
153 let mut mod_items = crate::source::parse_source(&source).map_err(|e| CompileError {
154 msg: format!("parse error in module '{}': {}", dep_name, e),
155 })?;
156
157 crate::tco::transform_program(&mut mod_items);
158 crate::resolver::resolve_program(&mut mod_items);
159
160 if let Some(sub_module) = mod_items.iter().find_map(|i| {
161 if let TopLevel::Module(m) = i {
162 Some(m)
163 } else {
164 None
165 }
166 }) {
167 for sub_dep in &sub_module.depends {
168 self.load_module_recursive(sub_dep, module_root, arena, loaded)?;
169 }
170 }
171
172 for item in &mod_items {
173 if let TopLevel::TypeDef(td) = item {
174 self.register_type_def(td, arena);
175 }
176 }
177
178 let exposes: Option<Vec<String>> = mod_items.iter().find_map(|i| {
179 if let TopLevel::Module(m) = i {
180 if m.exposes.is_empty() {
181 None
182 } else {
183 Some(m.exposes.clone())
184 }
185 } else {
186 None
187 }
188 });
189
190 let mut module_fn_ids: Vec<(String, u32)> = Vec::new();
191 for item in &mod_items {
192 if let TopLevel::FnDef(fndef) = item {
193 let fn_id = self.code.add_function(FnChunk {
194 name: format!("{}.{}", dep_name, fndef.name),
195 arity: fndef.params.len() as u8,
196 local_count: 0,
197 code: Vec::new(),
198 constants: Vec::new(),
199 effects: fndef.effects.clone(),
200 thin: false,
201 parent_thin: false,
202 });
203 module_fn_ids.push((fndef.name.clone(), fn_id));
204 }
205 }
206
207 let module_scope: HashMap<String, u32> = module_fn_ids.iter().cloned().collect();
208
209 let mut fn_idx = 0;
210 for item in &mod_items {
211 if let TopLevel::FnDef(fndef) = item {
212 let (_, fn_id) = module_fn_ids[fn_idx];
213 let chunk = self.compile_fn_with_scope(fndef, arena, &module_scope)?;
214 self.code.functions[fn_id as usize] = chunk;
215 fn_idx += 1;
216 }
217 }
218
219 for (fn_name, fn_id) in &module_fn_ids {
220 let exposed = match &exposes {
221 Some(list) => list.iter().any(|e| e == fn_name),
222 None => !fn_name.starts_with('_'),
223 };
224 if exposed {
225 let qualified = format!("{}.{}", dep_name, fn_name);
226 let global_idx = self.ensure_global(&qualified);
227 self.globals[global_idx as usize] = encode_vm_fn_ref(*fn_id);
228 }
229 }
230
231 for item in &mod_items {
232 if let TopLevel::TypeDef(td) = item {
233 let type_name = match td {
234 TypeDef::Sum { name, .. } | TypeDef::Product { name, .. } => name,
235 };
236 let exposed = match &exposes {
237 Some(list) => list.iter().any(|e| e == type_name),
238 None => !type_name.starts_with('_'),
239 };
240 if exposed {
241 }
245 }
246 }
247
248 Ok(())
249 }
250
251 fn ensure_global(&mut self, name: &str) -> u16 {
252 if let Some(&idx) = self.global_names.get(name) {
253 return idx;
254 }
255 let idx = self.globals.len() as u16;
256 self.global_names.insert(name.to_string(), idx);
257 self.globals.push(NanValue::UNIT);
258 idx
259 }
260
261 fn register_type_def(&mut self, td: &TypeDef, arena: &mut Arena) {
262 match td {
263 TypeDef::Product { name, fields, .. } => {
264 let field_names: Vec<String> = fields.iter().map(|(n, _)| n.clone()).collect();
265 arena.register_record_type(name, field_names);
266 }
267 TypeDef::Sum { name, variants, .. } => {
268 let variant_names: Vec<String> = variants.iter().map(|v| v.name.clone()).collect();
269 arena.register_sum_type(name, variant_names);
270 }
271 }
272 }
273
274 fn compile_fn(&mut self, fndef: &FnDef, arena: &mut Arena) -> Result<FnChunk, CompileError> {
275 let empty_scope = HashMap::new();
276 self.compile_fn_with_scope(fndef, arena, &empty_scope)
277 }
278
279 fn compile_fn_with_scope(
280 &mut self,
281 fndef: &FnDef,
282 arena: &mut Arena,
283 module_scope: &HashMap<String, u32>,
284 ) -> Result<FnChunk, CompileError> {
285 let resolution = fndef.resolution.as_ref();
286 let local_count = resolution.map_or(fndef.params.len() as u16, |r| r.local_count);
287 let local_slots: HashMap<String, u16> = resolution
288 .map(|r| r.local_slots.as_ref().clone())
289 .unwrap_or_else(|| {
290 fndef
291 .params
292 .iter()
293 .enumerate()
294 .map(|(i, (name, _))| (name.clone(), i as u16))
295 .collect()
296 });
297
298 let mut fc = FnCompiler::new(
299 &fndef.name,
300 fndef.params.len() as u8,
301 local_count,
302 fndef.effects.clone(),
303 local_slots,
304 &self.global_names,
305 module_scope,
306 &self.code,
307 arena,
308 );
309
310 match fndef.body.as_ref() {
311 FnBody::Block(stmts) => fc.compile_body(stmts)?,
312 }
313
314 Ok(fc.finish())
315 }
316
317 fn compile_top_level(
318 &mut self,
319 items: &[TopLevel],
320 arena: &mut Arena,
321 ) -> Result<(), CompileError> {
322 let has_stmts = items.iter().any(|i| matches!(i, TopLevel::Stmt(_)));
323 if !has_stmts {
324 return Ok(());
325 }
326
327 for item in items {
328 if let TopLevel::Stmt(Stmt::Binding(name, _, _)) = item {
329 self.ensure_global(name);
330 }
331 }
332
333 let empty_mod_scope = HashMap::new();
334 let mut fc = FnCompiler::new(
335 "__top_level__",
336 0,
337 0,
338 Vec::new(),
339 HashMap::new(),
340 &self.global_names,
341 &empty_mod_scope,
342 &self.code,
343 arena,
344 );
345
346 for item in items {
347 if let TopLevel::Stmt(stmt) = item {
348 match stmt {
349 Stmt::Binding(name, _type_ann, expr) => {
350 fc.compile_expr(expr)?;
351 let idx = self.global_names[name.as_str()];
352 fc.emit_op(STORE_GLOBAL);
353 fc.emit_u16(idx);
354 }
355 Stmt::Expr(expr) => {
356 fc.compile_expr(expr)?;
357 fc.emit_op(POP);
358 }
359 }
360 }
361 }
362
363 fc.emit_op(LOAD_UNIT);
364 fc.emit_op(RETURN);
365
366 let chunk = fc.finish();
367 self.code.add_function(chunk);
368 Ok(())
369 }
370}
371
372fn classify_thin_functions(code: &mut CodeStore, arena: &Arena) -> Result<(), CompileError> {
373 let thin_flags: Vec<bool> = code
374 .functions
375 .iter()
376 .map(classify_thin_chunk)
377 .collect::<Result<_, _>>()?;
378 let parent_thin_flags = classify_parent_thin_chunks(code, arena)?;
379
380 for ((chunk, thin), parent_thin) in code
381 .functions
382 .iter_mut()
383 .zip(thin_flags)
384 .zip(parent_thin_flags)
385 {
386 chunk.thin = thin;
387 chunk.parent_thin = parent_thin;
388 }
389
390 Ok(())
391}
392
393const MAX_PARENT_THIN_CODE_LEN: usize = 48;
394const MAX_PARENT_THIN_LOCALS: u16 = 8;
395
396fn classify_parent_thin_chunks(
397 code: &CodeStore,
398 _arena: &Arena,
399) -> Result<Vec<bool>, CompileError> {
400 let mut candidates: Vec<bool> = code
401 .functions
402 .iter()
403 .map(base_parent_thin_chunk)
404 .collect::<Result<_, _>>()?;
405
406 loop {
407 let mut changed = false;
408 for fn_id in 0..code.functions.len() {
409 if !candidates[fn_id] {
410 continue;
411 }
412 if !parent_thin_calls_are_safe(fn_id, code, &candidates)? {
413 candidates[fn_id] = false;
414 changed = true;
415 }
416 }
417 if !changed {
418 break;
419 }
420 }
421
422 Ok(candidates)
423}
424
425fn base_parent_thin_chunk(chunk: &FnChunk) -> Result<bool, CompileError> {
426 if !chunk.effects.is_empty()
427 || chunk.code.len() > MAX_PARENT_THIN_CODE_LEN
428 || chunk.local_count > MAX_PARENT_THIN_LOCALS
429 {
430 return Ok(false);
431 }
432
433 let code = &chunk.code;
434 let mut ip = 0usize;
435 while ip < code.len() {
436 let op = code[ip];
437 ip += 1;
438 match op {
439 STORE_GLOBAL | STORE_LOCAL | TAIL_CALL_SELF | TAIL_CALL_KNOWN | CALL_VALUE | JUMP
440 | JUMP_IF_FALSE | MATCH_TAG | MATCH_VARIANT | MATCH_UNWRAP | MATCH_NIL | MATCH_CONS
441 | LIST_HEAD_TAIL | MATCH_TUPLE | EXTRACT_FIELD | EXTRACT_TUPLE_ITEM | MATCH_FAIL => {
442 return Ok(false);
443 }
444
445 CALL_BUILTIN | CONCAT | LIST_NIL | LIST_CONS | LIST_GET | LIST_APPEND
446 | LIST_PREPEND | RECORD_UPDATE => {
447 return Ok(false);
448 }
449
450 POP | DUP | LOAD_UNIT | LOAD_TRUE | LOAD_FALSE | ADD | SUB | MUL | DIV | MOD | NEG
451 | NOT | EQ | LT | GT | RETURN | PROPAGATE_ERR | LIST_LEN | LIST_GET_MATCH => {}
452
453 LIST_NEW | WRAP | TUPLE_NEW | RECORD_NEW | VARIANT_NEW => {
454 return Ok(false);
455 }
456
457 LOAD_LOCAL | RECORD_GET => {
458 ip = advance_opcode_ip(chunk, ip, 1)?;
459 }
460
461 LOAD_CONST | LOAD_GLOBAL | RECORD_GET_NAMED => {
462 ip = advance_opcode_ip(chunk, ip, 2)?;
463 }
464
465 CALL_KNOWN => {
466 ip = advance_opcode_ip(chunk, ip, 3)?;
467 }
468
469 _ => {
470 return Err(CompileError {
471 msg: format!(
472 "unknown opcode 0x{op:02X} in {} at ip={} (code={:?})",
473 chunk.name,
474 ip - 1,
475 chunk.code
476 ),
477 });
478 }
479 }
480 }
481
482 Ok(true)
483}
484
485fn parent_thin_calls_are_safe(
486 current_fn_id: usize,
487 code: &CodeStore,
488 candidates: &[bool],
489) -> Result<bool, CompileError> {
490 let chunk = &code.functions[current_fn_id];
491 let bytes = &chunk.code;
492 let mut ip = 0usize;
493 while ip < bytes.len() {
494 let op = bytes[ip];
495 ip += 1;
496 match op {
497 CALL_KNOWN => {
498 if ip + 3 > bytes.len() {
499 return Err(CompileError {
500 msg: format!("truncated bytecode in {}", chunk.name),
501 });
502 }
503 let target_fn_id = u16::from_be_bytes([bytes[ip], bytes[ip + 1]]) as usize;
504 if target_fn_id == current_fn_id || target_fn_id >= candidates.len() {
505 return Ok(false);
506 }
507 if !code.functions[target_fn_id].effects.is_empty() {
508 return Ok(false);
509 }
510 ip = advance_opcode_ip(chunk, ip, 3)?;
511 }
512
513 LOAD_LOCAL | STORE_LOCAL | RECORD_GET | EXTRACT_FIELD | EXTRACT_TUPLE_ITEM => {
514 ip = advance_opcode_ip(chunk, ip, 1)?;
515 }
516
517 LOAD_CONST | LOAD_GLOBAL | JUMP | JUMP_IF_FALSE | RECORD_GET_NAMED | MATCH_FAIL => {
518 ip = advance_opcode_ip(chunk, ip, 2)?;
519 }
520
521 CALL_BUILTIN | MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE => {
522 ip = advance_opcode_ip(chunk, ip, 3)?;
523 }
524
525 MATCH_VARIANT => {
526 ip = advance_opcode_ip(chunk, ip, 4)?;
527 }
528
529 MATCH_NIL | MATCH_CONS => {
530 ip = advance_opcode_ip(chunk, ip, 2)?;
531 }
532
533 LIST_NEW | WRAP | TUPLE_NEW => {
534 ip = advance_opcode_ip(chunk, ip, 1)?;
535 }
536
537 RECORD_NEW => {
538 ip = advance_opcode_ip(chunk, ip, 3)?;
539 }
540
541 VARIANT_NEW => {
542 ip = advance_opcode_ip(chunk, ip, 5)?;
543 }
544
545 _ => {}
546 }
547 }
548 Ok(true)
549}
550
551fn classify_thin_chunk(chunk: &FnChunk) -> Result<bool, CompileError> {
552 let code = &chunk.code;
553 let mut ip = 0usize;
554
555 while ip < code.len() {
556 let op = code[ip];
557 ip += 1;
558 match op {
559 STORE_GLOBAL | TAIL_CALL_SELF | TAIL_CALL_KNOWN | CONCAT | LIST_NIL | LIST_CONS
560 | LIST_NEW | RECORD_NEW | VARIANT_NEW | WRAP | TUPLE_NEW | RECORD_UPDATE | LIST_LEN
561 | LIST_GET | LIST_APPEND | LIST_PREPEND => {
562 return Ok(false);
563 }
564
565 POP | DUP | LOAD_UNIT | LOAD_TRUE | LOAD_FALSE | ADD | SUB | MUL | DIV | MOD | NEG
566 | NOT | EQ | LT | GT | RETURN | PROPAGATE_ERR | LIST_HEAD_TAIL | LIST_GET_MATCH => {}
567
568 LOAD_LOCAL | STORE_LOCAL | CALL_VALUE | RECORD_GET | EXTRACT_FIELD
569 | EXTRACT_TUPLE_ITEM => {
570 ip = advance_opcode_ip(chunk, ip, 1)?;
571 }
572
573 LOAD_CONST | LOAD_GLOBAL | JUMP | JUMP_IF_FALSE | RECORD_GET_NAMED | MATCH_FAIL => {
574 ip = advance_opcode_ip(chunk, ip, 2)?;
575 }
576
577 CALL_KNOWN | CALL_BUILTIN | MATCH_TAG | MATCH_UNWRAP | MATCH_TUPLE => {
578 ip = advance_opcode_ip(chunk, ip, 3)?;
579 }
580
581 MATCH_VARIANT => {
582 ip = advance_opcode_ip(chunk, ip, 4)?;
583 }
584
585 MATCH_NIL | MATCH_CONS => {
586 ip = advance_opcode_ip(chunk, ip, 2)?;
587 }
588
589 _ => {
590 return Err(CompileError {
591 msg: format!(
592 "unknown opcode 0x{op:02X} in {} at ip={} (code={:?})",
593 chunk.name,
594 ip - 1,
595 chunk.code
596 ),
597 });
598 }
599 }
600 }
601
602 Ok(true)
603}
604
605fn advance_opcode_ip(chunk: &FnChunk, ip: usize, width: usize) -> Result<usize, CompileError> {
606 let new_ip = ip + width;
607 if new_ip > chunk.code.len() {
608 return Err(CompileError {
609 msg: format!("truncated bytecode in {}", chunk.name),
610 });
611 }
612 Ok(new_ip)
613}
614
615enum CallTarget {
617 KnownFn(u32),
619 Wrapper(u8),
621 None_,
623 Variant(u32, u16),
625 Builtin(String),
627}
628
629struct FnCompiler<'a> {
630 name: String,
631 arity: u8,
632 local_count: u16,
633 effects: Vec<String>,
634 local_slots: HashMap<String, u16>,
635 global_names: &'a HashMap<String, u16>,
636 module_scope: &'a HashMap<String, u32>,
639 code_store: &'a CodeStore,
640 arena: &'a mut Arena,
641 code: Vec<u8>,
642 constants: Vec<NanValue>,
643}
644
645impl<'a> FnCompiler<'a> {
646 #[allow(clippy::too_many_arguments)]
647 fn new(
648 name: &str,
649 arity: u8,
650 local_count: u16,
651 effects: Vec<String>,
652 local_slots: HashMap<String, u16>,
653 global_names: &'a HashMap<String, u16>,
654 module_scope: &'a HashMap<String, u32>,
655 code_store: &'a CodeStore,
656 arena: &'a mut Arena,
657 ) -> Self {
658 FnCompiler {
659 name: name.to_string(),
660 arity,
661 local_count,
662 effects,
663 local_slots,
664 global_names,
665 module_scope,
666 code_store,
667 arena,
668 code: Vec::new(),
669 constants: Vec::new(),
670 }
671 }
672
673 fn finish(self) -> FnChunk {
674 FnChunk {
675 name: self.name,
676 arity: self.arity,
677 local_count: self.local_count,
678 code: self.code,
679 constants: self.constants,
680 effects: self.effects,
681 thin: false,
682 parent_thin: false,
683 }
684 }
685
686 fn emit_op(&mut self, op: u8) {
687 self.code.push(op);
688 }
689
690 fn emit_u8(&mut self, val: u8) {
691 self.code.push(val);
692 }
693
694 fn emit_u16(&mut self, val: u16) {
695 self.code.push((val >> 8) as u8);
696 self.code.push((val & 0xFF) as u8);
697 }
698
699 fn emit_i16(&mut self, val: i16) {
700 self.emit_u16(val as u16);
701 }
702
703 fn add_constant(&mut self, val: NanValue) -> u16 {
704 for (i, c) in self.constants.iter().enumerate() {
705 if c.bits() == val.bits() {
706 return i as u16;
707 }
708 }
709 let idx = self.constants.len() as u16;
710 self.constants.push(val);
711 idx
712 }
713
714 fn offset(&self) -> usize {
715 self.code.len()
716 }
717
718 fn emit_jump(&mut self, op: u8) -> usize {
719 self.emit_op(op);
720 let patch_pos = self.code.len();
721 self.emit_i16(0);
722 patch_pos
723 }
724
725 fn patch_jump(&mut self, patch_pos: usize) {
726 let target = self.code.len();
727 let offset = (target as isize - patch_pos as isize - 2) as i16;
728 let bytes = (offset as u16).to_be_bytes();
729 self.code[patch_pos] = bytes[0];
730 self.code[patch_pos + 1] = bytes[1];
731 }
732
733 fn patch_jump_to(&mut self, patch_pos: usize, target: usize) {
734 let offset = (target as isize - patch_pos as isize - 2) as i16;
735 let bytes = (offset as u16).to_be_bytes();
736 self.code[patch_pos] = bytes[0];
737 self.code[patch_pos + 1] = bytes[1];
738 }
739
740 fn bind_top_to_local(&mut self, name: &str) {
741 if let Some(&slot) = self.local_slots.get(name) {
742 self.emit_op(STORE_LOCAL);
743 self.emit_u8(slot as u8);
744 } else {
745 self.emit_op(POP);
746 }
747 }
748
749 fn dup_and_bind_top_to_local(&mut self, name: &str) {
750 self.emit_op(DUP);
751 self.bind_top_to_local(name);
752 }
753}