glyph_runtime/
ir_to_bytecode.rs

1//! IR to bytecode compiler
2//!
3//! This module converts the intermediate representation (IR) into VM bytecode.
4
5use crate::instruction::Instruction;
6use crate::ir::*;
7use glyph_types::Value;
8use std::collections::HashMap;
9
10/// IR to bytecode compiler
11#[derive(Default)]
12pub struct IrToBytecode {
13    /// Map of function names to bytecode indices
14    function_indices: HashMap<String, usize>,
15    /// Map of block labels to instruction offsets within current function
16    block_offsets: HashMap<String, usize>,
17    /// Current function being compiled
18    current_function: Vec<Instruction>,
19    /// All compiled functions
20    functions: Vec<Vec<Instruction>>,
21}
22
23impl IrToBytecode {
24    pub fn new() -> Self {
25        Self::default()
26    }
27
28    pub fn compile(mut self, module: &IRModule) -> Vec<Vec<Instruction>> {
29        // First pass: assign indices to functions
30        let mut index = 0;
31
32        // Main function gets index 0 if it exists
33        if let Some(entry) = &module.entry_point {
34            self.function_indices.insert(entry.clone(), index);
35            index += 1;
36        }
37
38        // Other functions
39        for name in module.functions.keys() {
40            if Some(name) != module.entry_point.as_ref() {
41                self.function_indices.insert(name.clone(), index);
42                index += 1;
43            }
44        }
45
46        // Second pass: compile each function
47        if let Some(entry) = &module.entry_point {
48            if let Some(func) = module.functions.get(entry) {
49                self.compile_function(func);
50            }
51        }
52
53        for (name, func) in &module.functions {
54            if Some(name) != module.entry_point.as_ref() {
55                self.compile_function(func);
56            }
57        }
58
59        self.functions
60    }
61
62    fn compile_function(&mut self, func: &IRFunction) {
63        self.current_function.clear();
64        self.block_offsets.clear();
65
66        // Add parameter binding instructions at the start
67        // Parameters are pushed in order, so we bind them in reverse order
68        for param in func.params.iter().rev() {
69            self.current_function
70                .push(Instruction::BindLocal(param.clone()));
71        }
72
73        // First pass: calculate block offsets
74        let mut offset = func.params.len(); // Account for parameter bindings
75        for block in &func.blocks {
76            self.block_offsets.insert(block.label.clone(), offset);
77            offset += block.instructions.len();
78            offset += self.terminator_size(&block.terminator);
79        }
80
81        // Second pass: generate bytecode
82        for block in &func.blocks {
83            for inst in &block.instructions {
84                self.compile_instruction(inst);
85            }
86            self.compile_terminator(&block.terminator);
87        }
88
89        // Add function to compiled list
90        self.functions.push(self.current_function.clone());
91    }
92
93    fn compile_instruction(&mut self, inst: &IRInstruction) {
94        match inst {
95            IRInstruction::LoadConst(value) => {
96                self.current_function.push(Instruction::Push(value.clone()));
97            }
98            IRInstruction::LoadVar(name) => {
99                self.current_function
100                    .push(Instruction::LoadLocal(name.clone()));
101            }
102            IRInstruction::StoreVar(name) => {
103                self.current_function
104                    .push(Instruction::BindLocal(name.clone()));
105            }
106            IRInstruction::BinaryOp(op) => {
107                let vm_inst = match op {
108                    BinaryOp::Add => Instruction::Add,
109                    BinaryOp::Sub => Instruction::Sub,
110                    BinaryOp::Mul => Instruction::Mul,
111                    BinaryOp::Div => Instruction::Div,
112                    BinaryOp::Mod => Instruction::Mod,
113                    BinaryOp::Pow => Instruction::Pow,
114                    BinaryOp::Eq => Instruction::Eq,
115                    BinaryOp::Ne => Instruction::Ne,
116                    BinaryOp::Lt => Instruction::Lt,
117                    BinaryOp::Le => Instruction::Le,
118                    BinaryOp::Gt => Instruction::Gt,
119                    BinaryOp::Ge => Instruction::Ge,
120                    BinaryOp::And => Instruction::And,
121                    BinaryOp::Or => Instruction::Or,
122                };
123                self.current_function.push(vm_inst);
124            }
125            IRInstruction::UnaryOp(op) => {
126                let vm_inst = match op {
127                    UnaryOp::Neg => Instruction::Neg,
128                    UnaryOp::Not => Instruction::Not,
129                };
130                self.current_function.push(vm_inst);
131            }
132            IRInstruction::Call {
133                func,
134                args_count: _,
135            } => {
136                if let Some(&index) = self.function_indices.get(func) {
137                    self.current_function.push(Instruction::Call(index));
138                } else {
139                    // External function - use CallNative
140                    self.current_function
141                        .push(Instruction::CallNative(func.clone()));
142                }
143            }
144            IRInstruction::CallIntrinsic {
145                name,
146                args_count: _,
147            } => {
148                // Map intrinsic names to capabilities
149                let capability = match name.as_str() {
150                    "voice.speak" => Some("audio.speak".to_string()),
151                    "display.chart" => Some("display.chart".to_string()),
152                    "display.image" => Some("display.image".to_string()),
153                    "net.fetch" => Some("network.fetch".to_string()),
154                    "wait.confirm" => Some("display.text".to_string()),
155                    _ => None,
156                };
157
158                self.current_function.push(Instruction::CallIntrinsic {
159                    name: name.clone(),
160                    capability,
161                });
162            }
163            IRInstruction::MakeList(count) => {
164                self.current_function.push(Instruction::MakeList(*count));
165            }
166            IRInstruction::MakeDict(count) => {
167                self.current_function.push(Instruction::MakeDict(*count));
168            }
169            IRInstruction::GetAttr(attr) => {
170                self.current_function
171                    .push(Instruction::GetAttr(attr.clone()));
172            }
173            IRInstruction::GetItem => {
174                self.current_function.push(Instruction::GetIndex);
175            }
176            IRInstruction::CallMethod { name, argc: _ } => {
177                // For now, methods are implemented as regular function calls
178                // In a real implementation, we'd need proper method dispatch
179                self.current_function
180                    .push(Instruction::CallNative(name.clone()));
181            }
182            IRInstruction::Await => {
183                self.current_function.push(Instruction::AwaitPromise);
184            }
185            IRInstruction::Dup => {
186                self.current_function.push(Instruction::Dup);
187            }
188            IRInstruction::Pop => {
189                self.current_function.push(Instruction::Pop);
190            }
191        }
192    }
193
194    fn compile_terminator(&mut self, term: &IRTerminator) {
195        match term {
196            IRTerminator::Return => {
197                self.current_function.push(Instruction::Return);
198            }
199            IRTerminator::Jump(label) => {
200                if let Some(&offset) = self.block_offsets.get(label) {
201                    self.current_function.push(Instruction::Jump(offset));
202                }
203            }
204            IRTerminator::JumpIf {
205                then_block,
206                else_block,
207            } => {
208                // Duplicate condition for both tests
209                self.current_function.push(Instruction::Dup);
210
211                if let Some(&then_offset) = self.block_offsets.get(then_block) {
212                    self.current_function.push(Instruction::JumpIf(then_offset));
213                }
214
215                if let Some(&else_offset) = self.block_offsets.get(else_block) {
216                    self.current_function.push(Instruction::Jump(else_offset));
217                }
218            }
219            IRTerminator::Match { cases, default } => {
220                // Pattern matching implementation
221                // We need to preserve the subject value for variable patterns
222
223                for (i, (pattern, label)) in cases.iter().enumerate() {
224                    // For each case, we need to:
225                    // 1. Duplicate the subject value (except for first case)
226                    // 2. Test the pattern
227                    // 3. If match, jump to case block with subject still on stack
228
229                    if i > 0 {
230                        // Duplicate the subject for next comparison
231                        self.current_function.push(Instruction::Dup);
232                    }
233
234                    // Generate pattern matching code based on pattern type
235                    match pattern {
236                        IRPattern::Literal(value) => {
237                            // For literal patterns: duplicate subject, push literal, compare
238                            self.current_function.push(Instruction::Dup);
239                            self.current_function.push(Instruction::Push(value.clone()));
240                            self.current_function.push(Instruction::Eq);
241                        }
242                        IRPattern::Variable(_) => {
243                            // Variable patterns always match, keep subject on stack
244                            self.current_function
245                                .push(Instruction::Push(Value::Bool(true)));
246                        }
247                        IRPattern::Wildcard => {
248                            // Wildcard always matches, but doesn't need the value
249                            self.current_function
250                                .push(Instruction::Push(Value::Bool(true)));
251                        }
252                        IRPattern::Constructor { .. } => {
253                            // TODO: Implement constructor pattern matching
254                            self.current_function
255                                .push(Instruction::Push(Value::Bool(false)));
256                        }
257                    }
258
259                    if let Some(&offset) = self.block_offsets.get(label) {
260                        self.current_function.push(Instruction::JumpIf(offset));
261                    }
262
263                    // Pop the subject value if this case didn't match
264                    // (except for the last case, where we might need it for default)
265                    if i < cases.len() - 1 || default.is_some() {
266                        // The subject is still on the stack, ready for next pattern
267                    } else {
268                        // Last case and no default - pop the subject
269                        self.current_function.push(Instruction::Pop);
270                    }
271                }
272
273                // Default case or cleanup
274                if let Some(default_label) = default {
275                    if let Some(&offset) = self.block_offsets.get(default_label) {
276                        self.current_function.push(Instruction::Jump(offset));
277                    }
278                } else if cases.is_empty() {
279                    // No cases at all, just pop the subject
280                    self.current_function.push(Instruction::Pop);
281                }
282            }
283        }
284    }
285
286    fn terminator_size(&self, term: &IRTerminator) -> usize {
287        match term {
288            IRTerminator::Return => 1,
289            IRTerminator::Jump(_) => 1,
290            IRTerminator::JumpIf { .. } => 3, // Dup + JumpIf + Jump
291            IRTerminator::Match { cases, default } => {
292                let mut size = 0;
293                for (i, (pattern, _)) in cases.iter().enumerate() {
294                    if i > 0 {
295                        size += 1; // Dup subject for next comparison
296                    }
297                    // Size depends on pattern type
298                    match pattern {
299                        IRPattern::Literal(_) => size += 4, // Dup + Push + Eq + JumpIf
300                        IRPattern::Variable(_) | IRPattern::Wildcard => size += 2, // Push(true) + JumpIf
301                        IRPattern::Constructor { .. } => size += 2, // Push(false) + JumpIf
302                    }
303
304                    // Pop after non-matching case (except last)
305                    if i == cases.len() - 1 && default.is_none() {
306                        size += 1; // Pop on last case if no default
307                    }
308                }
309                if default.is_some() {
310                    size += 1; // Jump to default
311                } else if cases.is_empty() {
312                    size += 1; // Pop if no cases
313                }
314                size
315            }
316        }
317    }
318}
319
320/// Compile IR module to bytecode
321pub fn ir_to_bytecode(module: &IRModule) -> Vec<Vec<Instruction>> {
322    let compiler = IrToBytecode::new();
323    compiler.compile(module)
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn test_simple_function() {
332        let mut module = IRModule {
333            program: IRProgram {
334                name: "test".to_string(),
335                version: "1.0".to_string(),
336                requires: vec![],
337            },
338            functions: HashMap::new(),
339            entry_point: Some("main".to_string()),
340        };
341
342        let main_func = IRFunction {
343            name: "main".to_string(),
344            params: vec![],
345            locals_count: 0,
346            blocks: vec![IRBlock {
347                label: "entry".to_string(),
348                instructions: vec![IRInstruction::LoadConst(Value::Int(42))],
349                terminator: IRTerminator::Return,
350            }],
351            is_async: false,
352        };
353
354        module.functions.insert("main".to_string(), main_func);
355
356        let bytecode = ir_to_bytecode(&module);
357        assert_eq!(bytecode.len(), 1);
358        assert_eq!(bytecode[0].len(), 2);
359        assert_eq!(bytecode[0][0], Instruction::Push(Value::Int(42)));
360        assert_eq!(bytecode[0][1], Instruction::Return);
361    }
362
363    #[test]
364    fn test_arithmetic() {
365        let mut module = IRModule {
366            program: IRProgram {
367                name: "test".to_string(),
368                version: "1.0".to_string(),
369                requires: vec![],
370            },
371            functions: HashMap::new(),
372            entry_point: Some("main".to_string()),
373        };
374
375        let main_func = IRFunction {
376            name: "main".to_string(),
377            params: vec![],
378            locals_count: 0,
379            blocks: vec![IRBlock {
380                label: "entry".to_string(),
381                instructions: vec![
382                    IRInstruction::LoadConst(Value::Int(10)),
383                    IRInstruction::LoadConst(Value::Int(5)),
384                    IRInstruction::BinaryOp(BinaryOp::Add),
385                ],
386                terminator: IRTerminator::Return,
387            }],
388            is_async: false,
389        };
390
391        module.functions.insert("main".to_string(), main_func);
392
393        let bytecode = ir_to_bytecode(&module);
394        assert_eq!(bytecode[0][0], Instruction::Push(Value::Int(10)));
395        assert_eq!(bytecode[0][1], Instruction::Push(Value::Int(5)));
396        assert_eq!(bytecode[0][2], Instruction::Add);
397        assert_eq!(bytecode[0][3], Instruction::Return);
398    }
399}