Skip to main content

ternlang_core/codegen/
betbc.rs

1use crate::ast::*;
2use crate::vm::bet::pack_trits;
3use crate::trit::Trit;
4
5pub struct BytecodeEmitter {
6    code: Vec<u8>,
7    symbols: std::collections::HashMap<String, u8>,
8    func_addrs: std::collections::HashMap<String, u16>,
9    function_patches: std::collections::HashMap<String, Vec<usize>>,
10    break_patches: Vec<usize>,
11    continue_patches: Vec<usize>,
12    next_reg: usize,
13    pub struct_layouts: std::collections::HashMap<String, Vec<String>>,
14    agent_type_ids: std::collections::HashMap<String, u16>,
15    agent_handlers: Vec<(u16, u16)>,
16    /// Snapshots of the local symbol table for each function, keyed by function name.
17    /// Captured just before scope is restored so callers can map reg→varname after execution.
18    function_symbols: std::collections::HashMap<String, std::collections::HashMap<String, u8>>,
19}
20
21impl BytecodeEmitter {
22    pub fn new() -> Self {
23        Self {
24            code: Vec::new(),
25            symbols: std::collections::HashMap::new(),
26            func_addrs: std::collections::HashMap::new(),
27            function_patches: std::collections::HashMap::new(),
28            break_patches: Vec::new(),
29            continue_patches: Vec::new(),
30            next_reg: 0,
31            struct_layouts: std::collections::HashMap::new(),
32            agent_type_ids: std::collections::HashMap::new(),
33            agent_handlers: Vec::new(),
34            function_symbols: std::collections::HashMap::new(),
35        }
36    }
37
38    /// Returns the top-level variable-name → register-number map.
39    pub fn get_symbols(&self) -> &std::collections::HashMap<String, u8> {
40        &self.symbols
41    }
42
43    /// Returns the local symbol snapshot for a specific function (e.g. "main").
44    /// Used by `ternlang-cli --emit-symbols` to correlate VM register dumps with source variable names.
45    pub fn get_function_symbols(&self, name: &str) -> Option<&std::collections::HashMap<String, u8>> {
46        self.function_symbols.get(name)
47    }
48
49    pub fn register_agents(&self, vm: &mut crate::vm::BetVm) {
50        for &(type_id, addr) in &self.agent_handlers {
51            vm.register_agent_type(type_id, addr as usize);
52        }
53    }
54
55    /// Emit a single agent definition incrementally (used by the WASM fallback loop
56    /// when the full `parse_program` path is unavailable).
57    pub fn emit_agent_def(&mut self, agent: &crate::AgentDef) {
58        let type_id = self.agent_type_ids.len() as u16;
59        self.agent_type_ids.insert(agent.name.clone(), type_id);
60        let mut handler_addr: Option<u16> = None;
61        for method in &agent.methods {
62            let addr = self.code.len() as u16;
63            if handler_addr.is_none() {
64                handler_addr = Some(addr);
65            }
66            self.emit_function(method);
67            self.func_addrs.insert(format!("{}::{}", agent.name, method.name), addr);
68        }
69        if let Some(addr) = handler_addr {
70            self.agent_handlers.push((type_id, addr));
71        }
72    }
73
74    pub fn emit_header_jump(&mut self) -> usize {
75        let patch_pos = self.code.len() + 1;
76        self.code.push(0x0b); // TJMP
77        self.code.extend_from_slice(&[0u8, 0u8]);
78        patch_pos
79    }
80
81    pub fn patch_header_jump(&mut self, patch_pos: usize) {
82        let addr = self.code.len() as u16;
83        self.patch_u16(patch_pos, addr);
84    }
85
86    pub fn emit_program(&mut self, program: &Program) {
87        let parent_next_reg = self.next_reg;
88        for s in &program.structs {
89            let names: Vec<String> = s.fields.iter().map(|(n, _)| n.clone()).collect();
90            self.struct_layouts.insert(s.name.clone(), names);
91        }
92        for (idx, agent) in program.agents.iter().enumerate() {
93            self.agent_type_ids.insert(agent.name.clone(), idx as u16);
94        }
95
96        // PASS 1: Addresses
97        let real_code = std::mem::take(&mut self.code);
98        let real_func_addrs = std::mem::take(&mut self.func_addrs);
99        let real_agent_handlers = std::mem::take(&mut self.agent_handlers);
100        let base_addr = real_code.len() as u16;
101
102        for agent in &program.agents {
103            let type_id = self.agent_type_ids[&agent.name];
104            let mut handler_addr = None;
105            for method in &agent.methods {
106                let addr = base_addr + self.code.len() as u16;
107                if handler_addr.is_none() { handler_addr = Some(addr); }
108                self.emit_function(method);
109                // Restore correct absolute address overwritten by emit_function (TCALL-BUG fix):
110                self.func_addrs.insert(format!("{}::{}", agent.name, method.name), addr);
111            }
112            if let Some(addr) = handler_addr { self.agent_handlers.push((type_id, addr)); }
113        }
114        for func in &program.functions {
115            let addr = base_addr + self.code.len() as u16;
116            self.func_addrs.insert(func.name.clone(), addr);
117            // Ensure any global symbols or previous definitions are visible
118            self.emit_function(func);
119            // emit_function overwrites func_addrs[name] with a temp-buffer offset that
120            // omits base_addr. Restore the correct absolute address so that forward
121            // references resolved later in PASS 1 get the right TCALL target.
122            self.func_addrs.insert(func.name.clone(), addr);
123        }
124
125        let final_func_addrs = std::mem::replace(&mut self.func_addrs, real_func_addrs);
126        let final_agent_handlers = std::mem::replace(&mut self.agent_handlers, real_agent_handlers);
127        self.code = real_code;
128        self.func_addrs = final_func_addrs;
129        self.agent_handlers = final_agent_handlers;
130        self.next_reg = parent_next_reg;
131
132        // PASS 2: Real
133        for agent in &program.agents {
134            for method in &agent.methods { self.emit_function(method); }
135        }
136        for func in &program.functions { self.emit_function(func); }
137    }
138
139    pub fn emit_function(&mut self, func: &Function) {
140        let func_addr = self.code.len() as u16;
141        self.func_addrs.insert(func.name.clone(), func_addr);
142        if let Some(patches) = self.function_patches.remove(&func.name) {
143            for p in patches {
144                self.code[p..p + 2].copy_from_slice(&func_addr.to_le_bytes());
145            }
146        }
147        let parent_symbols = self.symbols.clone();
148        let parent_next_reg = self.next_reg;
149        self.next_reg = 0;
150
151        // If function has @sparseskip, we could emit a special header here.
152        // For now, it's just a marker in the AST.
153
154        for (name, ty) in func.params.iter().rev() {
155            if let Type::Named(s_name) = ty {
156                if let Some(fields) = self.struct_layouts.get(s_name).cloned() {
157                    // Structs are passed as a bundle: [field1, field2, ..., root_dummy]
158                    // We must pop root dummy first, then fields.
159                    
160                    // Pop root dummy
161                    let root_reg = self.alloc_reg();
162                    self.symbols.insert(name.clone(), root_reg);
163                    self.code.push(0x08); self.code.push(root_reg);
164
165                    // Pop fields in reverse order of how they were pushed
166                    for f_name in fields.iter().rev() {
167                        let f_reg = self.alloc_reg();
168                        let key = format!("{}.{}", name, f_name);
169                        self.symbols.insert(key, f_reg);
170                        self.code.push(0x08); self.code.push(f_reg);
171                    }
172                    continue;
173                }
174            }
175            let reg = self.alloc_reg();
176            self.symbols.insert(name.clone(), reg);
177            self.code.push(0x08); self.code.push(reg);
178        }
179        for stmt in &func.body { self.emit_stmt(stmt); }
180        // Snapshot local symbols before scope is restored — used by --emit-symbols
181        self.function_symbols.insert(func.name.clone(), self.symbols.clone());
182        self.symbols = parent_symbols;
183        self.next_reg = parent_next_reg;
184        self.code.push(0x11); // TRET
185    }
186
187    pub fn emit_stmt(&mut self, stmt: &Stmt) {
188        match stmt {
189            Stmt::Let { name, ty, value } => {
190                let mut handled = false;
191                match ty {
192                    Type::TritTensor { dims } => {
193                        // Auto-alloc for any zero-initializer (TritLiteral(0) or IntLiteral(0))
194                        let is_zero_init = matches!(value, Expr::TritLiteral(0) | Expr::IntLiteral(0));
195                        if !dims.is_empty() && !dims.contains(&0) && is_zero_init {
196                            let rows = dims[0];
197                            let cols = if dims.len() > 1 { dims[1] } else { 1 };
198                            self.code.push(0x0f); // TALLOC (trit)
199                            self.code.extend_from_slice(&(rows as u32).to_le_bytes());
200                            self.code.extend_from_slice(&(cols as u32).to_le_bytes());
201                            handled = true;
202                        }
203                    }
204                    Type::IntTensor { dims } => {
205                        let is_zero_init = matches!(value, Expr::TritLiteral(0) | Expr::IntLiteral(0));
206                        if !dims.is_empty() && !dims.contains(&0) && is_zero_init {
207                            let rows = dims[0];
208                            let cols = if dims.len() > 1 { dims[1] } else { 1 };
209                            self.code.push(0x3c); // TALLOC_Int
210                            self.code.extend_from_slice(&(rows as u32).to_le_bytes());
211                            self.code.extend_from_slice(&(cols as u32).to_le_bytes());
212                            handled = true;
213                        }
214                    }
215                    Type::FloatTensor { dims } => {
216                        let is_zero_init = matches!(value, Expr::TritLiteral(0) | Expr::IntLiteral(0));
217                        if !dims.is_empty() && !dims.contains(&0) && is_zero_init {
218                            let rows = dims[0];
219                            let cols = if dims.len() > 1 { dims[1] } else { 1 };
220                            self.code.push(0x3d); // TALLOC_Float
221                            self.code.extend_from_slice(&(rows as u32).to_le_bytes());
222                            self.code.extend_from_slice(&(cols as u32).to_le_bytes());
223                            handled = true;
224                        }
225                    }
226                    Type::Named(_) => {
227                        if let Expr::StructLiteral { fields, .. } = value {
228                            // Flatten struct fields into mangled registers
229                            for (f_name, f_val) in fields {
230                                self.emit_expr(f_val);
231                                let reg = self.alloc_reg();
232                                let key = format!("{}.{}", name, f_name);
233                                self.symbols.insert(key, reg);
234                                self.code.push(0x08); self.code.push(reg);
235                            }
236                            // Now we let the normal path emit the root variable's dummy value
237                        }
238                    }
239                    _ => {}
240                }
241                if !handled {
242                    self.emit_expr(value);
243                }
244                let reg = self.alloc_reg();
245                self.symbols.insert(name.clone(), reg);
246                self.code.push(0x08); self.code.push(reg); // TSTORE
247            }
248            Stmt::Set { name, value } => {
249                self.emit_expr(value);
250                if let Some(&reg) = self.symbols.get(name) {
251                    self.code.push(0x08); self.code.push(reg);
252                }
253            }
254            Stmt::FieldSet { object, field, value } => {
255                let key = format!("{}.{}", object, field);
256                self.emit_expr(value);
257                if let Some(&reg) = self.symbols.get(&key) {
258                    self.code.push(0x08); self.code.push(reg);
259                }
260            }
261            Stmt::IndexSet { object, row, col, value } => {
262                if let Some(&reg) = self.symbols.get(object) {
263                    self.code.push(0x09); self.code.push(reg);
264                    self.emit_expr(row);
265                    self.emit_expr(col);
266                    self.emit_expr(value);
267                    self.code.push(0x23);
268                }
269            }
270            Stmt::IfTernary { condition, on_pos, on_zero, on_neg } => {
271                let pre_reg = self.next_reg;
272                self.emit_expr(condition);
273                let cond_reg = self.alloc_reg();
274                self.code.push(0x08); self.code.push(cond_reg); // Tstore
275                
276                // Load condition for checks
277                self.code.push(0x09); self.code.push(cond_reg); // Tload
278                
279                // Check POS
280                let pos_patch = self.code.len() + 1;
281                self.code.push(0x05); self.code.extend_from_slice(&[0, 0]); // TJMP_POS
282                
283                // Check ZERO
284                let zero_patch = self.code.len() + 1;
285                self.code.push(0x06); self.code.extend_from_slice(&[0, 0]); // TJMP_ZERO
286                
287                // NEG arm: pop the condition and execute
288                self.code.push(0x0c); // TPOP
289                self.emit_stmt(on_neg);
290                let exit_patch = self.code.len() + 1;
291                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]); // TJMP to end
292                
293                // POS arm
294                let pos_addr = self.code.len() as u16;
295                self.patch_u16(pos_patch, pos_addr);
296                self.code.push(0x0c); // TPOP
297                self.emit_stmt(on_pos);
298                let exit_pos = self.code.len() + 1;
299                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
300                
301                // ZERO arm
302                let zero_addr = self.code.len() as u16;
303                self.patch_u16(zero_patch, zero_addr);
304                self.code.push(0x0c); // TPOP
305                self.emit_stmt(on_zero);
306                
307                let end = self.code.len() as u16;
308                self.patch_u16(exit_patch, end);
309                self.patch_u16(exit_pos, end);
310                self.next_reg = pre_reg;
311            }
312            Stmt::Match { condition, arms } => {
313                let pre_reg = self.next_reg;
314                self.emit_expr(condition);
315                let cond_reg = self.alloc_reg();
316                self.code.push(0x08); self.code.push(cond_reg); // Tstore
317
318                let mut end_patches = Vec::new();
319                let mut next_arm_patch = None;
320
321                for (pattern, stmt) in arms {
322                    if let Some(p) = next_arm_patch {
323                        let addr = self.code.len() as u16;
324                        self.patch_u16(p, addr);
325                    }
326
327                    // Load condition for this arm
328                    self.code.push(0x09); self.code.push(cond_reg); // Tload
329
330                    let match_patch;
331                    match pattern {
332                        Pattern::Trit(1) | Pattern::Int(1) => {
333                            self.code.push(0x05); // TjmpPos (peeks)
334                            match_patch = self.code.len();
335                            self.code.extend_from_slice(&[0, 0]);
336                        }
337                        Pattern::Trit(0) | Pattern::Int(0) => {
338                            self.code.push(0x06); // TjmpZero (peeks)
339                            match_patch = self.code.len();
340                            self.code.extend_from_slice(&[0, 0]);
341                        }
342                        Pattern::Trit(-1) | Pattern::Int(-1) => {
343                            self.code.push(0x07); // TjmpNeg (peeks)
344                            match_patch = self.code.len();
345                            self.code.extend_from_slice(&[0, 0]);
346                        }
347                        Pattern::Int(v) => {
348                            self.code.push(0x25); // TjmpEqInt (peeks)
349                            self.code.extend_from_slice(&v.to_le_bytes());
350                            match_patch = self.code.len();
351                            self.code.extend_from_slice(&[0, 0]);
352                        }
353                        Pattern::Trit(v) => {
354                            self.code.push(0x25); // TjmpEqInt (peeks)
355                            self.code.extend_from_slice(&(*v as i64).to_le_bytes());
356                            match_patch = self.code.len();
357                            self.code.extend_from_slice(&[0, 0]);
358                        }
359                        Pattern::Float(v) => {
360                            self.code.push(0x2a); // TjmpEqFloat (peeks)
361                            self.code.extend_from_slice(&v.to_le_bytes());
362                            match_patch = self.code.len();
363                            self.code.extend_from_slice(&[0, 0]);
364                        }
365                        Pattern::Wildcard => {
366                            // Wildcard always matches — unconditional jump to body.
367                            // Do NOT pop here: the body's shared TPOP below will clean
368                            // the TLOAD value from the stack, keeping it balanced.
369                            match_patch = self.code.len() + 1;
370                            self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]); // TJMP placeholder
371                        }
372                    }
373
374                    // Mismatch: the conditional test above PEEKS (doesn't pop), so if it
375                    // didn't jump the TLOAD result is still on the stack. Pop it before
376                    // jumping to the next arm to keep the stack balanced.
377                    // (Wildcard never reaches here — it always jumps above.)
378                    self.code.push(0x0c); // TPOP — discard unmatched arm's cond value
379                    let skip_patch = self.code.len() + 1;
380                    self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
381                    next_arm_patch = Some(skip_patch);
382
383                    // Match found: execute body
384                    let body_addr = self.code.len() as u16;
385                    self.patch_u16(match_patch, body_addr);
386                    
387                    // Body: first pop the condition we were peeking at
388                    self.code.push(0x0c); // Tpop
389                    self.emit_stmt(stmt);
390                    
391                    // After body, jump to end of match
392                    let end_patch = self.code.len() + 1;
393                    self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
394                    end_patches.push(end_patch);
395                }
396
397                if let Some(p) = next_arm_patch {
398                    let addr = self.code.len() as u16;
399                    self.patch_u16(p, addr);
400                }
401                
402                if !arms.is_empty() {
403                    // Each arm's mismatch path now does its own TPOP (see per-arm fix above),
404                    // so the stack is already clean when we reach the fallback.
405                    // VM-MATCH-001: non-exhaustive match — no arm was taken.
406                    // Push a Tend (hold/undefined) placeholder so the stack is balanced
407                    // even if the caller expects a return value from this match expression.
408                    self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend]));
409                }
410
411                let end_addr = self.code.len() as u16;
412                for p in end_patches { self.patch_u16(p, end_addr); }
413                self.next_reg = pre_reg;
414            }
415            Stmt::ForIn { var, iter, body } => {
416                // Save next_reg so loop-internal registers are freed after the loop ends.
417                // Without this, each for-in permanently consumes 4 registers, exhausting
418                // the register file after 6-7 loops in a single function.
419                let pre_loop_reg = self.next_reg;
420
421                self.emit_expr(iter);
422                let it_reg = self.alloc_reg();
423                self.code.push(0x08); self.code.push(it_reg);
424                self.code.push(0x09); self.code.push(it_reg);
425                self.code.push(0x24); // TSHAPE: pushes rows then cols (cols on top)
426                self.code.push(0x0c); // pop cols — iterate over rows, not cols
427                let r_reg = self.alloc_reg();
428                self.code.push(0x08); self.code.push(r_reg); // store rows as loop bound
429                let i_reg = self.alloc_reg();
430                self.code.push(0x17); self.code.extend_from_slice(&0i64.to_le_bytes());
431                self.code.push(0x08); self.code.push(i_reg);
432
433                // Use a register for the loop comparison so we avoid TDUP accumulation.
434                // Previously: TDUP + TjmpNeg/TjmpZero (peek) left 2 values on the stack per
435                // iteration, causing a stack leak that corrupted subsequent operations.
436                let cmp_reg = self.alloc_reg();
437
438                let top = self.code.len() as u16;
439                let pre_break = self.break_patches.len();
440                let pre_cont = self.continue_patches.len();
441
442                // Compute i < r → cmp_reg (stack neutral: push then immediately store)
443                self.code.push(0x09); self.code.push(i_reg);
444                self.code.push(0x09); self.code.push(r_reg);
445                self.code.push(0x14);                        // Tless → [cmp]
446                self.code.push(0x08); self.code.push(cmp_reg); // TSTORE cmp → []
447
448                // Load and test for NEG (i >= r → Reject → exit)
449                self.code.push(0x09); self.code.push(cmp_reg); // [cmp]
450                let neg = self.code.len() + 1;
451                self.code.push(0x07); self.code.extend_from_slice(&[0, 0]); // TjmpNeg → peeks
452                self.code.push(0x0c); // TPOP — clean up after failed neg check
453
454                // Load and test for ZERO (i == r → Tend → exit)
455                self.code.push(0x09); self.code.push(cmp_reg); // [cmp]
456                let zero = self.code.len() + 1;
457                self.code.push(0x06); self.code.extend_from_slice(&[0, 0]); // TjmpZero → peeks
458                self.code.push(0x0c); // TPOP — clean up after failed zero check, body runs clean
459
460                // Body: load element tensor[it, i, 0] → v_reg
461                self.code.push(0x09); self.code.push(it_reg);
462                self.code.push(0x09); self.code.push(i_reg);
463                self.code.push(0x17); self.code.extend_from_slice(&0i64.to_le_bytes());
464                self.code.push(0x22);
465                let v_reg = self.alloc_reg();
466                self.symbols.insert(var.clone(), v_reg);
467                self.code.push(0x08); self.code.push(v_reg);
468                self.emit_stmt(body);
469
470                let cont_addr = self.code.len() as u16;
471                let cs: Vec<usize> = self.continue_patches.drain(pre_cont..).collect();
472                for p in cs { self.patch_u16(p, cont_addr); }
473
474                self.code.push(0x09); self.code.push(i_reg);
475                self.code.push(0x17); self.code.extend_from_slice(&1i64.to_le_bytes());
476                self.code.push(0x18);
477                self.code.push(0x08); self.code.push(i_reg);
478                let back = self.code.len() + 1;
479                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
480                self.patch_u16(back, top);
481
482                // Exit paths for neg/zero: the TjmpNeg/TjmpZero PEEK so the cmp value
483                // is still on the stack when they jump. Add a TPOP cleanup then TJMP end.
484                let neg_exit_addr = self.code.len() as u16;
485                self.patch_u16(neg, neg_exit_addr);
486                self.code.push(0x0c); // TPOP — clean peeked cmp
487                let neg_to_end = self.code.len() + 1;
488                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
489
490                let zero_exit_addr = self.code.len() as u16;
491                self.patch_u16(zero, zero_exit_addr);
492                self.code.push(0x0c); // TPOP — clean peeked cmp
493                let zero_to_end = self.code.len() + 1;
494                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
495
496                let end = self.code.len() as u16;
497                self.patch_u16(neg_to_end, end);
498                self.patch_u16(zero_to_end, end);
499                let bs: Vec<usize> = self.break_patches.drain(pre_break..).collect();
500                for p in bs { self.patch_u16(p, end); }
501
502                // Free loop registers: loop variable is out of scope, and the 4
503                // internal registers (it, r, i, v) are no longer needed.
504                self.symbols.remove(var);
505                self.next_reg = pre_loop_reg;
506            }
507            Stmt::WhileTernary { condition, on_pos, on_zero, on_neg } => {
508                let pre_reg = self.next_reg;
509                let cond_reg = self.alloc_reg();
510                let top = self.code.len() as u16;
511                let pre_break = self.break_patches.len();
512                let pre_cont = self.continue_patches.len();
513
514                self.emit_expr(condition);
515                self.code.push(0x08); self.code.push(cond_reg); // Tstore
516                
517                // Load condition for checks
518                self.code.push(0x09); self.code.push(cond_reg); // Tload
519                
520                // Check POS
521                let pos_patch = self.code.len() + 1;
522                self.code.push(0x05); self.code.extend_from_slice(&[0, 0]); // TJMP_POS
523                
524                // Check ZERO
525                let zero_patch = self.code.len() + 1;
526                self.code.push(0x06); self.code.extend_from_slice(&[0, 0]); // TJMP_ZERO
527                
528                // NEG ARM: pop and execute and EXIT (don't loop back)
529                self.code.push(0x0c); // TPOP
530                self.emit_stmt(on_neg);
531                let exit_neg = self.code.len() + 1;
532                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]); // TJMP to end
533
534                // POS ARM: pop and execute and LOOP BACK
535                let pos_addr = self.code.len() as u16;
536                self.patch_u16(pos_patch, pos_addr);
537                self.code.push(0x0c); // TPOP
538                self.emit_stmt(on_pos);
539                let back_pos = self.code.len() + 1;
540                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
541                self.patch_u16(back_pos, top);
542
543                // ZERO ARM: pop and execute and EXIT (don't loop back)
544                let zero_addr = self.code.len() as u16;
545                self.patch_u16(zero_patch, zero_addr);
546                self.code.push(0x0c); // TPOP
547                self.emit_stmt(on_zero);
548                
549                let end = self.code.len() as u16;
550                self.patch_u16(exit_neg, end);
551
552                let cs: Vec<usize> = self.continue_patches.drain(pre_cont..).collect();
553                for p in cs { self.patch_u16(p, top); }
554                let bs: Vec<usize> = self.break_patches.drain(pre_break..).collect();
555                for p in bs { self.patch_u16(p, end); }
556                self.next_reg = pre_reg;
557            }
558            Stmt::Loop { body } => {
559                let top = self.code.len() as u16;
560                let pre_break = self.break_patches.len();
561                let pre_cont = self.continue_patches.len();
562                self.emit_stmt(body);
563                let back = self.code.len() + 1;
564                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
565                self.patch_u16(back, top);
566                let end = self.code.len() as u16;
567                let cs: Vec<usize> = self.continue_patches.drain(pre_cont..).collect();
568                for p in cs { self.patch_u16(p, top); }
569                let bs: Vec<usize> = self.break_patches.drain(pre_break..).collect();
570                for p in bs { self.patch_u16(p, end); }
571            }
572            Stmt::Break => {
573                let p = self.code.len() + 1;
574                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
575                self.break_patches.push(p);
576            }
577            Stmt::Continue => {
578                let p = self.code.len() + 1;
579                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
580                self.continue_patches.push(p);
581            }
582            Stmt::Send { target, message } => {
583                self.emit_expr(target);
584                self.emit_expr(message);
585                self.code.push(0x31); // TSEND
586            }
587            Stmt::Return(e) => { self.emit_expr(e); self.code.push(0x11); }
588            Stmt::Block(ss) => { for s in ss { self.emit_stmt(s); } }
589            Stmt::Expr(e) => { self.emit_expr(e); self.code.push(0x0c); }
590            Stmt::Decorated { directive: _, stmt } => { self.emit_stmt(stmt); }
591            _ => {}
592        }
593    }
594
595    fn emit_expr(&mut self, expr: &Expr) {
596        match expr {
597            Expr::TritLiteral(v) => {
598                self.code.push(0x01);
599                self.code.extend(pack_trits(&[Trit::from(*v)]));
600            }
601            Expr::IntLiteral(v) => {
602                self.code.push(0x17);
603                self.code.extend_from_slice(&v.to_le_bytes());
604            }
605            Expr::FloatLiteral(val) => {
606                self.code.push(0x19);
607                self.code.extend_from_slice(&val.to_le_bytes());
608            }
609            Expr::StringLiteral(val) => {
610                self.code.push(0x21); // TPUSH_STRING
611                let bytes = val.as_bytes();
612                self.code.extend_from_slice(&(bytes.len() as u16).to_le_bytes());
613                self.code.extend_from_slice(bytes);
614            }
615            Expr::Ident(name) => {
616                // COMP-BOOL-001: `true`/`false` are not keywords in the lexer — they arrive
617                // as Token::Ident. Handle them here so they produce a value instead of
618                // causing a stack underflow when no symbol matches.
619                match name.as_str() {
620                    "true" => {
621                        self.code.push(0x17); // TpushInt
622                        self.code.extend_from_slice(&1i64.to_le_bytes());
623                    }
624                    "false" => {
625                        self.code.push(0x17); // TpushInt
626                        self.code.extend_from_slice(&0i64.to_le_bytes());
627                    }
628                    // COMP-TRIT-001: trit aliases that arrive as Ident if lexer misses them
629                    "affirm" => { self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Affirm])); }
630                    "hold" | "tend" => { self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend])); }
631                    "reject" => { self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Reject])); }
632                    _ => {
633                        if let Some(&r) = self.symbols.get(name) {
634                            self.code.push(0x09); self.code.push(r);
635                        }
636                    }
637                }
638            }
639            Expr::BinaryOp { op, lhs, rhs } => {
640                self.emit_expr(lhs); self.emit_expr(rhs);
641                match op {
642                    BinOp::Add => self.code.push(0x02),
643                    BinOp::Mul => self.code.push(0x03),
644                    BinOp::Div => self.code.push(0x1e),
645                    BinOp::Mod => self.code.push(0x1f),
646                    BinOp::Sub => { self.code.push(0x04); self.code.push(0x02); }
647                    BinOp::Equal => self.code.push(0x16),
648                    BinOp::NotEqual => { self.code.push(0x16); self.code.push(0x04); }
649                    BinOp::And => self.code.push(0x28), // TAND = min(a,b)
650                    BinOp::Or  => self.code.push(0x29), // TOR  = max(a,b)
651                    BinOp::Less => self.code.push(0x14),
652                    BinOp::Greater => self.code.push(0x15),
653                    BinOp::LessEqual => self.code.push(0x26),
654                    BinOp::GreaterEqual => self.code.push(0x27),
655                }
656            }
657            Expr::UnaryOp { op, expr } => {
658                self.emit_expr(expr);
659                match op { UnOp::Neg => self.code.push(0x04) }
660            }
661            Expr::Call { callee, args } => {
662                match callee.as_str() {
663                    // `print` is an alias for `println` — same TPRINT opcode (0x20)
664                    "println" | "print" => {
665                        if args.is_empty() {
666                            // print newline only (not implemented, but let's push dummy)
667                        } else {
668                            for a in args {
669                                self.emit_expr(a);
670                                self.code.push(0x20); // TPRINT
671                            }
672                        }
673                        self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend])); // return hold()
674                    }
675                    "opent" => {
676                        if args.len() == 2 {
677                            for a in args { self.emit_expr(a); }
678                            self.code.push(0x33); // TOPENT (pushes Int handle)
679                        } else {
680                            // error but push dummy
681                            self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend]));
682                        }
683                    }
684                    "readt" => {
685                        if args.len() == 1 {
686                            self.emit_expr(&args[0]);
687                            self.code.push(0x34); // TREADT (pushes Trit)
688                        } else {
689                            self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend]));
690                        }
691                    }
692                    "writet" => {
693                        if args.len() == 2 {
694                            for a in args { self.emit_expr(a); }
695                            self.code.push(0x35); // TWRITET
696                        }
697                        self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend])); // push void/hold result
698                    }
699                    "consensus" => {
700
701                        for a in args { self.emit_expr(a); }
702                        if args.len() == 2 { self.code.push(0x0e); }
703                    }
704                    "length" => {
705                        if args.len() == 1 {
706                            self.emit_expr(&args[0]);
707                            self.code.push(0x24); // TSHAPE
708                            self.code.push(0x0c); // TPOP (cols)
709                        }
710                    }
711                    // VM-BUILTIN-001: `invert(t)` = ternary negation (Tneg, opcode 0x04)
712                    "invert" => {
713                        if args.len() == 1 {
714                            self.emit_expr(&args[0]);
715                            self.code.push(0x04); // Tneg
716                        }
717                    }
718                    // VM-BUILTIN-002: `len(arr)` is an alias for `length(arr)`
719                    "len" => {
720                        if args.len() == 1 {
721                            self.emit_expr(&args[0]);
722                            self.code.push(0x24); // TSHAPE
723                            self.code.push(0x0c); // TPOP (cols — TSHAPE pushes rows then cols)
724                        }
725                    }
726                    // VM-BUILTIN-001: `abs(n)` — inline: dup, push 0, less-than, branch on negative
727                    "abs" => {
728                        if args.len() == 1 {
729                            self.emit_expr(&args[0]);          // stack: [x]
730                            self.code.push(0x0a);              // TDUP   → [x, x]
731                            self.code.push(0x17);              // TpushInt 0
732                            self.code.extend_from_slice(&0i64.to_le_bytes()); // → [x, x, 0]
733                            self.code.push(0x14);              // Tless: (x < 0) → Affirm; [x, cmp]
734                            // TjmpPos (peek) to negate branch
735                            let neg_patch = self.code.len() + 1;
736                            self.code.push(0x05); self.code.extend_from_slice(&[0, 0]);
737                            // not negative: pop cmp, jump to end
738                            self.code.push(0x0c);              // TPOP → [x]
739                            let end_patch = self.code.len() + 1;
740                            self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
741                            // negate branch: pop cmp, negate x
742                            let neg_addr = self.code.len() as u16;
743                            self.patch_u16(neg_patch, neg_addr);
744                            self.code.push(0x0c);              // TPOP → [x]
745                            self.code.push(0x04);              // Tneg → [-x] (positive when x<0)
746                            let end_addr = self.code.len() as u16;
747                            self.patch_u16(end_patch, end_addr);
748                        }
749                    }
750                    // VM-BUILTIN-001: `min(a, b)` — inline with temp registers
751                    "min" => {
752                        if args.len() == 2 {
753                            let a_reg = self.alloc_reg();
754                            let b_reg = self.alloc_reg();
755                            self.emit_expr(&args[0]);
756                            self.code.push(0x08); self.code.push(a_reg); // TSTORE a
757                            self.emit_expr(&args[1]);
758                            self.code.push(0x08); self.code.push(b_reg); // TSTORE b
759                            self.code.push(0x09); self.code.push(a_reg); // TLOAD a
760                            self.code.push(0x09); self.code.push(b_reg); // TLOAD b
761                            self.code.push(0x14);                         // Tless: a < b → Affirm
762                            // TjmpPos → a is smaller, return a
763                            let a_smaller_patch = self.code.len() + 1;
764                            self.code.push(0x05); self.code.extend_from_slice(&[0, 0]);
765                            // a >= b: return b
766                            self.code.push(0x0c);              // TPOP cmp
767                            self.code.push(0x09); self.code.push(b_reg); // TLOAD b
768                            let end_patch = self.code.len() + 1;
769                            self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
770                            // a < b: return a
771                            let a_smaller_addr = self.code.len() as u16;
772                            self.patch_u16(a_smaller_patch, a_smaller_addr);
773                            self.code.push(0x0c);              // TPOP cmp
774                            self.code.push(0x09); self.code.push(a_reg); // TLOAD a
775                            let end_addr = self.code.len() as u16;
776                            self.patch_u16(end_patch, end_addr);
777                        }
778                    }
779                    // VM-BUILTIN-001: `max(a, b)` — inline with temp registers
780                    "max" => {
781                        if args.len() == 2 {
782                            let a_reg = self.alloc_reg();
783                            let b_reg = self.alloc_reg();
784                            self.emit_expr(&args[0]);
785                            self.code.push(0x08); self.code.push(a_reg); // TSTORE a
786                            self.emit_expr(&args[1]);
787                            self.code.push(0x08); self.code.push(b_reg); // TSTORE b
788                            self.code.push(0x09); self.code.push(b_reg); // TLOAD b
789                            self.code.push(0x09); self.code.push(a_reg); // TLOAD a
790                            self.code.push(0x14);                         // Tless: b < a → Affirm
791                            // TjmpPos → a is larger, return a
792                            let a_larger_patch = self.code.len() + 1;
793                            self.code.push(0x05); self.code.extend_from_slice(&[0, 0]);
794                            // b >= a: return b
795                            self.code.push(0x0c);              // TPOP cmp
796                            self.code.push(0x09); self.code.push(b_reg); // TLOAD b
797                            let end_patch = self.code.len() + 1;
798                            self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
799                            // b < a: return a
800                            let a_larger_addr = self.code.len() as u16;
801                            self.patch_u16(a_larger_patch, a_larger_addr);
802                            self.code.push(0x0c);              // TPOP cmp
803                            self.code.push(0x09); self.code.push(a_reg); // TLOAD a
804                            let end_addr = self.code.len() as u16;
805                            self.patch_u16(end_patch, end_addr);
806                        }
807                    }
808                    // `pow(base, exp)` — integer power via loop: result = 1; while exp>0 { result*=base; exp-=1; }
809                    "pow" => {
810                        if args.len() == 2 {
811                            let b_reg = self.alloc_reg(); // base
812                            let e_reg = self.alloc_reg(); // exponent
813                            let r_reg = self.alloc_reg(); // result
814                            // store base
815                            self.emit_expr(&args[0]);
816                            self.code.push(0x08); self.code.push(b_reg);
817                            // store exp
818                            self.emit_expr(&args[1]);
819                            self.code.push(0x08); self.code.push(e_reg);
820                            // result = 1
821                            self.code.push(0x17); self.code.extend_from_slice(&1i64.to_le_bytes());
822                            self.code.push(0x08); self.code.push(r_reg);
823                            // loop_start: check e > 0
824                            let loop_start = self.code.len() as u16;
825                            self.code.push(0x09); self.code.push(e_reg);  // TLOAD e
826                            self.code.push(0x17); self.code.extend_from_slice(&0i64.to_le_bytes()); // push 0
827                            self.code.push(0x15);  // Tgreater: e > 0 → Affirm
828                            // TjmpPos → jump to loop body
829                            let body_patch = self.code.len() + 1;
830                            self.code.push(0x05); self.code.extend_from_slice(&[0, 0]);
831                            // e <= 0: pop cmp, jump to end
832                            self.code.push(0x0c);
833                            let end_patch = self.code.len() + 1;
834                            self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]);
835                            // loop body:
836                            let body_addr = self.code.len() as u16;
837                            self.patch_u16(body_patch, body_addr);
838                            self.code.push(0x0c);  // TPOP cmp
839                            // r = r * b
840                            self.code.push(0x09); self.code.push(r_reg);
841                            self.code.push(0x09); self.code.push(b_reg);
842                            self.code.push(0x03);  // Tmul (handles int*int)
843                            self.code.push(0x08); self.code.push(r_reg);
844                            // e = e - 1
845                            self.code.push(0x09); self.code.push(e_reg);
846                            self.code.push(0x17); self.code.extend_from_slice(&(-1i64).to_le_bytes());
847                            self.code.push(0x18);  // TaddInt
848                            self.code.push(0x08); self.code.push(e_reg);
849                            // jump back to loop_start
850                            self.code.push(0x0b); self.code.extend_from_slice(&loop_start.to_le_bytes());
851                            // end:
852                            let end_addr = self.code.len() as u16;
853                            self.patch_u16(end_patch, end_addr);
854                            // push result
855                            self.code.push(0x09); self.code.push(r_reg);
856                        }
857                    }
858                    // `push(arr, val)` / `pop(arr)` — tensor mutation not yet implemented.
859                    // Emit argument expressions for side-effects then push a Tend stub so
860                    // callers get a value without falling through to an unresolved TCALL 0x0000
861                    // (which causes infinite recursion via jump-to-program-start).
862                    "push" => {
863                        for a in args { self.emit_expr(a); self.code.push(0x0c); } // eval + discard
864                        self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend])); // stub result
865                    }
866                    "pop" => {
867                        for a in args { self.emit_expr(a); self.code.push(0x0c); } // eval + discard
868                        self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend])); // stub result
869                    }
870                    "mul" => {
871                        for a in args { self.emit_expr(a); }
872                        if args.len() == 2 { self.code.push(0x03); }
873                    }
874                    "truth" => { self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Affirm])); }
875                    "hold" => { self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend])); }
876                    "conflict" => { self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Reject])); }
877                    _ => {
878                        for a in args {
879                            // If argument is a struct, we need to push all its flattened fields + root dummy
880                            let mut pushed_as_struct = false;
881                            if let Expr::Ident(name) = a {
882                                // We don't have the variable type here, but we can try to find if it's a struct
883                                // by looking for any mangled keys starting with "name.".
884                                // To get the correct field order, we'd need the struct name.
885                                // Let's try to find which struct layout matches the existing mangled keys.
886                                let mut fields_found = Vec::new();
887                                for (_s_name, s_fields) in &self.struct_layouts {
888                                    let mut all_present = true;
889                                    let mut current_regs = Vec::new();
890                                    for f in s_fields {
891                                        let key = format!("{}.{}", name, f);
892                                        if let Some(&r) = self.symbols.get(&key) {
893                                            current_regs.push(r);
894                                        } else {
895                                            all_present = false;
896                                            break;
897                                        }
898                                    }
899                                    if all_present && !s_fields.is_empty() {
900                                        fields_found = current_regs;
901                                        break;
902                                    }
903                                }
904
905                                if !fields_found.is_empty() {
906                                    for reg in fields_found {
907                                        self.code.push(0x09); self.code.push(reg); // TLOAD field
908                                    }
909                                    // Push root dummy
910                                    if let Some(&reg) = self.symbols.get(name) {
911                                        self.code.push(0x09); self.code.push(reg); // TLOAD root
912                                    }
913                                    pushed_as_struct = true;
914                                }
915                            }
916                            
917                            if !pushed_as_struct {
918                                self.emit_expr(a);
919                            }
920                        }
921                        self.code.push(0x10); // TCALL
922                        if let Some(&addr) = self.func_addrs.get(callee) {
923                            self.code.extend_from_slice(&addr.to_le_bytes());
924                        } else {
925                            let patch = self.code.len();
926                            self.code.extend_from_slice(&[0, 0]);
927                            self.function_patches.entry(callee.to_string()).or_default().push(patch);
928                        }
929                    }
930                }
931            }
932            Expr::Spawn { agent_name, .. } => {
933                if let Some(&type_id) = self.agent_type_ids.get(agent_name) {
934                    self.code.push(0x30); // TSPAWN
935                    self.code.extend_from_slice(&type_id.to_le_bytes());
936                } else {
937                    self.code.push(0x01); self.code.extend(pack_trits(&[Trit::Tend]));
938                }
939            }
940            Expr::Await { target } => {
941                self.emit_expr(target);
942                self.code.push(0x32); // TAWAIT
943            }
944            Expr::TritTensorLiteral(vs) => {
945                let rows = vs.len();
946                let cols = 1;
947                self.code.push(0x0f);
948                self.code.extend_from_slice(&(rows as u16).to_le_bytes());
949                self.code.extend_from_slice(&(cols as u16).to_le_bytes());
950                let tr = self.next_reg; self.next_reg += 1;
951                self.code.push(0x08); self.code.push(tr.try_into().unwrap());
952                for (idx, &v) in vs.iter().enumerate() {
953                    self.code.push(0x09); self.code.push(tr.try_into().unwrap());
954                    self.code.push(0x17); self.code.extend_from_slice(&(idx as i64).to_le_bytes());
955                    self.code.push(0x17); self.code.extend_from_slice(&0i64.to_le_bytes());
956                    self.code.push(0x01); self.code.extend(pack_trits(&[Trit::from(v)]));
957                    self.code.push(0x23);
958                }
959                self.code.push(0x09); self.code.push(tr.try_into().unwrap());
960            }
961            Expr::StructLiteral { fields, .. } => {
962                for (_, f_val) in fields {
963                    self.emit_expr(f_val);
964                }
965                self.code.push(0x40); // TSTRUCT
966                self.code.push(fields.len() as u8);
967                // We pop in reverse order of pushing
968                for (f_name, _) in fields.iter().rev() {
969                    self.code.push(f_name.len() as u8);
970                    self.code.extend_from_slice(f_name.as_bytes());
971                }
972            }
973            Expr::Propagate { expr } => {
974                self.emit_expr(expr);
975                self.code.push(0x0a); // TDUP
976                let patch = self.code.len() + 1;
977                self.code.push(0x07); self.code.extend_from_slice(&[0, 0]); // TJMP_NEG
978                let skip = self.code.len() + 1;
979                self.code.push(0x0b); self.code.extend_from_slice(&[0, 0]); // TJMP
980                let early_ret = self.code.len() as u16;
981                self.patch_u16(patch, early_ret);
982                self.code.push(0x11); // TRET
983                let next = self.code.len() as u16;
984                self.patch_u16(skip, next);
985            }
986            Expr::Index { object, row, col } => {
987                self.emit_expr(object); self.emit_expr(row); self.emit_expr(col);
988                self.code.push(0x22);
989            }
990            Expr::FieldAccess { object, field } => {
991                if let Expr::Ident(obj_name) = object.as_ref() {
992                    let key = format!("{}.{}", obj_name, field);
993                    if let Some(&r) = self.symbols.get(&key) {
994                        self.code.push(0x09); self.code.push(r); // TLOAD
995                    }
996                }
997            }
998            Expr::Cast { expr, .. } => {
999                // cast() is a type annotation hint only — pass inner expression through
1000                self.emit_expr(expr);
1001            }
1002            Expr::NodeId => {
1003                // Emit TNODEID (0x36): defers binding to runtime so that
1004                // `--node-addr` / vm.set_node_id() is actually respected.
1005                // Previously this emitted a hardcoded "127.0.0.1:7373" string
1006                // literal at compile time, which meant distributed modules always
1007                // announced the wrong address when deployed with a custom node addr.
1008                self.code.push(0x36); // TNODEID — pushes Value::String(vm.node_id)
1009            }
1010        }
1011    }
1012
1013    pub fn emit_entry_call(&mut self, name: &str) {
1014        if let Some(&addr) = self.func_addrs.get(name) {
1015            self.code.push(0x10); self.code.extend_from_slice(&addr.to_le_bytes());
1016        }
1017    }
1018
1019    /// Allocate the next register, returning its index as `u8` (the bytecode register width).
1020    /// Emits a stderr diagnostic if the function requires more than 255 registers — programs
1021    /// that hit this have much bigger structural problems anyway.
1022    fn alloc_reg(&mut self) -> u8 {
1023        let r = self.next_reg;
1024        self.next_reg += 1;
1025        if r > 255 {
1026            eprintln!(
1027                "[CODEGEN] Warning: register #{r} exceeds u8 range — \
1028                 this function has too many local variables (max 255). \
1029                 Split the function or reduce scope depth."
1030            );
1031        }
1032        r as u8
1033    }
1034
1035    pub fn get_agent_handlers(&self) -> Vec<(u16, usize)> {
1036        self.agent_handlers.iter().map(|&(id, addr)| (id, addr as usize)).collect()
1037    }
1038
1039    pub fn finalize(&mut self) -> Vec<u8> { std::mem::take(&mut self.code) }
1040
1041    fn patch_u16(&mut self, pos: usize, val: u16) {
1042        let b = val.to_le_bytes();
1043        self.code[pos] = b[0]; self.code[pos + 1] = b[1];
1044    }
1045}