Skip to main content

ternlang_compat/
tasm.rs

1//! `.tasm` 9-trit assembly → BET bytecode assembler
2//!
3//! Translates the balanced ternary RISC assembly dialect used in the
4//! 9-trit simulator ecosystem into BET VM bytecode that runs on ternlang.
5//!
6//! ## Trit literal syntax
7//! Positive digits: `0`, `1`, `2`, … (but balanced ternary only has 0 and 1 at the digit level)
8//! Negative trit:   `T` (stands for −1, sometimes written as `t`)
9//! Example: `10T` = 1×9 + 0×3 + (−1)×1 = 8
10//!
11//! ## Supported mnemonics
12//! ```text
13//! NOP                     — no operation
14//! HALT                    — stop execution
15//! LOAD  rd, imm           — load immediate trit value into register
16//! MOV   rd, rs            — copy register
17//! ADD   rd, rs1, rs2      — rd = rs1 + rs2
18//! SUB   rd, rs1, rs2      — rd = rs1 + neg(rs2)
19//! MUL   rd, rs1, rs2      — rd = rs1 * rs2  (ternary multiply)
20//! NEG   rd, rs            — rd = neg(rs)
21//! JMP   label             — unconditional jump
22//! BEQ   rs, label         — branch if rs == 0 (hold)
23//! BLT   rs, label         — branch if rs == -1 (conflict)
24//! BGT   rs, label         — branch if rs == +1 (truth)
25//! CONS  rd, rs1, rs2      — rd = consensus(rs1, rs2)
26//! PUSH  rs                — push register onto stack
27//! POP   rd                — pop stack into register
28//! ```
29
30/// Error type for `.tasm` assembly.
31#[derive(Debug, PartialEq)]
32pub enum TasmError {
33    UnknownMnemonic(String),
34    InvalidRegister(String),
35    InvalidImmediate(String),
36    UndefinedLabel(String),
37    MissingOperand { mnemonic: String, expected: usize, got: usize },
38}
39
40impl std::fmt::Display for TasmError {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            TasmError::UnknownMnemonic(m)   => write!(f, "Unknown mnemonic: {}", m),
44            TasmError::InvalidRegister(r)   => write!(f, "Invalid register: {}", r),
45            TasmError::InvalidImmediate(v)  => write!(f, "Invalid immediate: {}", v),
46            TasmError::UndefinedLabel(l)    => write!(f, "Undefined label: {}", l),
47            TasmError::MissingOperand { mnemonic, expected, got } =>
48                write!(f, "{}: expected {} operands, got {}", mnemonic, expected, got),
49        }
50    }
51}
52
53// ─────────────────────────────────────────────────────────────────────────────
54// BET opcodes (from BET-ISA-SPEC.md)
55// ─────────────────────────────────────────────────────────────────────────────
56const OP_THALT:     u8 = 0x00;
57const OP_TPUSH:     u8 = 0x01;
58const OP_TADD:      u8 = 0x02;
59const OP_TMUL:      u8 = 0x03;
60const OP_TNEG:      u8 = 0x04;
61const OP_TJMP:      u8 = 0x0b;
62const OP_TJMP_ZERO: u8 = 0x06;
63const OP_TJMP_NEG:  u8 = 0x05; // TJMP_POS is 0x05; NEG is 0x07 — see spec
64const OP_TJMP_POS:  u8 = 0x05;
65const OP_TLOAD:     u8 = 0x09;  // TLOAD reg → push reg value
66const OP_TSTORE:    u8 = 0x08;  // TSTORE reg ← pop
67const OP_TCONS:     u8 = 0x0e;
68
69// Trit encoding constants (2-bit BET packing)
70const TRIT_NEG:  u8 = 0x01; // -1 (conflict)
71const TRIT_POS:  u8 = 0x02; // +1 (truth)
72const TRIT_ZERO: u8 = 0x03; // 0  (hold)
73
74// ─────────────────────────────────────────────────────────────────────────────
75// Trit literal parser  ("10T" → i32 balanced ternary value)
76// ─────────────────────────────────────────────────────────────────────────────
77
78/// Parse a balanced ternary literal where T (or t) means −1.
79/// Examples: "1" → 1, "T" → -1, "10T" → 8, "0" → 0
80pub fn parse_trit_literal(s: &str) -> Result<i32, TasmError> {
81    if s.is_empty() {
82        return Err(TasmError::InvalidImmediate(s.to_string()));
83    }
84    let mut result = 0i32;
85    let mut power  = 1i32;
86    for ch in s.chars().rev() {
87        let digit = match ch {
88            '0' => 0,
89            '1' => 1,
90            'T' | 't' => -1,
91            _ => return Err(TasmError::InvalidImmediate(s.to_string())),
92        };
93        result += digit * power;
94        power  *= 3;
95    }
96    Ok(result)
97}
98
99/// Clamp an i32 value to a single trit {-1, 0, +1} and encode as BET byte.
100fn trit_encode(v: i32) -> u8 {
101    match v.signum() {
102        -1 => TRIT_NEG,
103         1 => TRIT_POS,
104         _ => TRIT_ZERO,
105    }
106}
107
108/// Parse register specifier: "r0"–"r26" or bare "0"–"26".
109fn parse_reg(s: &str) -> Result<u8, TasmError> {
110    let digits = s.trim_start_matches('r').trim_start_matches('R');
111    digits.parse::<u8>().map_err(|_| TasmError::InvalidRegister(s.to_string()))
112        .and_then(|n| if n < 27 { Ok(n) } else { Err(TasmError::InvalidRegister(s.to_string())) })
113}
114
115// ─────────────────────────────────────────────────────────────────────────────
116// Assembler
117// ─────────────────────────────────────────────────────────────────────────────
118
119/// Assembles `.tasm` source code into BET VM bytecode.
120pub struct TasmAssembler {
121    /// Emitted bytecode
122    pub bytecode: Vec<u8>,
123    /// Label → byte offset table (for two-pass label resolution)
124    labels: std::collections::HashMap<String, usize>,
125    /// Unresolved label references: (patch_offset, label_name)
126    patches: Vec<(usize, String)>,
127}
128
129impl TasmAssembler {
130    pub fn new() -> Self {
131        TasmAssembler {
132            bytecode: Vec::new(),
133            labels: std::collections::HashMap::new(),
134            patches: Vec::new(),
135        }
136    }
137
138    /// Assemble `.tasm` source. Returns BET bytecode on success.
139    pub fn assemble(&mut self, source: &str) -> Result<Vec<u8>, TasmError> {
140        self.bytecode.clear();
141        self.labels.clear();
142        self.patches.clear();
143
144        // Pass 1: collect labels + emit instructions
145        for raw_line in source.lines() {
146            let line = raw_line.trim();
147            if line.is_empty() || line.starts_with(';') || line.starts_with("//") {
148                continue; // blank / comment
149            }
150
151            // Strip inline comments
152            let line = line.split(';').next().unwrap_or(line).trim();
153            let line = line.split("//").next().unwrap_or(line).trim();
154
155            // Label definition: "loop:" or ".loop"
156            if line.ends_with(':') {
157                let label = line.trim_end_matches(':').to_string();
158                self.labels.insert(label, self.bytecode.len());
159                continue;
160            }
161            if line.starts_with('.') {
162                let label = line[1..].to_string();
163                self.labels.insert(label, self.bytecode.len());
164                continue;
165            }
166
167            // Tokenise instruction
168            let tokens: Vec<&str> = line.split_whitespace()
169                .flat_map(|t| t.split(','))
170                .map(str::trim)
171                .filter(|t| !t.is_empty())
172                .collect();
173
174            if tokens.is_empty() { continue; }
175
176            self.emit_instruction(&tokens)?;
177        }
178
179        // Pass 2: resolve labels
180        for (offset, label) in &self.patches {
181            let target = self.labels.get(label)
182                .ok_or_else(|| TasmError::UndefinedLabel(label.clone()))?;
183            // Patch the two bytes at offset with little-endian u16 address
184            let addr = *target as u16;
185            self.bytecode[*offset]     = (addr & 0xFF) as u8;
186            self.bytecode[*offset + 1] = (addr >> 8)  as u8;
187        }
188
189        Ok(self.bytecode.clone())
190    }
191
192    fn emit(&mut self, byte: u8) {
193        self.bytecode.push(byte);
194    }
195
196    fn require(tokens: &[&str], mnemonic: &str, count: usize) -> Result<(), TasmError> {
197        if tokens.len() - 1 < count {
198            return Err(TasmError::MissingOperand {
199                mnemonic: mnemonic.to_string(),
200                expected: count,
201                got: tokens.len() - 1,
202            });
203        }
204        Ok(())
205    }
206
207    fn emit_jump(&mut self, opcode: u8, label: &str) {
208        self.emit(opcode);
209        // Reserve 2 bytes for the address; patch in pass 2
210        let patch_offset = self.bytecode.len();
211        self.emit(0x00);
212        self.emit(0x00);
213        self.patches.push((patch_offset, label.to_string()));
214    }
215
216    fn emit_instruction(&mut self, tokens: &[&str]) -> Result<(), TasmError> {
217        let mnemonic = tokens[0].to_uppercase();
218
219        match mnemonic.as_str() {
220            "NOP" => {
221                // No-op: push hold, pop immediately — net effect: nothing
222                // BET has no dedicated NOP; use TDUP+THALT would stop, so just skip.
223            }
224
225            "HALT" => {
226                self.emit(OP_THALT);
227            }
228
229            // LOAD rd, imm  — push trit immediate into register
230            "LOAD" => {
231                Self::require(tokens, "LOAD", 2)?;
232                let rd  = parse_reg(tokens[1])?;
233                let val = parse_trit_literal(tokens[2])?;
234                // Push the trit value, then store into register
235                self.emit(OP_TPUSH);
236                self.emit(trit_encode(val));
237                self.emit(OP_TSTORE);
238                self.emit(rd);
239            }
240
241            // MOV rd, rs  — copy rs to rd
242            "MOV" => {
243                Self::require(tokens, "MOV", 2)?;
244                let rd = parse_reg(tokens[1])?;
245                let rs = parse_reg(tokens[2])?;
246                self.emit(OP_TLOAD);
247                self.emit(rs);
248                self.emit(OP_TSTORE);
249                self.emit(rd);
250            }
251
252            // ADD rd, rs1, rs2  — rd = rs1 + rs2
253            "ADD" => {
254                Self::require(tokens, "ADD", 3)?;
255                let rd  = parse_reg(tokens[1])?;
256                let rs1 = parse_reg(tokens[2])?;
257                let rs2 = parse_reg(tokens[3])?;
258                self.emit(OP_TLOAD);  self.emit(rs1);
259                self.emit(OP_TLOAD);  self.emit(rs2);
260                self.emit(OP_TADD);
261                self.emit(OP_TSTORE); self.emit(rd);
262            }
263
264            // SUB rd, rs1, rs2  — rd = rs1 + neg(rs2)
265            "SUB" => {
266                Self::require(tokens, "SUB", 3)?;
267                let rd  = parse_reg(tokens[1])?;
268                let rs1 = parse_reg(tokens[2])?;
269                let rs2 = parse_reg(tokens[3])?;
270                self.emit(OP_TLOAD);  self.emit(rs1);
271                self.emit(OP_TLOAD);  self.emit(rs2);
272                self.emit(OP_TNEG);                   // negate rs2
273                self.emit(OP_TADD);
274                self.emit(OP_TSTORE); self.emit(rd);
275            }
276
277            // MUL rd, rs1, rs2  — rd = rs1 × rs2
278            "MUL" => {
279                Self::require(tokens, "MUL", 3)?;
280                let rd  = parse_reg(tokens[1])?;
281                let rs1 = parse_reg(tokens[2])?;
282                let rs2 = parse_reg(tokens[3])?;
283                self.emit(OP_TLOAD);  self.emit(rs1);
284                self.emit(OP_TLOAD);  self.emit(rs2);
285                self.emit(OP_TMUL);
286                self.emit(OP_TSTORE); self.emit(rd);
287            }
288
289            // NEG rd, rs  — rd = neg(rs)
290            "NEG" => {
291                Self::require(tokens, "NEG", 2)?;
292                let rd = parse_reg(tokens[1])?;
293                let rs = parse_reg(tokens[2])?;
294                self.emit(OP_TLOAD);  self.emit(rs);
295                self.emit(OP_TNEG);
296                self.emit(OP_TSTORE); self.emit(rd);
297            }
298
299            // CONS rd, rs1, rs2  — rd = consensus(rs1, rs2)
300            "CONS" => {
301                Self::require(tokens, "CONS", 3)?;
302                let rd  = parse_reg(tokens[1])?;
303                let rs1 = parse_reg(tokens[2])?;
304                let rs2 = parse_reg(tokens[3])?;
305                self.emit(OP_TLOAD);  self.emit(rs1);
306                self.emit(OP_TLOAD);  self.emit(rs2);
307                self.emit(OP_TCONS);
308                self.emit(OP_TSTORE); self.emit(rd);
309            }
310
311            // PUSH rs  — push register onto stack
312            "PUSH" => {
313                Self::require(tokens, "PUSH", 1)?;
314                let rs = parse_reg(tokens[1])?;
315                self.emit(OP_TLOAD); self.emit(rs);
316            }
317
318            // POP rd  — pop stack into register
319            "POP" => {
320                Self::require(tokens, "POP", 1)?;
321                let rd = parse_reg(tokens[1])?;
322                self.emit(OP_TSTORE); self.emit(rd);
323            }
324
325            // JMP label
326            "JMP" | "JUMP" => {
327                Self::require(tokens, "JMP", 1)?;
328                self.emit_jump(OP_TJMP, tokens[1]);
329            }
330
331            // BEQ rs, label  — branch if rs == 0
332            "BEQ" | "BZ" => {
333                Self::require(tokens, "BEQ", 2)?;
334                let rs = parse_reg(tokens[1])?;
335                self.emit(OP_TLOAD); self.emit(rs);
336                self.emit_jump(OP_TJMP_ZERO, tokens[2]);
337            }
338
339            // BLT rs, label  — branch if rs == -1
340            "BLT" | "BN" => {
341                Self::require(tokens, "BLT", 2)?;
342                let rs = parse_reg(tokens[1])?;
343                self.emit(OP_TLOAD); self.emit(rs);
344                self.emit_jump(OP_TJMP_NEG, tokens[2]);
345            }
346
347            // BGT rs, label  — branch if rs == +1
348            "BGT" | "BP" => {
349                Self::require(tokens, "BGT", 2)?;
350                let rs = parse_reg(tokens[1])?;
351                self.emit(OP_TLOAD); self.emit(rs);
352                self.emit_jump(OP_TJMP_POS, tokens[2]);
353            }
354
355            _ => return Err(TasmError::UnknownMnemonic(tokens[0].to_string())),
356        }
357
358        Ok(())
359    }
360}
361
362impl Default for TasmAssembler {
363    fn default() -> Self { Self::new() }
364}
365
366// ─────────────────────────────────────────────────────────────────────────────
367// Tests
368// ─────────────────────────────────────────────────────────────────────────────
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_parse_trit_literal_simple() {
376        assert_eq!(parse_trit_literal("1"),   Ok(1));
377        assert_eq!(parse_trit_literal("0"),   Ok(0));
378        assert_eq!(parse_trit_literal("T"),   Ok(-1));
379    }
380
381    #[test]
382    fn test_parse_trit_literal_multidigit() {
383        // 10T = 1×9 + 0×3 + (-1)×1 = 8
384        assert_eq!(parse_trit_literal("10T"), Ok(8));
385        // 1T1 = 1×9 + (-1)×3 + 1×1 = 7
386        assert_eq!(parse_trit_literal("1T1"), Ok(7));
387        // TTT = -1×9 + -1×3 + -1×1 = -13... wait that's not right
388        // actually TTT = (-1)*9 + (-1)*3 + (-1)*1 = -9-3-1 = -13, no:
389        // In balanced ternary, the digits are evaluated left-to-right as most significant first
390        // TTT = -1*9 + -1*3 + -1*1 = -13
391        assert_eq!(parse_trit_literal("TTT"), Ok(-13));
392    }
393
394    #[test]
395    fn test_parse_trit_literal_invalid() {
396        assert!(parse_trit_literal("2").is_err());
397        assert!(parse_trit_literal("").is_err());
398    }
399
400    #[test]
401    fn test_assemble_halt() {
402        let mut asm = TasmAssembler::new();
403        let code = asm.assemble("HALT").unwrap();
404        assert_eq!(code, vec![0x00]);
405    }
406
407    #[test]
408    fn test_assemble_load_pos() {
409        let mut asm = TasmAssembler::new();
410        // LOAD r0, 1 → TPUSH 0x02 (trit +1), TSTORE r0
411        let code = asm.assemble("LOAD r0, 1").unwrap();
412        assert_eq!(code[0], 0x01); // TPUSH
413        assert_eq!(code[1], 0x02); // +1 encoding
414        assert_eq!(code[2], 0x08); // TSTORE
415        assert_eq!(code[3], 0x00); // register 0
416    }
417
418    #[test]
419    fn test_assemble_load_neg() {
420        let mut asm = TasmAssembler::new();
421        let code = asm.assemble("LOAD r1, T").unwrap();
422        assert_eq!(code[1], 0x01); // -1 encoding
423        assert_eq!(code[3], 0x01); // register 1
424    }
425
426    #[test]
427    fn test_assemble_load_zero() {
428        let mut asm = TasmAssembler::new();
429        let code = asm.assemble("LOAD r2, 0").unwrap();
430        assert_eq!(code[1], 0x03); // hold encoding
431    }
432
433    #[test]
434    fn test_assemble_add() {
435        let mut asm = TasmAssembler::new();
436        let code = asm.assemble("ADD r0, r1, r2\nHALT").unwrap();
437        assert!(!code.is_empty());
438        assert!(code.contains(&0x02)); // TADD opcode
439        assert!(code.last() == Some(&0x00)); // HALT
440    }
441
442    #[test]
443    fn test_assemble_neg() {
444        let mut asm = TasmAssembler::new();
445        let code = asm.assemble("NEG r0, r1\nHALT").unwrap();
446        assert!(code.contains(&0x04)); // TNEG opcode
447    }
448
449    #[test]
450    fn test_assemble_label_jump() {
451        let mut asm = TasmAssembler::new();
452        let src = "
453; infinite loop (test label resolution)
454loop:
455  LOAD r0, 1
456  JMP loop
457";
458        let code = asm.assemble(src).unwrap();
459        assert!(!code.is_empty());
460        // The jump target should resolve to offset 0 (label at start)
461        assert!(code.contains(&0x0b)); // TJMP
462    }
463
464    #[test]
465    fn test_assemble_undefined_label() {
466        let mut asm = TasmAssembler::new();
467        let result = asm.assemble("JMP nonexistent");
468        assert!(matches!(result, Err(TasmError::UndefinedLabel(_))));
469    }
470
471    #[test]
472    fn test_assemble_unknown_mnemonic() {
473        let mut asm = TasmAssembler::new();
474        let result = asm.assemble("FLOATOP r0, r1");
475        assert!(matches!(result, Err(TasmError::UnknownMnemonic(_))));
476    }
477
478    #[test]
479    fn test_assemble_comments_ignored() {
480        let mut asm = TasmAssembler::new();
481        let code = asm.assemble(
482            "; this is a comment\n// also a comment\nHALT"
483        ).unwrap();
484        assert_eq!(code, vec![0x00]);
485    }
486
487    #[test]
488    fn test_assemble_full_program() {
489        // Load +1 into r0, load -1 into r1, add into r2, halt
490        let src = "
491  LOAD r0, 1      ; truth
492  LOAD r1, T      ; conflict
493  ADD  r2, r0, r1 ; hold (1 + -1 = 0)
494  HALT
495";
496        let mut asm = TasmAssembler::new();
497        let code = asm.assemble(src).unwrap();
498        assert!(!code.is_empty());
499        assert_eq!(*code.last().unwrap(), 0x00); // HALT at end
500    }
501
502    #[test]
503    fn test_trit_encode() {
504        assert_eq!(trit_encode(-1), 0x01);
505        assert_eq!(trit_encode(0),  0x03);
506        assert_eq!(trit_encode(1),  0x02);
507        assert_eq!(trit_encode(5),  0x02); // positive → +1
508        assert_eq!(trit_encode(-9), 0x01); // negative → -1
509    }
510}