avm_rs/assembler/
mod.rs

1//! TEAL assembler implementation
2
3use crate::error::{AvmError, AvmResult};
4use crate::opcodes::*;
5use crate::varuint::encode_varuint;
6use std::collections::HashMap;
7
8/// TEAL assembler
9#[derive(Debug, Default)]
10pub struct Assembler {
11    /// Program version
12    version: u8,
13    /// Type tracking enabled
14    typetrack: bool,
15    /// Label to address mapping
16    labels: HashMap<String, usize>,
17    /// Forward label references to resolve
18    forward_refs: Vec<(usize, String)>,
19}
20
21impl Assembler {
22    /// Create a new assembler
23    pub fn new() -> Self {
24        Self::default()
25    }
26
27    /// Assemble TEAL source code to bytecode
28    pub fn assemble(&mut self, source: &str) -> AvmResult<Vec<u8>> {
29        let mut bytecode = Vec::new();
30        let lines: Vec<&str> = source.lines().collect();
31
32        // First pass: collect labels and generate bytecode
33        for (line_num, line) in lines.iter().enumerate() {
34            let line = line.trim();
35
36            // Skip empty lines and comments
37            if line.is_empty() || line.starts_with("//") || line.starts_with(";") {
38                continue;
39            }
40
41            // Handle inline comments (remove everything after ; or //)
42            let line = if let Some(pos) = line.find(';') {
43                line[..pos].trim()
44            } else if let Some(pos) = line.find("//") {
45                line[..pos].trim()
46            } else {
47                line
48            };
49
50            // Skip if line becomes empty after comment removal
51            if line.is_empty() {
52                continue;
53            }
54
55            // Handle pragma directives
56            if line.starts_with("#pragma") {
57                let parts: Vec<&str> = line.split_whitespace().collect();
58                if parts.len() >= 3 {
59                    match parts[1] {
60                        "version" => {
61                            self.version = parts[2].parse().map_err(|_| {
62                                AvmError::assembly_error(format!(
63                                    "Invalid version on line {}",
64                                    line_num + 1
65                                ))
66                            })?;
67                        }
68                        "typetrack" => {
69                            self.typetrack = parts[2].parse().map_err(|_| {
70                                AvmError::assembly_error(format!(
71                                    "Invalid typetrack value on line {}",
72                                    line_num + 1
73                                ))
74                            })?;
75                        }
76                        _ => {
77                            return Err(AvmError::assembly_error(format!(
78                                "Unknown pragma directive '{}' on line {}",
79                                parts[1],
80                                line_num + 1
81                            )));
82                        }
83                    }
84                } else {
85                    return Err(AvmError::assembly_error(format!(
86                        "Invalid pragma syntax on line {}",
87                        line_num + 1
88                    )));
89                }
90                continue;
91            }
92
93            // Handle labels
94            if line.ends_with(':') {
95                let label = line.strip_suffix(':').unwrap();
96                self.labels.insert(label.to_string(), bytecode.len());
97                continue;
98            }
99
100            // Parse instruction
101            let parts: Vec<&str> = line.split_whitespace().collect();
102            if parts.is_empty() {
103                continue;
104            }
105
106            let opcode = parts[0];
107            let args = &parts[1..];
108
109            self.assemble_instruction(&mut bytecode, opcode, args, line_num + 1)?;
110        }
111
112        // Second pass: resolve forward references
113        self.resolve_forward_refs(&mut bytecode)?;
114
115        Ok(bytecode)
116    }
117
118    /// Assemble a single instruction
119    fn assemble_instruction(
120        &mut self,
121        bytecode: &mut Vec<u8>,
122        opcode: &str,
123        args: &[&str],
124        line_num: usize,
125    ) -> AvmResult<()> {
126        match opcode {
127            // Arithmetic operations
128            "+" => bytecode.push(OP_PLUS),
129            "-" => bytecode.push(OP_MINUS),
130            "*" => bytecode.push(OP_MUL),
131            "/" => bytecode.push(OP_DIV),
132            "%" => bytecode.push(OP_MOD),
133            "<" => bytecode.push(OP_LT),
134            ">" => bytecode.push(OP_GT),
135            "<=" => bytecode.push(OP_LE),
136            ">=" => bytecode.push(OP_GE),
137            "==" => bytecode.push(OP_EQ),
138            "!=" => bytecode.push(OP_NE),
139            "&&" => bytecode.push(OP_AND),
140            "||" => bytecode.push(OP_OR),
141            "!" => bytecode.push(OP_NOT),
142            "|" => bytecode.push(OP_BITWISE_OR),
143            "&" => bytecode.push(OP_BITWISE_AND),
144            "^" => bytecode.push(OP_BITWISE_XOR),
145            "~" => bytecode.push(OP_BITWISE_NOT),
146
147            // Stack operations
148            "pop" => bytecode.push(OP_POP),
149            "dup" => bytecode.push(OP_DUP),
150            "dup2" => bytecode.push(OP_DUP2),
151            "dig" => {
152                bytecode.push(OP_DIG);
153                self.assemble_byte_immediate(bytecode, args, line_num)?;
154            }
155            "bury" => {
156                bytecode.push(OP_BURY);
157                self.assemble_byte_immediate(bytecode, args, line_num)?;
158            }
159            "cover" => {
160                bytecode.push(OP_COVER);
161                self.assemble_byte_immediate(bytecode, args, line_num)?;
162            }
163            "uncover" => {
164                bytecode.push(OP_UNCOVER);
165                self.assemble_byte_immediate(bytecode, args, line_num)?;
166            }
167            "swap" => bytecode.push(OP_SWAP),
168            "select" => bytecode.push(OP_SELECT),
169            "dupn" => {
170                bytecode.push(OP_DUPN);
171                self.assemble_byte_immediate(bytecode, args, line_num)?;
172            }
173            "popn" => {
174                bytecode.push(OP_POPN);
175                self.assemble_byte_immediate(bytecode, args, line_num)?;
176            }
177
178            // Flow control
179            "bnz" => {
180                bytecode.push(OP_BNZ);
181                self.assemble_branch_target(bytecode, args, line_num)?;
182            }
183            "bz" => {
184                bytecode.push(OP_BZ);
185                self.assemble_branch_target(bytecode, args, line_num)?;
186            }
187            "b" => {
188                bytecode.push(OP_B);
189                self.assemble_branch_target(bytecode, args, line_num)?;
190            }
191            "return" => bytecode.push(OP_RETURN),
192            "assert" => bytecode.push(OP_ASSERT),
193            "callsub" => {
194                bytecode.push(OP_CALLSUB);
195                self.assemble_branch_target(bytecode, args, line_num)?;
196            }
197            "retsub" => bytecode.push(OP_RETSUB),
198            "proto" => {
199                bytecode.push(OP_PROTO);
200                // Proto takes 2 byte immediates: args and returns
201                if args.len() < 2 {
202                    return Err(AvmError::assembly_error(format!(
203                        "proto requires args and returns count on line {line_num}"
204                    )));
205                }
206                self.assemble_byte_immediate(bytecode, &[args[0]], line_num)?;
207                self.assemble_byte_immediate(bytecode, &[args[1]], line_num)?;
208            }
209            "frame_dig" => {
210                bytecode.push(OP_FRAME_DIG);
211                self.assemble_byte_immediate(bytecode, args, line_num)?;
212            }
213            "frame_bury" => {
214                bytecode.push(OP_FRAME_BURY);
215                self.assemble_byte_immediate(bytecode, args, line_num)?;
216            }
217            "switch" => {
218                bytecode.push(OP_SWITCH);
219                self.assemble_branch_target(bytecode, args, line_num)?;
220            }
221            "match" => {
222                bytecode.push(OP_MATCH);
223                self.assemble_branch_target(bytecode, args, line_num)?;
224            }
225
226            // Constants (high-level syntax)
227            "int" => {
228                bytecode.push(OP_PUSHINT);
229                self.assemble_int_immediate(bytecode, args, line_num)?;
230            }
231            "byte" => {
232                bytecode.push(OP_PUSHBYTES);
233                self.assemble_bytes_immediate(bytecode, args, line_num)?;
234            }
235            "addr" => {
236                bytecode.push(OP_PUSHBYTES);
237                self.assemble_addr_immediate(bytecode, args, line_num)?;
238            }
239            "method" => {
240                bytecode.push(OP_PUSHBYTES);
241                self.assemble_method_immediate(bytecode, args, line_num)?;
242            }
243            // Low-level opcodes (for direct use if needed)
244            "pushint" => {
245                bytecode.push(OP_PUSHINT);
246                self.assemble_int_immediate(bytecode, args, line_num)?;
247            }
248            "pushbytes" => {
249                bytecode.push(OP_PUSHBYTES);
250                self.assemble_bytes_immediate(bytecode, args, line_num)?;
251            }
252
253            // Constant block opcodes
254            "intcblock" => {
255                bytecode.push(OP_INTCBLOCK);
256                self.assemble_intcblock(bytecode, args, line_num)?;
257            }
258            "intc" => {
259                bytecode.push(OP_INTC);
260                self.assemble_byte_immediate(bytecode, args, line_num)?;
261            }
262            "intc_0" => bytecode.push(OP_INTC_0),
263            "intc_1" => bytecode.push(OP_INTC_1),
264            "intc_2" => bytecode.push(OP_INTC_2),
265            "intc_3" => bytecode.push(OP_INTC_3),
266            "bytecblock" => {
267                bytecode.push(OP_BYTECBLOCK);
268                self.assemble_bytecblock(bytecode, args, line_num)?;
269            }
270            "bytec" => {
271                bytecode.push(OP_BYTEC);
272                self.assemble_byte_immediate(bytecode, args, line_num)?;
273            }
274            "bytec_0" => bytecode.push(OP_BYTEC_0),
275            "bytec_1" => bytecode.push(OP_BYTEC_1),
276            "bytec_2" => bytecode.push(OP_BYTEC_2),
277            "bytec_3" => bytecode.push(OP_BYTEC_3),
278
279            // Arguments
280            "arg" => {
281                bytecode.push(OP_ARG);
282                self.assemble_byte_immediate(bytecode, args, line_num)?;
283            }
284            "arg_0" => bytecode.push(OP_ARG_0),
285            "arg_1" => bytecode.push(OP_ARG_1),
286            "arg_2" => bytecode.push(OP_ARG_2),
287            "arg_3" => bytecode.push(OP_ARG_3),
288
289            // Utility
290            "len" => bytecode.push(OP_LEN),
291            "itob" => bytecode.push(OP_ITOB),
292            "btoi" => bytecode.push(OP_BTOI),
293            "concat" => bytecode.push(OP_CONCAT),
294            "substring" => {
295                bytecode.push(OP_SUBSTRING);
296                self.assemble_substring_args(bytecode, args, line_num)?;
297            }
298            "substring3" => bytecode.push(OP_SUBSTRING3),
299            "getbit" => bytecode.push(OP_GETBIT),
300            "setbit" => bytecode.push(OP_SETBIT),
301            "getbyte" => bytecode.push(OP_GETBYTE),
302            "setbyte" => bytecode.push(OP_SETBYTE),
303            "extract" => {
304                bytecode.push(OP_EXTRACT);
305                self.assemble_substring_args(bytecode, args, line_num)?;
306            }
307            "extract3" => bytecode.push(OP_EXTRACT3),
308            "extract_uint16" => {
309                bytecode.push(OP_EXTRACT_UINT16);
310                self.assemble_byte_immediate(bytecode, args, line_num)?;
311            }
312            "extract_uint32" => {
313                bytecode.push(OP_EXTRACT_UINT32);
314                self.assemble_byte_immediate(bytecode, args, line_num)?;
315            }
316            "extract_uint64" => {
317                bytecode.push(OP_EXTRACT_UINT64);
318                self.assemble_byte_immediate(bytecode, args, line_num)?;
319            }
320            "replace2" => {
321                bytecode.push(OP_REPLACE2);
322                self.assemble_byte_immediate(bytecode, args, line_num)?;
323            }
324            "replace3" => bytecode.push(OP_REPLACE3),
325            "base64_decode" => {
326                bytecode.push(OP_BASE64_DECODE);
327                self.assemble_byte_immediate(bytecode, args, line_num)?;
328            }
329            "json_ref" => {
330                bytecode.push(OP_JSON_REF);
331                self.assemble_byte_immediate(bytecode, args, line_num)?;
332            }
333
334            // Crypto
335            "sha256" => bytecode.push(OP_SHA256),
336            "keccak256" => bytecode.push(OP_KECCAK256),
337            "sha512_256" => bytecode.push(OP_SHA512_256),
338            "sha3_256" => bytecode.push(OP_SHA3_256),
339            "ed25519verify" => bytecode.push(OP_ED25519VERIFY),
340            "ed25519verify_bare" => bytecode.push(OP_ED25519VERIFY_BARE),
341            "vrf_verify" => bytecode.push(OP_VRF_VERIFY),
342
343            // Scratch space
344            "load" => {
345                bytecode.push(OP_LOAD);
346                self.assemble_byte_immediate(bytecode, args, line_num)?;
347            }
348            "store" => {
349                bytecode.push(OP_STORE);
350                self.assemble_byte_immediate(bytecode, args, line_num)?;
351            }
352
353            // Transaction fields
354            "txn" => {
355                bytecode.push(OP_TXN);
356                self.assemble_txn_field(bytecode, args, line_num)?;
357            }
358            "gtxn" => {
359                bytecode.push(OP_GTXN);
360                self.assemble_gtxn_args(bytecode, args, line_num)?;
361            }
362            "global" => {
363                bytecode.push(OP_GLOBAL);
364                self.assemble_global_field(bytecode, args, line_num)?;
365            }
366
367            // Application state
368            "app_opted_in" => bytecode.push(OP_APP_OPTED_IN),
369            "app_local_get" => bytecode.push(OP_APP_LOCAL_GET),
370            "app_local_get_ex" => bytecode.push(OP_APP_LOCAL_GET_EX),
371            "app_global_get" => bytecode.push(OP_APP_GLOBAL_GET),
372            "app_global_get_ex" => bytecode.push(OP_APP_GLOBAL_GET_EX),
373            "app_local_put" => bytecode.push(OP_APP_LOCAL_PUT),
374            "app_global_put" => bytecode.push(OP_APP_GLOBAL_PUT),
375            "app_local_del" => bytecode.push(OP_APP_LOCAL_DEL),
376            "app_global_del" => bytecode.push(OP_APP_GLOBAL_DEL),
377            "asset_holding_get" => bytecode.push(OP_ASSET_HOLDING_GET),
378            "asset_params_get" => bytecode.push(OP_ASSET_PARAMS_GET),
379            "app_params_get" => bytecode.push(OP_APP_PARAMS_GET),
380            "acct_params_get" => bytecode.push(OP_ACCT_PARAMS_GET),
381            "balance" => bytecode.push(OP_BALANCE),
382            "min_balance" => bytecode.push(OP_MIN_BALANCE),
383
384            // Box operations (v8+)
385            "box_create" => bytecode.push(OP_BOX_CREATE),
386            "box_extract" => bytecode.push(OP_BOX_EXTRACT),
387            "box_replace" => bytecode.push(OP_BOX_REPLACE),
388            "box_del" => bytecode.push(OP_BOX_DEL),
389            "box_len" => bytecode.push(OP_BOX_LEN),
390            "box_get" => bytecode.push(OP_BOX_GET),
391            "box_put" => bytecode.push(OP_BOX_PUT),
392            "box_splice" => bytecode.push(OP_BOX_SPLICE),
393            "box_resize" => bytecode.push(OP_BOX_RESIZE),
394
395            // Block operations
396            "block" => {
397                bytecode.push(OP_BLOCK);
398                self.assemble_byte_immediate(bytecode, args, line_num)?;
399            }
400
401            "err" => bytecode.push(OP_ERR),
402
403            _ => {
404                return Err(AvmError::assembly_error(format!(
405                    "Unknown opcode '{opcode}' on line {line_num}"
406                )));
407            }
408        }
409
410        Ok(())
411    }
412
413    /// Assemble branch target (may be forward reference)
414    fn assemble_branch_target(
415        &mut self,
416        bytecode: &mut Vec<u8>,
417        args: &[&str],
418        line_num: usize,
419    ) -> AvmResult<()> {
420        if args.is_empty() {
421            return Err(AvmError::assembly_error(format!(
422                "Missing branch target on line {line_num}"
423            )));
424        }
425
426        let target = args[0];
427
428        // Check if it's a label
429        if let Some(&addr) = self.labels.get(target) {
430            // The offset is calculated from the PC position when the branch executes
431            // At that point, PC has advanced past the entire instruction (opcode + 2 bytes)
432            // So the target calculation is: target_addr = (pc_after_instruction) + offset
433            // Therefore: offset = target_addr - pc_after_instruction
434            let pc_after_instruction = bytecode.len() + 2; // bytecode.len() is where offset goes, +2 gets past it
435            let offset = (addr as i32) - (pc_after_instruction as i32);
436            bytecode.extend_from_slice(&(offset as i16).to_be_bytes());
437        } else {
438            // Forward reference - add placeholder and record for later resolution
439            self.forward_refs.push((bytecode.len(), target.to_string()));
440            bytecode.extend_from_slice(&[0, 0]); // Placeholder
441        }
442
443        Ok(())
444    }
445
446    /// Assemble integer immediate value
447    fn assemble_int_immediate(
448        &mut self,
449        bytecode: &mut Vec<u8>,
450        args: &[&str],
451        line_num: usize,
452    ) -> AvmResult<()> {
453        if args.is_empty() {
454            return Err(AvmError::assembly_error(format!(
455                "Missing integer value on line {line_num}"
456            )));
457        }
458
459        let value = self.parse_integer(args[0], line_num)?;
460        bytecode.extend_from_slice(&value.to_be_bytes());
461        Ok(())
462    }
463
464    /// Parse integer from various formats (decimal, hex, octal, binary)
465    fn parse_integer(&self, input: &str, line_num: usize) -> AvmResult<u64> {
466        let value = if input.starts_with("0x") || input.starts_with("0X") {
467            // Hexadecimal
468            u64::from_str_radix(&input[2..], 16)
469        } else if input.starts_with("0o") || input.starts_with("0O") {
470            // Octal
471            u64::from_str_radix(&input[2..], 8)
472        } else if input.starts_with("0b") || input.starts_with("0B") {
473            // Binary
474            u64::from_str_radix(&input[2..], 2)
475        } else {
476            // Decimal
477            input.parse::<u64>()
478        };
479
480        value.map_err(|_| {
481            AvmError::assembly_error(format!("Invalid integer '{input}' on line {line_num}"))
482        })
483    }
484
485    /// Assemble byte immediate value
486    fn assemble_byte_immediate(
487        &mut self,
488        bytecode: &mut Vec<u8>,
489        args: &[&str],
490        line_num: usize,
491    ) -> AvmResult<()> {
492        if args.is_empty() {
493            return Err(AvmError::assembly_error(format!(
494                "Missing byte value on line {line_num}"
495            )));
496        }
497
498        let value: u8 = args[0].parse().map_err(|_| {
499            AvmError::assembly_error(format!(
500                "Invalid byte value '{}' on line {}",
501                args[0], line_num
502            ))
503        })?;
504
505        bytecode.push(value);
506        Ok(())
507    }
508
509    /// Assemble bytes immediate value
510    fn assemble_bytes_immediate(
511        &mut self,
512        bytecode: &mut Vec<u8>,
513        args: &[&str],
514        line_num: usize,
515    ) -> AvmResult<()> {
516        if args.is_empty() {
517            return Err(AvmError::assembly_error(format!(
518                "Missing bytes value on line {line_num}"
519            )));
520        }
521
522        let bytes = self.parse_bytes(args, line_num)?;
523
524        if bytes.len() > 255 {
525            return Err(AvmError::assembly_error(format!(
526                "Bytes too long ({} > 255) on line {}",
527                bytes.len(),
528                line_num
529            )));
530        }
531
532        bytecode.push(bytes.len() as u8);
533        bytecode.extend_from_slice(&bytes);
534        Ok(())
535    }
536
537    /// Parse bytes from various formats
538    fn parse_bytes(&self, args: &[&str], line_num: usize) -> AvmResult<Vec<u8>> {
539        if args.is_empty() {
540            return Err(AvmError::assembly_error(format!(
541                "Missing bytes value on line {line_num}"
542            )));
543        }
544
545        // Handle different byte formats
546        if args.len() >= 2 && (args[0] == "base64" || args[0] == "b64") {
547            // base64 format: byte base64 AAAA... or byte b64 AAAA...
548            let b64_data = args[1..].join("");
549            use base64::{Engine as _, engine::general_purpose};
550            general_purpose::STANDARD
551                .decode(b64_data)
552                .map_err(|_| AvmError::assembly_error(format!("Invalid base64 on line {line_num}")))
553        } else if args.len() == 1 {
554            let arg = args[0];
555            if let Some(stripped) = arg.strip_prefix("0x") {
556                // Hex format: byte 0x1234...
557                hex::decode(stripped).map_err(|_| {
558                    AvmError::assembly_error(format!(
559                        "Invalid hex bytes '{arg}' on line {line_num}"
560                    ))
561                })
562            } else if arg.starts_with('"') && arg.ends_with('"') {
563                // String literal: byte "hello"
564                let content = &arg[1..arg.len() - 1];
565                Ok(self.parse_string_literal(content)?)
566            } else {
567                // Try to parse as base32 (Algorand address)
568                self.try_parse_base32(arg, line_num)
569            }
570        } else {
571            Err(AvmError::assembly_error(format!(
572                "Invalid bytes format on line {line_num}"
573            )))
574        }
575    }
576
577    /// Parse string literal with escape sequences
578    fn parse_string_literal(&self, content: &str) -> AvmResult<Vec<u8>> {
579        let mut result = Vec::new();
580        let mut chars = content.chars();
581
582        while let Some(ch) = chars.next() {
583            if ch == '\\' {
584                match chars.next() {
585                    Some('n') => result.push(b'\n'),
586                    Some('t') => result.push(b'\t'),
587                    Some('r') => result.push(b'\r'),
588                    Some('\\') => result.push(b'\\'),
589                    Some('"') => result.push(b'"'),
590                    Some('x') => {
591                        // Hex escape: \x41
592                        let c1 = chars.next();
593                        let c2 = chars.next();
594                        if let (Some(c1), Some(c2)) = (c1, c2) {
595                            let hex = format!("{c1}{c2}");
596                            if let Ok(byte) = u8::from_str_radix(&hex, 16) {
597                                result.push(byte);
598                            } else {
599                                return Err(AvmError::assembly_error(
600                                    "Invalid hex escape sequence".to_string(),
601                                ));
602                            }
603                        } else {
604                            return Err(AvmError::assembly_error(
605                                "Invalid hex escape sequence".to_string(),
606                            ));
607                        }
608                    }
609                    Some(c) => result.push(c as u8),
610                    None => {
611                        return Err(AvmError::assembly_error(
612                            "Incomplete escape sequence".to_string(),
613                        ));
614                    }
615                }
616            } else {
617                result.push(ch as u8);
618            }
619        }
620
621        Ok(result)
622    }
623
624    /// Try to parse as base32 (Algorand address format)
625    fn try_parse_base32(&self, input: &str, line_num: usize) -> AvmResult<Vec<u8>> {
626        use base32::{Alphabet, decode};
627
628        // Algorand uses a specific base32 alphabet (RFC4648 without padding)
629        match decode(Alphabet::Rfc4648 { padding: false }, input) {
630            Some(bytes) => {
631                // Algorand addresses should be 32 bytes after decoding
632                if bytes.len() == 32 {
633                    Ok(bytes)
634                } else {
635                    Err(AvmError::assembly_error(format!(
636                        "Invalid address length: expected 32 bytes, got {} on line {line_num}",
637                        bytes.len()
638                    )))
639                }
640            }
641            None => Err(AvmError::assembly_error(format!(
642                "Invalid base32 encoding in address '{input}' on line {line_num}"
643            ))),
644        }
645    }
646
647    /// Assemble address immediate value
648    fn assemble_addr_immediate(
649        &mut self,
650        bytecode: &mut Vec<u8>,
651        args: &[&str],
652        line_num: usize,
653    ) -> AvmResult<()> {
654        if args.is_empty() {
655            return Err(AvmError::assembly_error(format!(
656                "Missing address value on line {line_num}"
657            )));
658        }
659
660        // Parse Algorand address (base32) and convert to bytes
661        let addr_bytes = self.parse_algorand_address(args[0], line_num)?;
662
663        if addr_bytes.len() > 255 {
664            return Err(AvmError::assembly_error(format!(
665                "Address too long ({} > 255) on line {}",
666                addr_bytes.len(),
667                line_num
668            )));
669        }
670
671        bytecode.push(addr_bytes.len() as u8);
672        bytecode.extend_from_slice(&addr_bytes);
673        Ok(())
674    }
675
676    /// Assemble method selector immediate value
677    fn assemble_method_immediate(
678        &mut self,
679        bytecode: &mut Vec<u8>,
680        args: &[&str],
681        line_num: usize,
682    ) -> AvmResult<()> {
683        if args.is_empty() {
684            return Err(AvmError::assembly_error(format!(
685                "Missing method signature on line {line_num}"
686            )));
687        }
688
689        // Join all args to form method signature, then compute selector
690        let method_sig = args.join(" ");
691        let selector = self.compute_method_selector(&method_sig)?;
692
693        bytecode.push(4); // Method selector is always 4 bytes
694        bytecode.extend_from_slice(&selector);
695        Ok(())
696    }
697
698    /// Parse Algorand address from base32 format
699    fn parse_algorand_address(&self, addr: &str, line_num: usize) -> AvmResult<Vec<u8>> {
700        use base32::{Alphabet, decode};
701        use sha2::{Digest, Sha512_256};
702
703        // Algorand addresses are 58 characters in base32
704        if addr.len() != 58 {
705            return Err(AvmError::assembly_error(format!(
706                "Invalid Algorand address length on line {line_num}: expected 58 characters, got {}",
707                addr.len()
708            )));
709        }
710
711        // Decode the base32 address
712        let decoded = decode(Alphabet::Rfc4648 { padding: false }, addr).ok_or_else(|| {
713            AvmError::assembly_error(format!(
714                "Invalid base32 encoding in address on line {line_num}"
715            ))
716        })?;
717
718        // Algorand addresses contain 32 bytes + 4 byte checksum = 36 bytes total
719        if decoded.len() != 36 {
720            return Err(AvmError::assembly_error(format!(
721                "Invalid decoded address length on line {line_num}: expected 36 bytes, got {}",
722                decoded.len()
723            )));
724        }
725
726        // Split address and checksum
727        let (address_bytes, checksum) = decoded.split_at(32);
728
729        // Verify checksum using SHA512-256 (last 4 bytes)
730        let mut hasher = Sha512_256::new();
731        hasher.update(address_bytes);
732        let hash = hasher.finalize();
733        let expected_checksum = &hash[hash.len() - 4..];
734
735        if checksum != expected_checksum {
736            return Err(AvmError::assembly_error(format!(
737                "Invalid address checksum on line {line_num}"
738            )));
739        }
740
741        Ok(address_bytes.to_vec())
742    }
743
744    /// Compute ARC-4 method selector from method signature
745    fn compute_method_selector(&self, method_sig: &str) -> AvmResult<[u8; 4]> {
746        use sha2::{Digest, Sha256};
747
748        // Compute SHA-256 hash of method signature
749        let mut hasher = Sha256::new();
750        hasher.update(method_sig.as_bytes());
751        let hash = hasher.finalize();
752
753        // Take first 4 bytes as method selector
754        let mut selector = [0u8; 4];
755        selector.copy_from_slice(&hash[..4]);
756        Ok(selector)
757    }
758
759    /// Assemble substring arguments
760    fn assemble_substring_args(
761        &mut self,
762        bytecode: &mut Vec<u8>,
763        args: &[&str],
764        line_num: usize,
765    ) -> AvmResult<()> {
766        if args.len() < 2 {
767            return Err(AvmError::assembly_error(format!(
768                "substring requires start and length on line {line_num}"
769            )));
770        }
771
772        let start: u8 = args[0].parse().map_err(|_| {
773            AvmError::assembly_error(format!(
774                "Invalid start value '{}' on line {}",
775                args[0], line_num
776            ))
777        })?;
778
779        let length: u8 = args[1].parse().map_err(|_| {
780            AvmError::assembly_error(format!(
781                "Invalid length value '{}' on line {}",
782                args[1], line_num
783            ))
784        })?;
785
786        bytecode.push(start);
787        bytecode.push(length);
788        Ok(())
789    }
790
791    /// Assemble transaction field
792    fn assemble_txn_field(
793        &mut self,
794        bytecode: &mut Vec<u8>,
795        args: &[&str],
796        line_num: usize,
797    ) -> AvmResult<()> {
798        if args.is_empty() {
799            return Err(AvmError::assembly_error(format!(
800                "Missing transaction field on line {line_num}"
801            )));
802        }
803
804        let field_id = match args[0] {
805            "Sender" => 0,
806            "Fee" => 1,
807            "FirstValid" => 2,
808            "FirstValidTime" => 3,
809            "LastValid" => 4,
810            "Note" => 5,
811            "Lease" => 6,
812            "Receiver" => 7,
813            "Amount" => 8,
814            "CloseRemainderTo" => 9,
815            "VotePK" => 10,
816            "SelectionPK" => 11,
817            "VoteFirst" => 12,
818            "VoteLast" => 13,
819            "VoteKeyDilution" => 14,
820            "Type" => 15,
821            "TypeEnum" => 16,
822            "XferAsset" => 17,
823            "AssetAmount" => 18,
824            "AssetSender" => 19,
825            "AssetReceiver" => 20,
826            "AssetCloseTo" => 21,
827            "GroupIndex" => 22,
828            "TxID" => 23,
829            "ApplicationID" => 24,
830            "OnCompletion" => 25,
831            "ApplicationArgs" => 26,
832            "NumAppArgs" => 27,
833            "Accounts" => 28,
834            "NumAccounts" => 29,
835            "ApprovalProgram" => 30,
836            "ClearStateProgram" => 31,
837            "RekeyTo" => 32,
838            "ConfigAsset" => 33,
839            "ConfigAssetTotal" => 34,
840            "ConfigAssetDecimals" => 35,
841            "ConfigAssetDefaultFrozen" => 36,
842            "ConfigAssetUnitName" => 37,
843            "ConfigAssetName" => 38,
844            "ConfigAssetURL" => 39,
845            "ConfigAssetMetadataHash" => 40,
846            "ConfigAssetManager" => 41,
847            "ConfigAssetReserve" => 42,
848            "ConfigAssetFreeze" => 43,
849            "ConfigAssetClawback" => 44,
850            "FreezeAsset" => 45,
851            "FreezeAssetAccount" => 46,
852            "FreezeAssetFrozen" => 47,
853            "Assets" => 48,
854            "NumAssets" => 49,
855            "Applications" => 50,
856            "NumApplications" => 51,
857            "GlobalNumUint" => 52,
858            "GlobalNumByteSlice" => 53,
859            "LocalNumUint" => 54,
860            "LocalNumByteSlice" => 55,
861            "ExtraProgramPages" => 56,
862            "Nonparticipation" => 57,
863            "Logs" => 58,
864            "NumLogs" => 59,
865            "CreatedAssetID" => 60,
866            "CreatedApplicationID" => 61,
867            "LastLog" => 62,
868            "StateProofPK" => 63,
869            "ApprovalProgramPages" => 64,
870            "NumApprovalProgramPages" => 65,
871            "ClearStateProgramPages" => 66,
872            "NumClearStateProgramPages" => 67,
873            _ => {
874                return Err(AvmError::assembly_error(format!(
875                    "Unknown transaction field '{}' on line {}",
876                    args[0], line_num
877                )));
878            }
879        };
880
881        bytecode.push(field_id);
882        Ok(())
883    }
884
885    /// Assemble group transaction arguments
886    fn assemble_gtxn_args(
887        &mut self,
888        bytecode: &mut Vec<u8>,
889        args: &[&str],
890        line_num: usize,
891    ) -> AvmResult<()> {
892        if args.len() < 2 {
893            return Err(AvmError::assembly_error(format!(
894                "gtxn requires group index and field on line {line_num}"
895            )));
896        }
897
898        let group_index: u8 = args[0].parse().map_err(|_| {
899            AvmError::assembly_error(format!(
900                "Invalid group index '{}' on line {}",
901                args[0], line_num
902            ))
903        })?;
904
905        bytecode.push(group_index);
906        self.assemble_txn_field(bytecode, &args[1..], line_num)?;
907        Ok(())
908    }
909
910    /// Assemble global field
911    fn assemble_global_field(
912        &mut self,
913        bytecode: &mut Vec<u8>,
914        args: &[&str],
915        line_num: usize,
916    ) -> AvmResult<()> {
917        if args.is_empty() {
918            return Err(AvmError::assembly_error(format!(
919                "Missing global field on line {line_num}"
920            )));
921        }
922
923        let field_id = match args[0] {
924            "MinTxnFee" => 0,
925            "MinBalance" => 1,
926            "MaxTxnLife" => 2,
927            "ZeroAddress" => 3,
928            "GroupSize" => 4,
929            "LogicSigVersion" => 5,
930            "Round" => 6,
931            "LatestTimestamp" => 7,
932            "CurrentApplicationID" => 8,
933            "CreatorAddress" => 9,
934            "CurrentApplicationAddress" => 10,
935            "GroupID" => 11,
936            "OpcodeBudget" => 12,
937            "CallerApplicationID" => 13,
938            "CallerApplicationAddress" => 14,
939            "AssetCreateMinBalance" => 15,
940            "AssetOptInMinBalance" => 16,
941            "GenesisHash" => 17,
942            _ => {
943                return Err(AvmError::assembly_error(format!(
944                    "Unknown global field '{}' on line {}",
945                    args[0], line_num
946                )));
947            }
948        };
949
950        bytecode.push(field_id);
951        Ok(())
952    }
953
954    /// Assemble integer constant block
955    fn assemble_intcblock(
956        &mut self,
957        bytecode: &mut Vec<u8>,
958        args: &[&str],
959        line_num: usize,
960    ) -> AvmResult<()> {
961        if args.is_empty() {
962            return Err(AvmError::assembly_error(format!(
963                "intcblock requires at least one integer constant on line {line_num}"
964            )));
965        }
966
967        // Write count as varuint
968        let count_bytes = encode_varuint(args.len() as u64);
969        bytecode.extend_from_slice(&count_bytes);
970
971        // Write each integer constant as varuint
972        for arg in args {
973            let value = self.parse_integer(arg, line_num)?;
974            let value_bytes = encode_varuint(value);
975            bytecode.extend_from_slice(&value_bytes);
976        }
977
978        Ok(())
979    }
980
981    /// Assemble byte constant block
982    fn assemble_bytecblock(
983        &mut self,
984        bytecode: &mut Vec<u8>,
985        args: &[&str],
986        line_num: usize,
987    ) -> AvmResult<()> {
988        if args.is_empty() {
989            return Err(AvmError::assembly_error(format!(
990                "bytecblock requires at least one byte constant on line {line_num}"
991            )));
992        }
993
994        // Write count as varuint
995        let count_bytes = encode_varuint(args.len() as u64);
996        bytecode.extend_from_slice(&count_bytes);
997
998        // Write each byte constant
999        for arg in args {
1000            let bytes = self.parse_bytes(&[arg], line_num)?;
1001
1002            // Write length as varuint followed by the bytes
1003            let length_bytes = encode_varuint(bytes.len() as u64);
1004            bytecode.extend_from_slice(&length_bytes);
1005            bytecode.extend_from_slice(&bytes);
1006        }
1007
1008        Ok(())
1009    }
1010
1011    /// Resolve forward label references
1012    fn resolve_forward_refs(&self, bytecode: &mut [u8]) -> AvmResult<()> {
1013        for (addr, label) in &self.forward_refs {
1014            let target_addr = self
1015                .labels
1016                .get(label)
1017                .ok_or_else(|| AvmError::assembly_error(format!("Undefined label: {label}")))?;
1018
1019            // The offset is calculated from the PC position when the branch executes
1020            // At that point, PC has advanced past the entire instruction (opcode + 2 bytes)
1021            // So the target calculation is: target_addr = (pc_after_instruction) + offset
1022            // Therefore: offset = target_addr - pc_after_instruction
1023            let pc_after_instruction = *addr + 2; // addr points to offset bytes, +2 gets past them
1024            let offset = (*target_addr as i32) - (pc_after_instruction as i32);
1025            let offset_bytes = (offset as i16).to_be_bytes();
1026            bytecode[*addr] = offset_bytes[0];
1027            bytecode[*addr + 1] = offset_bytes[1];
1028        }
1029
1030        Ok(())
1031    }
1032}
1033
1034/// Disassemble bytecode to TEAL source
1035pub fn disassemble(bytecode: &[u8]) -> AvmResult<String> {
1036    use crate::varuint::decode_varuint;
1037    let mut result = String::new();
1038    let mut pc = 0;
1039
1040    while pc < bytecode.len() {
1041        let opcode = bytecode[pc];
1042
1043        result.push_str(&format!("{pc:04x}: "));
1044
1045        let (instruction, size) = match opcode {
1046            OP_ERR => ("err".to_string(), 1),
1047            OP_PLUS => ("+".to_string(), 1),
1048            OP_MINUS => ("-".to_string(), 1),
1049            OP_MUL => ("*".to_string(), 1),
1050            OP_DIV => ("/".to_string(), 1),
1051            OP_MOD => ("%".to_string(), 1),
1052            OP_LT => ("<".to_string(), 1),
1053            OP_GT => (">".to_string(), 1),
1054            OP_LE => ("<=".to_string(), 1),
1055            OP_GE => (">=".to_string(), 1),
1056            OP_EQ => ("==".to_string(), 1),
1057            OP_NE => ("!=".to_string(), 1),
1058            OP_AND => ("&&".to_string(), 1),
1059            OP_OR => ("||".to_string(), 1),
1060            OP_NOT => ("!".to_string(), 1),
1061            OP_BITWISE_OR => ("|".to_string(), 1),
1062            OP_BITWISE_AND => ("&".to_string(), 1),
1063            OP_BITWISE_XOR => ("^".to_string(), 1),
1064            OP_BITWISE_NOT => ("~".to_string(), 1),
1065            OP_POP => ("pop".to_string(), 1),
1066            OP_DUP => ("dup".to_string(), 1),
1067            OP_DUP2 => ("dup2".to_string(), 1),
1068            OP_SWAP => ("swap".to_string(), 1),
1069            OP_SELECT => ("select".to_string(), 1),
1070            OP_BNZ => {
1071                if pc + 2 < bytecode.len() {
1072                    let offset = i16::from_be_bytes([bytecode[pc + 1], bytecode[pc + 2]]);
1073                    let target = (pc as i32 + 3 + offset as i32) as usize;
1074                    (format!("bnz {target:04x}"), 3)
1075                } else {
1076                    ("bnz <invalid>".to_string(), 1)
1077                }
1078            }
1079            OP_BZ => {
1080                if pc + 2 < bytecode.len() {
1081                    let offset = i16::from_be_bytes([bytecode[pc + 1], bytecode[pc + 2]]);
1082                    let target = (pc as i32 + 3 + offset as i32) as usize;
1083                    (format!("bz {target:04x}"), 3)
1084                } else {
1085                    ("bz <invalid>".to_string(), 1)
1086                }
1087            }
1088            OP_B => {
1089                if pc + 2 < bytecode.len() {
1090                    let offset = i16::from_be_bytes([bytecode[pc + 1], bytecode[pc + 2]]);
1091                    let target = (pc as i32 + 3 + offset as i32) as usize;
1092                    (format!("b {target:04x}"), 3)
1093                } else {
1094                    ("b <invalid>".to_string(), 1)
1095                }
1096            }
1097            OP_RETURN => ("return".to_string(), 1),
1098            OP_ASSERT => ("assert".to_string(), 1),
1099            OP_RETSUB => ("retsub".to_string(), 1),
1100            OP_SHA256 => ("sha256".to_string(), 1),
1101            OP_KECCAK256 => ("keccak256".to_string(), 1),
1102            OP_SHA512_256 => ("sha512_256".to_string(), 1),
1103            OP_ED25519VERIFY => ("ed25519verify".to_string(), 1),
1104            OP_LEN => ("len".to_string(), 1),
1105            OP_ITOB => ("itob".to_string(), 1),
1106            OP_BTOI => ("btoi".to_string(), 1),
1107            OP_CONCAT => ("concat".to_string(), 1),
1108            OP_SUBSTRING3 => ("substring3".to_string(), 1),
1109            OP_APP_GLOBAL_GET => ("app_global_get".to_string(), 1),
1110            OP_APP_GLOBAL_PUT => ("app_global_put".to_string(), 1),
1111            OP_APP_GLOBAL_DEL => ("app_global_del".to_string(), 1),
1112            OP_APP_LOCAL_GET => ("app_local_get".to_string(), 1),
1113            OP_APP_LOCAL_PUT => ("app_local_put".to_string(), 1),
1114            OP_APP_LOCAL_DEL => ("app_local_del".to_string(), 1),
1115            OP_BALANCE => ("balance".to_string(), 1),
1116            OP_MIN_BALANCE => ("min_balance".to_string(), 1),
1117
1118            // Constant block opcodes
1119            OP_INTCBLOCK => {
1120                let mut offset = pc + 1;
1121
1122                // Read count as varuint
1123                if let Ok((count, consumed)) = decode_varuint(&bytecode[offset..]) {
1124                    offset += consumed;
1125                    let count = count as usize;
1126                    let mut constants = Vec::new();
1127
1128                    // Read each integer constant as varuint
1129                    for _ in 0..count {
1130                        if let Ok((value, consumed)) = decode_varuint(&bytecode[offset..]) {
1131                            constants.push(value.to_string());
1132                            offset += consumed;
1133                        } else {
1134                            break;
1135                        }
1136                    }
1137
1138                    if constants.len() == count {
1139                        (format!("intcblock {}", constants.join(" ")), offset - pc)
1140                    } else {
1141                        ("intcblock <invalid>".to_string(), 1)
1142                    }
1143                } else {
1144                    ("intcblock <invalid>".to_string(), 1)
1145                }
1146            }
1147            OP_INTC => {
1148                if pc + 1 < bytecode.len() {
1149                    let index = bytecode[pc + 1];
1150                    (format!("intc {index}"), 2)
1151                } else {
1152                    ("intc <invalid>".to_string(), 1)
1153                }
1154            }
1155            OP_INTC_0 => ("intc_0".to_string(), 1),
1156            OP_INTC_1 => ("intc_1".to_string(), 1),
1157            OP_INTC_2 => ("intc_2".to_string(), 1),
1158            OP_INTC_3 => ("intc_3".to_string(), 1),
1159            OP_BYTECBLOCK => {
1160                let mut offset = pc + 1;
1161
1162                // Read count as varuint
1163                if let Ok((count, consumed)) = decode_varuint(&bytecode[offset..]) {
1164                    offset += consumed;
1165                    let count = count as usize;
1166                    let mut constants = Vec::new();
1167
1168                    // Read each byte constant (length-prefixed)
1169                    for _ in 0..count {
1170                        if let Ok((length, consumed)) = decode_varuint(&bytecode[offset..]) {
1171                            offset += consumed;
1172                            let length = length as usize;
1173
1174                            if offset + length <= bytecode.len() {
1175                                let bytes = &bytecode[offset..offset + length];
1176                                if bytes.iter().all(|&b| b.is_ascii() && !b.is_ascii_control()) {
1177                                    constants
1178                                        .push(format!("\"{}\"", String::from_utf8_lossy(bytes)));
1179                                } else {
1180                                    constants.push(format!("0x{}", hex::encode(bytes)));
1181                                }
1182                                offset += length;
1183                            } else {
1184                                break;
1185                            }
1186                        } else {
1187                            break;
1188                        }
1189                    }
1190
1191                    if constants.len() == count {
1192                        (format!("bytecblock {}", constants.join(" ")), offset - pc)
1193                    } else {
1194                        ("bytecblock <invalid>".to_string(), 1)
1195                    }
1196                } else {
1197                    ("bytecblock <invalid>".to_string(), 1)
1198                }
1199            }
1200            OP_BYTEC => {
1201                if pc + 1 < bytecode.len() {
1202                    let index = bytecode[pc + 1];
1203                    (format!("bytec {index}"), 2)
1204                } else {
1205                    ("bytec <invalid>".to_string(), 1)
1206                }
1207            }
1208            OP_BYTEC_0 => ("bytec_0".to_string(), 1),
1209            OP_BYTEC_1 => ("bytec_1".to_string(), 1),
1210            OP_BYTEC_2 => ("bytec_2".to_string(), 1),
1211            OP_BYTEC_3 => ("bytec_3".to_string(), 1),
1212
1213            // Argument opcodes
1214            OP_ARG => {
1215                if pc + 1 < bytecode.len() {
1216                    let index = bytecode[pc + 1];
1217                    (format!("arg {index}"), 2)
1218                } else {
1219                    ("arg <invalid>".to_string(), 1)
1220                }
1221            }
1222            OP_ARG_0 => ("arg_0".to_string(), 1),
1223            OP_ARG_1 => ("arg_1".to_string(), 1),
1224            OP_ARG_2 => ("arg_2".to_string(), 1),
1225            OP_ARG_3 => ("arg_3".to_string(), 1),
1226
1227            OP_PUSHINT => {
1228                if pc + 8 < bytecode.len() {
1229                    let value = u64::from_be_bytes(bytecode[pc + 1..pc + 9].try_into().unwrap());
1230                    (format!("int {value}"), 9)
1231                } else {
1232                    ("int <invalid>".to_string(), 1)
1233                }
1234            }
1235
1236            OP_PUSHBYTES => {
1237                if pc + 1 < bytecode.len() {
1238                    let len = bytecode[pc + 1] as usize;
1239                    if pc + 1 + len < bytecode.len() {
1240                        let bytes = &bytecode[pc + 2..pc + 2 + len];
1241                        if bytes.iter().all(|&b| b.is_ascii() && !b.is_ascii_control()) {
1242                            (
1243                                format!("byte \"{}\"", String::from_utf8_lossy(bytes)),
1244                                2 + len,
1245                            )
1246                        } else {
1247                            (format!("byte 0x{}", hex::encode(bytes)), 2 + len)
1248                        }
1249                    } else {
1250                        ("byte <invalid>".to_string(), 1)
1251                    }
1252                } else {
1253                    ("byte <invalid>".to_string(), 1)
1254                }
1255            }
1256
1257            _ => (format!("unknown_{opcode:02x}"), 1),
1258        };
1259
1260        result.push_str(&instruction);
1261        result.push('\n');
1262
1263        pc += size;
1264    }
1265
1266    Ok(result)
1267}