1use crate::error::{AvmError, AvmResult};
4use crate::opcodes::*;
5use crate::varuint::encode_varuint;
6use std::collections::HashMap;
7
8#[derive(Debug, Default)]
10pub struct Assembler {
11 version: u8,
13 typetrack: bool,
15 labels: HashMap<String, usize>,
17 forward_refs: Vec<(usize, String)>,
19}
20
21impl Assembler {
22 pub fn new() -> Self {
24 Self::default()
25 }
26
27 pub fn assemble(&mut self, source: &str) -> AvmResult<Vec<u8>> {
29 let mut bytecode = Vec::new();
30 let lines: Vec<&str> = source.lines().collect();
31
32 for (line_num, line) in lines.iter().enumerate() {
34 let line = line.trim();
35
36 if line.is_empty() || line.starts_with("//") || line.starts_with(";") {
38 continue;
39 }
40
41 let line = if let Some(pos) = line.find(';') {
43 line[..pos].trim()
44 } else if let Some(pos) = line.find("//") {
45 line[..pos].trim()
46 } else {
47 line
48 };
49
50 if line.is_empty() {
52 continue;
53 }
54
55 if line.starts_with("#pragma") {
57 let parts: Vec<&str> = line.split_whitespace().collect();
58 if parts.len() >= 3 {
59 match parts[1] {
60 "version" => {
61 self.version = parts[2].parse().map_err(|_| {
62 AvmError::assembly_error(format!(
63 "Invalid version on line {}",
64 line_num + 1
65 ))
66 })?;
67 }
68 "typetrack" => {
69 self.typetrack = parts[2].parse().map_err(|_| {
70 AvmError::assembly_error(format!(
71 "Invalid typetrack value on line {}",
72 line_num + 1
73 ))
74 })?;
75 }
76 _ => {
77 return Err(AvmError::assembly_error(format!(
78 "Unknown pragma directive '{}' on line {}",
79 parts[1],
80 line_num + 1
81 )));
82 }
83 }
84 } else {
85 return Err(AvmError::assembly_error(format!(
86 "Invalid pragma syntax on line {}",
87 line_num + 1
88 )));
89 }
90 continue;
91 }
92
93 if line.ends_with(':') {
95 let label = line.strip_suffix(':').unwrap();
96 self.labels.insert(label.to_string(), bytecode.len());
97 continue;
98 }
99
100 let parts: Vec<&str> = line.split_whitespace().collect();
102 if parts.is_empty() {
103 continue;
104 }
105
106 let opcode = parts[0];
107 let args = &parts[1..];
108
109 self.assemble_instruction(&mut bytecode, opcode, args, line_num + 1)?;
110 }
111
112 self.resolve_forward_refs(&mut bytecode)?;
114
115 Ok(bytecode)
116 }
117
118 fn assemble_instruction(
120 &mut self,
121 bytecode: &mut Vec<u8>,
122 opcode: &str,
123 args: &[&str],
124 line_num: usize,
125 ) -> AvmResult<()> {
126 match opcode {
127 "+" => bytecode.push(OP_PLUS),
129 "-" => bytecode.push(OP_MINUS),
130 "*" => bytecode.push(OP_MUL),
131 "/" => bytecode.push(OP_DIV),
132 "%" => bytecode.push(OP_MOD),
133 "<" => bytecode.push(OP_LT),
134 ">" => bytecode.push(OP_GT),
135 "<=" => bytecode.push(OP_LE),
136 ">=" => bytecode.push(OP_GE),
137 "==" => bytecode.push(OP_EQ),
138 "!=" => bytecode.push(OP_NE),
139 "&&" => bytecode.push(OP_AND),
140 "||" => bytecode.push(OP_OR),
141 "!" => bytecode.push(OP_NOT),
142 "|" => bytecode.push(OP_BITWISE_OR),
143 "&" => bytecode.push(OP_BITWISE_AND),
144 "^" => bytecode.push(OP_BITWISE_XOR),
145 "~" => bytecode.push(OP_BITWISE_NOT),
146
147 "pop" => bytecode.push(OP_POP),
149 "dup" => bytecode.push(OP_DUP),
150 "dup2" => bytecode.push(OP_DUP2),
151 "dig" => {
152 bytecode.push(OP_DIG);
153 self.assemble_byte_immediate(bytecode, args, line_num)?;
154 }
155 "bury" => {
156 bytecode.push(OP_BURY);
157 self.assemble_byte_immediate(bytecode, args, line_num)?;
158 }
159 "cover" => {
160 bytecode.push(OP_COVER);
161 self.assemble_byte_immediate(bytecode, args, line_num)?;
162 }
163 "uncover" => {
164 bytecode.push(OP_UNCOVER);
165 self.assemble_byte_immediate(bytecode, args, line_num)?;
166 }
167 "swap" => bytecode.push(OP_SWAP),
168 "select" => bytecode.push(OP_SELECT),
169 "dupn" => {
170 bytecode.push(OP_DUPN);
171 self.assemble_byte_immediate(bytecode, args, line_num)?;
172 }
173 "popn" => {
174 bytecode.push(OP_POPN);
175 self.assemble_byte_immediate(bytecode, args, line_num)?;
176 }
177
178 "bnz" => {
180 bytecode.push(OP_BNZ);
181 self.assemble_branch_target(bytecode, args, line_num)?;
182 }
183 "bz" => {
184 bytecode.push(OP_BZ);
185 self.assemble_branch_target(bytecode, args, line_num)?;
186 }
187 "b" => {
188 bytecode.push(OP_B);
189 self.assemble_branch_target(bytecode, args, line_num)?;
190 }
191 "return" => bytecode.push(OP_RETURN),
192 "assert" => bytecode.push(OP_ASSERT),
193 "callsub" => {
194 bytecode.push(OP_CALLSUB);
195 self.assemble_branch_target(bytecode, args, line_num)?;
196 }
197 "retsub" => bytecode.push(OP_RETSUB),
198 "proto" => {
199 bytecode.push(OP_PROTO);
200 if args.len() < 2 {
202 return Err(AvmError::assembly_error(format!(
203 "proto requires args and returns count on line {line_num}"
204 )));
205 }
206 self.assemble_byte_immediate(bytecode, &[args[0]], line_num)?;
207 self.assemble_byte_immediate(bytecode, &[args[1]], line_num)?;
208 }
209 "frame_dig" => {
210 bytecode.push(OP_FRAME_DIG);
211 self.assemble_byte_immediate(bytecode, args, line_num)?;
212 }
213 "frame_bury" => {
214 bytecode.push(OP_FRAME_BURY);
215 self.assemble_byte_immediate(bytecode, args, line_num)?;
216 }
217 "switch" => {
218 bytecode.push(OP_SWITCH);
219 self.assemble_branch_target(bytecode, args, line_num)?;
220 }
221 "match" => {
222 bytecode.push(OP_MATCH);
223 self.assemble_branch_target(bytecode, args, line_num)?;
224 }
225
226 "int" => {
228 bytecode.push(OP_PUSHINT);
229 self.assemble_int_immediate(bytecode, args, line_num)?;
230 }
231 "byte" => {
232 bytecode.push(OP_PUSHBYTES);
233 self.assemble_bytes_immediate(bytecode, args, line_num)?;
234 }
235 "addr" => {
236 bytecode.push(OP_PUSHBYTES);
237 self.assemble_addr_immediate(bytecode, args, line_num)?;
238 }
239 "method" => {
240 bytecode.push(OP_PUSHBYTES);
241 self.assemble_method_immediate(bytecode, args, line_num)?;
242 }
243 "pushint" => {
245 bytecode.push(OP_PUSHINT);
246 self.assemble_int_immediate(bytecode, args, line_num)?;
247 }
248 "pushbytes" => {
249 bytecode.push(OP_PUSHBYTES);
250 self.assemble_bytes_immediate(bytecode, args, line_num)?;
251 }
252
253 "intcblock" => {
255 bytecode.push(OP_INTCBLOCK);
256 self.assemble_intcblock(bytecode, args, line_num)?;
257 }
258 "intc" => {
259 bytecode.push(OP_INTC);
260 self.assemble_byte_immediate(bytecode, args, line_num)?;
261 }
262 "intc_0" => bytecode.push(OP_INTC_0),
263 "intc_1" => bytecode.push(OP_INTC_1),
264 "intc_2" => bytecode.push(OP_INTC_2),
265 "intc_3" => bytecode.push(OP_INTC_3),
266 "bytecblock" => {
267 bytecode.push(OP_BYTECBLOCK);
268 self.assemble_bytecblock(bytecode, args, line_num)?;
269 }
270 "bytec" => {
271 bytecode.push(OP_BYTEC);
272 self.assemble_byte_immediate(bytecode, args, line_num)?;
273 }
274 "bytec_0" => bytecode.push(OP_BYTEC_0),
275 "bytec_1" => bytecode.push(OP_BYTEC_1),
276 "bytec_2" => bytecode.push(OP_BYTEC_2),
277 "bytec_3" => bytecode.push(OP_BYTEC_3),
278
279 "arg" => {
281 bytecode.push(OP_ARG);
282 self.assemble_byte_immediate(bytecode, args, line_num)?;
283 }
284 "arg_0" => bytecode.push(OP_ARG_0),
285 "arg_1" => bytecode.push(OP_ARG_1),
286 "arg_2" => bytecode.push(OP_ARG_2),
287 "arg_3" => bytecode.push(OP_ARG_3),
288
289 "len" => bytecode.push(OP_LEN),
291 "itob" => bytecode.push(OP_ITOB),
292 "btoi" => bytecode.push(OP_BTOI),
293 "concat" => bytecode.push(OP_CONCAT),
294 "substring" => {
295 bytecode.push(OP_SUBSTRING);
296 self.assemble_substring_args(bytecode, args, line_num)?;
297 }
298 "substring3" => bytecode.push(OP_SUBSTRING3),
299 "getbit" => bytecode.push(OP_GETBIT),
300 "setbit" => bytecode.push(OP_SETBIT),
301 "getbyte" => bytecode.push(OP_GETBYTE),
302 "setbyte" => bytecode.push(OP_SETBYTE),
303 "extract" => {
304 bytecode.push(OP_EXTRACT);
305 self.assemble_substring_args(bytecode, args, line_num)?;
306 }
307 "extract3" => bytecode.push(OP_EXTRACT3),
308 "extract_uint16" => {
309 bytecode.push(OP_EXTRACT_UINT16);
310 self.assemble_byte_immediate(bytecode, args, line_num)?;
311 }
312 "extract_uint32" => {
313 bytecode.push(OP_EXTRACT_UINT32);
314 self.assemble_byte_immediate(bytecode, args, line_num)?;
315 }
316 "extract_uint64" => {
317 bytecode.push(OP_EXTRACT_UINT64);
318 self.assemble_byte_immediate(bytecode, args, line_num)?;
319 }
320 "replace2" => {
321 bytecode.push(OP_REPLACE2);
322 self.assemble_byte_immediate(bytecode, args, line_num)?;
323 }
324 "replace3" => bytecode.push(OP_REPLACE3),
325 "base64_decode" => {
326 bytecode.push(OP_BASE64_DECODE);
327 self.assemble_byte_immediate(bytecode, args, line_num)?;
328 }
329 "json_ref" => {
330 bytecode.push(OP_JSON_REF);
331 self.assemble_byte_immediate(bytecode, args, line_num)?;
332 }
333
334 "sha256" => bytecode.push(OP_SHA256),
336 "keccak256" => bytecode.push(OP_KECCAK256),
337 "sha512_256" => bytecode.push(OP_SHA512_256),
338 "sha3_256" => bytecode.push(OP_SHA3_256),
339 "ed25519verify" => bytecode.push(OP_ED25519VERIFY),
340 "ed25519verify_bare" => bytecode.push(OP_ED25519VERIFY_BARE),
341 "vrf_verify" => bytecode.push(OP_VRF_VERIFY),
342
343 "load" => {
345 bytecode.push(OP_LOAD);
346 self.assemble_byte_immediate(bytecode, args, line_num)?;
347 }
348 "store" => {
349 bytecode.push(OP_STORE);
350 self.assemble_byte_immediate(bytecode, args, line_num)?;
351 }
352
353 "txn" => {
355 bytecode.push(OP_TXN);
356 self.assemble_txn_field(bytecode, args, line_num)?;
357 }
358 "gtxn" => {
359 bytecode.push(OP_GTXN);
360 self.assemble_gtxn_args(bytecode, args, line_num)?;
361 }
362 "global" => {
363 bytecode.push(OP_GLOBAL);
364 self.assemble_global_field(bytecode, args, line_num)?;
365 }
366
367 "app_opted_in" => bytecode.push(OP_APP_OPTED_IN),
369 "app_local_get" => bytecode.push(OP_APP_LOCAL_GET),
370 "app_local_get_ex" => bytecode.push(OP_APP_LOCAL_GET_EX),
371 "app_global_get" => bytecode.push(OP_APP_GLOBAL_GET),
372 "app_global_get_ex" => bytecode.push(OP_APP_GLOBAL_GET_EX),
373 "app_local_put" => bytecode.push(OP_APP_LOCAL_PUT),
374 "app_global_put" => bytecode.push(OP_APP_GLOBAL_PUT),
375 "app_local_del" => bytecode.push(OP_APP_LOCAL_DEL),
376 "app_global_del" => bytecode.push(OP_APP_GLOBAL_DEL),
377 "asset_holding_get" => bytecode.push(OP_ASSET_HOLDING_GET),
378 "asset_params_get" => bytecode.push(OP_ASSET_PARAMS_GET),
379 "app_params_get" => bytecode.push(OP_APP_PARAMS_GET),
380 "acct_params_get" => bytecode.push(OP_ACCT_PARAMS_GET),
381 "balance" => bytecode.push(OP_BALANCE),
382 "min_balance" => bytecode.push(OP_MIN_BALANCE),
383
384 "box_create" => bytecode.push(OP_BOX_CREATE),
386 "box_extract" => bytecode.push(OP_BOX_EXTRACT),
387 "box_replace" => bytecode.push(OP_BOX_REPLACE),
388 "box_del" => bytecode.push(OP_BOX_DEL),
389 "box_len" => bytecode.push(OP_BOX_LEN),
390 "box_get" => bytecode.push(OP_BOX_GET),
391 "box_put" => bytecode.push(OP_BOX_PUT),
392 "box_splice" => bytecode.push(OP_BOX_SPLICE),
393 "box_resize" => bytecode.push(OP_BOX_RESIZE),
394
395 "block" => {
397 bytecode.push(OP_BLOCK);
398 self.assemble_byte_immediate(bytecode, args, line_num)?;
399 }
400
401 "err" => bytecode.push(OP_ERR),
402
403 _ => {
404 return Err(AvmError::assembly_error(format!(
405 "Unknown opcode '{opcode}' on line {line_num}"
406 )));
407 }
408 }
409
410 Ok(())
411 }
412
413 fn assemble_branch_target(
415 &mut self,
416 bytecode: &mut Vec<u8>,
417 args: &[&str],
418 line_num: usize,
419 ) -> AvmResult<()> {
420 if args.is_empty() {
421 return Err(AvmError::assembly_error(format!(
422 "Missing branch target on line {line_num}"
423 )));
424 }
425
426 let target = args[0];
427
428 if let Some(&addr) = self.labels.get(target) {
430 let pc_after_instruction = bytecode.len() + 2; let offset = (addr as i32) - (pc_after_instruction as i32);
436 bytecode.extend_from_slice(&(offset as i16).to_be_bytes());
437 } else {
438 self.forward_refs.push((bytecode.len(), target.to_string()));
440 bytecode.extend_from_slice(&[0, 0]); }
442
443 Ok(())
444 }
445
446 fn assemble_int_immediate(
448 &mut self,
449 bytecode: &mut Vec<u8>,
450 args: &[&str],
451 line_num: usize,
452 ) -> AvmResult<()> {
453 if args.is_empty() {
454 return Err(AvmError::assembly_error(format!(
455 "Missing integer value on line {line_num}"
456 )));
457 }
458
459 let value = self.parse_integer(args[0], line_num)?;
460 bytecode.extend_from_slice(&value.to_be_bytes());
461 Ok(())
462 }
463
464 fn parse_integer(&self, input: &str, line_num: usize) -> AvmResult<u64> {
466 let value = if input.starts_with("0x") || input.starts_with("0X") {
467 u64::from_str_radix(&input[2..], 16)
469 } else if input.starts_with("0o") || input.starts_with("0O") {
470 u64::from_str_radix(&input[2..], 8)
472 } else if input.starts_with("0b") || input.starts_with("0B") {
473 u64::from_str_radix(&input[2..], 2)
475 } else {
476 input.parse::<u64>()
478 };
479
480 value.map_err(|_| {
481 AvmError::assembly_error(format!("Invalid integer '{input}' on line {line_num}"))
482 })
483 }
484
485 fn assemble_byte_immediate(
487 &mut self,
488 bytecode: &mut Vec<u8>,
489 args: &[&str],
490 line_num: usize,
491 ) -> AvmResult<()> {
492 if args.is_empty() {
493 return Err(AvmError::assembly_error(format!(
494 "Missing byte value on line {line_num}"
495 )));
496 }
497
498 let value: u8 = args[0].parse().map_err(|_| {
499 AvmError::assembly_error(format!(
500 "Invalid byte value '{}' on line {}",
501 args[0], line_num
502 ))
503 })?;
504
505 bytecode.push(value);
506 Ok(())
507 }
508
509 fn assemble_bytes_immediate(
511 &mut self,
512 bytecode: &mut Vec<u8>,
513 args: &[&str],
514 line_num: usize,
515 ) -> AvmResult<()> {
516 if args.is_empty() {
517 return Err(AvmError::assembly_error(format!(
518 "Missing bytes value on line {line_num}"
519 )));
520 }
521
522 let bytes = self.parse_bytes(args, line_num)?;
523
524 if bytes.len() > 255 {
525 return Err(AvmError::assembly_error(format!(
526 "Bytes too long ({} > 255) on line {}",
527 bytes.len(),
528 line_num
529 )));
530 }
531
532 bytecode.push(bytes.len() as u8);
533 bytecode.extend_from_slice(&bytes);
534 Ok(())
535 }
536
537 fn parse_bytes(&self, args: &[&str], line_num: usize) -> AvmResult<Vec<u8>> {
539 if args.is_empty() {
540 return Err(AvmError::assembly_error(format!(
541 "Missing bytes value on line {line_num}"
542 )));
543 }
544
545 if args.len() >= 2 && (args[0] == "base64" || args[0] == "b64") {
547 let b64_data = args[1..].join("");
549 use base64::{Engine as _, engine::general_purpose};
550 general_purpose::STANDARD
551 .decode(b64_data)
552 .map_err(|_| AvmError::assembly_error(format!("Invalid base64 on line {line_num}")))
553 } else if args.len() == 1 {
554 let arg = args[0];
555 if let Some(stripped) = arg.strip_prefix("0x") {
556 hex::decode(stripped).map_err(|_| {
558 AvmError::assembly_error(format!(
559 "Invalid hex bytes '{arg}' on line {line_num}"
560 ))
561 })
562 } else if arg.starts_with('"') && arg.ends_with('"') {
563 let content = &arg[1..arg.len() - 1];
565 Ok(self.parse_string_literal(content)?)
566 } else {
567 self.try_parse_base32(arg, line_num)
569 }
570 } else {
571 Err(AvmError::assembly_error(format!(
572 "Invalid bytes format on line {line_num}"
573 )))
574 }
575 }
576
577 fn parse_string_literal(&self, content: &str) -> AvmResult<Vec<u8>> {
579 let mut result = Vec::new();
580 let mut chars = content.chars();
581
582 while let Some(ch) = chars.next() {
583 if ch == '\\' {
584 match chars.next() {
585 Some('n') => result.push(b'\n'),
586 Some('t') => result.push(b'\t'),
587 Some('r') => result.push(b'\r'),
588 Some('\\') => result.push(b'\\'),
589 Some('"') => result.push(b'"'),
590 Some('x') => {
591 let c1 = chars.next();
593 let c2 = chars.next();
594 if let (Some(c1), Some(c2)) = (c1, c2) {
595 let hex = format!("{c1}{c2}");
596 if let Ok(byte) = u8::from_str_radix(&hex, 16) {
597 result.push(byte);
598 } else {
599 return Err(AvmError::assembly_error(
600 "Invalid hex escape sequence".to_string(),
601 ));
602 }
603 } else {
604 return Err(AvmError::assembly_error(
605 "Invalid hex escape sequence".to_string(),
606 ));
607 }
608 }
609 Some(c) => result.push(c as u8),
610 None => {
611 return Err(AvmError::assembly_error(
612 "Incomplete escape sequence".to_string(),
613 ));
614 }
615 }
616 } else {
617 result.push(ch as u8);
618 }
619 }
620
621 Ok(result)
622 }
623
624 fn try_parse_base32(&self, input: &str, line_num: usize) -> AvmResult<Vec<u8>> {
626 use base32::{Alphabet, decode};
627
628 match decode(Alphabet::Rfc4648 { padding: false }, input) {
630 Some(bytes) => {
631 if bytes.len() == 32 {
633 Ok(bytes)
634 } else {
635 Err(AvmError::assembly_error(format!(
636 "Invalid address length: expected 32 bytes, got {} on line {line_num}",
637 bytes.len()
638 )))
639 }
640 }
641 None => Err(AvmError::assembly_error(format!(
642 "Invalid base32 encoding in address '{input}' on line {line_num}"
643 ))),
644 }
645 }
646
647 fn assemble_addr_immediate(
649 &mut self,
650 bytecode: &mut Vec<u8>,
651 args: &[&str],
652 line_num: usize,
653 ) -> AvmResult<()> {
654 if args.is_empty() {
655 return Err(AvmError::assembly_error(format!(
656 "Missing address value on line {line_num}"
657 )));
658 }
659
660 let addr_bytes = self.parse_algorand_address(args[0], line_num)?;
662
663 if addr_bytes.len() > 255 {
664 return Err(AvmError::assembly_error(format!(
665 "Address too long ({} > 255) on line {}",
666 addr_bytes.len(),
667 line_num
668 )));
669 }
670
671 bytecode.push(addr_bytes.len() as u8);
672 bytecode.extend_from_slice(&addr_bytes);
673 Ok(())
674 }
675
676 fn assemble_method_immediate(
678 &mut self,
679 bytecode: &mut Vec<u8>,
680 args: &[&str],
681 line_num: usize,
682 ) -> AvmResult<()> {
683 if args.is_empty() {
684 return Err(AvmError::assembly_error(format!(
685 "Missing method signature on line {line_num}"
686 )));
687 }
688
689 let method_sig = args.join(" ");
691 let selector = self.compute_method_selector(&method_sig)?;
692
693 bytecode.push(4); bytecode.extend_from_slice(&selector);
695 Ok(())
696 }
697
698 fn parse_algorand_address(&self, addr: &str, line_num: usize) -> AvmResult<Vec<u8>> {
700 use base32::{Alphabet, decode};
701 use sha2::{Digest, Sha512_256};
702
703 if addr.len() != 58 {
705 return Err(AvmError::assembly_error(format!(
706 "Invalid Algorand address length on line {line_num}: expected 58 characters, got {}",
707 addr.len()
708 )));
709 }
710
711 let decoded = decode(Alphabet::Rfc4648 { padding: false }, addr).ok_or_else(|| {
713 AvmError::assembly_error(format!(
714 "Invalid base32 encoding in address on line {line_num}"
715 ))
716 })?;
717
718 if decoded.len() != 36 {
720 return Err(AvmError::assembly_error(format!(
721 "Invalid decoded address length on line {line_num}: expected 36 bytes, got {}",
722 decoded.len()
723 )));
724 }
725
726 let (address_bytes, checksum) = decoded.split_at(32);
728
729 let mut hasher = Sha512_256::new();
731 hasher.update(address_bytes);
732 let hash = hasher.finalize();
733 let expected_checksum = &hash[hash.len() - 4..];
734
735 if checksum != expected_checksum {
736 return Err(AvmError::assembly_error(format!(
737 "Invalid address checksum on line {line_num}"
738 )));
739 }
740
741 Ok(address_bytes.to_vec())
742 }
743
744 fn compute_method_selector(&self, method_sig: &str) -> AvmResult<[u8; 4]> {
746 use sha2::{Digest, Sha256};
747
748 let mut hasher = Sha256::new();
750 hasher.update(method_sig.as_bytes());
751 let hash = hasher.finalize();
752
753 let mut selector = [0u8; 4];
755 selector.copy_from_slice(&hash[..4]);
756 Ok(selector)
757 }
758
759 fn assemble_substring_args(
761 &mut self,
762 bytecode: &mut Vec<u8>,
763 args: &[&str],
764 line_num: usize,
765 ) -> AvmResult<()> {
766 if args.len() < 2 {
767 return Err(AvmError::assembly_error(format!(
768 "substring requires start and length on line {line_num}"
769 )));
770 }
771
772 let start: u8 = args[0].parse().map_err(|_| {
773 AvmError::assembly_error(format!(
774 "Invalid start value '{}' on line {}",
775 args[0], line_num
776 ))
777 })?;
778
779 let length: u8 = args[1].parse().map_err(|_| {
780 AvmError::assembly_error(format!(
781 "Invalid length value '{}' on line {}",
782 args[1], line_num
783 ))
784 })?;
785
786 bytecode.push(start);
787 bytecode.push(length);
788 Ok(())
789 }
790
791 fn assemble_txn_field(
793 &mut self,
794 bytecode: &mut Vec<u8>,
795 args: &[&str],
796 line_num: usize,
797 ) -> AvmResult<()> {
798 if args.is_empty() {
799 return Err(AvmError::assembly_error(format!(
800 "Missing transaction field on line {line_num}"
801 )));
802 }
803
804 let field_id = match args[0] {
805 "Sender" => 0,
806 "Fee" => 1,
807 "FirstValid" => 2,
808 "FirstValidTime" => 3,
809 "LastValid" => 4,
810 "Note" => 5,
811 "Lease" => 6,
812 "Receiver" => 7,
813 "Amount" => 8,
814 "CloseRemainderTo" => 9,
815 "VotePK" => 10,
816 "SelectionPK" => 11,
817 "VoteFirst" => 12,
818 "VoteLast" => 13,
819 "VoteKeyDilution" => 14,
820 "Type" => 15,
821 "TypeEnum" => 16,
822 "XferAsset" => 17,
823 "AssetAmount" => 18,
824 "AssetSender" => 19,
825 "AssetReceiver" => 20,
826 "AssetCloseTo" => 21,
827 "GroupIndex" => 22,
828 "TxID" => 23,
829 "ApplicationID" => 24,
830 "OnCompletion" => 25,
831 "ApplicationArgs" => 26,
832 "NumAppArgs" => 27,
833 "Accounts" => 28,
834 "NumAccounts" => 29,
835 "ApprovalProgram" => 30,
836 "ClearStateProgram" => 31,
837 "RekeyTo" => 32,
838 "ConfigAsset" => 33,
839 "ConfigAssetTotal" => 34,
840 "ConfigAssetDecimals" => 35,
841 "ConfigAssetDefaultFrozen" => 36,
842 "ConfigAssetUnitName" => 37,
843 "ConfigAssetName" => 38,
844 "ConfigAssetURL" => 39,
845 "ConfigAssetMetadataHash" => 40,
846 "ConfigAssetManager" => 41,
847 "ConfigAssetReserve" => 42,
848 "ConfigAssetFreeze" => 43,
849 "ConfigAssetClawback" => 44,
850 "FreezeAsset" => 45,
851 "FreezeAssetAccount" => 46,
852 "FreezeAssetFrozen" => 47,
853 "Assets" => 48,
854 "NumAssets" => 49,
855 "Applications" => 50,
856 "NumApplications" => 51,
857 "GlobalNumUint" => 52,
858 "GlobalNumByteSlice" => 53,
859 "LocalNumUint" => 54,
860 "LocalNumByteSlice" => 55,
861 "ExtraProgramPages" => 56,
862 "Nonparticipation" => 57,
863 "Logs" => 58,
864 "NumLogs" => 59,
865 "CreatedAssetID" => 60,
866 "CreatedApplicationID" => 61,
867 "LastLog" => 62,
868 "StateProofPK" => 63,
869 "ApprovalProgramPages" => 64,
870 "NumApprovalProgramPages" => 65,
871 "ClearStateProgramPages" => 66,
872 "NumClearStateProgramPages" => 67,
873 _ => {
874 return Err(AvmError::assembly_error(format!(
875 "Unknown transaction field '{}' on line {}",
876 args[0], line_num
877 )));
878 }
879 };
880
881 bytecode.push(field_id);
882 Ok(())
883 }
884
885 fn assemble_gtxn_args(
887 &mut self,
888 bytecode: &mut Vec<u8>,
889 args: &[&str],
890 line_num: usize,
891 ) -> AvmResult<()> {
892 if args.len() < 2 {
893 return Err(AvmError::assembly_error(format!(
894 "gtxn requires group index and field on line {line_num}"
895 )));
896 }
897
898 let group_index: u8 = args[0].parse().map_err(|_| {
899 AvmError::assembly_error(format!(
900 "Invalid group index '{}' on line {}",
901 args[0], line_num
902 ))
903 })?;
904
905 bytecode.push(group_index);
906 self.assemble_txn_field(bytecode, &args[1..], line_num)?;
907 Ok(())
908 }
909
910 fn assemble_global_field(
912 &mut self,
913 bytecode: &mut Vec<u8>,
914 args: &[&str],
915 line_num: usize,
916 ) -> AvmResult<()> {
917 if args.is_empty() {
918 return Err(AvmError::assembly_error(format!(
919 "Missing global field on line {line_num}"
920 )));
921 }
922
923 let field_id = match args[0] {
924 "MinTxnFee" => 0,
925 "MinBalance" => 1,
926 "MaxTxnLife" => 2,
927 "ZeroAddress" => 3,
928 "GroupSize" => 4,
929 "LogicSigVersion" => 5,
930 "Round" => 6,
931 "LatestTimestamp" => 7,
932 "CurrentApplicationID" => 8,
933 "CreatorAddress" => 9,
934 "CurrentApplicationAddress" => 10,
935 "GroupID" => 11,
936 "OpcodeBudget" => 12,
937 "CallerApplicationID" => 13,
938 "CallerApplicationAddress" => 14,
939 "AssetCreateMinBalance" => 15,
940 "AssetOptInMinBalance" => 16,
941 "GenesisHash" => 17,
942 _ => {
943 return Err(AvmError::assembly_error(format!(
944 "Unknown global field '{}' on line {}",
945 args[0], line_num
946 )));
947 }
948 };
949
950 bytecode.push(field_id);
951 Ok(())
952 }
953
954 fn assemble_intcblock(
956 &mut self,
957 bytecode: &mut Vec<u8>,
958 args: &[&str],
959 line_num: usize,
960 ) -> AvmResult<()> {
961 if args.is_empty() {
962 return Err(AvmError::assembly_error(format!(
963 "intcblock requires at least one integer constant on line {line_num}"
964 )));
965 }
966
967 let count_bytes = encode_varuint(args.len() as u64);
969 bytecode.extend_from_slice(&count_bytes);
970
971 for arg in args {
973 let value = self.parse_integer(arg, line_num)?;
974 let value_bytes = encode_varuint(value);
975 bytecode.extend_from_slice(&value_bytes);
976 }
977
978 Ok(())
979 }
980
981 fn assemble_bytecblock(
983 &mut self,
984 bytecode: &mut Vec<u8>,
985 args: &[&str],
986 line_num: usize,
987 ) -> AvmResult<()> {
988 if args.is_empty() {
989 return Err(AvmError::assembly_error(format!(
990 "bytecblock requires at least one byte constant on line {line_num}"
991 )));
992 }
993
994 let count_bytes = encode_varuint(args.len() as u64);
996 bytecode.extend_from_slice(&count_bytes);
997
998 for arg in args {
1000 let bytes = self.parse_bytes(&[arg], line_num)?;
1001
1002 let length_bytes = encode_varuint(bytes.len() as u64);
1004 bytecode.extend_from_slice(&length_bytes);
1005 bytecode.extend_from_slice(&bytes);
1006 }
1007
1008 Ok(())
1009 }
1010
1011 fn resolve_forward_refs(&self, bytecode: &mut [u8]) -> AvmResult<()> {
1013 for (addr, label) in &self.forward_refs {
1014 let target_addr = self
1015 .labels
1016 .get(label)
1017 .ok_or_else(|| AvmError::assembly_error(format!("Undefined label: {label}")))?;
1018
1019 let pc_after_instruction = *addr + 2; let offset = (*target_addr as i32) - (pc_after_instruction as i32);
1025 let offset_bytes = (offset as i16).to_be_bytes();
1026 bytecode[*addr] = offset_bytes[0];
1027 bytecode[*addr + 1] = offset_bytes[1];
1028 }
1029
1030 Ok(())
1031 }
1032}
1033
1034pub fn disassemble(bytecode: &[u8]) -> AvmResult<String> {
1036 use crate::varuint::decode_varuint;
1037 let mut result = String::new();
1038 let mut pc = 0;
1039
1040 while pc < bytecode.len() {
1041 let opcode = bytecode[pc];
1042
1043 result.push_str(&format!("{pc:04x}: "));
1044
1045 let (instruction, size) = match opcode {
1046 OP_ERR => ("err".to_string(), 1),
1047 OP_PLUS => ("+".to_string(), 1),
1048 OP_MINUS => ("-".to_string(), 1),
1049 OP_MUL => ("*".to_string(), 1),
1050 OP_DIV => ("/".to_string(), 1),
1051 OP_MOD => ("%".to_string(), 1),
1052 OP_LT => ("<".to_string(), 1),
1053 OP_GT => (">".to_string(), 1),
1054 OP_LE => ("<=".to_string(), 1),
1055 OP_GE => (">=".to_string(), 1),
1056 OP_EQ => ("==".to_string(), 1),
1057 OP_NE => ("!=".to_string(), 1),
1058 OP_AND => ("&&".to_string(), 1),
1059 OP_OR => ("||".to_string(), 1),
1060 OP_NOT => ("!".to_string(), 1),
1061 OP_BITWISE_OR => ("|".to_string(), 1),
1062 OP_BITWISE_AND => ("&".to_string(), 1),
1063 OP_BITWISE_XOR => ("^".to_string(), 1),
1064 OP_BITWISE_NOT => ("~".to_string(), 1),
1065 OP_POP => ("pop".to_string(), 1),
1066 OP_DUP => ("dup".to_string(), 1),
1067 OP_DUP2 => ("dup2".to_string(), 1),
1068 OP_SWAP => ("swap".to_string(), 1),
1069 OP_SELECT => ("select".to_string(), 1),
1070 OP_BNZ => {
1071 if pc + 2 < bytecode.len() {
1072 let offset = i16::from_be_bytes([bytecode[pc + 1], bytecode[pc + 2]]);
1073 let target = (pc as i32 + 3 + offset as i32) as usize;
1074 (format!("bnz {target:04x}"), 3)
1075 } else {
1076 ("bnz <invalid>".to_string(), 1)
1077 }
1078 }
1079 OP_BZ => {
1080 if pc + 2 < bytecode.len() {
1081 let offset = i16::from_be_bytes([bytecode[pc + 1], bytecode[pc + 2]]);
1082 let target = (pc as i32 + 3 + offset as i32) as usize;
1083 (format!("bz {target:04x}"), 3)
1084 } else {
1085 ("bz <invalid>".to_string(), 1)
1086 }
1087 }
1088 OP_B => {
1089 if pc + 2 < bytecode.len() {
1090 let offset = i16::from_be_bytes([bytecode[pc + 1], bytecode[pc + 2]]);
1091 let target = (pc as i32 + 3 + offset as i32) as usize;
1092 (format!("b {target:04x}"), 3)
1093 } else {
1094 ("b <invalid>".to_string(), 1)
1095 }
1096 }
1097 OP_RETURN => ("return".to_string(), 1),
1098 OP_ASSERT => ("assert".to_string(), 1),
1099 OP_RETSUB => ("retsub".to_string(), 1),
1100 OP_SHA256 => ("sha256".to_string(), 1),
1101 OP_KECCAK256 => ("keccak256".to_string(), 1),
1102 OP_SHA512_256 => ("sha512_256".to_string(), 1),
1103 OP_ED25519VERIFY => ("ed25519verify".to_string(), 1),
1104 OP_LEN => ("len".to_string(), 1),
1105 OP_ITOB => ("itob".to_string(), 1),
1106 OP_BTOI => ("btoi".to_string(), 1),
1107 OP_CONCAT => ("concat".to_string(), 1),
1108 OP_SUBSTRING3 => ("substring3".to_string(), 1),
1109 OP_APP_GLOBAL_GET => ("app_global_get".to_string(), 1),
1110 OP_APP_GLOBAL_PUT => ("app_global_put".to_string(), 1),
1111 OP_APP_GLOBAL_DEL => ("app_global_del".to_string(), 1),
1112 OP_APP_LOCAL_GET => ("app_local_get".to_string(), 1),
1113 OP_APP_LOCAL_PUT => ("app_local_put".to_string(), 1),
1114 OP_APP_LOCAL_DEL => ("app_local_del".to_string(), 1),
1115 OP_BALANCE => ("balance".to_string(), 1),
1116 OP_MIN_BALANCE => ("min_balance".to_string(), 1),
1117
1118 OP_INTCBLOCK => {
1120 let mut offset = pc + 1;
1121
1122 if let Ok((count, consumed)) = decode_varuint(&bytecode[offset..]) {
1124 offset += consumed;
1125 let count = count as usize;
1126 let mut constants = Vec::new();
1127
1128 for _ in 0..count {
1130 if let Ok((value, consumed)) = decode_varuint(&bytecode[offset..]) {
1131 constants.push(value.to_string());
1132 offset += consumed;
1133 } else {
1134 break;
1135 }
1136 }
1137
1138 if constants.len() == count {
1139 (format!("intcblock {}", constants.join(" ")), offset - pc)
1140 } else {
1141 ("intcblock <invalid>".to_string(), 1)
1142 }
1143 } else {
1144 ("intcblock <invalid>".to_string(), 1)
1145 }
1146 }
1147 OP_INTC => {
1148 if pc + 1 < bytecode.len() {
1149 let index = bytecode[pc + 1];
1150 (format!("intc {index}"), 2)
1151 } else {
1152 ("intc <invalid>".to_string(), 1)
1153 }
1154 }
1155 OP_INTC_0 => ("intc_0".to_string(), 1),
1156 OP_INTC_1 => ("intc_1".to_string(), 1),
1157 OP_INTC_2 => ("intc_2".to_string(), 1),
1158 OP_INTC_3 => ("intc_3".to_string(), 1),
1159 OP_BYTECBLOCK => {
1160 let mut offset = pc + 1;
1161
1162 if let Ok((count, consumed)) = decode_varuint(&bytecode[offset..]) {
1164 offset += consumed;
1165 let count = count as usize;
1166 let mut constants = Vec::new();
1167
1168 for _ in 0..count {
1170 if let Ok((length, consumed)) = decode_varuint(&bytecode[offset..]) {
1171 offset += consumed;
1172 let length = length as usize;
1173
1174 if offset + length <= bytecode.len() {
1175 let bytes = &bytecode[offset..offset + length];
1176 if bytes.iter().all(|&b| b.is_ascii() && !b.is_ascii_control()) {
1177 constants
1178 .push(format!("\"{}\"", String::from_utf8_lossy(bytes)));
1179 } else {
1180 constants.push(format!("0x{}", hex::encode(bytes)));
1181 }
1182 offset += length;
1183 } else {
1184 break;
1185 }
1186 } else {
1187 break;
1188 }
1189 }
1190
1191 if constants.len() == count {
1192 (format!("bytecblock {}", constants.join(" ")), offset - pc)
1193 } else {
1194 ("bytecblock <invalid>".to_string(), 1)
1195 }
1196 } else {
1197 ("bytecblock <invalid>".to_string(), 1)
1198 }
1199 }
1200 OP_BYTEC => {
1201 if pc + 1 < bytecode.len() {
1202 let index = bytecode[pc + 1];
1203 (format!("bytec {index}"), 2)
1204 } else {
1205 ("bytec <invalid>".to_string(), 1)
1206 }
1207 }
1208 OP_BYTEC_0 => ("bytec_0".to_string(), 1),
1209 OP_BYTEC_1 => ("bytec_1".to_string(), 1),
1210 OP_BYTEC_2 => ("bytec_2".to_string(), 1),
1211 OP_BYTEC_3 => ("bytec_3".to_string(), 1),
1212
1213 OP_ARG => {
1215 if pc + 1 < bytecode.len() {
1216 let index = bytecode[pc + 1];
1217 (format!("arg {index}"), 2)
1218 } else {
1219 ("arg <invalid>".to_string(), 1)
1220 }
1221 }
1222 OP_ARG_0 => ("arg_0".to_string(), 1),
1223 OP_ARG_1 => ("arg_1".to_string(), 1),
1224 OP_ARG_2 => ("arg_2".to_string(), 1),
1225 OP_ARG_3 => ("arg_3".to_string(), 1),
1226
1227 OP_PUSHINT => {
1228 if pc + 8 < bytecode.len() {
1229 let value = u64::from_be_bytes(bytecode[pc + 1..pc + 9].try_into().unwrap());
1230 (format!("int {value}"), 9)
1231 } else {
1232 ("int <invalid>".to_string(), 1)
1233 }
1234 }
1235
1236 OP_PUSHBYTES => {
1237 if pc + 1 < bytecode.len() {
1238 let len = bytecode[pc + 1] as usize;
1239 if pc + 1 + len < bytecode.len() {
1240 let bytes = &bytecode[pc + 2..pc + 2 + len];
1241 if bytes.iter().all(|&b| b.is_ascii() && !b.is_ascii_control()) {
1242 (
1243 format!("byte \"{}\"", String::from_utf8_lossy(bytes)),
1244 2 + len,
1245 )
1246 } else {
1247 (format!("byte 0x{}", hex::encode(bytes)), 2 + len)
1248 }
1249 } else {
1250 ("byte <invalid>".to_string(), 1)
1251 }
1252 } else {
1253 ("byte <invalid>".to_string(), 1)
1254 }
1255 }
1256
1257 _ => (format!("unknown_{opcode:02x}"), 1),
1258 };
1259
1260 result.push_str(&instruction);
1261 result.push('\n');
1262
1263 pc += size;
1264 }
1265
1266 Ok(result)
1267}