swamp_vm/
string.rs

1/*
2 * Copyright (c) Peter Bjorklund. All rights reserved. https://github.com/swamp/swamp
3 * Licensed under the MIT License. See LICENSE in the project root for license information.
4 */
5use crate::memory::ExecutionMode;
6use crate::memory::Memory;
7use crate::{TrapCode, Vm, get_reg, i16_from_u8s, set_reg};
8use std::num::ParseIntError;
9use std::{mem::size_of, ptr, slice};
10use swamp_vm_isa::{
11    MAX_STRING_LEN, StringIterator, VEC_HEADER_MAGIC_CODE, VEC_HEADER_PAYLOAD_OFFSET, VecHeader,
12};
13
14impl Vm {
15    pub fn get_string_iterator_header_ptr_from_reg(
16        &self,
17        vec_iterator_reg: u8,
18    ) -> *mut StringIterator {
19        self.get_ptr_from_reg(vec_iterator_reg) as *mut StringIterator
20    }
21
22    #[inline]
23    fn get_string(&self, reg: u8) -> &str {
24        let string_header_addr = get_reg!(self, reg);
25        let header_ptr =
26            self.memory()
27                .get_heap_const_ptr(string_header_addr as usize) as *const VecHeader;
28        let header = unsafe { *header_ptr };
29        let byte_count = header.element_count;
30
31        #[cfg(feature = "debug_vm")]
32        if self.debug_operations_enabled {
33            eprintln!(
34                "get string {:X}. Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
35                string_header_addr,
36                self.memory().constant_memory_size,
37                self.memory().stack_start,
38                self.memory().stack_offset,
39                self.memory().heap_start,
40                self.memory().heap_alloc_offset
41            );
42        }
43
44        if byte_count != 0 {
45            debug_assert_eq!(
46                header.padding, VEC_HEADER_MAGIC_CODE,
47                "string is corrupt. it is saying it has length {byte_count}, left: {}, right: {VEC_HEADER_MAGIC_CODE}",
48                header.padding
49            );
50            debug_assert!(
51                header.element_count < MAX_STRING_LEN,
52                "string of strange length. it is saying it has length {byte_count}, left: {}, right: {VEC_HEADER_MAGIC_CODE}",
53                header.padding
54            );
55        }
56
57        let runes_ptr = self.memory().get_heap_const_ptr(
58            (string_header_addr as usize) + VEC_HEADER_PAYLOAD_OFFSET.0 as usize,
59        );
60
61        unsafe {
62            let bytes = slice::from_raw_parts(runes_ptr, byte_count as usize);
63
64            if byte_count > 0 {
65                let s = std::str::from_utf8(bytes).unwrap_or("INVALID_UTF8");
66                #[cfg(feature = "debug_vm")]
67                if self.debug_operations_enabled {
68                    eprintln!("String content: \"{s}\" at addr {string_header_addr:X}");
69                }
70                self.verify_string_without_mut(bytes);
71            }
72
73            std::str::from_utf8(bytes).expect("utf8 conversion error")
74        }
75    }
76
77    pub fn verify_string(&mut self, raw_bytes: &[u8]) {
78        if self.debug_operations_enabled {
79            let hex: String = raw_bytes
80                .iter()
81                .map(|b| format!("{:02X}", b))
82                .collect::<Vec<_>>()
83                .join(" ");
84            eprintln!("raw_string: '{hex}'");
85        }
86
87        if raw_bytes.contains(&0) {
88            return self.internal_trap(TrapCode::InvalidUtf8Sequence);
89        }
90    }
91
92    pub fn verify_string_without_mut(&self, raw_bytes: &[u8]) {
93        if self.debug_operations_enabled {
94            let hex: String = raw_bytes
95                .iter()
96                .map(|b| format!("{:02X}", b))
97                .collect::<Vec<_>>()
98                .join(" ");
99            eprintln!("raw_string: '{hex}'");
100        }
101        if raw_bytes.contains(&0) {
102            panic!("illegal string");
103        }
104    }
105
106    pub fn execute_string_from_bytes(&mut self, target_string_view_reg: u8, bytes_vec_reg: u8) {
107        #[cfg(feature = "debug_vm")]
108        if self.debug_operations_enabled {
109            eprintln!("=== STRING FROM BYTES ===");
110        }
111        let bytes_header_addr = get_reg!(self, bytes_vec_reg);
112        let bytes_header =
113            self.memory().get_heap_const_ptr(bytes_header_addr as usize) as *const VecHeader;
114
115        let raw_bytes = unsafe {
116            slice::from_raw_parts(
117                self.memory.get_heap_const_ptr(
118                    bytes_header_addr as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize,
119                ),
120                (*bytes_header).element_count as usize,
121            )
122        };
123
124        #[cfg(feature = "debug_vm")]
125        self.verify_string(raw_bytes);
126
127        let converted_string_result = unsafe { str::from_utf8(&*raw_bytes) };
128        match converted_string_result {
129            Ok(converted_string) => {
130                self.create_string(target_string_view_reg, converted_string);
131            }
132            Err(_) => self.internal_trap(TrapCode::InvalidUtf8Sequence),
133        }
134    }
135
136    pub fn execute_string_storage_from_bytes(
137        &mut self,
138        target_string_storage_reg: u8,
139        bytes_vec_reg: u8,
140    ) {
141        #[cfg(feature = "debug_vm")]
142        if self.debug_operations_enabled {
143            eprintln!("=== STRING STORAGE FROM BYTES ===");
144        }
145        let bytes_header_addr = get_reg!(self, bytes_vec_reg);
146        let bytes_header =
147            self.memory().get_heap_const_ptr(bytes_header_addr as usize) as *const VecHeader;
148
149        let raw_bytes = unsafe {
150            slice::from_raw_parts(
151                self.memory.get_heap_const_ptr(
152                    bytes_header_addr as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize,
153                ),
154                (*bytes_header).element_count as usize,
155            )
156        };
157
158        #[cfg(feature = "debug_vm")]
159        self.verify_string(raw_bytes);
160
161        let converted_string_result = unsafe { str::from_utf8(&*raw_bytes) };
162
163        match converted_string_result {
164            Ok(_converted_string) => {
165                self.execute_vec_copy(target_string_storage_reg, bytes_vec_reg);
166            }
167            Err(_) => self.internal_trap(TrapCode::InvalidUtf8Sequence),
168        }
169    }
170
171    #[inline]
172    pub fn execute_string_duplicate(&mut self, target_string_view_reg: u8, string_storage: u8) {
173        #[cfg(feature = "debug_vm")]
174        if self.debug_operations_enabled {
175            eprintln!("=== STRING DUPLICATE OPERATION ===");
176        }
177        let str_a = self.get_string(string_storage).to_string();
178
179        #[cfg(feature = "debug_vm")]
180        self.verify_string(str_a.as_bytes());
181
182        self.create_string(target_string_view_reg, &str_a);
183    }
184
185    #[inline]
186    pub fn execute_string_append(&mut self, target_string_reg: u8, string_a: u8, string_b: u8) {
187        #[cfg(feature = "debug_vm")]
188        if self.debug_operations_enabled {
189            eprintln!("=== STRING_APPEND OPERATION ===");
190            eprintln!(
191                "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
192                self.memory().constant_memory_size,
193                self.memory().stack_start,
194                self.memory().stack_offset,
195                self.memory().heap_start,
196                self.memory().heap_alloc_offset
197            );
198
199            // Debug: Print register values using the get_reg macro
200            let reg_a_value = get_reg!(self, string_a);
201            let reg_b_value = get_reg!(self, string_b);
202
203            eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
204            eprintln!("String B register {string_b}: 0x{reg_b_value:X}");
205            eprintln!("Target register {target_string_reg}");
206        }
207
208        let result = {
209            let str_a = self.get_string(string_a);
210            let str_b = self.get_string(string_b);
211            str_a.to_string() + str_b
212        };
213
214        #[cfg(feature = "debug_vm")]
215        self.verify_string(result.as_bytes());
216
217        #[cfg(feature = "debug_vm")]
218        if self.debug_operations_enabled {
219            eprintln!(
220                "Concatenated string: \"{}\" (length: {})",
221                result,
222                result.len()
223            );
224        }
225
226        self.create_string(target_string_reg, &result);
227        // Debug: Print final register value
228        let final_reg_value = get_reg!(self, target_string_reg);
229        #[cfg(feature = "debug_vm")]
230        if self.debug_operations_enabled {
231            eprintln!("append: Final target register value: 0x{final_reg_value:X}");
232        }
233    }
234
235    #[inline]
236    pub fn execute_string_repeat(&mut self, target_string_reg: u8, string_a: u8, repeat_reg: u8) {
237        #[cfg(feature = "debug_vm")]
238        if self.debug_operations_enabled {
239            eprintln!("=== STRING_REPEAT OPERATION ===");
240            eprintln!(
241                "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
242                self.memory().constant_memory_size,
243                self.memory().stack_start,
244                self.memory().stack_offset,
245                self.memory().heap_start,
246                self.memory().heap_alloc_offset
247            );
248
249            // Debug: Print register values
250            let reg_a_value = get_reg!(self, string_a);
251            let repeat_value = get_reg!(self, repeat_reg);
252            eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
253            eprintln!("Repeat count register {repeat_reg}: {}", repeat_value);
254            eprintln!("Target register {target_string_reg}");
255        }
256
257        // Load the input string
258        let str_a = self.get_string(string_a);
259
260        let count = get_reg!(self, repeat_reg) as usize;
261
262        // Perform the repeat
263        let result = str_a.repeat(count);
264        #[cfg(feature = "debug_vm")]
265        self.verify_string(result.as_bytes());
266
267        #[cfg(feature = "debug_vm")]
268        if self.debug_operations_enabled {
269            eprintln!(
270                "Repeated string: \"{}\" (length: {}, repeated {} times)",
271                result,
272                result.len(),
273                count
274            );
275        }
276
277        // Store the result back into the target register
278        self.create_string(target_string_reg, &result);
279
280        #[cfg(feature = "debug_vm")]
281        if self.debug_operations_enabled {
282            let final_reg_value = get_reg!(self, target_string_reg);
283            eprintln!("Final target register value: 0x{final_reg_value:X}");
284        }
285    }
286
287    #[inline]
288    pub fn execute_string_cmp(&mut self, dest_reg: u8, string_a: u8, string_b: u8) {
289        #[cfg(feature = "debug_vm")]
290        if self.debug_operations_enabled {
291            eprintln!("=== STRING_COMPARE OPERATION ===");
292            eprintln!(
293                "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
294                self.memory().constant_memory_size,
295                self.memory().stack_start,
296                self.memory().stack_offset,
297                self.memory().heap_start,
298                self.memory().heap_alloc_offset
299            );
300        }
301
302        // Debug: Print register values
303        let reg_a_value = get_reg!(self, string_a);
304        let reg_b_value = get_reg!(self, string_b);
305
306        #[cfg(feature = "debug_vm")]
307        if self.debug_operations_enabled {
308            eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
309            eprintln!("String B register {string_b}: 0x{reg_b_value:X}");
310        }
311
312        let str_a = self.get_string(string_a);
313        let str_b = self.get_string(string_b);
314
315        let result = str_a == str_b;
316
317        #[cfg(feature = "debug_vm")]
318        if self.debug_operations_enabled {
319            eprintln!("String comparison result: {result}");
320        }
321
322        // Store the result
323        set_reg!(self, dest_reg, result as u32);
324    }
325
326    /// Return the same string but with quotes.
327    #[inline]
328    pub fn execute_string_starts_with(
329        &mut self,
330        dest_reg: u8,
331        source_string: u8,
332        other_string_reg: u8,
333    ) {
334        #[cfg(feature = "debug_vm")]
335        if self.debug_operations_enabled {
336            eprintln!("=== String starts with OPERATION ===");
337            eprintln!(
338                "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
339                self.memory().constant_memory_size,
340                self.memory().stack_start,
341                self.memory().stack_offset,
342                self.memory().heap_start,
343                self.memory().heap_alloc_offset
344            );
345        }
346
347        let source_str = self.get_string(source_string);
348        let other_str = self.get_string(other_string_reg);
349
350        set_reg!(self, dest_reg, source_str.starts_with(other_str))
351    }
352
353    fn str_to_int(text: &str) -> Result<i32, ParseIntError> {
354        let text = text.replace('_', "");
355        text.strip_prefix("0x").map_or_else(
356            || {
357                text.strip_prefix("-0x").map_or_else(
358                    || text.parse::<i32>(),
359                    |rest| i32::from_str_radix(rest, 16).map(|x| -x),
360                )
361            },
362            |rest| i32::from_str_radix(rest, 16),
363        )
364    }
365
366    /// Parses the string to float and returns the tuple with result
367    #[inline]
368    pub fn execute_string_to_float(&mut self, dest_tuple_reg: u8, source_string: u8) {
369        #[cfg(feature = "debug_vm")]
370        if self.debug_operations_enabled {
371            eprintln!("=== String to float OPERATION ===");
372        }
373
374        let source_str = self.get_string(source_string).to_string();
375
376        let tuple_addr = get_reg!(self, dest_tuple_reg);
377
378        let tuple_ptr = self.memory_mut().get_heap_ptr(tuple_addr as usize);
379
380        let float_value = source_str.parse::<f32>();
381
382        if let Ok(value) = float_value {
383            unsafe {
384                let fp = fixed32::Fp::from(value);
385                *(tuple_ptr as *mut u32) = fp.inner() as u32;
386                *tuple_ptr.add(4) = 0x01;
387            }
388        } else {
389            unsafe {
390                *(tuple_ptr as *mut u32) = 0;
391                *tuple_ptr.add(4) = 0x00;
392            }
393        }
394    }
395
396    #[inline]
397    pub fn execute_string_to_int(&mut self, dest_tuple_reg: u8, source_string: u8) {
398        #[cfg(feature = "debug_vm")]
399        if self.debug_operations_enabled {
400            eprintln!("=== String to int OPERATION ===");
401        }
402
403        let source_str = self.get_string(source_string).to_string();
404
405        let tuple_addr = get_reg!(self, dest_tuple_reg);
406
407        let tuple_ptr = self.memory_mut().get_heap_ptr(tuple_addr as usize);
408
409        let int_value = Self::str_to_int(&source_str);
410
411        if let Ok(value) = int_value {
412            unsafe {
413                *(tuple_ptr as *mut i32) = value;
414                *tuple_ptr.add(4) = 0x01;
415            }
416        } else {
417            unsafe {
418                *(tuple_ptr as *mut u32) = 0;
419                *tuple_ptr.add(4) = 0x00;
420            }
421        }
422    }
423
424    /// Return the same string but with quotes.
425    #[inline]
426    pub fn execute_string_to_string(&mut self, dest_reg: u8, source_string: u8) {
427        #[cfg(feature = "debug_vm")]
428        if self.debug_operations_enabled {
429            eprintln!("=== STRING_TO_STRING OPERATION ===");
430            eprintln!(
431                "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
432                self.memory().constant_memory_size,
433                self.memory().stack_start,
434                self.memory().stack_offset,
435                self.memory().heap_start,
436                self.memory().heap_alloc_offset
437            );
438
439            let source_reg_value = get_reg!(self, source_string);
440            eprintln!("Source string register {source_string}: 0x{source_reg_value:X}");
441        }
442
443        let source_str = self.get_string(source_string);
444
445        // Create the formatted string with quotes
446        let mut formatted_string = String::with_capacity(source_str.len() + 2);
447        formatted_string.push('"');
448        formatted_string.push_str(source_str);
449        formatted_string.push('"');
450
451        #[cfg(feature = "debug_vm")]
452        if self.debug_operations_enabled {
453            eprintln!(
454                "Formatted string: \"{}\" (length: {})",
455                formatted_string,
456                formatted_string.len()
457            );
458        }
459
460        self.create_string(dest_reg, &formatted_string);
461
462        let final_reg_value = get_reg!(self, dest_reg);
463
464        #[cfg(feature = "debug_vm")]
465        if self.debug_operations_enabled {
466            eprintln!("Final destination register value: 0x{final_reg_value:X}");
467        }
468    }
469
470    pub fn read_string(&self, heap_addr: u32, heap: &Memory) -> &str {
471        let string_header_ptr = heap.get_heap_const_ptr(heap_addr as usize) as *const VecHeader;
472        let string_header = unsafe { *string_header_ptr };
473
474        #[cfg(feature = "debug_vm")]
475        if self.debug_operations_enabled {
476            eprintln!(
477                "read_string: addr=0x{heap_addr:X}, capacity={}, byte_count={}, padding=0x{:X}",
478                string_header.capacity, string_header.element_count, string_header.padding
479            );
480        }
481
482        let byte_count = string_header.element_count as usize;
483
484        #[cfg(feature = "debug_vm")]
485        if string_header.element_count != 0 {
486            debug_assert_eq!(
487                string_header.padding, VEC_HEADER_MAGIC_CODE,
488                "CORRUPTION DETECTED in read_string: String header at 0x{heap_addr:X} has invalid padding 0x{:X}, should be 0x{VEC_HEADER_MAGIC_CODE:X}",
489                string_header.padding
490            );
491            debug_assert_eq!(
492                string_header.capacity, string_header.element_count,
493                "Corruption. strings should never change"
494            );
495            // TODO: just a hack for now to see if it is plausible.
496            debug_assert!(
497                byte_count < 1024,
498                "Strange. string byte_count {byte_count} is unreasonably large"
499            );
500        }
501
502        // String data follows directly after the header
503        let string_data_ptr = unsafe {
504            heap.get_heap_const_ptr(heap_addr as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize)
505        };
506
507        unsafe {
508            let bytes = std::slice::from_raw_parts(string_data_ptr, byte_count);
509            match std::str::from_utf8(bytes) {
510                Ok(s) => s,
511                Err(e) => {
512                    panic!("ERROR: Invalid UTF-8 string data at 0x{heap_addr:X}: {e}");
513                    ""
514                }
515            }
516        }
517    }
518
519    /// Strings are immutable, can not be altered after they have been created.
520    /// They can be safely shared and the pointer can be blittable when inside composite types.
521    /// The string data is stored directly after the header in memory.
522    pub(crate) fn create_string(&mut self, dst_reg: u8, string: &str) {
523        let rune_bytes = string.as_bytes();
524        let byte_count = rune_bytes.len();
525        let cap_bytes = if byte_count == 0 { 1 } else { byte_count };
526
527        debug_assert!(
528            byte_count <= MAX_STRING_LEN as usize,
529            "String too large: {byte_count} bytes"
530        );
531
532        // Calculate total size needed: header + string data
533        // We assume that StringHeader is aligned to u32
534        let total_size = size_of::<VecHeader>() + byte_count;
535
536        let header_addr_in_heap = self.memory.heap_allocate_secret(total_size);
537
538        #[cfg(feature = "debug_vm")]
539        match self.memory.execution_mode {
540            ExecutionMode::ConstantEvaluation => {
541                // In constant evaluation, strings should be in heap which is directly after constant area
542                debug_assert!(
543                    header_addr_in_heap >= self.memory.heap_start as u32
544                        && header_addr_in_heap < self.memory.heap_alloc_offset as u32,
545                    "String allocation at 0x{header_addr_in_heap:X} is not in heap during constant evaluation",
546                );
547            }
548            ExecutionMode::NormalExecution => {
549                // In normal execution, strings should be in heap which is after stack
550                debug_assert!(
551                    header_addr_in_heap >= self.memory.heap_start as u32
552                        && header_addr_in_heap < self.memory.heap_alloc_offset as u32,
553                    "String allocation at 0x{header_addr_in_heap:X} is not in heap during normal execution",
554                );
555            }
556        }
557
558        let string_header = VecHeader {
559            capacity: cap_bytes as u16,
560            element_count: byte_count as u16,
561            element_size: 1,
562            padding: VEC_HEADER_MAGIC_CODE,
563        };
564
565        unsafe {
566            let header_ptr =
567                self.memory.get_heap_ptr(header_addr_in_heap as usize) as *mut VecHeader;
568            ptr::write(header_ptr, string_header);
569
570            let string_data_ptr = self
571                .memory
572                .get_heap_ptr(header_addr_in_heap as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
573            ptr::copy_nonoverlapping(rune_bytes.as_ptr(), string_data_ptr, byte_count);
574        }
575
576        #[cfg(feature = "debug_vm")]
577        if self.debug_operations_enabled {
578            eprintln!(
579                "Creating string: '{string}', header at 0x{header_addr_in_heap:X}, capacity={byte_count}, byte_count={byte_count}, padding=0x{VEC_HEADER_MAGIC_CODE:X}"
580            );
581        }
582
583        set_reg!(self, dst_reg, header_addr_in_heap);
584    }
585
586    #[inline]
587    pub fn execute_string_iter_init(
588        &mut self,
589        target_string_iterator_header_reg: u8,
590        string_header_reg: u8,
591    ) {
592        let string_header_addr = get_reg!(self, string_header_reg);
593
594        // Check that vec header is correct
595        let string_header_ptr = self
596            .memory
597            .get_heap_const_ptr(string_header_addr as usize)
598            .cast::<VecHeader>();
599        let string_header = unsafe { &*string_header_ptr };
600
601        if string_header.padding != VEC_HEADER_MAGIC_CODE {
602            return self.internal_trap(TrapCode::MemoryCorruption);
603        }
604
605        #[cfg(feature = "debug_vm")]
606        if self.debug_operations_enabled {
607            let iter_addr = get_reg!(self, target_string_iterator_header_reg);
608            eprintln!(
609                "string_iter_init: iter_addr: {iter_addr:04X} string_header_addr:{string_header_addr:04X} element_size: {}",
610                string_header.element_size
611            );
612        }
613        let string_iterator = StringIterator {
614            string_heap_ptr: string_header_addr,
615            byte_index: 0,
616            index: 0,
617        };
618
619        let string_iterator_mut_ptr =
620            self.get_ptr_from_reg(target_string_iterator_header_reg) as *mut StringIterator;
621
622        unsafe {
623            ptr::write(string_iterator_mut_ptr, string_iterator);
624        }
625    }
626
627    #[inline]
628    pub fn execute_string_iter_next(
629        &mut self,
630        string_iterator_header_reg: u8,
631        target_variable: u8,
632        branch_offset_lower: u8,
633        branch_offset_upper: u8,
634    ) {
635        let string_iterator =
636            self.get_string_iterator_header_ptr_from_reg(string_iterator_header_reg);
637
638        unsafe {
639            let string_header_addr = (*string_iterator).string_heap_ptr;
640            let string_header_ptr = self
641                .memory
642                .get_heap_const_ptr(string_header_addr as usize)
643                .cast::<VecHeader>();
644
645            let string_header_raw_ptr = self.memory.get_heap_const_ptr(string_header_addr as usize);
646
647            let string_header = &*string_header_ptr;
648            if string_header.padding != VEC_HEADER_MAGIC_CODE {
649                return self.internal_trap(TrapCode::MemoryCorruption);
650            }
651
652            #[cfg(feature = "debug_vm")]
653            if self.debug_operations_enabled {
654                let iter_addr = get_reg!(self, string_iterator_header_reg);
655                let index = (*string_iterator).byte_index;
656                eprintln!(
657                    "string_iter_next: iter_addr: {iter_addr:04X} addr:{string_header_addr:04X} index:{index} len: {}, capacity: {}",
658                    string_header.element_count, string_header.capacity
659                );
660            }
661
662            // Check if we've reached the end
663            if (*string_iterator).byte_index >= string_header.element_count {
664                // Jump to the provided address if we're done
665                let branch_offset = i16_from_u8s!(branch_offset_lower, branch_offset_upper);
666
667                #[cfg(feature = "debug_vm")]
668                {
669                    if self.debug_operations_enabled {
670                        eprintln!("string_iter_next complete. jumping with offset {branch_offset}");
671                    }
672                }
673
674                self.pc = (self.pc as i32 + branch_offset as i32) as usize;
675
676                return;
677            }
678
679            let current_byte_index = (*string_iterator).byte_index as usize;
680            let remaining_byte_count = (string_header.element_count as usize) - current_byte_index;
681            let payload_ptr = string_header_raw_ptr.add(VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
682
683            let remaining_bytes = std::slice::from_raw_parts(
684                payload_ptr.add(current_byte_index),
685                remaining_byte_count,
686            );
687
688            match std::str::from_utf8(remaining_bytes) {
689                Ok(valid_str) => {
690                    if let Some(c) = valid_str.chars().next() {
691                        // Place the decoded character (a Char - u32) into the target register
692                        // Advance the iterator by the actual byte width of the character
693                        let advancement = c.len_utf8() as u16;
694                        (*string_iterator).byte_index += advancement;
695
696                        let raw_u32 = c as u32;
697                        set_reg!(self, target_variable, raw_u32);
698                    } else {
699                        self.internal_trap(TrapCode::InvalidUtf8Sequence);
700                    }
701                }
702                Err(_) => {
703                    // The string data in the VM memory is corrupted/invalid
704                    self.internal_trap(TrapCode::InvalidUtf8Sequence);
705                }
706            }
707        }
708    }
709
710    #[inline]
711    pub fn execute_string_iter_next_pair(
712        &mut self,
713        string_iterator_header_reg: u8,
714        target_key_reg: u8,
715        target_value_reg: u8,
716        branch_offset_lower: u8,
717        branch_offset_upper: u8,
718    ) {
719        let string_iterator =
720            self.get_string_iterator_header_ptr_from_reg(string_iterator_header_reg);
721
722        unsafe {
723            let string_header_addr = (*string_iterator).string_heap_ptr;
724            let string_header_ptr = self
725                .memory
726                .get_heap_const_ptr(string_header_addr as usize)
727                .cast::<VecHeader>();
728
729            let string_header_raw_ptr = self.memory.get_heap_const_ptr(string_header_addr as usize);
730
731            let string_header = &*string_header_ptr;
732            if string_header.padding != VEC_HEADER_MAGIC_CODE {
733                return self.internal_trap(TrapCode::MemoryCorruption);
734            }
735
736            #[cfg(feature = "debug_vm")]
737            if self.debug_operations_enabled {
738                let iter_addr = get_reg!(self, string_iterator_header_reg);
739                let index = (*string_iterator).byte_index;
740                eprintln!(
741                    "string_iter_next: iter_addr: {iter_addr:04X} addr:{string_header_addr:04X} index:{index} len: {}, capacity: {}",
742                    string_header.element_count, string_header.capacity
743                );
744            }
745
746            // Check if we've reached the end
747            if (*string_iterator).byte_index >= string_header.element_count {
748                // Jump to the provided address if we're done
749                let branch_offset = i16_from_u8s!(branch_offset_lower, branch_offset_upper);
750
751                #[cfg(feature = "debug_vm")]
752                {
753                    if self.debug_operations_enabled {
754                        eprintln!("string_iter_next complete. jumping with offset {branch_offset}");
755                    }
756                }
757
758                self.pc = (self.pc as i32 + branch_offset as i32) as usize;
759
760                return;
761            }
762
763            let current_byte_index = (*string_iterator).byte_index as usize;
764            let remaining_byte_count = (string_header.element_count as usize) - current_byte_index;
765            let payload_ptr = string_header_raw_ptr.add(VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
766
767            let remaining_bytes = std::slice::from_raw_parts(
768                payload_ptr.add(current_byte_index),
769                remaining_byte_count,
770            );
771
772            match std::str::from_utf8(remaining_bytes) {
773                Ok(valid_str) => {
774                    if let Some(c) = valid_str.chars().next() {
775                        // Place the decoded character (a Char - u32) into the target register
776                        // Advance the iterator by the actual byte width of the character
777                        let advancement = c.len_utf8() as u16;
778                        (*string_iterator).byte_index += advancement;
779
780                        let raw_u32 = c as u32;
781                        eprintln!(
782                            "raw: {raw_u32} advancement {advancement} -> r{target_value_reg}"
783                        );
784                        set_reg!(self, target_key_reg, (*string_iterator).index);
785                        set_reg!(self, target_value_reg, raw_u32);
786
787                        (*string_iterator).index += 1;
788                    } else {
789                        self.internal_trap(TrapCode::InvalidUtf8Sequence);
790                    }
791                }
792                Err(_) => {
793                    // The string data in the VM memory is corrupted/invalid
794                    self.internal_trap(TrapCode::InvalidUtf8Sequence);
795                }
796            }
797        }
798    }
799}