1use crate::memory::{ExecutionMode, Memory};
6use crate::{TrapCode, Vm, get_reg, i16_from_u8s, set_reg};
7use std::{mem::size_of, ptr};
8use swamp_vm_types::{
9 MAX_STRING_LEN, StringIterator, VEC_HEADER_MAGIC_CODE, VEC_HEADER_PAYLOAD_OFFSET, VecHeader,
10};
11
12impl Vm {
13 pub fn get_string_iterator_header_ptr_from_reg(
14 &self,
15 vec_iterator_reg: u8,
16 ) -> *mut StringIterator {
17 self.get_ptr_from_reg(vec_iterator_reg) as *mut StringIterator
18 }
19
20 #[inline]
21 fn get_string(&self, reg: u8) -> &str {
22 let string_header_addr = get_reg!(self, reg);
23 let header_ptr =
24 self.memory()
25 .get_heap_const_ptr(string_header_addr as usize) as *const VecHeader;
26 let header = unsafe { *header_ptr };
27 let byte_count = header.element_count;
28
29 #[cfg(feature = "debug_vm")]
30 if self.debug_operations_enabled {
31 eprintln!(
32 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
33 self.memory().constant_memory_size,
34 self.memory().stack_start,
35 self.memory().stack_offset,
36 self.memory().heap_start,
37 self.memory().heap_alloc_offset
38 );
39 }
40
41 if byte_count != 0 {
42 debug_assert_eq!(
43 header.padding, VEC_HEADER_MAGIC_CODE,
44 "string is corrupt. it is saying it has length {byte_count}, left: {}, right: {VEC_HEADER_MAGIC_CODE}",
45 header.padding
46 );
47 debug_assert!(
48 header.element_count < MAX_STRING_LEN,
49 "string of strange length. it is saying it has length {byte_count}, left: {}, right: {VEC_HEADER_MAGIC_CODE}",
50 header.padding
51 );
52 }
53
54 let runes_ptr = self.memory().get_heap_const_ptr(
55 (string_header_addr as usize) + VEC_HEADER_PAYLOAD_OFFSET.0 as usize,
56 );
57
58 unsafe {
59 let bytes = std::slice::from_raw_parts(runes_ptr, byte_count as usize);
60
61 if byte_count > 0 {
62 let s = std::str::from_utf8(bytes).unwrap_or("INVALID_UTF8");
63 #[cfg(feature = "debug_vm")]
64 if self.debug_operations_enabled {
65 eprintln!("String content: \"{s}\"");
66 }
67 }
68
69 std::str::from_utf8_unchecked(bytes)
70 }
71 }
72 #[inline]
73 pub fn execute_string_append(&mut self, target_string_reg: u8, string_a: u8, string_b: u8) {
74 #[cfg(feature = "debug_vm")]
75 if self.debug_operations_enabled {
76 eprintln!("=== STRING_APPEND OPERATION ===");
77 eprintln!(
78 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
79 self.memory().constant_memory_size,
80 self.memory().stack_start,
81 self.memory().stack_offset,
82 self.memory().heap_start,
83 self.memory().heap_alloc_offset
84 );
85
86 let reg_a_value = get_reg!(self, string_a);
88 let reg_b_value = get_reg!(self, string_b);
89
90 eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
91 eprintln!("String B register {string_b}: 0x{reg_b_value:X}");
92 eprintln!("Target register {target_string_reg}");
93 }
94
95 let str_a = self.get_string(string_a);
96 let str_b = self.get_string(string_b);
97
98 let result = str_a.to_string() + str_b;
99
100 #[cfg(feature = "debug_vm")]
101 if self.debug_operations_enabled {
102 eprintln!(
103 "Concatenated string: \"{}\" (length: {})",
104 result,
105 result.len()
106 );
107 }
108
109 self.create_string(target_string_reg, &result);
110 let final_reg_value = get_reg!(self, target_string_reg);
112 #[cfg(feature = "debug_vm")]
113 if self.debug_operations_enabled {
114 eprintln!("Final target register value: 0x{final_reg_value:X}");
115 }
116 }
117
118 #[inline]
119 pub fn execute_string_cmp(&mut self, dest_reg: u8, string_a: u8, string_b: u8) {
120 #[cfg(feature = "debug_vm")]
121 if self.debug_operations_enabled {
122 eprintln!("=== STRING_COMPARE OPERATION ===");
123 eprintln!(
124 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
125 self.memory().constant_memory_size,
126 self.memory().stack_start,
127 self.memory().stack_offset,
128 self.memory().heap_start,
129 self.memory().heap_alloc_offset
130 );
131 }
132
133 let reg_a_value = get_reg!(self, string_a);
135 let reg_b_value = get_reg!(self, string_b);
136
137 #[cfg(feature = "debug_vm")]
138 if self.debug_operations_enabled {
139 eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
140 eprintln!("String B register {string_b}: 0x{reg_b_value:X}");
141 }
142
143 let str_a = self.get_string(string_a);
144 let str_b = self.get_string(string_b);
145
146 let result = str_a == str_b;
147
148 #[cfg(feature = "debug_vm")]
149 if self.debug_operations_enabled {
150 eprintln!("String comparison result: {result}");
151 }
152
153 set_reg!(self, dest_reg, result as u32);
155 }
156
157 #[inline]
159 pub fn execute_string_to_string(&mut self, dest_reg: u8, source_string: u8) {
160 #[cfg(feature = "debug_vm")]
161 if self.debug_operations_enabled {
162 eprintln!("=== STRING_TO_STRING OPERATION ===");
163 eprintln!(
164 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
165 self.memory().constant_memory_size,
166 self.memory().stack_start,
167 self.memory().stack_offset,
168 self.memory().heap_start,
169 self.memory().heap_alloc_offset
170 );
171
172 let source_reg_value = get_reg!(self, source_string);
173 eprintln!("Source string register {source_string}: 0x{source_reg_value:X}");
174 }
175
176 let source_str = self.get_string(source_string);
177
178 let mut formatted_string = String::with_capacity(source_str.len() + 2);
180 formatted_string.push('"');
181 formatted_string.push_str(source_str);
182 formatted_string.push('"');
183
184 #[cfg(feature = "debug_vm")]
185 if self.debug_operations_enabled {
186 eprintln!(
187 "Formatted string: \"{}\" (length: {})",
188 formatted_string,
189 formatted_string.len()
190 );
191 }
192
193 self.create_string(dest_reg, &formatted_string);
194
195 let final_reg_value = get_reg!(self, dest_reg);
196
197 #[cfg(feature = "debug_vm")]
198 if self.debug_operations_enabled {
199 eprintln!("Final destination register value: 0x{final_reg_value:X}");
200 }
201 }
202
203 pub fn read_string(&self, heap_addr: u32, heap: &Memory) -> &str {
204 let string_header_ptr = heap.get_heap_const_ptr(heap_addr as usize) as *const VecHeader;
205 let string_header = unsafe { *string_header_ptr };
206
207 #[cfg(feature = "debug_vm")]
208 if self.debug_operations_enabled {
209 eprintln!(
210 "read_string: addr=0x{heap_addr:X}, capacity={}, byte_count={}, padding=0x{:X}",
211 string_header.capacity, string_header.element_count, string_header.padding
212 );
213 }
214
215 let byte_count = string_header.element_count as usize;
216
217 #[cfg(feature = "debug_vm")]
218 if string_header.element_count != 0 {
219 debug_assert_eq!(
220 string_header.padding, VEC_HEADER_MAGIC_CODE,
221 "CORRUPTION DETECTED in read_string: String header at 0x{heap_addr:X} has invalid padding 0x{:X}, should be 0x{VEC_HEADER_MAGIC_CODE:X}",
222 string_header.padding
223 );
224 debug_assert_eq!(
225 string_header.capacity, string_header.element_count,
226 "Corruption. strings should never change"
227 );
228 debug_assert!(
230 byte_count < 512,
231 "Strange. string byte_count {byte_count} is unreasonably large"
232 );
233 }
234
235 let string_data_ptr = unsafe {
237 heap.get_heap_const_ptr(heap_addr as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize)
238 };
239
240 unsafe {
241 let bytes = std::slice::from_raw_parts(string_data_ptr, byte_count);
242 match std::str::from_utf8(bytes) {
243 Ok(s) => s,
244 Err(e) => {
245 panic!("ERROR: Invalid UTF-8 string data at 0x{heap_addr:X}: {e}");
246 ""
247 }
248 }
249 }
250 }
251
252 pub(crate) fn create_string(&mut self, dst_reg: u8, string: &str) {
256 let rune_bytes = string.as_bytes();
257 let byte_count = rune_bytes.len();
258 let cap_bytes = if byte_count == 0 { 1 } else { byte_count };
259
260 debug_assert!(
261 byte_count <= MAX_STRING_LEN as usize,
262 "String too large: {byte_count} bytes"
263 );
264
265 let total_size = size_of::<VecHeader>() + byte_count;
268
269 let header_addr_in_heap = self.memory.heap_allocate_secret(total_size);
270
271 #[cfg(feature = "debug_vm")]
272 match self.memory.execution_mode {
273 ExecutionMode::ConstantEvaluation => {
274 debug_assert!(
276 header_addr_in_heap >= self.memory.heap_start as u32
277 && header_addr_in_heap < self.memory.heap_alloc_offset as u32,
278 "String allocation at 0x{header_addr_in_heap:X} is not in heap during constant evaluation",
279 );
280 }
281 ExecutionMode::NormalExecution => {
282 debug_assert!(
284 header_addr_in_heap >= self.memory.heap_start as u32
285 && header_addr_in_heap < self.memory.heap_alloc_offset as u32,
286 "String allocation at 0x{header_addr_in_heap:X} is not in heap during normal execution",
287 );
288 }
289 }
290
291 let string_header = VecHeader {
292 capacity: cap_bytes as u16,
293 element_count: byte_count as u16,
294 element_size: 1,
295 padding: VEC_HEADER_MAGIC_CODE,
296 };
297
298 unsafe {
299 let header_ptr =
300 self.memory.get_heap_ptr(header_addr_in_heap as usize) as *mut VecHeader;
301 ptr::write(header_ptr, string_header);
302
303 let string_data_ptr = self
304 .memory
305 .get_heap_ptr(header_addr_in_heap as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
306 ptr::copy_nonoverlapping(rune_bytes.as_ptr(), string_data_ptr, byte_count);
307 }
308
309 #[cfg(feature = "debug_vm")]
310 if self.debug_operations_enabled {
311 eprintln!(
312 "Creating string: '{string}', header at 0x{header_addr_in_heap:X}, capacity={byte_count}, byte_count={byte_count}, padding=0x{VEC_HEADER_MAGIC_CODE:X}"
313 );
314 }
315
316 set_reg!(self, dst_reg, header_addr_in_heap);
317 }
318
319 #[inline]
320 pub fn execute_string_iter_init(
321 &mut self,
322 target_string_iterator_header_reg: u8,
323 string_header_reg: u8,
324 ) {
325 let string_header_addr = get_reg!(self, string_header_reg);
326
327 let string_header_ptr = self
329 .memory
330 .get_heap_const_ptr(string_header_addr as usize)
331 .cast::<VecHeader>();
332 let string_header = unsafe { &*string_header_ptr };
333
334 if string_header.padding != VEC_HEADER_MAGIC_CODE {
335 return self.internal_trap(TrapCode::MemoryCorruption);
336 }
337 if string_header.capacity == 0 {
338 return self.internal_trap(TrapCode::VecNeverInitialized);
339 }
340
341 #[cfg(feature = "debug_vm")]
342 if self.debug_operations_enabled {
343 let iter_addr = get_reg!(self, target_string_iterator_header_reg);
344 eprintln!(
345 "string_iter_init: iter_addr: {iter_addr:04X} string_header_addr:{string_header_addr:04X} element_size: {}",
346 string_header.element_size
347 );
348 }
349 let string_iterator = StringIterator {
350 string_heap_ptr: string_header_addr,
351 byte_index: 0,
352 index: 0,
353 };
354
355 let string_iterator_mut_ptr =
356 self.get_ptr_from_reg(target_string_iterator_header_reg) as *mut StringIterator;
357
358 unsafe {
359 ptr::write(string_iterator_mut_ptr, string_iterator);
360 }
361 }
362
363 #[inline]
364 pub fn execute_string_iter_next(
365 &mut self,
366 string_iterator_header_reg: u8,
367 target_variable: u8,
368 branch_offset_lower: u8,
369 branch_offset_upper: u8,
370 ) {
371 let string_iterator =
372 self.get_string_iterator_header_ptr_from_reg(string_iterator_header_reg);
373
374 unsafe {
375 let string_header_addr = (*string_iterator).string_heap_ptr;
376 let string_header_ptr = self
377 .memory
378 .get_heap_const_ptr(string_header_addr as usize)
379 .cast::<VecHeader>();
380
381 let string_header_raw_ptr = self.memory.get_heap_const_ptr(string_header_addr as usize);
382
383 let string_header = &*string_header_ptr;
384 if string_header.padding != VEC_HEADER_MAGIC_CODE {
385 return self.internal_trap(TrapCode::MemoryCorruption);
386 }
387
388 #[cfg(feature = "debug_vm")]
389 if self.debug_operations_enabled {
390 let iter_addr = get_reg!(self, string_iterator_header_reg);
391 let index = (*string_iterator).byte_index;
392 eprintln!(
393 "string_iter_next: iter_addr: {iter_addr:04X} addr:{string_header_addr:04X} index:{index} len: {}, capacity: {}",
394 string_header.element_count, string_header.capacity
395 );
396 }
397
398 if (*string_iterator).byte_index >= string_header.element_count {
400 let branch_offset = i16_from_u8s!(branch_offset_lower, branch_offset_upper);
402
403 #[cfg(feature = "debug_vm")]
404 {
405 if self.debug_operations_enabled {
406 eprintln!("string_iter_next complete. jumping with offset {branch_offset}");
407 }
408 }
409
410 self.pc = (self.pc as i32 + branch_offset as i32) as usize;
411
412 return;
413 }
414
415 let current_byte_index = (*string_iterator).byte_index as usize;
416 let remaining_byte_count = (string_header.element_count as usize) - current_byte_index;
417 let payload_ptr = string_header_raw_ptr.add(VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
418
419 let remaining_bytes = std::slice::from_raw_parts(
420 payload_ptr.add(current_byte_index),
421 remaining_byte_count,
422 );
423
424 match std::str::from_utf8(remaining_bytes) {
425 Ok(valid_str) => {
426 if let Some(c) = valid_str.chars().next() {
427 let advancement = c.len_utf8() as u16;
430 (*string_iterator).byte_index += advancement;
431
432 let raw_u32 = c as u32;
433 set_reg!(self, target_variable, raw_u32);
434 } else {
435 self.internal_trap(TrapCode::InvalidUtf8Sequence);
436 }
437 }
438 Err(_) => {
439 self.internal_trap(TrapCode::InvalidUtf8Sequence);
441 }
442 }
443 }
444 }
445
446 #[inline]
447 pub fn execute_string_iter_next_pair(
448 &mut self,
449 string_iterator_header_reg: u8,
450 target_key_reg: u8,
451 target_value_reg: u8,
452 branch_offset_lower: u8,
453 branch_offset_upper: u8,
454 ) {
455 let string_iterator =
456 self.get_string_iterator_header_ptr_from_reg(string_iterator_header_reg);
457
458 unsafe {
459 let string_header_addr = (*string_iterator).string_heap_ptr;
460 let string_header_ptr = self
461 .memory
462 .get_heap_const_ptr(string_header_addr as usize)
463 .cast::<VecHeader>();
464
465 let string_header_raw_ptr = self.memory.get_heap_const_ptr(string_header_addr as usize);
466
467 let string_header = &*string_header_ptr;
468 if string_header.padding != VEC_HEADER_MAGIC_CODE {
469 return self.internal_trap(TrapCode::MemoryCorruption);
470 }
471
472 #[cfg(feature = "debug_vm")]
473 if self.debug_operations_enabled {
474 let iter_addr = get_reg!(self, string_iterator_header_reg);
475 let index = (*string_iterator).byte_index;
476 eprintln!(
477 "string_iter_next: iter_addr: {iter_addr:04X} addr:{string_header_addr:04X} index:{index} len: {}, capacity: {}",
478 string_header.element_count, string_header.capacity
479 );
480 }
481
482 if (*string_iterator).byte_index >= string_header.element_count {
484 let branch_offset = i16_from_u8s!(branch_offset_lower, branch_offset_upper);
486
487 #[cfg(feature = "debug_vm")]
488 {
489 if self.debug_operations_enabled {
490 eprintln!("string_iter_next complete. jumping with offset {branch_offset}");
491 }
492 }
493
494 self.pc = (self.pc as i32 + branch_offset as i32) as usize;
495
496 return;
497 }
498
499 let current_byte_index = (*string_iterator).byte_index as usize;
500 let remaining_byte_count = (string_header.element_count as usize) - current_byte_index;
501 let payload_ptr = string_header_raw_ptr.add(VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
502
503 let remaining_bytes = std::slice::from_raw_parts(
504 payload_ptr.add(current_byte_index),
505 remaining_byte_count,
506 );
507
508 match std::str::from_utf8(remaining_bytes) {
509 Ok(valid_str) => {
510 if let Some(c) = valid_str.chars().next() {
511 let advancement = c.len_utf8() as u16;
514 (*string_iterator).byte_index += advancement;
515
516 let raw_u32 = c as u32;
517 eprintln!(
518 "raw: {raw_u32} advancement {advancement} -> r{target_value_reg}"
519 );
520 set_reg!(self, target_key_reg, (*string_iterator).index);
521 set_reg!(self, target_value_reg, raw_u32);
522
523 (*string_iterator).index += 1;
524 } else {
525 self.internal_trap(TrapCode::InvalidUtf8Sequence);
526 }
527 }
528 Err(_) => {
529 self.internal_trap(TrapCode::InvalidUtf8Sequence);
531 }
532 }
533 }
534 }
535}