1use crate::memory::{ExecutionMode, Memory};
6use crate::{get_reg, i16_from_u8s, set_reg, TrapCode, Vm};
7use std::{mem::size_of, ptr};
8use swamp_vm_types::{
9 StringIterator, VecHeader, MAX_STRING_LEN, VEC_HEADER_MAGIC_CODE, VEC_HEADER_PAYLOAD_OFFSET,
10};
11
12impl Vm {
13 pub fn get_string_iterator_header_ptr_from_reg(
14 &self,
15 vec_iterator_reg: u8,
16 ) -> *mut StringIterator {
17 self.get_ptr_from_reg(vec_iterator_reg) as *mut StringIterator
18 }
19
20 #[inline]
21 fn get_string(&self, reg: u8) -> &str {
22 let string_header_addr = get_reg!(self, reg);
23 let header_ptr =
24 self.memory()
25 .get_heap_const_ptr(string_header_addr as usize) as *const VecHeader;
26 let header = unsafe { *header_ptr };
27 let byte_count = header.element_count;
28
29 #[cfg(feature = "debug_vm")]
30 if self.debug_operations_enabled {
31 eprintln!(
32 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
33 self.memory().constant_memory_size,
34 self.memory().stack_start,
35 self.memory().stack_offset,
36 self.memory().heap_start,
37 self.memory().heap_alloc_offset
38 );
39 }
40
41 if byte_count != 0 {
42 debug_assert_eq!(
43 header.padding, VEC_HEADER_MAGIC_CODE,
44 "string is corrupt. it is saying it has length {byte_count}, left: {}, right: {VEC_HEADER_MAGIC_CODE}",
45 header.padding
46 );
47 debug_assert!(
48 header.element_count < MAX_STRING_LEN,
49 "string of strange length. it is saying it has length {byte_count}, left: {}, right: {VEC_HEADER_MAGIC_CODE}",
50 header.padding
51 );
52 }
53
54 let runes_ptr = self.memory().get_heap_const_ptr(
55 (string_header_addr as usize) + VEC_HEADER_PAYLOAD_OFFSET.0 as usize,
56 );
57
58 unsafe {
59 let bytes = std::slice::from_raw_parts(runes_ptr, byte_count as usize);
60
61 if byte_count > 0 {
62 let s = std::str::from_utf8(bytes).unwrap_or("INVALID_UTF8");
63 #[cfg(feature = "debug_vm")]
64 if self.debug_operations_enabled {
65 eprintln!("String content: \"{s}\"");
66 }
67 }
68
69 std::str::from_utf8_unchecked(bytes)
70 }
71 }
72 #[inline]
73 pub fn execute_string_append(&mut self, target_string_reg: u8, string_a: u8, string_b: u8) {
74 #[cfg(feature = "debug_vm")]
75 if self.debug_operations_enabled {
76 eprintln!("=== STRING_APPEND OPERATION ===");
77 eprintln!(
78 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
79 self.memory().constant_memory_size,
80 self.memory().stack_start,
81 self.memory().stack_offset,
82 self.memory().heap_start,
83 self.memory().heap_alloc_offset
84 );
85
86 let reg_a_value = get_reg!(self, string_a);
88 let reg_b_value = get_reg!(self, string_b);
89
90 eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
91 eprintln!("String B register {string_b}: 0x{reg_b_value:X}");
92 eprintln!("Target register {target_string_reg}");
93 }
94
95 let str_a = self.get_string(string_a);
96 let str_b = self.get_string(string_b);
97
98 let result = str_a.to_string() + str_b;
99
100 #[cfg(feature = "debug_vm")]
101 if self.debug_operations_enabled {
102 eprintln!(
103 "Concatenated string: \"{}\" (length: {})",
104 result,
105 result.len()
106 );
107 }
108
109 self.create_string(target_string_reg, &result);
110 let final_reg_value = get_reg!(self, target_string_reg);
112 #[cfg(feature = "debug_vm")]
113 if self.debug_operations_enabled {
114 eprintln!("Final target register value: 0x{final_reg_value:X}");
115 }
116 }
117
118 #[inline]
119 pub fn execute_string_repeat(&mut self, target_string_reg: u8, string_a: u8, repeat_reg: u8) {
120 #[cfg(feature = "debug_vm")]
121 if self.debug_operations_enabled {
122 eprintln!("=== STRING_REPEAT OPERATION ===");
123 eprintln!(
124 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
125 self.memory().constant_memory_size,
126 self.memory().stack_start,
127 self.memory().stack_offset,
128 self.memory().heap_start,
129 self.memory().heap_alloc_offset
130 );
131
132 let reg_a_value = get_reg!(self, string_a);
134 let repeat_value = get_reg!(self, repeat_reg);
135 eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
136 eprintln!("Repeat count register {repeat_reg}: {}", repeat_value);
137 eprintln!("Target register {target_string_reg}");
138 }
139
140 let str_a = self.get_string(string_a);
142
143 let count = get_reg!(self, repeat_reg) as usize;
144
145 let result = str_a.repeat(count);
147
148 #[cfg(feature = "debug_vm")]
149 if self.debug_operations_enabled {
150 eprintln!(
151 "Repeated string: \"{}\" (length: {}, repeated {} times)",
152 result,
153 result.len(),
154 count
155 );
156 }
157
158 self.create_string(target_string_reg, &result);
160
161 #[cfg(feature = "debug_vm")]
162 if self.debug_operations_enabled {
163 let final_reg_value = get_reg!(self, target_string_reg);
164 eprintln!("Final target register value: 0x{final_reg_value:X}");
165 }
166 }
167
168
169 #[inline]
170 pub fn execute_string_cmp(&mut self, dest_reg: u8, string_a: u8, string_b: u8) {
171 #[cfg(feature = "debug_vm")]
172 if self.debug_operations_enabled {
173 eprintln!("=== STRING_COMPARE OPERATION ===");
174 eprintln!(
175 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
176 self.memory().constant_memory_size,
177 self.memory().stack_start,
178 self.memory().stack_offset,
179 self.memory().heap_start,
180 self.memory().heap_alloc_offset
181 );
182 }
183
184 let reg_a_value = get_reg!(self, string_a);
186 let reg_b_value = get_reg!(self, string_b);
187
188 #[cfg(feature = "debug_vm")]
189 if self.debug_operations_enabled {
190 eprintln!("String A register {string_a}: 0x{reg_a_value:X}");
191 eprintln!("String B register {string_b}: 0x{reg_b_value:X}");
192 }
193
194 let str_a = self.get_string(string_a);
195 let str_b = self.get_string(string_b);
196
197 let result = str_a == str_b;
198
199 #[cfg(feature = "debug_vm")]
200 if self.debug_operations_enabled {
201 eprintln!("String comparison result: {result}");
202 }
203
204 set_reg!(self, dest_reg, result as u32);
206 }
207
208 #[inline]
210 pub fn execute_string_to_string(&mut self, dest_reg: u8, source_string: u8) {
211 #[cfg(feature = "debug_vm")]
212 if self.debug_operations_enabled {
213 eprintln!("=== STRING_TO_STRING OPERATION ===");
214 eprintln!(
215 "Memory layout: constants: 0x0-0x{:X}, stack: 0x{:X}-0x{:X}, heap: 0x{:X}-0x{:X}",
216 self.memory().constant_memory_size,
217 self.memory().stack_start,
218 self.memory().stack_offset,
219 self.memory().heap_start,
220 self.memory().heap_alloc_offset
221 );
222
223 let source_reg_value = get_reg!(self, source_string);
224 eprintln!("Source string register {source_string}: 0x{source_reg_value:X}");
225 }
226
227 let source_str = self.get_string(source_string);
228
229 let mut formatted_string = String::with_capacity(source_str.len() + 2);
231 formatted_string.push('"');
232 formatted_string.push_str(source_str);
233 formatted_string.push('"');
234
235 #[cfg(feature = "debug_vm")]
236 if self.debug_operations_enabled {
237 eprintln!(
238 "Formatted string: \"{}\" (length: {})",
239 formatted_string,
240 formatted_string.len()
241 );
242 }
243
244 self.create_string(dest_reg, &formatted_string);
245
246 let final_reg_value = get_reg!(self, dest_reg);
247
248 #[cfg(feature = "debug_vm")]
249 if self.debug_operations_enabled {
250 eprintln!("Final destination register value: 0x{final_reg_value:X}");
251 }
252 }
253
254 pub fn read_string(&self, heap_addr: u32, heap: &Memory) -> &str {
255 let string_header_ptr = heap.get_heap_const_ptr(heap_addr as usize) as *const VecHeader;
256 let string_header = unsafe { *string_header_ptr };
257
258 #[cfg(feature = "debug_vm")]
259 if self.debug_operations_enabled {
260 eprintln!(
261 "read_string: addr=0x{heap_addr:X}, capacity={}, byte_count={}, padding=0x{:X}",
262 string_header.capacity, string_header.element_count, string_header.padding
263 );
264 }
265
266 let byte_count = string_header.element_count as usize;
267
268 #[cfg(feature = "debug_vm")]
269 if string_header.element_count != 0 {
270 debug_assert_eq!(
271 string_header.padding, VEC_HEADER_MAGIC_CODE,
272 "CORRUPTION DETECTED in read_string: String header at 0x{heap_addr:X} has invalid padding 0x{:X}, should be 0x{VEC_HEADER_MAGIC_CODE:X}",
273 string_header.padding
274 );
275 debug_assert_eq!(
276 string_header.capacity, string_header.element_count,
277 "Corruption. strings should never change"
278 );
279 debug_assert!(
281 byte_count < 1024,
282 "Strange. string byte_count {byte_count} is unreasonably large"
283 );
284 }
285
286 let string_data_ptr = unsafe {
288 heap.get_heap_const_ptr(heap_addr as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize)
289 };
290
291 unsafe {
292 let bytes = std::slice::from_raw_parts(string_data_ptr, byte_count);
293 match std::str::from_utf8(bytes) {
294 Ok(s) => s,
295 Err(e) => {
296 panic!("ERROR: Invalid UTF-8 string data at 0x{heap_addr:X}: {e}");
297 ""
298 }
299 }
300 }
301 }
302
303 pub(crate) fn create_string(&mut self, dst_reg: u8, string: &str) {
307 let rune_bytes = string.as_bytes();
308 let byte_count = rune_bytes.len();
309 let cap_bytes = if byte_count == 0 { 1 } else { byte_count };
310
311 debug_assert!(
312 byte_count <= MAX_STRING_LEN as usize,
313 "String too large: {byte_count} bytes"
314 );
315
316 let total_size = size_of::<VecHeader>() + byte_count;
319
320 let header_addr_in_heap = self.memory.heap_allocate_secret(total_size);
321
322 #[cfg(feature = "debug_vm")]
323 match self.memory.execution_mode {
324 ExecutionMode::ConstantEvaluation => {
325 debug_assert!(
327 header_addr_in_heap >= self.memory.heap_start as u32
328 && header_addr_in_heap < self.memory.heap_alloc_offset as u32,
329 "String allocation at 0x{header_addr_in_heap:X} is not in heap during constant evaluation",
330 );
331 }
332 ExecutionMode::NormalExecution => {
333 debug_assert!(
335 header_addr_in_heap >= self.memory.heap_start as u32
336 && header_addr_in_heap < self.memory.heap_alloc_offset as u32,
337 "String allocation at 0x{header_addr_in_heap:X} is not in heap during normal execution",
338 );
339 }
340 }
341
342 let string_header = VecHeader {
343 capacity: cap_bytes as u16,
344 element_count: byte_count as u16,
345 element_size: 1,
346 padding: VEC_HEADER_MAGIC_CODE,
347 };
348
349 unsafe {
350 let header_ptr =
351 self.memory.get_heap_ptr(header_addr_in_heap as usize) as *mut VecHeader;
352 ptr::write(header_ptr, string_header);
353
354 let string_data_ptr = self
355 .memory
356 .get_heap_ptr(header_addr_in_heap as usize + VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
357 ptr::copy_nonoverlapping(rune_bytes.as_ptr(), string_data_ptr, byte_count);
358 }
359
360 #[cfg(feature = "debug_vm")]
361 if self.debug_operations_enabled {
362 eprintln!(
363 "Creating string: '{string}', header at 0x{header_addr_in_heap:X}, capacity={byte_count}, byte_count={byte_count}, padding=0x{VEC_HEADER_MAGIC_CODE:X}"
364 );
365 }
366
367 set_reg!(self, dst_reg, header_addr_in_heap);
368 }
369
370 #[inline]
371 pub fn execute_string_iter_init(
372 &mut self,
373 target_string_iterator_header_reg: u8,
374 string_header_reg: u8,
375 ) {
376 let string_header_addr = get_reg!(self, string_header_reg);
377
378 let string_header_ptr = self
380 .memory
381 .get_heap_const_ptr(string_header_addr as usize)
382 .cast::<VecHeader>();
383 let string_header = unsafe { &*string_header_ptr };
384
385 if string_header.padding != VEC_HEADER_MAGIC_CODE {
386 return self.internal_trap(TrapCode::MemoryCorruption);
387 }
388 if string_header.capacity == 0 {
389 return self.internal_trap(TrapCode::VecNeverInitialized);
390 }
391
392 #[cfg(feature = "debug_vm")]
393 if self.debug_operations_enabled {
394 let iter_addr = get_reg!(self, target_string_iterator_header_reg);
395 eprintln!(
396 "string_iter_init: iter_addr: {iter_addr:04X} string_header_addr:{string_header_addr:04X} element_size: {}",
397 string_header.element_size
398 );
399 }
400 let string_iterator = StringIterator {
401 string_heap_ptr: string_header_addr,
402 byte_index: 0,
403 index: 0,
404 };
405
406 let string_iterator_mut_ptr =
407 self.get_ptr_from_reg(target_string_iterator_header_reg) as *mut StringIterator;
408
409 unsafe {
410 ptr::write(string_iterator_mut_ptr, string_iterator);
411 }
412 }
413
414 #[inline]
415 pub fn execute_string_iter_next(
416 &mut self,
417 string_iterator_header_reg: u8,
418 target_variable: u8,
419 branch_offset_lower: u8,
420 branch_offset_upper: u8,
421 ) {
422 let string_iterator =
423 self.get_string_iterator_header_ptr_from_reg(string_iterator_header_reg);
424
425 unsafe {
426 let string_header_addr = (*string_iterator).string_heap_ptr;
427 let string_header_ptr = self
428 .memory
429 .get_heap_const_ptr(string_header_addr as usize)
430 .cast::<VecHeader>();
431
432 let string_header_raw_ptr = self.memory.get_heap_const_ptr(string_header_addr as usize);
433
434 let string_header = &*string_header_ptr;
435 if string_header.padding != VEC_HEADER_MAGIC_CODE {
436 return self.internal_trap(TrapCode::MemoryCorruption);
437 }
438
439 #[cfg(feature = "debug_vm")]
440 if self.debug_operations_enabled {
441 let iter_addr = get_reg!(self, string_iterator_header_reg);
442 let index = (*string_iterator).byte_index;
443 eprintln!(
444 "string_iter_next: iter_addr: {iter_addr:04X} addr:{string_header_addr:04X} index:{index} len: {}, capacity: {}",
445 string_header.element_count, string_header.capacity
446 );
447 }
448
449 if (*string_iterator).byte_index >= string_header.element_count {
451 let branch_offset = i16_from_u8s!(branch_offset_lower, branch_offset_upper);
453
454 #[cfg(feature = "debug_vm")]
455 {
456 if self.debug_operations_enabled {
457 eprintln!("string_iter_next complete. jumping with offset {branch_offset}");
458 }
459 }
460
461 self.pc = (self.pc as i32 + branch_offset as i32) as usize;
462
463 return;
464 }
465
466 let current_byte_index = (*string_iterator).byte_index as usize;
467 let remaining_byte_count = (string_header.element_count as usize) - current_byte_index;
468 let payload_ptr = string_header_raw_ptr.add(VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
469
470 let remaining_bytes = std::slice::from_raw_parts(
471 payload_ptr.add(current_byte_index),
472 remaining_byte_count,
473 );
474
475 match std::str::from_utf8(remaining_bytes) {
476 Ok(valid_str) => {
477 if let Some(c) = valid_str.chars().next() {
478 let advancement = c.len_utf8() as u16;
481 (*string_iterator).byte_index += advancement;
482
483 let raw_u32 = c as u32;
484 set_reg!(self, target_variable, raw_u32);
485 } else {
486 self.internal_trap(TrapCode::InvalidUtf8Sequence);
487 }
488 }
489 Err(_) => {
490 self.internal_trap(TrapCode::InvalidUtf8Sequence);
492 }
493 }
494 }
495 }
496
497 #[inline]
498 pub fn execute_string_iter_next_pair(
499 &mut self,
500 string_iterator_header_reg: u8,
501 target_key_reg: u8,
502 target_value_reg: u8,
503 branch_offset_lower: u8,
504 branch_offset_upper: u8,
505 ) {
506 let string_iterator =
507 self.get_string_iterator_header_ptr_from_reg(string_iterator_header_reg);
508
509 unsafe {
510 let string_header_addr = (*string_iterator).string_heap_ptr;
511 let string_header_ptr = self
512 .memory
513 .get_heap_const_ptr(string_header_addr as usize)
514 .cast::<VecHeader>();
515
516 let string_header_raw_ptr = self.memory.get_heap_const_ptr(string_header_addr as usize);
517
518 let string_header = &*string_header_ptr;
519 if string_header.padding != VEC_HEADER_MAGIC_CODE {
520 return self.internal_trap(TrapCode::MemoryCorruption);
521 }
522
523 #[cfg(feature = "debug_vm")]
524 if self.debug_operations_enabled {
525 let iter_addr = get_reg!(self, string_iterator_header_reg);
526 let index = (*string_iterator).byte_index;
527 eprintln!(
528 "string_iter_next: iter_addr: {iter_addr:04X} addr:{string_header_addr:04X} index:{index} len: {}, capacity: {}",
529 string_header.element_count, string_header.capacity
530 );
531 }
532
533 if (*string_iterator).byte_index >= string_header.element_count {
535 let branch_offset = i16_from_u8s!(branch_offset_lower, branch_offset_upper);
537
538 #[cfg(feature = "debug_vm")]
539 {
540 if self.debug_operations_enabled {
541 eprintln!("string_iter_next complete. jumping with offset {branch_offset}");
542 }
543 }
544
545 self.pc = (self.pc as i32 + branch_offset as i32) as usize;
546
547 return;
548 }
549
550 let current_byte_index = (*string_iterator).byte_index as usize;
551 let remaining_byte_count = (string_header.element_count as usize) - current_byte_index;
552 let payload_ptr = string_header_raw_ptr.add(VEC_HEADER_PAYLOAD_OFFSET.0 as usize);
553
554 let remaining_bytes = std::slice::from_raw_parts(
555 payload_ptr.add(current_byte_index),
556 remaining_byte_count,
557 );
558
559 match std::str::from_utf8(remaining_bytes) {
560 Ok(valid_str) => {
561 if let Some(c) = valid_str.chars().next() {
562 let advancement = c.len_utf8() as u16;
565 (*string_iterator).byte_index += advancement;
566
567 let raw_u32 = c as u32;
568 eprintln!(
569 "raw: {raw_u32} advancement {advancement} -> r{target_value_reg}"
570 );
571 set_reg!(self, target_key_reg, (*string_iterator).index);
572 set_reg!(self, target_value_reg, raw_u32);
573
574 (*string_iterator).index += 1;
575 } else {
576 self.internal_trap(TrapCode::InvalidUtf8Sequence);
577 }
578 }
579 Err(_) => {
580 self.internal_trap(TrapCode::InvalidUtf8Sequence);
582 }
583 }
584 }
585 }
586}