1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
//! Literal Compilation
//!
//! Handles compilation of literals: integers, booleans, strings, bytes, chars.
use syn::Lit;
use super::{Compiler, CompileError};
impl Compiler {
/// Compile a literal expression
/// Supports: integers, booleans, strings, bytes, chars
pub(crate) fn compile_literal(&mut self, lit: &Lit) -> Result<(), CompileError> {
match lit {
// Integer literal: 42, 0xFF, 0b1010
Lit::Int(int_lit) => {
let value: u64 = int_lit.base10_parse()
.map_err(|e| CompileError(format!("Invalid integer: {}", e)))?;
if value == 0 {
self.emit_zero();
} else {
self.emit_constant(value);
}
}
// Boolean literal: true, false
Lit::Bool(bool_lit) => {
if bool_lit.value {
self.emit_constant(1);
} else {
self.emit_zero();
}
}
// String literal: "hello"
// Compiles to: STR_NEW + STR_PUSH for each byte
Lit::Str(str_lit) => {
self.compile_string_literal(&str_lit.value())?;
}
// Byte string literal: b"hello"
Lit::ByteStr(byte_str) => {
self.compile_byte_string_literal(&byte_str.value())?;
}
// Char literal: 'a'
Lit::Char(char_lit) => {
let c = char_lit.value();
// UTF-8 encode the character
let mut buf = [0u8; 4];
let encoded = c.encode_utf8(&mut buf);
// Push first byte as u64 (for single-byte chars)
// For multi-byte, we'd need string support
if encoded.len() == 1 {
self.emit_constant(buf[0] as u64);
} else {
// Multi-byte char: create a string
self.compile_string_literal(encoded)?;
}
}
// Byte literal: b'a'
Lit::Byte(byte_lit) => {
self.emit_constant(byte_lit.value() as u64);
}
// Float literals not supported (VM is integer-only)
Lit::Float(_) => {
return Err(CompileError("Float literals not supported (VM is integer-only)".to_string()));
}
// Verbatim literals
_ => {
return Err(CompileError("Unsupported literal type".to_string()));
}
}
Ok(())
}
/// Compile a string literal to VM bytecode
/// Generates: STR_NEW(capacity) then STR_PUSH for each byte
/// Result: string address on stack
pub(crate) fn compile_string_literal(&mut self, s: &str) -> Result<(), CompileError> {
let bytes = s.as_bytes();
let len = bytes.len();
// Create string with exact capacity
// Stack: [capacity] -> [str_addr]
self.emit_constant(len as u64);
self.emit_str_new();
// Push each byte
// STR_PUSH: Stack: [str_addr, byte] -> []
for &byte in bytes {
self.emit_dup(); // [str_addr, str_addr]
self.emit_constant(byte as u64); // [str_addr, str_addr, byte]
self.emit_str_push(); // [str_addr]
}
// String address remains on stack
Ok(())
}
/// Compile a byte string literal (b"...")
/// Same as string literal but from raw bytes
pub(crate) fn compile_byte_string_literal(&mut self, bytes: &[u8]) -> Result<(), CompileError> {
let len = bytes.len();
// Create string with exact capacity
self.emit_constant(len as u64);
self.emit_str_new();
// Push each byte
for &byte in bytes {
self.emit_dup();
self.emit_constant(byte as u64);
self.emit_str_push();
}
Ok(())
}
/// Compile String::new() or String::with_capacity(n)
pub(crate) fn compile_string_constructor(&mut self, capacity: Option<u64>) -> Result<(), CompileError> {
match capacity {
Some(cap) => self.emit_constant(cap),
None => self.emit_zero(),
}
self.emit_str_new();
Ok(())
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_string_encoding() {
let s = "hello";
let bytes = s.as_bytes();
assert_eq!(bytes, &[104, 101, 108, 108, 111]);
}
#[test]
fn test_utf8_encoding() {
let s = "こんにちは";
let bytes = s.as_bytes();
// Japanese characters are 3 bytes each in UTF-8
assert_eq!(bytes.len(), 15);
}
}