sim_codec/implementation/
portable.rs1use sim_kernel::{CodecId, Error, Expr, NumberLiteral, Result, Symbol};
18
19pub fn encode_portable(codec: CodecId, expr: &Expr) -> Result<String> {
37 let mut out = String::new();
38 write_value(codec, expr, &mut out)?;
39 Ok(out)
40}
41
42pub fn decode_portable(codec: CodecId, source: &str) -> Result<Expr> {
45 let mut parser = Parser {
46 bytes: source.as_bytes(),
47 pos: 0,
48 codec,
49 };
50 let expr = parser.parse_value()?;
51 parser.skip_ws();
52 if parser.pos != parser.bytes.len() {
53 return Err(parser.error("trailing input after value"));
54 }
55 Ok(expr)
56}
57
58fn unsupported(codec: CodecId, form: &str) -> Error {
59 Error::CodecError {
60 codec,
61 message: format!("portable text cannot encode a non-data expression form: {form}"),
62 }
63}
64
65fn write_value(codec: CodecId, expr: &Expr, out: &mut String) -> Result<()> {
66 match expr {
67 Expr::Nil => out.push('_'),
68 Expr::Bool(true) => out.push('T'),
69 Expr::Bool(false) => out.push('F'),
70 Expr::Number(number) => {
71 out.push('N');
72 write_symbol_payload(&number.domain, out);
73 write_qstr(&number.canonical, out);
74 }
75 Expr::Symbol(symbol) => {
76 out.push('S');
77 write_symbol_payload(symbol, out);
78 }
79 Expr::String(text) => {
80 out.push('R');
81 write_qstr(text, out);
82 }
83 Expr::Bytes(bytes) => {
84 out.push('B');
85 write_qstr(&hex_encode(bytes), out);
86 }
87 Expr::List(items) => write_seq(codec, '(', ')', items, out)?,
88 Expr::Vector(items) => write_seq(codec, '[', ']', items, out)?,
89 Expr::Set(items) => {
90 out.push('%');
91 write_seq(codec, '(', ')', items, out)?;
92 }
93 Expr::Map(entries) => {
94 out.push('{');
95 for (key, value) in entries {
96 out.push(' ');
97 write_value(codec, key, out)?;
98 out.push(' ');
99 write_value(codec, value, out)?;
100 }
101 out.push_str(" }");
102 }
103 Expr::Local(_) => return Err(unsupported(codec, "local")),
104 Expr::Call { .. } => return Err(unsupported(codec, "call")),
105 Expr::Infix { .. } => return Err(unsupported(codec, "infix")),
106 Expr::Prefix { .. } => return Err(unsupported(codec, "prefix")),
107 Expr::Postfix { .. } => return Err(unsupported(codec, "postfix")),
108 Expr::Block(_) => return Err(unsupported(codec, "block")),
109 Expr::Quote { .. } => return Err(unsupported(codec, "quote")),
110 Expr::Annotated { .. } => return Err(unsupported(codec, "annotated")),
111 Expr::Extension { .. } => return Err(unsupported(codec, "extension")),
112 }
113 Ok(())
114}
115
116fn write_seq(
117 codec: CodecId,
118 open: char,
119 close: char,
120 items: &[Expr],
121 out: &mut String,
122) -> Result<()> {
123 out.push(open);
124 for item in items {
125 out.push(' ');
126 write_value(codec, item, out)?;
127 }
128 out.push(' ');
129 out.push(close);
130 Ok(())
131}
132
133fn write_symbol_payload(symbol: &Symbol, out: &mut String) {
134 match &symbol.namespace {
135 Some(namespace) => {
136 out.push('Q');
137 write_qstr(namespace, out);
138 write_qstr(&symbol.name, out);
139 }
140 None => {
141 out.push('U');
142 write_qstr(&symbol.name, out);
143 }
144 }
145}
146
147fn write_qstr(text: &str, out: &mut String) {
148 out.push('"');
149 for ch in text.chars() {
150 match ch {
151 '\\' => out.push_str("\\\\"),
152 '"' => out.push_str("\\\""),
153 '\n' => out.push_str("\\n"),
154 '\r' => out.push_str("\\r"),
155 '\t' => out.push_str("\\t"),
156 other => out.push(other),
157 }
158 }
159 out.push('"');
160}
161
162fn hex_encode(bytes: &[u8]) -> String {
163 const HEX: &[u8; 16] = b"0123456789abcdef";
164 let mut out = String::with_capacity(bytes.len() * 2);
165 for byte in bytes {
166 out.push(HEX[(byte >> 4) as usize] as char);
167 out.push(HEX[(byte & 0x0f) as usize] as char);
168 }
169 out
170}
171
172struct Parser<'a> {
173 bytes: &'a [u8],
174 pos: usize,
175 codec: CodecId,
176}
177
178impl Parser<'_> {
179 fn error(&self, message: impl Into<String>) -> Error {
180 Error::CodecError {
181 codec: self.codec,
182 message: format!(
183 "portable text decode error at byte {}: {}",
184 self.pos,
185 message.into()
186 ),
187 }
188 }
189
190 fn skip_ws(&mut self) {
191 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_whitespace() {
192 self.pos += 1;
193 }
194 }
195
196 fn peek(&self) -> Option<u8> {
197 self.bytes.get(self.pos).copied()
198 }
199
200 fn bump(&mut self) -> Option<u8> {
201 let byte = self.peek()?;
202 self.pos += 1;
203 Some(byte)
204 }
205
206 fn expect(&mut self, byte: u8) -> Result<()> {
207 if self.bump() == Some(byte) {
208 Ok(())
209 } else {
210 Err(self.error(format!("expected '{}'", byte as char)))
211 }
212 }
213
214 fn parse_value(&mut self) -> Result<Expr> {
215 self.skip_ws();
216 match self.peek() {
217 Some(b'_') => {
218 self.pos += 1;
219 Ok(Expr::Nil)
220 }
221 Some(b'T') => {
222 self.pos += 1;
223 Ok(Expr::Bool(true))
224 }
225 Some(b'F') => {
226 self.pos += 1;
227 Ok(Expr::Bool(false))
228 }
229 Some(b'N') => {
230 self.pos += 1;
231 let domain = self.parse_symbol_payload()?;
232 let canonical = self.parse_qstr()?;
233 Ok(Expr::Number(NumberLiteral { domain, canonical }))
234 }
235 Some(b'S') => {
236 self.pos += 1;
237 Ok(Expr::Symbol(self.parse_symbol_payload()?))
238 }
239 Some(b'R') => {
240 self.pos += 1;
241 Ok(Expr::String(self.parse_qstr()?))
242 }
243 Some(b'B') => {
244 self.pos += 1;
245 let hex = self.parse_qstr()?;
246 Ok(Expr::Bytes(self.parse_hex(&hex)?))
247 }
248 Some(b'(') => Ok(Expr::List(self.parse_seq(b'(', b')')?)),
249 Some(b'[') => Ok(Expr::Vector(self.parse_seq(b'[', b']')?)),
250 Some(b'%') => {
251 self.pos += 1;
252 Ok(Expr::Set(self.parse_seq(b'(', b')')?))
253 }
254 Some(b'{') => self.parse_map(),
255 Some(other) => Err(self.error(format!("unexpected tag byte '{}'", other as char))),
256 None => Err(self.error("unexpected end of input")),
257 }
258 }
259
260 fn parse_seq(&mut self, open: u8, close: u8) -> Result<Vec<Expr>> {
261 self.expect(open)?;
262 let mut items = Vec::new();
263 loop {
264 self.skip_ws();
265 match self.peek() {
266 Some(byte) if byte == close => {
267 self.pos += 1;
268 return Ok(items);
269 }
270 None => return Err(self.error("unterminated sequence")),
271 _ => items.push(self.parse_value()?),
272 }
273 }
274 }
275
276 fn parse_map(&mut self) -> Result<Expr> {
277 self.expect(b'{')?;
278 let mut entries = Vec::new();
279 loop {
280 self.skip_ws();
281 match self.peek() {
282 Some(b'}') => {
283 self.pos += 1;
284 return Ok(Expr::Map(entries));
285 }
286 None => return Err(self.error("unterminated map")),
287 _ => {
288 let key = self.parse_value()?;
289 let value = self.parse_value()?;
290 entries.push((key, value));
291 }
292 }
293 }
294 }
295
296 fn parse_symbol_payload(&mut self) -> Result<Symbol> {
297 match self.bump() {
298 Some(b'Q') => {
299 let namespace = self.parse_qstr()?;
300 let name = self.parse_qstr()?;
301 Ok(Symbol::qualified(namespace, name))
302 }
303 Some(b'U') => Ok(Symbol::new(self.parse_qstr()?)),
304 _ => Err(self.error("expected symbol payload tag 'Q' or 'U'")),
305 }
306 }
307
308 fn parse_qstr(&mut self) -> Result<String> {
309 self.expect(b'"')?;
310 let mut bytes = Vec::new();
311 loop {
312 match self.bump() {
313 Some(b'"') => {
314 return String::from_utf8(bytes)
315 .map_err(|err| self.error(format!("invalid utf-8 in string: {err}")));
316 }
317 Some(b'\\') => match self.bump() {
318 Some(b'\\') => bytes.push(b'\\'),
319 Some(b'"') => bytes.push(b'"'),
320 Some(b'n') => bytes.push(b'\n'),
321 Some(b'r') => bytes.push(b'\r'),
322 Some(b't') => bytes.push(b'\t'),
323 _ => return Err(self.error("invalid escape sequence")),
324 },
325 Some(byte) => bytes.push(byte),
326 None => return Err(self.error("unterminated string")),
327 }
328 }
329 }
330
331 fn parse_hex(&self, hex: &str) -> Result<Vec<u8>> {
332 if !hex.len().is_multiple_of(2) {
333 return Err(self.error("byte literal has odd hex length"));
334 }
335 let bytes = hex.as_bytes();
336 let mut out = Vec::with_capacity(hex.len() / 2);
337 let mut index = 0;
338 while index < bytes.len() {
339 let hi = hex_digit(bytes[index]).ok_or_else(|| self.error("invalid hex digit"))?;
340 let lo = hex_digit(bytes[index + 1]).ok_or_else(|| self.error("invalid hex digit"))?;
341 out.push((hi << 4) | lo);
342 index += 2;
343 }
344 Ok(out)
345 }
346}
347
348fn hex_digit(byte: u8) -> Option<u8> {
349 match byte {
350 b'0'..=b'9' => Some(byte - b'0'),
351 b'a'..=b'f' => Some(byte - b'a' + 10),
352 b'A'..=b'F' => Some(byte - b'A' + 10),
353 _ => None,
354 }
355}