1
2use num_bigint::{BigInt, BigUint, Sign};
3
4use derive_more::From;
5
6use std::{collections::HashMap, io::{Read, Write}};
7
8use esexpr::{ESExpr, ESExprCodec};
9
10#[derive(From, Debug)]
11pub enum ParseError {
12 #[from(ignore)]
13 InvalidTokenByte(u8),
14
15 #[from(ignore)]
16 InvalidStringTableIndex,
17
18 #[from(ignore)]
19 InvalidLength,
20
21 #[from(ignore)]
22 UnexpectedKeywordToken,
23
24 #[from(ignore)]
25 UnexpectedConstructorEnd,
26
27 #[from(ignore)]
28 UnexpectedEndOfFile,
29
30 #[from(ignore)]
31 InvalidStringPool(esexpr::DecodeError),
32
33 IOError(std::io::Error),
34 Utf8Error(std::str::Utf8Error),
35
36}
37
38enum VarIntTag {
39 ConstructorStart,
40 NonNegIntValue,
41 NegIntValue,
42 StringLengthValue,
43 StringPoolValue,
44 BytesLengthValue,
45 KeywordArgument,
46}
47
48enum ExprToken {
49 ConstructorStart(usize),
50 ConstructorStartKnown(&'static str),
51 ConstructorEnd,
52 Keyword(usize),
53 IntValue(BigInt),
54 StringValue(String),
55 StringPoolValue(usize),
56 BinaryValue(Vec<u8>),
57 Float32Value(f32),
58 Float64Value(f64),
59 BooleanValue(bool),
60 NullValue(BigUint),
61 AppendStringTable,
62}
63
64
65const TAG_VARINT_MASK: u8 = 0xE0;
66const TAG_VARINT_CONSTRUCTOR_START: u8 = 0x00;
67const TAG_VARINT_NON_NEG_INT: u8 = 0x20;
68const TAG_VARINT_NEG_INT: u8 = 0x40;
69const TAG_VARINT_STRING_LENGTH: u8 = 0x60;
70const TAG_VARINT_STRING_POOL: u8 = 0x80;
71const TAG_VARINT_BYTES_LENGTH: u8 = 0xA0;
72const TAG_VARINT_KEYWORD: u8 = 0xC0;
73
74const TAG_CONSTRUCTOR_END: u8 = 0xE0;
75const TAG_TRUE: u8 = 0xE1;
76const TAG_FALSE: u8 = 0xE2;
77const TAG_NULL0: u8 = 0xE3;
78const TAG_FLOAT32: u8 = 0xE4;
79const TAG_FLOAT64: u8 = 0xE5;
80const TAG_CONSTRUCTOR_START_STRING_TABLE: u8 = 0xE6;
81const TAG_CONSTRUCTOR_START_LIST: u8 = 0xE7;
82const TAG_NULL1: u8 = 0xE8;
83const TAG_NULL2: u8 = 0xE9;
84const TAG_NULLN: u8 = 0xEA;
85const TAG_APPEND_STRING_TABLE: u8 = 0xEB;
86
87
88enum ExprPlus {
89 Expr(ESExpr),
90 Keyword(usize),
91 ConstructorEnd,
92 AppendedToStringTable,
93 EndOfFile,
94}
95
96
97struct TokenReader<R> {
98 read: R,
99}
100
101impl <R: Read> Iterator for TokenReader<R> {
102 type Item = Result<ExprToken, ParseError>;
103
104 fn next(&mut self) -> Option<Self::Item> {
105 read_token_impl(self).transpose()
106 }
107}
108
109fn read_token_impl<R: Read>(reader: &mut TokenReader<R>) -> Result<Option<ExprToken>, ParseError> {
110 let mut b: [u8; 1] = [0];
111
112 if reader.read.read(&mut b)? == 0 {
113 return Ok(None);
114 }
115
116 let b: u8 = b[0];
117
118 Ok(Some(
119 if (b & TAG_VARINT_MASK) == TAG_VARINT_MASK {
120 match b {
121 TAG_CONSTRUCTOR_END => ExprToken::ConstructorEnd,
122 TAG_TRUE => ExprToken::BooleanValue(true),
123 TAG_FALSE => ExprToken::BooleanValue(false),
124 TAG_NULL0 => ExprToken::NullValue(BigUint::ZERO),
125 TAG_NULL1 => ExprToken::NullValue(BigUint::from(1u32)),
126 TAG_NULL2 => ExprToken::NullValue(BigUint::from(2u32)),
127 TAG_NULLN => {
128 let n = read_int_full(reader)?;
129 ExprToken::NullValue(n + 3u32)
130 },
131 TAG_FLOAT32 => {
132 let buffer: [u8; 4] = read_bytes(reader)?;
133 ExprToken::Float32Value(f32::from_le_bytes(buffer))
134 },
135 TAG_FLOAT64 => {
136 let buffer: [u8; 8] = read_bytes(reader)?;
137 ExprToken::Float64Value(f64::from_le_bytes(buffer))
138 },
139 TAG_CONSTRUCTOR_START_STRING_TABLE => ExprToken::ConstructorStartKnown("string-table"),
140 TAG_CONSTRUCTOR_START_LIST => ExprToken::ConstructorStartKnown("list"),
141 TAG_APPEND_STRING_TABLE => ExprToken::AppendStringTable,
142 _ => {
143 return Err(ParseError::InvalidTokenByte(b));
144 },
145 }
146 }
147 else {
148 let tag = match b & TAG_VARINT_MASK {
149 TAG_VARINT_CONSTRUCTOR_START => VarIntTag::ConstructorStart,
150 TAG_VARINT_NON_NEG_INT => VarIntTag::NonNegIntValue,
151 TAG_VARINT_NEG_INT => VarIntTag::NegIntValue,
152 TAG_VARINT_STRING_LENGTH => VarIntTag::StringLengthValue,
153 TAG_VARINT_STRING_POOL => VarIntTag::StringPoolValue,
154 TAG_VARINT_BYTES_LENGTH => VarIntTag::BytesLengthValue,
155 TAG_VARINT_KEYWORD => VarIntTag::KeywordArgument,
156 _ => panic!("Should not be reachable"),
157 };
158
159 let mut n = read_int(reader, b)?;
160
161 match tag {
162 VarIntTag::ConstructorStart => ExprToken::ConstructorStart(get_string_table_index(n)?),
163 VarIntTag::NonNegIntValue => ExprToken::IntValue(BigInt::from_biguint(Sign::Plus, n)),
164 VarIntTag::NegIntValue => {
165 n += 1u32;
166 ExprToken::IntValue(BigInt::from_biguint(Sign::Minus, n))
167 },
168 VarIntTag::StringLengthValue => {
169 let len = get_length(n)?;
170 let mut buff = vec![0u8; len];
171 reader.read.read_exact(&mut buff)?;
172 ExprToken::StringValue(std::str::from_utf8(&buff)?.to_owned())
173 },
174 VarIntTag::StringPoolValue => ExprToken::StringPoolValue(get_string_table_index(n)?),
175 VarIntTag::BytesLengthValue => {
176 let len = get_length(n)?;
177 let mut buff = vec![0u8; len];
178 reader.read.read_exact(&mut buff)?;
179 ExprToken::BinaryValue(buff)
180 },
181 VarIntTag::KeywordArgument => ExprToken::Keyword(get_string_table_index(n)?),
182 }
183 }
184 ))
185
186}
187
188fn read_int<R: Read>(reader: &mut TokenReader<R>, initial: u8) -> Result<BigUint, ParseError> {
189 let current = initial & 0x0F;
190 let bit_offset = 4;
191 let has_next = (initial & 0x10) == 0x10;
192
193 read_int_rest(reader, current, bit_offset, has_next)
194}
195
196fn read_int_full<R: Read>(reader: &mut TokenReader<R>) -> Result<BigUint, ParseError> {
197 let current = 0;
198 let bit_offset = 0;
199 let has_next = true;
200
201 read_int_rest(reader, current, bit_offset, has_next)
202}
203
204fn read_int_rest<R: Read>(reader: &mut TokenReader<R>, mut current: u8, mut bit_offset: i32, mut has_next: bool) -> Result<BigUint, ParseError> {
205 let mut buffer = Vec::new();
206
207 while has_next {
208 let b = read_byte(reader)?;
209
210 has_next = (b & 0x80) == 0x80;
211
212 let value = b & 0x7F;
213 let low = value << bit_offset;
214 let high = if bit_offset > 1 { value >> (8 - bit_offset) } else { 0 };
215
216
217
218 current |= low;
219 bit_offset += 7;
220 if bit_offset >= 8 {
221 bit_offset -= 8;
222 buffer.push(current);
223 current = high;
224 }
225 }
226
227 if bit_offset > 0 {
228 buffer.push(current);
229 }
230
231 Ok(BigUint::from_bytes_le(&buffer))
232}
233
234
235
236fn read_bytes<R: Read, const N: usize>(reader: &mut TokenReader<R>) -> Result<[u8; N], std::io::Error> {
237 let mut b: [u8; N] = [0; N];
238 reader.read.read_exact(&mut b)?;
239 Ok(b)
240}
241
242fn read_byte<R: Read>(reader: &mut TokenReader<R>) -> Result<u8, std::io::Error> {
243 Ok(read_bytes::<R, 1>(reader)?[0])
244}
245
246fn get_string_table_index(i: BigUint) -> Result<usize, ParseError> {
247 i.try_into().map_err(|_| ParseError::InvalidStringTableIndex)
248}
249
250fn get_length(i: BigUint) -> Result<usize, ParseError> {
251 i.try_into().map_err(|_| ParseError::InvalidLength)
252}
253
254pub trait ExprParser {
255 fn try_read_next_expr(&mut self) -> Result<Option<ESExpr>, ParseError>;
256 fn read_next_expr(&mut self) -> Result<ESExpr, ParseError>;
257}
258
259struct ExprParserImpl<I> {
260 string_pool: Vec<String>,
261 iter: I,
262}
263
264impl <I: Iterator<Item=Result<ExprToken, ParseError>>> ExprParser for ExprParserImpl<I> {
265 fn try_read_next_expr(&mut self) -> Result<Option<ESExpr>, ParseError> {
266 loop {
267 return match self.read_expr_plus()? {
268 ExprPlus::Expr(expr) => Ok(Some(expr)),
269 ExprPlus::Keyword(_) => Err(ParseError::UnexpectedKeywordToken),
270 ExprPlus::ConstructorEnd => Err(ParseError::UnexpectedConstructorEnd),
271 ExprPlus::AppendedToStringTable => continue,
272 ExprPlus::EndOfFile => Ok(None),
273 }
274 }
275 }
276
277 fn read_next_expr(&mut self) -> Result<ESExpr, ParseError> {
278 self.try_read_next_expr()?.ok_or(ParseError::UnexpectedEndOfFile)
279 }
280}
281
282impl <I: Iterator<Item=Result<ExprToken, ParseError>>> ExprParserImpl<I> {
283 fn read_expr_plus(&mut self) -> Result<ExprPlus, ParseError> {
284 let Some(token) = self.iter.next().transpose()? else {
285 return Ok(ExprPlus::EndOfFile)
286 };
287
288 Ok(ExprPlus::Expr(match token {
289 ExprToken::ConstructorStart(index) => {
290 let name = self.get_string(index)?;
291 self.read_expr_constructor(name)?
292 },
293 ExprToken::ConstructorStartKnown(name) => {
294 self.read_expr_constructor(name.to_owned())?
295 }
296 ExprToken::ConstructorEnd => return Ok(ExprPlus::ConstructorEnd),
297 ExprToken::Keyword(index) => return Ok(ExprPlus::Keyword(index)),
298 ExprToken::IntValue(i) => ESExpr::Int(i),
299 ExprToken::StringValue(s) => ESExpr::Str(s),
300 ExprToken::StringPoolValue(index) => ESExpr::Str(self.get_string(index)?),
301 ExprToken::BinaryValue(b) => ESExpr::Binary(b),
302 ExprToken::Float32Value(f) => ESExpr::Float32(f),
303 ExprToken::Float64Value(d) => ESExpr::Float64(d),
304 ExprToken::BooleanValue(b) => ESExpr::Bool(b),
305 ExprToken::NullValue(level) => ESExpr::Null(level),
306 ExprToken::AppendStringTable => {
307 let new_string_table = self.read_next_expr()?;
308 let new_string_table = AppendedStringPool::decode_esexpr(new_string_table)
309 .map_err(ParseError::InvalidStringPool)?;
310
311 match new_string_table {
312 AppendedStringPool::Fixed(mut fixed_string_pool) =>
313 self.string_pool.append(&mut fixed_string_pool.strings),
314
315 AppendedStringPool::Single(s) =>
316 self.string_pool.push(s),
317 }
318
319 return Ok(ExprPlus::AppendedToStringTable)
320 }
321 }))
322 }
323
324 fn read_expr_constructor(&mut self, name: String) -> Result<ESExpr, ParseError> {
325 let mut args = Vec::new();
326 let mut kwargs = HashMap::new();
327
328 loop {
329 match self.read_expr_plus()? {
330 ExprPlus::Expr(expr) => args.push(expr),
331 ExprPlus::Keyword(index) => {
332 let kw = self.get_string(index)?;
333 let value = self.read_next_expr()?;
334 kwargs.insert(kw, value);
335 },
336 ExprPlus::ConstructorEnd => break,
337 ExprPlus::AppendedToStringTable => continue,
338 ExprPlus::EndOfFile => return Err(ParseError::UnexpectedEndOfFile),
339 }
340 }
341
342 Ok(ESExpr::Constructor { name, args, kwargs })
343 }
344
345 fn get_string(&self, i: usize) -> Result<String, ParseError> {
346 self.string_pool.get(i)
347 .map(|s| s.as_str().to_owned())
348 .ok_or(ParseError::InvalidStringTableIndex)
349 }
350}
351
352impl <I: Iterator<Item=Result<ExprToken, ParseError>>> Iterator for ExprParserImpl<I> {
353 type Item = Result<ESExpr, ParseError>;
354
355 fn next(&mut self) -> Option<Self::Item> {
356 self.try_read_next_expr().transpose()
357 }
358}
359
360pub fn parse_existing_string_pool<'a, F: Read + 'a>(f: F, string_pool: Vec<String>) -> impl ExprParser + Iterator<Item=Result<ESExpr, ParseError>> + 'a {
361 ExprParserImpl {
362 iter: TokenReader { read: f },
363 string_pool,
364 }
365}
366
367pub fn parse<'a, F: Read + 'a>(f: F) -> impl ExprParser + Iterator<Item=Result<ESExpr, ParseError>> + 'a {
368 parse_existing_string_pool(f, Vec::new())
369}
370
371
372#[derive(From, Debug)]
373pub enum GeneratorError {
374 #[from(ignore)]
375 StringNotInStringPool,
376
377 IOError(std::io::Error),
378}
379
380
381pub trait StringPool {
382 fn lookup(&mut self, s: &str) -> Option<usize>;
383}
384
385
386pub struct ExprGenerator<'a, W> {
387 out: &'a mut W,
388 string_pool: Vec<String>,
389}
390
391impl <'a, W: Write> ExprGenerator<'a, W> {
392 pub fn new(out: &'a mut W) -> Self {
393 ExprGenerator {
394 out,
395 string_pool: Vec::new(),
396 }
397 }
398
399 pub fn new_with_string_pool(out: &'a mut W, string_pool: Vec<String>) -> Self {
400 ExprGenerator {
401 out,
402 string_pool,
403 }
404 }
405
406 pub fn generate(&mut self, expr: &ESExpr) -> Result<(), GeneratorError> {
407 let old_string_pool_end = self.string_pool.len();
408
409 let mut generator = ExprGenerator {
410 out: &mut std::io::sink(),
411 string_pool: Vec::new(),
412 };
413
414 std::mem::swap(&mut self.string_pool, &mut generator.string_pool);
415 generator.generate_expr(expr)?;
417
418 std::mem::swap(&mut self.string_pool, &mut generator.string_pool);
419
420 match &self.string_pool[old_string_pool_end..] {
421 [] => {},
422 [ s ] => {
423 let s = s.to_owned();
424 self.write(TAG_APPEND_STRING_TABLE)?;
425 self.generate_expr(&ESExpr::Str(s.to_owned()))?;
426 },
427 new_strings => {
428 let sp_expr = FixedStringPool {
429 strings: new_strings.to_vec(),
430 }.encode_esexpr();
431
432 self.write(TAG_APPEND_STRING_TABLE)?;
433 self.generate_expr(&sp_expr)?;
434 }
435 }
436
437 self.generate_expr(expr)?;
438 Ok(())
439 }
440
441 fn generate_expr(&mut self, expr: &ESExpr) -> Result<(), GeneratorError> {
442 match expr {
443 ESExpr::Constructor { name, args, kwargs } => {
444 match name.as_str() {
445 "string-table" => self.write(TAG_CONSTRUCTOR_START_STRING_TABLE)?,
446 "list" => self.write(TAG_CONSTRUCTOR_START_LIST)?,
447 _ => {
448 let index = self.get_string_pool_index(&name)?;
449 self.write_int_tag(TAG_VARINT_CONSTRUCTOR_START, &BigUint::from(index))?;
450 }
451 }
452
453 for arg in args {
454 self.generate_expr(arg)?;
455 }
456
457 for (kw, value) in kwargs {
458 let index = self.get_string_pool_index(&kw)?;
459 self.write_int_tag(TAG_VARINT_KEYWORD, &BigUint::from(index))?;
460 self.generate_expr(value)?;
461 }
462
463 self.write(TAG_CONSTRUCTOR_END)?;
464 },
465 ESExpr::Bool(true) => {
466 self.write(TAG_TRUE)?;
467 },
468 ESExpr::Bool(false) => {
469 self.write(TAG_FALSE)?;
470 },
471 ESExpr::Int(i) => {
472 let (sign, mut magnitude) = i.clone().into_parts();
473
474 match sign {
475 Sign::NoSign | Sign::Plus => {
476 self.write_int_tag(TAG_VARINT_NON_NEG_INT, &magnitude)?;
477 },
478
479 Sign::Minus => {
480 magnitude -= 1usize;
481 self.write_int_tag(TAG_VARINT_NEG_INT, &magnitude)?;
482 },
483 }
484 },
485 ESExpr::Str(s) => {
486 self.write_int_tag(TAG_VARINT_STRING_LENGTH, &BigUint::from(s.len()))?;
487 self.out.write_all(&s.as_bytes())?;
488 },
489 ESExpr::Binary(b) => {
490 self.write_int_tag(TAG_VARINT_BYTES_LENGTH, &BigUint::from(b.len()))?;
491 self.out.write_all(&b)?;
492 },
493 ESExpr::Float32(f) => {
494 self.write(TAG_FLOAT32)?;
495 self.out.write_all(&f32::to_le_bytes(*f))?;
496 },
497 ESExpr::Float64(d) => {
498 self.write(TAG_FLOAT64)?;
499 self.out.write_all(&f64::to_le_bytes(*d))?;
500 },
501 ESExpr::Null(level) => {
502 if *level == BigUint::ZERO {
503 self.write(TAG_NULL0)?;
504 }
505 else if *level == BigUint::from(1u32) {
506 self.write(TAG_NULL1)?;
507 }
508 else if *level == BigUint::from(2u32) {
509 self.write(TAG_NULL2)?;
510 }
511 else {
512 self.write(TAG_NULLN)?;
513 self.write_int_full(&(level - 3u32))?;
514 }
515 },
516 }
517
518 Ok(())
519 }
520
521 fn get_string_pool_index(&mut self, s: &str) -> Result<usize, GeneratorError> {
522 if let Some(index) = self.string_pool.iter().position(|s2| s2 == s) {
523 return Ok(index);
524 }
525
526 let index = self.string_pool.len();
527 self.string_pool.push(s.to_owned());
528
529 self.write(TAG_APPEND_STRING_TABLE)?;
530 self.generate_expr(&ESExpr::Str(s.to_owned()))?;
531
532 Ok(index)
533 }
534
535 fn write_int_tag(&mut self, tag: u8, i: &BigUint) -> Result<(), GeneratorError> {
536 let buff = i.to_bytes_le();
537
538 let b0 = *buff.get(0).unwrap_or(&0);
539 let mut current = tag | (b0 & 0x0F);
540 if buff.len() < 2 && (b0 & 0xF0) == 0 {
541 self.write(current)?;
542 return Ok(());
543 }
544
545 current |= 0x10;
546 self.write(current)?;
547
548 current = b0 >> 4;
549 let bit_index = 4;
550
551 self.write_int_rest(&buff[1..], current, bit_index)
552 }
553
554 fn write_int_full(&mut self, i: &BigUint) -> Result<(), GeneratorError> {
555 if *i == BigUint::ZERO {
556 self.write(0)?;
557 return Ok(());
558 }
559
560 let buff = i.to_bytes_le();
561 let current = 0;
562 let bit_index = 0;
563
564 self.write_int_rest(&buff, current, bit_index)
565 }
566
567 fn write_int_rest(&mut self, buff: &[u8], mut current: u8, mut bit_index: i32) -> Result<(), GeneratorError> {
568 for (i, b) in buff.iter().copied().enumerate() {
569 let mut bit_index2 = 0;
570 while bit_index2 < 8 {
571 let written_bits = std::cmp::min(7 - bit_index, 8 - bit_index2);
572 current |= ((b >> bit_index2) & 0x7F) << bit_index;
573
574
575 bit_index += written_bits;
576 bit_index2 += written_bits;
577 if bit_index >= 7 {
578 if i < buff.len() - 1 || (bit_index2 < 8 && (b >> bit_index2) != 0) {
579 current |= 0x80;
580 }
581
582 self.write(current)?;
583 bit_index = 0;
584 current = 0;
585 }
586 }
587 }
588
589 if current != 0 {
590 self.write(current)?;
591 }
592
593 Ok(())
594 }
595
596 fn write(&mut self, b: u8) -> Result<(), GeneratorError> {
597 Ok(self.out.write_all(std::slice::from_ref(&b))?)
598 }
599}
600
601
602#[derive(ESExprCodec, Debug, PartialEq, Clone)]
603#[constructor = "string-table"]
604pub struct FixedStringPool {
605 #[vararg]
606 pub strings: Vec<String>,
607}
608
609impl StringPool for FixedStringPool {
610 fn lookup(&mut self, s: &str) -> Option<usize> {
611 self.strings.iter().position(|a| a == s)
612 }
613}
614
615#[derive(ESExprCodec, Debug, PartialEq, Clone)]
616pub enum AppendedStringPool {
617 #[inline_value]
618 Fixed(FixedStringPool),
619
620 #[inline_value]
621 Single(String),
622}
623
624
625#[cfg(test)]
626mod test {
627 use std::str::FromStr;
628
629 use num_bigint::BigUint;
630
631 use crate::*;
632
633 #[test]
634 fn encode_int() {
635 fn check(n: &str, enc: &[u8]) {
636 let n = BigUint::from_str(n).unwrap();
637
638 let mut buff: Vec<u8> = Vec::new();
639 let mut gen = ExprGenerator {
640 out: &mut buff,
641 string_pool: vec![],
642 };
643
644 gen.write_int_tag(TAG_VARINT_NON_NEG_INT, &n).unwrap();
645
646 assert_eq!(enc, &buff);
647
648 let mut reader = TokenReader {
649 read: &enc[1..],
650 };
651 let m = read_int(&mut reader, enc[0]).unwrap();
652 assert_eq!(n, m);
653 }
654
655 check("4", &[0x24]);
656 check("9223372036854775807", &[0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07]);
657 check("18446744073709551615", &[0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F]);
658 check("12345678901234567890", &[0x32, 0xAD, 0xE1, 0xC7, 0xF5, 0x8C, 0xD3, 0xD2, 0xDA, 0x0A]);
659 check("98765432109876543210", &[0x3A, 0xEE, 0xCF, 0xC9, 0xF2, 0xB8, 0x9A, 0x95, 0xD5, 0x55]);
660 }
661}
662