1use alloc::string::String;
2use alloc::vec::Vec;
3
4use esexpr::{ESExpr, ESExprCodec, ESExprEncodedEq};
5use num_bigint::BigUint;
6
7use crate::append_only_string_list::AppendOnlyStringList;
8
9#[derive(Debug)]
11pub enum ParseError<IOError> {
12 InvalidTokenByte(u8),
14
15 InvalidStringTableIndex,
17
18 InvalidLength,
20
21 UnexpectedKeywordToken,
23
24 UnexpectedConstructorEnd,
26
27 UnexpectedEndOfFile,
29
30 InvalidStringPool(esexpr::DecodeError),
32
33 IOError(IOError),
35
36 Utf8Error(core::str::Utf8Error),
38}
39
40#[cfg(feature = "std")]
41impl From<std::io::Error> for ParseError<std::io::Error> {
42 fn from(value: std::io::Error) -> Self {
43 ParseError::IOError(value)
44 }
45}
46
47impl<IOError> From<core::str::Utf8Error> for ParseError<IOError> {
48 fn from(err: core::str::Utf8Error) -> Self {
49 ParseError::Utf8Error(err)
50 }
51}
52
53impl<IOError> From<alloc::string::FromUtf8Error> for ParseError<IOError> {
54 fn from(value: alloc::string::FromUtf8Error) -> Self {
55 ParseError::Utf8Error(value.utf8_error())
56 }
57}
58
59#[derive(ESExprCodec, ESExprEncodedEq, Debug, PartialEq, Clone)]
60#[esexpr(constructor = "string-table")]
61struct FixedStringPool {
62 #[esexpr(vararg)]
63 pub strings: Vec<String>,
64}
65
66#[derive(ESExprCodec, ESExprEncodedEq, Debug, PartialEq, Clone)]
67enum AppendedStringPool {
68 #[esexpr(inline_value)]
69 Fixed(FixedStringPool),
70
71 #[esexpr(inline_value)]
72 Single(String),
73}
74
75enum ExprPlus<'a> {
76 Expr(ESExpr<'a>),
77 Keyword(usize),
78 ConstructorEnd,
79 AppendedToStringTable,
80 EndOfFile,
81}
82
83fn get_string_table_index<E>(i: BigUint) -> Result<usize, ParseError<E>> {
84 i.try_into().map_err(|_| ParseError::InvalidStringTableIndex)
85}
86
87fn get_length<E>(i: BigUint) -> Result<usize, ParseError<E>> {
88 i.try_into().map_err(|_| ParseError::InvalidLength)
89}
90
91fn get_string<'a, E>(string_pool: &'a AppendOnlyStringList, i: usize) -> Result<&'a str, ParseError<E>> {
92 string_pool.get(i).ok_or(ParseError::InvalidStringTableIndex)
93}
94
95macro_rules! reader_mod {
96 ($syncness: ident) => {
97 use alloc::borrow::{Cow, ToOwned};
98 use alloc::collections::BTreeMap;
99 use alloc::vec;
100
101 use esexpr::cowstr::CowStr;
102 use half::f16;
103 use num_bigint::{BigInt, Sign};
104
105 use crate::async_macros::{do_await, if_async, maybe_async};
106 use crate::format::*;
107
108 maybe_async!(
109 $syncness,
110 pub(super) fn read_token_impl<E>(reader: &mut impl Read<E>) -> Result<Option<ExprToken>, ParseError<E>> {
111 let mut b: [u8; 1] = [0];
112
113 if do_await!($syncness, reader.read(&mut b)).map_err(ParseError::IOError)? == 0 {
114 return Ok(None);
115 }
116
117 let b = b[0];
118
119 Ok(Some(if (b & TAG_VARINT_MASK) == TAG_VARINT_MASK {
120 match b {
121 TAG_CONSTRUCTOR_END => ExprToken::ConstructorEnd,
122 TAG_TRUE => ExprToken::BooleanValue(true),
123 TAG_FALSE => ExprToken::BooleanValue(false),
124 TAG_NULL0 => ExprToken::NullValue(BigUint::ZERO),
125 TAG_NULL1 => ExprToken::NullValue(BigUint::from(1u32)),
126 TAG_NULL2 => ExprToken::NullValue(BigUint::from(2u32)),
127 TAG_NULLN => {
128 let n = do_await!($syncness, read_int_full(reader))?;
129 ExprToken::NullValue(n + 3u32)
130 },
131 TAG_FLOAT16 => {
132 let buffer: [u8; 2] = do_await!($syncness, read_bytes(reader))?;
133 ExprToken::Float16Value(f16::from_le_bytes(buffer))
134 },
135 TAG_FLOAT32 => {
136 let buffer: [u8; 4] = do_await!($syncness, read_bytes(reader))?;
137 ExprToken::Float32Value(f32::from_le_bytes(buffer))
138 },
139 TAG_FLOAT64 => {
140 let buffer: [u8; 8] = do_await!($syncness, read_bytes(reader))?;
141 ExprToken::Float64Value(f64::from_le_bytes(buffer))
142 },
143 TAG_CONSTRUCTOR_START_STRING_TABLE => ExprToken::ConstructorStartKnown("string-table"),
144 TAG_CONSTRUCTOR_START_LIST => ExprToken::ConstructorStartKnown("list"),
145 TAG_APPEND_STRING_TABLE => ExprToken::AppendStringTable,
146 TAG_ARRAY16 => {
147 let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
148 let mut buff = vec![0u16; n];
149 do_await!(
150 $syncness,
151 read_exact(reader, bytemuck::cast_slice_mut::<u16, u8>(&mut buff))
152 )?;
153 #[cfg(target_endian = "big")]
154 {
155 for b in buff.iter_mut() {
156 *b = b.swap_bytes();
157 }
158 }
159 ExprToken::Array16Value(buff)
160 },
161 TAG_ARRAY32 => {
162 let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
163 let mut buff = vec![0u32; n];
164 do_await!(
165 $syncness,
166 read_exact(reader, bytemuck::cast_slice_mut::<u32, u8>(&mut buff))
167 )?;
168 #[cfg(target_endian = "big")]
169 {
170 for b in buff.iter_mut() {
171 *b = b.swap_bytes();
172 }
173 }
174 ExprToken::Array32Value(buff)
175 },
176 TAG_ARRAY64 => {
177 let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
178 let mut buff = vec![0u64; n];
179 do_await!(
180 $syncness,
181 read_exact(reader, bytemuck::cast_slice_mut::<u64, u8>(&mut buff))
182 )?;
183 #[cfg(target_endian = "big")]
184 {
185 for b in buff.iter_mut() {
186 *b = b.swap_bytes();
187 }
188 }
189 ExprToken::Array64Value(buff)
190 },
191 TAG_ARRAY128 => {
192 let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
193 let mut buff = vec![0u128; n];
194 do_await!(
195 $syncness,
196 read_exact(reader, bytemuck::cast_slice_mut::<u128, u8>(&mut buff))
197 )?;
198 #[cfg(target_endian = "big")]
199 {
200 for b in buff.iter_mut() {
201 *b = b.swap_bytes();
202 }
203 }
204 ExprToken::Array128Value(buff)
205 },
206
207 _ => {
208 return Err(ParseError::InvalidTokenByte(b));
209 },
210 }
211 }
212 else {
213 let tag = match b & TAG_VARINT_MASK {
214 TAG_VARINT_CONSTRUCTOR_START => VarIntTag::ConstructorStart,
215 TAG_VARINT_NON_NEG_INT => VarIntTag::NonNegIntValue,
216 TAG_VARINT_NEG_INT => VarIntTag::NegIntValue,
217 TAG_VARINT_STRING_LENGTH => VarIntTag::StringLengthValue,
218 TAG_VARINT_STRING_POOL => VarIntTag::StringPoolValue,
219 TAG_VARINT_ARRAY8_LENGTH => VarIntTag::Array8LengthValue,
220 TAG_VARINT_KEYWORD => VarIntTag::KeywordArgument,
221 _ => unreachable!("remaining bits have been masked"),
222 };
223
224 let mut n = do_await!($syncness, read_int(reader, b))?;
225
226 match tag {
227 VarIntTag::ConstructorStart => ExprToken::ConstructorStart(get_string_table_index(n)?),
228 VarIntTag::NonNegIntValue => ExprToken::IntValue(BigInt::from_biguint(Sign::Plus, n)),
229 VarIntTag::NegIntValue => {
230 n += 1u32;
231 ExprToken::IntValue(BigInt::from_biguint(Sign::Minus, n))
232 },
233 VarIntTag::StringLengthValue => {
234 let len = get_length(n)?;
235 let mut buff = vec![0u8; len];
236 do_await!($syncness, read_exact(reader, &mut buff))?;
237 ExprToken::StringValue(String::from_utf8(buff)?.to_owned())
238 },
239 VarIntTag::StringPoolValue => ExprToken::StringPoolValue(get_string_table_index(n)?),
240 VarIntTag::Array8LengthValue => {
241 let len = get_length(n)?;
242 let mut buff = vec![0u8; len];
243 do_await!($syncness, read_exact(reader, &mut buff))?;
244 ExprToken::Array8Value(buff)
245 },
246 VarIntTag::KeywordArgument => ExprToken::Keyword(get_string_table_index(n)?),
247 }
248 }))
249 }
250 );
251
252 maybe_async!(
253 $syncness,
254 fn read_int<E>(reader: &mut impl Read<E>, initial: u8) -> Result<BigUint, ParseError<E>> {
255 let current = initial & 0x0F;
256 let bit_offset = 4;
257 let has_next = (initial & 0x10) == 0x10;
258
259 do_await!($syncness, read_int_rest(reader, current, bit_offset, has_next))
260 }
261 );
262
263 maybe_async!(
264 $syncness,
265 fn read_int_full<E>(reader: &mut impl Read<E>) -> Result<BigUint, ParseError<E>> {
266 let current = 0;
267 let bit_offset = 0;
268 let has_next = true;
269
270 do_await!($syncness, read_int_rest(reader, current, bit_offset, has_next))
271 }
272 );
273
274 maybe_async!(
275 $syncness,
276 fn read_int_rest<E>(
277 reader: &mut impl Read<E>,
278 mut current: u8,
279 mut bit_offset: i32,
280 mut has_next: bool,
281 ) -> Result<BigUint, ParseError<E>> {
282 let mut buffer = Vec::new();
283
284 while has_next {
285 let b = do_await!($syncness, read_byte(reader))?;
286
287 has_next = (b & 0x80) == 0x80;
288
289 let value = b & 0x7F;
290 let low = value << bit_offset;
291 let high = if bit_offset > 1 {
292 value >> (8 - bit_offset)
293 }
294 else {
295 0
296 };
297
298 current |= low;
299 bit_offset += 7;
300 if bit_offset >= 8 {
301 bit_offset -= 8;
302 buffer.push(current);
303 current = high;
304 }
305 }
306
307 if bit_offset > 0 {
308 buffer.push(current);
309 }
310
311 Ok(BigUint::from_bytes_le(&buffer))
312 }
313 );
314
315 maybe_async!(
316 $syncness,
317 fn read_bytes<E, const N: usize>(reader: &mut impl Read<E>) -> Result<[u8; N], ParseError<E>> {
318 let mut b: [u8; N] = [0; N];
319 do_await!($syncness, read_exact(reader, &mut b))?;
320 Ok(b)
321 }
322 );
323
324 maybe_async!(
325 $syncness,
326 fn read_exact<E>(reader: &mut impl Read<E>, mut buf: &mut [u8]) -> Result<(), ParseError<E>> {
327 while !buf.is_empty() {
328 let n = do_await!($syncness, reader.read(buf)).map_err(ParseError::IOError)?;
329 if n == 0 {
330 return Err(ParseError::UnexpectedEndOfFile);
331 }
332
333 buf = &mut buf[n..];
334 }
335
336 Ok(())
337 }
338 );
339
340 maybe_async!(
341 $syncness,
342 fn read_byte<E>(reader: &mut impl Read<E>) -> Result<u8, ParseError<E>> {
343 Ok(do_await!($syncness, read_bytes::<E, 1>(reader))?[0])
344 }
345 );
346
347 pub trait ExprParser<E> {
349 maybe_async!(
350 $syncness,
351 fn try_read_next_expr<'a>(&'a mut self) -> Result<Option<ESExpr<'a>>, ParseError<E>>;
356 );
357
358 maybe_async!(
359 $syncness,
360 fn read_next_expr<'a>(&'a mut self) -> Result<ESExpr<'a>, ParseError<E>>;
365 );
366
367 fn iter_static(
369 &mut self,
370 ) -> if_async!(
371 $syncness,
372 impl Stream<Item = Result<ESExpr<'static>, ParseError<E>>>,
373 impl Iterator<Item = Result<ESExpr<'static>, ParseError<E>>>
374 ) {
375 if_async!(
376 $syncness,
377 stream::poll_fn(|ctx| core::pin::pin!(async {
378 self.try_read_next_expr()
379 .await
380 .map(|res| res.map(ESExpr::into_owned))
381 .transpose()
382 })
383 .poll(ctx)),
384 core::iter::from_fn(move || {
385 self.try_read_next_expr()
386 .map(|res| res.map(ESExpr::into_owned))
387 .transpose()
388 })
389 )
390 }
391 }
392
393 struct ExprParserImpl<I> {
394 string_pool: AppendOnlyStringList,
395 iter: I,
396 }
397
398 impl<E, I: IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin> ExprParser<E> for ExprParserImpl<I> {
399 maybe_async!(
400 $syncness,
401 fn try_read_next_expr<'a>(&'a mut self) -> Result<Option<ESExpr<'a>>, ParseError<E>> {
402 do_await!(
403 $syncness,
404 try_read_next_expr_impl(&mut self.iter, &self.string_pool)
405 )
406 }
407 );
408
409 maybe_async!(
410 $syncness,
411 fn read_next_expr<'a>(&'a mut self) -> Result<ESExpr<'a>, ParseError<E>> {
412 do_await!($syncness, read_next_expr_impl(&mut self.iter, &self.string_pool))
413 }
414 );
415 }
416
417 maybe_async!(
418 $syncness,
419 fn try_read_next_expr_impl<'a, E>(
420 iter: &mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
421 string_pool: &'a AppendOnlyStringList,
422 ) -> Result<Option<ESExpr<'a>>, ParseError<E>> {
423 loop {
424 return match do_await!($syncness, read_expr_plus(iter, string_pool))? {
425 ExprPlus::Expr(expr) => Ok(Some(expr)),
426 ExprPlus::Keyword(_) => Err(ParseError::UnexpectedKeywordToken),
427 ExprPlus::ConstructorEnd => Err(ParseError::UnexpectedConstructorEnd),
428 ExprPlus::AppendedToStringTable => continue,
429 ExprPlus::EndOfFile => Ok(None),
430 };
431 }
432 }
433 );
434
435 maybe_async!(
436 $syncness,
437 fn read_next_expr_impl<'a, E>(
438 iter: &mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
439 string_pool: &'a AppendOnlyStringList,
440 ) -> Result<ESExpr<'a>, ParseError<E>> {
441 do_await!($syncness, try_read_next_expr_impl(iter, string_pool))?.ok_or(ParseError::UnexpectedEndOfFile)
442 }
443 );
444
445 maybe_async!(
446 $syncness,
447 fn read_expr_plus<'a, 'b, E>(
448 iter: &'b mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
449 string_pool: &'a AppendOnlyStringList,
450 ) -> Result<ExprPlus<'a>, ParseError<E>>
451 where
452 'a: 'b,
453 {
454 let Some(token) = do_await!($syncness, iter.next()).transpose()?
455 else {
456 return Ok(ExprPlus::EndOfFile);
457 };
458
459 let expr: ExprPlus<'a> = ExprPlus::Expr(match token {
460 ExprToken::ConstructorStart(index) => {
461 let name = get_string(string_pool, index)?;
462 do_await!(
463 $syncness,
464 read_expr_constructor(iter, string_pool, CowStr::Borrowed(name))
465 )?
466 },
467 ExprToken::ConstructorStartKnown(name) => do_await!(
468 $syncness,
469 read_expr_constructor(iter, string_pool, CowStr::Static(name))
470 )?,
471 ExprToken::ConstructorEnd => return Ok(ExprPlus::ConstructorEnd),
472 ExprToken::Keyword(index) => return Ok(ExprPlus::Keyword(index)),
473 ExprToken::IntValue(i) => ESExpr::Int(Cow::Owned(i)),
474 ExprToken::StringValue(s) => ESExpr::Str(CowStr::Owned(s)),
475 ExprToken::StringPoolValue(index) => ESExpr::Str(CowStr::Borrowed(get_string(string_pool, index)?)),
476 ExprToken::Float16Value(f) => ESExpr::Float16(f),
477 ExprToken::Float32Value(f) => ESExpr::Float32(f),
478 ExprToken::Float64Value(d) => ESExpr::Float64(d),
479 ExprToken::Array8Value(b) => ESExpr::Array8(Cow::Owned(b)),
480 ExprToken::Array16Value(b) => ESExpr::Array16(Cow::Owned(b)),
481 ExprToken::Array32Value(b) => ESExpr::Array32(Cow::Owned(b)),
482 ExprToken::Array64Value(b) => ESExpr::Array64(Cow::Owned(b)),
483 ExprToken::Array128Value(b) => ESExpr::Array128(Cow::Owned(b)),
484 ExprToken::BooleanValue(b) => ESExpr::Bool(b),
485 ExprToken::NullValue(level) => ESExpr::Null(Cow::Owned(level)),
486 ExprToken::AppendStringTable => {
487 let new_string_table = do_await!($syncness, read_next_expr_impl(iter, string_pool))?;
488 let new_string_table = AppendedStringPool::decode_esexpr(new_string_table)
489 .map_err(ParseError::InvalidStringPool)?;
490
491 match new_string_table {
492 AppendedStringPool::Fixed(mut fixed_string_pool) => {
493 string_pool.append(&mut fixed_string_pool.strings)
494 },
495
496 AppendedStringPool::Single(s) => string_pool.push(s),
497 }
498
499 return Ok(ExprPlus::AppendedToStringTable);
500 },
501 });
502
503 Ok(expr)
504 }
505 );
506
507 maybe_async!(
508 $syncness,
509 fn read_expr_constructor<'a, 'b, E>(
510 iter: &'b mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
511 string_pool: &'a AppendOnlyStringList,
512 name: CowStr<'a>,
513 ) -> Result<ESExpr<'a>, ParseError<E>> {
514 let mut args = Vec::new();
515 let mut kwargs = BTreeMap::new();
516
517 loop {
518 match do_await!($syncness, read_expr_plus(iter, string_pool))? {
519 ExprPlus::Expr(expr) => args.push(expr),
520 ExprPlus::Keyword(index) => {
521 let kw = get_string(string_pool, index)?;
522 let value = do_await!($syncness, read_next_expr_impl(iter, string_pool))?;
523 kwargs.insert(CowStr::Borrowed(kw), value);
524 },
525 ExprPlus::ConstructorEnd => break,
526 ExprPlus::AppendedToStringTable => {},
527 ExprPlus::EndOfFile => return Err(ParseError::UnexpectedEndOfFile),
528 }
529 }
530
531 Ok(ESExpr::constructor(name, args, kwargs))
532 }
533 );
534
535 pub fn parse_existing_string_pool<'a, R: Read<E>, E: 'static>(
537 data: &'a mut R,
538 string_pool: Vec<String>,
539 ) -> impl ExprParser<E> {
540 ExprParserImpl {
541 iter: if_async!($syncness, Box::pin(token_reader(data)), token_reader(data)),
542 string_pool: AppendOnlyStringList::from(string_pool),
543 }
544 }
545
546 pub fn parse<E: 'static>(data: &mut impl Read<E>) -> impl ExprParser<E> {
548 parse_existing_string_pool(data, Vec::new())
549 }
550 };
551}
552
553mod reader_sync {
554 use core::iter::{self, Iterator as IterLike, Iterator};
555
556 use super::*;
557 use crate::io::Read;
558
559 fn token_reader<E, R: Read<E>>(read: &mut R) -> impl Iterator<Item = Result<ExprToken, ParseError<E>>> {
560 iter::from_fn(|| read_token_impl(read).transpose())
561 }
562
563 reader_mod!(sync);
564}
565
566#[allow(async_fn_in_trait, reason = "No additional traits to add")]
567mod reader_async {
568 use alloc::boxed::Box;
569
570 use futures::{Stream as IterLike, Stream, StreamExt, stream};
571
572 use super::*;
573 use crate::io::AsyncRead as Read;
574
575 fn token_reader<E>(read: &mut impl Read<E>) -> impl Stream<Item = Result<ExprToken, ParseError<E>>> {
576 stream::poll_fn(|ctx| core::pin::pin!(async { read_token_impl(read).await.transpose() }).poll(ctx))
577 }
578
579 reader_mod!(async);
580}
581
582pub use reader_async::{
583 ExprParser as ExprParserAsync,
584 parse as parse_async,
585 parse_existing_string_pool as parse_existing_string_pool_async,
586};
587pub use reader_sync::{
588 ExprParser as ExprParserSync,
589 parse as parse_sync,
590 parse_existing_string_pool as parse_existing_string_pool_sync,
591};