esexpr_binary/
reader.rs

1use alloc::string::String;
2use alloc::vec::Vec;
3
4use esexpr::{ESExpr, ESExprCodec, ESExprEncodedEq};
5use num_bigint::BigUint;
6
7use crate::append_only_string_list::AppendOnlyStringList;
8
9/// `ESExpr` binary format parse error.
10#[derive(Debug)]
11pub enum ParseError<IOError> {
12	/// Invalid token byte.
13	InvalidTokenByte(u8),
14
15	/// Invalid string table index.
16	InvalidStringTableIndex,
17
18	/// Invalid length.
19	InvalidLength,
20
21	/// Unexpected keyword token.
22	UnexpectedKeywordToken,
23
24	/// Unexpected constructor end.
25	UnexpectedConstructorEnd,
26
27	/// Unexpected end of file.
28	UnexpectedEndOfFile,
29
30	/// Invalid string pool.
31	InvalidStringPool(esexpr::DecodeError),
32
33	/// IO error.
34	IOError(IOError),
35
36	/// Utf8 error.
37	Utf8Error(core::str::Utf8Error),
38}
39
40#[cfg(feature = "std")]
41impl From<std::io::Error> for ParseError<std::io::Error> {
42	fn from(value: std::io::Error) -> Self {
43		ParseError::IOError(value)
44	}
45}
46
47impl<IOError> From<core::str::Utf8Error> for ParseError<IOError> {
48	fn from(err: core::str::Utf8Error) -> Self {
49		ParseError::Utf8Error(err)
50	}
51}
52
53impl<IOError> From<alloc::string::FromUtf8Error> for ParseError<IOError> {
54	fn from(value: alloc::string::FromUtf8Error) -> Self {
55		ParseError::Utf8Error(value.utf8_error())
56	}
57}
58
59#[derive(ESExprCodec, ESExprEncodedEq, Debug, PartialEq, Clone)]
60#[esexpr(constructor = "string-table")]
61struct FixedStringPool {
62	#[esexpr(vararg)]
63	pub strings: Vec<String>,
64}
65
66#[derive(ESExprCodec, ESExprEncodedEq, Debug, PartialEq, Clone)]
67enum AppendedStringPool {
68	#[esexpr(inline_value)]
69	Fixed(FixedStringPool),
70
71	#[esexpr(inline_value)]
72	Single(String),
73}
74
75enum ExprPlus<'a> {
76	Expr(ESExpr<'a>),
77	Keyword(usize),
78	ConstructorEnd,
79	AppendedToStringTable,
80	EndOfFile,
81}
82
83fn get_string_table_index<E>(i: BigUint) -> Result<usize, ParseError<E>> {
84	i.try_into().map_err(|_| ParseError::InvalidStringTableIndex)
85}
86
87fn get_length<E>(i: BigUint) -> Result<usize, ParseError<E>> {
88	i.try_into().map_err(|_| ParseError::InvalidLength)
89}
90
91fn get_string<'a, E>(string_pool: &'a AppendOnlyStringList, i: usize) -> Result<&'a str, ParseError<E>> {
92	string_pool.get(i).ok_or(ParseError::InvalidStringTableIndex)
93}
94
95macro_rules! reader_mod {
96	($syncness: ident) => {
97		use alloc::borrow::{Cow, ToOwned};
98		use alloc::collections::BTreeMap;
99		use alloc::vec;
100
101		use esexpr::cowstr::CowStr;
102		use half::f16;
103		use num_bigint::{BigInt, Sign};
104
105		use crate::async_macros::{do_await, if_async, maybe_async};
106		use crate::format::*;
107
108		maybe_async!(
109			$syncness,
110			pub(super) fn read_token_impl<E>(reader: &mut impl Read<E>) -> Result<Option<ExprToken>, ParseError<E>> {
111				let mut b: [u8; 1] = [0];
112
113				if do_await!($syncness, reader.read(&mut b)).map_err(ParseError::IOError)? == 0 {
114					return Ok(None);
115				}
116
117				let b = b[0];
118
119				Ok(Some(if (b & TAG_VARINT_MASK) == TAG_VARINT_MASK {
120					match b {
121						TAG_CONSTRUCTOR_END => ExprToken::ConstructorEnd,
122						TAG_TRUE => ExprToken::BooleanValue(true),
123						TAG_FALSE => ExprToken::BooleanValue(false),
124						TAG_NULL0 => ExprToken::NullValue(BigUint::ZERO),
125						TAG_NULL1 => ExprToken::NullValue(BigUint::from(1u32)),
126						TAG_NULL2 => ExprToken::NullValue(BigUint::from(2u32)),
127						TAG_NULLN => {
128							let n = do_await!($syncness, read_int_full(reader))?;
129							ExprToken::NullValue(n + 3u32)
130						},
131						TAG_FLOAT16 => {
132							let buffer: [u8; 2] = do_await!($syncness, read_bytes(reader))?;
133							ExprToken::Float16Value(f16::from_le_bytes(buffer))
134						},
135						TAG_FLOAT32 => {
136							let buffer: [u8; 4] = do_await!($syncness, read_bytes(reader))?;
137							ExprToken::Float32Value(f32::from_le_bytes(buffer))
138						},
139						TAG_FLOAT64 => {
140							let buffer: [u8; 8] = do_await!($syncness, read_bytes(reader))?;
141							ExprToken::Float64Value(f64::from_le_bytes(buffer))
142						},
143						TAG_CONSTRUCTOR_START_STRING_TABLE => ExprToken::ConstructorStartKnown("string-table"),
144						TAG_CONSTRUCTOR_START_LIST => ExprToken::ConstructorStartKnown("list"),
145						TAG_APPEND_STRING_TABLE => ExprToken::AppendStringTable,
146						TAG_ARRAY16 => {
147							let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
148							let mut buff = vec![0u16; n];
149							do_await!(
150								$syncness,
151								read_exact(reader, bytemuck::cast_slice_mut::<u16, u8>(&mut buff))
152							)?;
153							#[cfg(target_endian = "big")]
154							{
155								for b in buff.iter_mut() {
156									*b = b.swap_bytes();
157								}
158							}
159							ExprToken::Array16Value(buff)
160						},
161						TAG_ARRAY32 => {
162							let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
163							let mut buff = vec![0u32; n];
164							do_await!(
165								$syncness,
166								read_exact(reader, bytemuck::cast_slice_mut::<u32, u8>(&mut buff))
167							)?;
168							#[cfg(target_endian = "big")]
169							{
170								for b in buff.iter_mut() {
171									*b = b.swap_bytes();
172								}
173							}
174							ExprToken::Array32Value(buff)
175						},
176						TAG_ARRAY64 => {
177							let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
178							let mut buff = vec![0u64; n];
179							do_await!(
180								$syncness,
181								read_exact(reader, bytemuck::cast_slice_mut::<u64, u8>(&mut buff))
182							)?;
183							#[cfg(target_endian = "big")]
184							{
185								for b in buff.iter_mut() {
186									*b = b.swap_bytes();
187								}
188							}
189							ExprToken::Array64Value(buff)
190						},
191						TAG_ARRAY128 => {
192							let n = get_length(do_await!($syncness, read_int_full(reader))?)?;
193							let mut buff = vec![0u128; n];
194							do_await!(
195								$syncness,
196								read_exact(reader, bytemuck::cast_slice_mut::<u128, u8>(&mut buff))
197							)?;
198							#[cfg(target_endian = "big")]
199							{
200								for b in buff.iter_mut() {
201									*b = b.swap_bytes();
202								}
203							}
204							ExprToken::Array128Value(buff)
205						},
206
207						_ => {
208							return Err(ParseError::InvalidTokenByte(b));
209						},
210					}
211				}
212				else {
213					let tag = match b & TAG_VARINT_MASK {
214						TAG_VARINT_CONSTRUCTOR_START => VarIntTag::ConstructorStart,
215						TAG_VARINT_NON_NEG_INT => VarIntTag::NonNegIntValue,
216						TAG_VARINT_NEG_INT => VarIntTag::NegIntValue,
217						TAG_VARINT_STRING_LENGTH => VarIntTag::StringLengthValue,
218						TAG_VARINT_STRING_POOL => VarIntTag::StringPoolValue,
219						TAG_VARINT_ARRAY8_LENGTH => VarIntTag::Array8LengthValue,
220						TAG_VARINT_KEYWORD => VarIntTag::KeywordArgument,
221						_ => unreachable!("remaining bits have been masked"),
222					};
223
224					let mut n = do_await!($syncness, read_int(reader, b))?;
225
226					match tag {
227						VarIntTag::ConstructorStart => ExprToken::ConstructorStart(get_string_table_index(n)?),
228						VarIntTag::NonNegIntValue => ExprToken::IntValue(BigInt::from_biguint(Sign::Plus, n)),
229						VarIntTag::NegIntValue => {
230							n += 1u32;
231							ExprToken::IntValue(BigInt::from_biguint(Sign::Minus, n))
232						},
233						VarIntTag::StringLengthValue => {
234							let len = get_length(n)?;
235							let mut buff = vec![0u8; len];
236							do_await!($syncness, read_exact(reader, &mut buff))?;
237							ExprToken::StringValue(String::from_utf8(buff)?.to_owned())
238						},
239						VarIntTag::StringPoolValue => ExprToken::StringPoolValue(get_string_table_index(n)?),
240						VarIntTag::Array8LengthValue => {
241							let len = get_length(n)?;
242							let mut buff = vec![0u8; len];
243							do_await!($syncness, read_exact(reader, &mut buff))?;
244							ExprToken::Array8Value(buff)
245						},
246						VarIntTag::KeywordArgument => ExprToken::Keyword(get_string_table_index(n)?),
247					}
248				}))
249			}
250		);
251
252		maybe_async!(
253			$syncness,
254			fn read_int<E>(reader: &mut impl Read<E>, initial: u8) -> Result<BigUint, ParseError<E>> {
255				let current = initial & 0x0F;
256				let bit_offset = 4;
257				let has_next = (initial & 0x10) == 0x10;
258
259				do_await!($syncness, read_int_rest(reader, current, bit_offset, has_next))
260			}
261		);
262
263		maybe_async!(
264			$syncness,
265			fn read_int_full<E>(reader: &mut impl Read<E>) -> Result<BigUint, ParseError<E>> {
266				let current = 0;
267				let bit_offset = 0;
268				let has_next = true;
269
270				do_await!($syncness, read_int_rest(reader, current, bit_offset, has_next))
271			}
272		);
273
274		maybe_async!(
275			$syncness,
276			fn read_int_rest<E>(
277				reader: &mut impl Read<E>,
278				mut current: u8,
279				mut bit_offset: i32,
280				mut has_next: bool,
281			) -> Result<BigUint, ParseError<E>> {
282				let mut buffer = Vec::new();
283
284				while has_next {
285					let b = do_await!($syncness, read_byte(reader))?;
286
287					has_next = (b & 0x80) == 0x80;
288
289					let value = b & 0x7F;
290					let low = value << bit_offset;
291					let high = if bit_offset > 1 {
292						value >> (8 - bit_offset)
293					}
294					else {
295						0
296					};
297
298					current |= low;
299					bit_offset += 7;
300					if bit_offset >= 8 {
301						bit_offset -= 8;
302						buffer.push(current);
303						current = high;
304					}
305				}
306
307				if bit_offset > 0 {
308					buffer.push(current);
309				}
310
311				Ok(BigUint::from_bytes_le(&buffer))
312			}
313		);
314
315		maybe_async!(
316			$syncness,
317			fn read_bytes<E, const N: usize>(reader: &mut impl Read<E>) -> Result<[u8; N], ParseError<E>> {
318				let mut b: [u8; N] = [0; N];
319				do_await!($syncness, read_exact(reader, &mut b))?;
320				Ok(b)
321			}
322		);
323
324		maybe_async!(
325			$syncness,
326			fn read_exact<E>(reader: &mut impl Read<E>, mut buf: &mut [u8]) -> Result<(), ParseError<E>> {
327				while !buf.is_empty() {
328					let n = do_await!($syncness, reader.read(buf)).map_err(ParseError::IOError)?;
329					if n == 0 {
330						return Err(ParseError::UnexpectedEndOfFile);
331					}
332
333					buf = &mut buf[n..];
334				}
335
336				Ok(())
337			}
338		);
339
340		maybe_async!(
341			$syncness,
342			fn read_byte<E>(reader: &mut impl Read<E>) -> Result<u8, ParseError<E>> {
343				Ok(do_await!($syncness, read_bytes::<E, 1>(reader))?[0])
344			}
345		);
346
347		/// An expression parser
348		pub trait ExprParser<E> {
349			maybe_async!(
350				$syncness,
351				/// Try to read the next expression.
352				///
353				/// # Errors
354				/// Returns `Err` if an error occurs during parsing.
355				fn try_read_next_expr<'a>(&'a mut self) -> Result<Option<ESExpr<'a>>, ParseError<E>>;
356			);
357
358			maybe_async!(
359				$syncness,
360				/// Read the next expression
361				///
362				/// # Errors
363				/// Returns `Err` if an error occurs during parsing, or if the end of the input is reached.
364				fn read_next_expr<'a>(&'a mut self) -> Result<ESExpr<'a>, ParseError<E>>;
365			);
366
367			/// Read all expressions, copying values when needed.
368			fn iter_static(
369				&mut self,
370			) -> if_async!(
371				$syncness,
372				impl Stream<Item = Result<ESExpr<'static>, ParseError<E>>>,
373				impl Iterator<Item = Result<ESExpr<'static>, ParseError<E>>>
374			) {
375				if_async!(
376					$syncness,
377					stream::poll_fn(|ctx| core::pin::pin!(async {
378						self.try_read_next_expr()
379							.await
380							.map(|res| res.map(ESExpr::into_owned))
381							.transpose()
382					})
383					.poll(ctx)),
384					core::iter::from_fn(move || {
385						self.try_read_next_expr()
386							.map(|res| res.map(ESExpr::into_owned))
387							.transpose()
388					})
389				)
390			}
391		}
392
393		struct ExprParserImpl<I> {
394			string_pool: AppendOnlyStringList,
395			iter: I,
396		}
397
398		impl<E, I: IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin> ExprParser<E> for ExprParserImpl<I> {
399			maybe_async!(
400				$syncness,
401				fn try_read_next_expr<'a>(&'a mut self) -> Result<Option<ESExpr<'a>>, ParseError<E>> {
402					do_await!(
403						$syncness,
404						try_read_next_expr_impl(&mut self.iter, &self.string_pool)
405					)
406				}
407			);
408
409			maybe_async!(
410				$syncness,
411				fn read_next_expr<'a>(&'a mut self) -> Result<ESExpr<'a>, ParseError<E>> {
412					do_await!($syncness, read_next_expr_impl(&mut self.iter, &self.string_pool))
413				}
414			);
415		}
416
417		maybe_async!(
418			$syncness,
419			fn try_read_next_expr_impl<'a, E>(
420				iter: &mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
421				string_pool: &'a AppendOnlyStringList,
422			) -> Result<Option<ESExpr<'a>>, ParseError<E>> {
423				loop {
424					return match do_await!($syncness, read_expr_plus(iter, string_pool))? {
425						ExprPlus::Expr(expr) => Ok(Some(expr)),
426						ExprPlus::Keyword(_) => Err(ParseError::UnexpectedKeywordToken),
427						ExprPlus::ConstructorEnd => Err(ParseError::UnexpectedConstructorEnd),
428						ExprPlus::AppendedToStringTable => continue,
429						ExprPlus::EndOfFile => Ok(None),
430					};
431				}
432			}
433		);
434
435		maybe_async!(
436			$syncness,
437			fn read_next_expr_impl<'a, E>(
438				iter: &mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
439				string_pool: &'a AppendOnlyStringList,
440			) -> Result<ESExpr<'a>, ParseError<E>> {
441				do_await!($syncness, try_read_next_expr_impl(iter, string_pool))?.ok_or(ParseError::UnexpectedEndOfFile)
442			}
443		);
444
445		maybe_async!(
446			$syncness,
447			fn read_expr_plus<'a, 'b, E>(
448				iter: &'b mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
449				string_pool: &'a AppendOnlyStringList,
450			) -> Result<ExprPlus<'a>, ParseError<E>>
451			where
452				'a: 'b,
453			{
454				let Some(token) = do_await!($syncness, iter.next()).transpose()?
455				else {
456					return Ok(ExprPlus::EndOfFile);
457				};
458
459				let expr: ExprPlus<'a> = ExprPlus::Expr(match token {
460					ExprToken::ConstructorStart(index) => {
461						let name = get_string(string_pool, index)?;
462						do_await!(
463							$syncness,
464							read_expr_constructor(iter, string_pool, CowStr::Borrowed(name))
465						)?
466					},
467					ExprToken::ConstructorStartKnown(name) => do_await!(
468						$syncness,
469						read_expr_constructor(iter, string_pool, CowStr::Static(name))
470					)?,
471					ExprToken::ConstructorEnd => return Ok(ExprPlus::ConstructorEnd),
472					ExprToken::Keyword(index) => return Ok(ExprPlus::Keyword(index)),
473					ExprToken::IntValue(i) => ESExpr::Int(Cow::Owned(i)),
474					ExprToken::StringValue(s) => ESExpr::Str(CowStr::Owned(s)),
475					ExprToken::StringPoolValue(index) => ESExpr::Str(CowStr::Borrowed(get_string(string_pool, index)?)),
476					ExprToken::Float16Value(f) => ESExpr::Float16(f),
477					ExprToken::Float32Value(f) => ESExpr::Float32(f),
478					ExprToken::Float64Value(d) => ESExpr::Float64(d),
479					ExprToken::Array8Value(b) => ESExpr::Array8(Cow::Owned(b)),
480					ExprToken::Array16Value(b) => ESExpr::Array16(Cow::Owned(b)),
481					ExprToken::Array32Value(b) => ESExpr::Array32(Cow::Owned(b)),
482					ExprToken::Array64Value(b) => ESExpr::Array64(Cow::Owned(b)),
483					ExprToken::Array128Value(b) => ESExpr::Array128(Cow::Owned(b)),
484					ExprToken::BooleanValue(b) => ESExpr::Bool(b),
485					ExprToken::NullValue(level) => ESExpr::Null(Cow::Owned(level)),
486					ExprToken::AppendStringTable => {
487						let new_string_table = do_await!($syncness, read_next_expr_impl(iter, string_pool))?;
488						let new_string_table = AppendedStringPool::decode_esexpr(new_string_table)
489							.map_err(ParseError::InvalidStringPool)?;
490
491						match new_string_table {
492							AppendedStringPool::Fixed(mut fixed_string_pool) => {
493								string_pool.append(&mut fixed_string_pool.strings)
494							},
495
496							AppendedStringPool::Single(s) => string_pool.push(s),
497						}
498
499						return Ok(ExprPlus::AppendedToStringTable);
500					},
501				});
502
503				Ok(expr)
504			}
505		);
506
507		maybe_async!(
508			$syncness,
509			fn read_expr_constructor<'a, 'b, E>(
510				iter: &'b mut (impl IterLike<Item = Result<ExprToken, ParseError<E>>> + Unpin),
511				string_pool: &'a AppendOnlyStringList,
512				name: CowStr<'a>,
513			) -> Result<ESExpr<'a>, ParseError<E>> {
514				let mut args = Vec::new();
515				let mut kwargs = BTreeMap::new();
516
517				loop {
518					match do_await!($syncness, read_expr_plus(iter, string_pool))? {
519						ExprPlus::Expr(expr) => args.push(expr),
520						ExprPlus::Keyword(index) => {
521							let kw = get_string(string_pool, index)?;
522							let value = do_await!($syncness, read_next_expr_impl(iter, string_pool))?;
523							kwargs.insert(CowStr::Borrowed(kw), value);
524						},
525						ExprPlus::ConstructorEnd => break,
526						ExprPlus::AppendedToStringTable => {},
527						ExprPlus::EndOfFile => return Err(ParseError::UnexpectedEndOfFile),
528					}
529				}
530
531				Ok(ESExpr::constructor(name, args, kwargs))
532			}
533		);
534
535		/// Parse binary input as `ESExpr` using an existing string pool
536		pub fn parse_existing_string_pool<'a, R: Read<E>, E: 'static>(
537			data: &'a mut R,
538			string_pool: Vec<String>,
539		) -> impl ExprParser<E> {
540			ExprParserImpl {
541				iter: if_async!($syncness, Box::pin(token_reader(data)), token_reader(data)),
542				string_pool: AppendOnlyStringList::from(string_pool),
543			}
544		}
545
546		/// Parse binary input as `ESExpr`
547		pub fn parse<E: 'static>(data: &mut impl Read<E>) -> impl ExprParser<E> {
548			parse_existing_string_pool(data, Vec::new())
549		}
550	};
551}
552
553mod reader_sync {
554	use core::iter::{self, Iterator as IterLike, Iterator};
555
556	use super::*;
557	use crate::io::Read;
558
559	fn token_reader<E, R: Read<E>>(read: &mut R) -> impl Iterator<Item = Result<ExprToken, ParseError<E>>> {
560		iter::from_fn(|| read_token_impl(read).transpose())
561	}
562
563	reader_mod!(sync);
564}
565
566#[allow(async_fn_in_trait, reason = "No additional traits to add")]
567mod reader_async {
568	use alloc::boxed::Box;
569
570	use futures::{Stream as IterLike, Stream, StreamExt, stream};
571
572	use super::*;
573	use crate::io::AsyncRead as Read;
574
575	fn token_reader<E>(read: &mut impl Read<E>) -> impl Stream<Item = Result<ExprToken, ParseError<E>>> {
576		stream::poll_fn(|ctx| core::pin::pin!(async { read_token_impl(read).await.transpose() }).poll(ctx))
577	}
578
579	reader_mod!(async);
580}
581
582pub use reader_async::{
583	ExprParser as ExprParserAsync,
584	parse as parse_async,
585	parse_existing_string_pool as parse_existing_string_pool_async,
586};
587pub use reader_sync::{
588	ExprParser as ExprParserSync,
589	parse as parse_sync,
590	parse_existing_string_pool as parse_existing_string_pool_sync,
591};