Skip to main content

surql_parser/upstream/syn/lexer/
reader.rs

1use crate::upstream::syn::error::SyntaxError;
2use crate::upstream::syn::token::Span;
3use thiserror::Error;
4#[derive(Error, Debug)]
5pub enum CharError {
6	#[error("found eof inside multi byte character")]
7	Eof,
8	#[error("string is not valid utf-8")]
9	Unicode,
10}
11impl From<CharError> for SyntaxError {
12	fn from(_: CharError) -> Self {
13		SyntaxError::new("Invalid, non valid UTF-8 bytes, in source")
14	}
15}
16#[derive(Clone, Debug)]
17pub struct BytesReader<'a> {
18	data: &'a [u8],
19	current: u32,
20}
21impl<'a> BytesReader<'a> {
22	pub fn new(slice: &'a [u8]) -> Self {
23		debug_assert!(
24			slice.len() < u32::MAX as usize,
25			"BytesReader got a string which was too large for lexing"
26		);
27		BytesReader {
28			data: slice,
29			current: 0,
30		}
31	}
32	#[inline]
33	pub fn remaining(&self) -> &'a [u8] {
34		&self.data[(self.current as usize)..]
35	}
36	#[inline]
37	pub fn len(&self) -> u32 {
38		self.remaining().len() as u32
39	}
40	#[inline]
41	pub fn offset(&self) -> u32 {
42		self.current
43	}
44	#[inline]
45	pub fn backup(&mut self, offset: u32) {
46		assert!(offset <= self.offset());
47		self.current = offset;
48	}
49	#[inline]
50	pub fn is_empty(&self) -> bool {
51		self.remaining().is_empty()
52	}
53	#[inline]
54	pub fn peek(&self) -> Option<u8> {
55		self.remaining().first().copied()
56	}
57	#[inline]
58	pub fn peek1(&self) -> Option<u8> {
59		self.remaining().get(1).copied()
60	}
61	#[inline]
62	pub fn eat(&mut self, c: u8) -> bool {
63		if self.peek() == Some(c) {
64			self.current += 1;
65			true
66		} else {
67			false
68		}
69	}
70	#[inline]
71	pub fn span(&self, span: Span) -> &'a [u8] {
72		&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
73	}
74	#[inline]
75	pub fn span_since(&self, offset: u32) -> Span {
76		assert!(
77			offset <= self.offset(),
78			"Tried to get a span from a offset read in the future"
79		);
80		Span {
81			offset,
82			len: self.offset() - offset,
83		}
84	}
85	#[inline]
86	pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
87		const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
88		const CONTINUE_BYTE_MASK: u8 = 0b0011_1111;
89		let byte = self.next().ok_or(CharError::Eof)?;
90		if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
91			return Err(CharError::Unicode);
92		}
93		Ok(byte & CONTINUE_BYTE_MASK)
94	}
95	#[inline]
96	pub fn convert_to_char(&mut self, start: u8) -> Result<char, CharError> {
97		if start.is_ascii() {
98			return Ok(start as char);
99		}
100		self.complete_char(start)
101	}
102	#[inline]
103	pub fn complete_char(&mut self, start: u8) -> Result<char, CharError> {
104		debug_assert!(
105			!start.is_ascii(),
106			"complete_char should not be handed ascii bytes"
107		);
108		match start & 0b1111_1000 {
109			0b1100_0000 | 0b1101_0000 | 0b1100_1000 | 0b1101_1000 => {
110				let mut val = (start & 0b0001_1111) as u32;
111				val <<= 6;
112				let next = self.next_continue_byte()?;
113				val |= next as u32;
114				char::from_u32(val).ok_or(CharError::Unicode)
115			}
116			0b1110_0000 | 0b1110_1000 => {
117				let mut val = (start & 0b0000_1111) as u32;
118				val <<= 6;
119				let next = self.next_continue_byte()?;
120				val |= next as u32;
121				val <<= 6;
122				let next = self.next_continue_byte()?;
123				val |= next as u32;
124				char::from_u32(val).ok_or(CharError::Unicode)
125			}
126			0b1111_0000 => {
127				let mut val = (start & 0b0000_0111) as u32;
128				val <<= 6;
129				let next = self.next_continue_byte()?;
130				val |= next as u32;
131				val <<= 6;
132				let next = self.next_continue_byte()?;
133				val |= next as u32;
134				val <<= 6;
135				let next = self.next_continue_byte()?;
136				val |= next as u32;
137				char::from_u32(val).ok_or(CharError::Unicode)
138			}
139			_ => Err(CharError::Unicode),
140		}
141	}
142}
143impl Iterator for BytesReader<'_> {
144	type Item = u8;
145	#[inline]
146	fn next(&mut self) -> Option<Self::Item> {
147		let res = self.peek()?;
148		self.current += 1;
149		Some(res)
150	}
151	fn size_hint(&self) -> (usize, Option<usize>) {
152		let len = self.len();
153		(len as usize, Some(len as usize))
154	}
155}
156impl ExactSizeIterator for BytesReader<'_> {
157	fn len(&self) -> usize {
158		self.len() as usize
159	}
160}