surql_parser/upstream/syn/lexer/
reader.rs1use crate::upstream::syn::error::SyntaxError;
2use crate::upstream::syn::token::Span;
3use thiserror::Error;
4#[derive(Error, Debug)]
5pub enum CharError {
6 #[error("found eof inside multi byte character")]
7 Eof,
8 #[error("string is not valid utf-8")]
9 Unicode,
10}
11impl From<CharError> for SyntaxError {
12 fn from(_: CharError) -> Self {
13 SyntaxError::new("Invalid, non valid UTF-8 bytes, in source")
14 }
15}
16#[derive(Clone, Debug)]
17pub struct BytesReader<'a> {
18 data: &'a [u8],
19 current: u32,
20}
21impl<'a> BytesReader<'a> {
22 pub fn new(slice: &'a [u8]) -> Self {
23 debug_assert!(
24 slice.len() < u32::MAX as usize,
25 "BytesReader got a string which was too large for lexing"
26 );
27 BytesReader {
28 data: slice,
29 current: 0,
30 }
31 }
32 #[inline]
33 pub fn remaining(&self) -> &'a [u8] {
34 &self.data[(self.current as usize)..]
35 }
36 #[inline]
37 pub fn len(&self) -> u32 {
38 self.remaining().len() as u32
39 }
40 #[inline]
41 pub fn offset(&self) -> u32 {
42 self.current
43 }
44 #[inline]
45 pub fn backup(&mut self, offset: u32) {
46 assert!(offset <= self.offset());
47 self.current = offset;
48 }
49 #[inline]
50 pub fn is_empty(&self) -> bool {
51 self.remaining().is_empty()
52 }
53 #[inline]
54 pub fn peek(&self) -> Option<u8> {
55 self.remaining().first().copied()
56 }
57 #[inline]
58 pub fn peek1(&self) -> Option<u8> {
59 self.remaining().get(1).copied()
60 }
61 #[inline]
62 pub fn eat(&mut self, c: u8) -> bool {
63 if self.peek() == Some(c) {
64 self.current += 1;
65 true
66 } else {
67 false
68 }
69 }
70 #[inline]
71 pub fn span(&self, span: Span) -> &'a [u8] {
72 &self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
73 }
74 #[inline]
75 pub fn span_since(&self, offset: u32) -> Span {
76 assert!(
77 offset <= self.offset(),
78 "Tried to get a span from a offset read in the future"
79 );
80 Span {
81 offset,
82 len: self.offset() - offset,
83 }
84 }
85 #[inline]
86 pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
87 const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
88 const CONTINUE_BYTE_MASK: u8 = 0b0011_1111;
89 let byte = self.next().ok_or(CharError::Eof)?;
90 if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
91 return Err(CharError::Unicode);
92 }
93 Ok(byte & CONTINUE_BYTE_MASK)
94 }
95 #[inline]
96 pub fn convert_to_char(&mut self, start: u8) -> Result<char, CharError> {
97 if start.is_ascii() {
98 return Ok(start as char);
99 }
100 self.complete_char(start)
101 }
102 #[inline]
103 pub fn complete_char(&mut self, start: u8) -> Result<char, CharError> {
104 debug_assert!(
105 !start.is_ascii(),
106 "complete_char should not be handed ascii bytes"
107 );
108 match start & 0b1111_1000 {
109 0b1100_0000 | 0b1101_0000 | 0b1100_1000 | 0b1101_1000 => {
110 let mut val = (start & 0b0001_1111) as u32;
111 val <<= 6;
112 let next = self.next_continue_byte()?;
113 val |= next as u32;
114 char::from_u32(val).ok_or(CharError::Unicode)
115 }
116 0b1110_0000 | 0b1110_1000 => {
117 let mut val = (start & 0b0000_1111) as u32;
118 val <<= 6;
119 let next = self.next_continue_byte()?;
120 val |= next as u32;
121 val <<= 6;
122 let next = self.next_continue_byte()?;
123 val |= next as u32;
124 char::from_u32(val).ok_or(CharError::Unicode)
125 }
126 0b1111_0000 => {
127 let mut val = (start & 0b0000_0111) as u32;
128 val <<= 6;
129 let next = self.next_continue_byte()?;
130 val |= next as u32;
131 val <<= 6;
132 let next = self.next_continue_byte()?;
133 val |= next as u32;
134 val <<= 6;
135 let next = self.next_continue_byte()?;
136 val |= next as u32;
137 char::from_u32(val).ok_or(CharError::Unicode)
138 }
139 _ => Err(CharError::Unicode),
140 }
141 }
142}
143impl Iterator for BytesReader<'_> {
144 type Item = u8;
145 #[inline]
146 fn next(&mut self) -> Option<Self::Item> {
147 let res = self.peek()?;
148 self.current += 1;
149 Some(res)
150 }
151 fn size_hint(&self) -> (usize, Option<usize>) {
152 let len = self.len();
153 (len as usize, Some(len as usize))
154 }
155}
156impl ExactSizeIterator for BytesReader<'_> {
157 fn len(&self) -> usize {
158 self.len() as usize
159 }
160}