json_flat_parser/
lexer.rs1use crate::string_from_bytes;
2
3#[derive(Debug)]
4pub enum Token<'json> {
5 CurlyOpen,
6 CurlyClose,
7 SquareOpen,
8 SquareClose,
9 Colon,
10 Comma,
11 String(&'json str),
12 Number(&'json str),
13 Boolean(&'json str),
14 Null,
15}
16
17
18pub struct SliceRead<'json> {
19 slice: &'json [u8],
20 index: usize,
21}
22
23impl<'json> SliceRead<'json> {
24 pub fn new(slice: &'json [u8]) -> Self {
25 SliceRead { slice, index: 0 }
26 }
27 #[inline]
28 pub fn next(&mut self) -> Option<u8> {
29 if self.index < self.slice.len() {
30 let result = self.slice[self.index];
31 self.index += 1;
32 Some(result)
33 } else {
34 None
35 }
36 }
37 #[inline]
38 pub fn next_u64(&mut self) -> (u64, usize) {
39 if self.index + 8 < self.slice.len() {
40 let result = u64::from_le_bytes(
41 [self.slice[self.index], self.slice[self.index + 1], self.slice[self.index + 2], self.slice[self.index + 3],
42 self.slice[self.index + 4], self.slice[self.index + 5], self.slice[self.index + 6], self.slice[self.index + 7]]);
43 self.index += 8;
44 (result, 8)
45 } else {
46 let mut v: [u8; 8] = [0; 8];
47 let mut i = 0;
48 while self.index + i < self.slice.len() {
49 v[i] = self.slice[self.index + i];
50 i += 1;
51 }
52 self.index += i;
53 (u64::from_le_bytes(v), i)
54 }
55 }
56 #[inline]
57 pub fn peek(&self) -> Option<u8> {
58 if self.index < self.slice.len() {
59 Some(self.slice[self.index])
60 } else {
61 None
62 }
63 }
64 #[inline]
65 pub fn slice_from(&self, start: usize) -> &'json [u8] {
66 &self.slice[start..self.index]
67 }
68 #[inline]
69 pub fn is_at_end(&self) -> bool {
70 self.index >= self.slice.len()
71 }
72
73 #[inline]
74 pub fn match_pattern(&mut self, pattern: &[u8]) -> bool {
75 let end = self.index + pattern.len();
76 if end <= self.slice.len() && self.slice[self.index..end] == *pattern {
77 self.index += pattern.len();
78 true
79 } else {
80 false
81 }
82 }
83
84 pub fn data(&self) -> &'json [u8] {
85 self.slice
86 }
87}
88
89
90pub struct Lexer<'json> {
91 reader: SliceRead<'json>,
92}
93
94
95const MASK_OPEN_CURLY: u64 = 0x0101010101010101 * b'{' as u64;
96const MASK_CLOSE_CURLY: u64 = 0x0101010101010101 * b'}' as u64;
97const MASK_OPEN_SQUARE: u64 = 0x0101010101010101 * b'[' as u64;
98const MASK_CLOSE_SQUARE: u64 = 0x0101010101010101 * b']' as u64;
99const MASK_QUOTE: u64 = 0x0101010101010101 * b'"' as u64;
100
101impl<'json> Lexer<'json> {
102 pub fn new(input: &'json [u8]) -> Self {
103 Lexer {
104 reader: SliceRead::new(input),
105 }
106 }
107
108 #[inline]
109 pub fn consume_string_until_end_of_array(&mut self, array_start_index: usize, nested_array: bool) -> Option<&'json str> {
110 let mut square_close_count = 1;
111 if nested_array {
112 square_close_count += 1;
113 }
114 while !self.reader.is_at_end() {
115 let current_index = self.reader.index;
116 let (bytes, _) = self.reader.next_u64();
117 let comparison_square_close = MASK_CLOSE_SQUARE ^ bytes;
118 let comparison_square_open = MASK_OPEN_SQUARE ^ bytes;
119 let high_bit_mask_square_close = (((comparison_square_close >> 1) | 0x8080808080808080) - comparison_square_close) & 0x8080808080808080;
120 let high_bit_mask_square_open = (((comparison_square_open >> 1) | 0x8080808080808080) - comparison_square_open) & 0x8080808080808080;
121 if high_bit_mask_square_close == 0 && high_bit_mask_square_open == 0 {
122 continue;
123 } else {
124 let mut index = 0;
125 if high_bit_mask_square_close != 0 {
126 index = (high_bit_mask_square_close.trailing_zeros() >> 3) as usize;
127 }
128 if high_bit_mask_square_open != 0 {
129 let open_index = (high_bit_mask_square_open.trailing_zeros() >> 3) as usize;
130 if open_index < index {
131 index = open_index;
132 }
133 }
134 self.reader.index = current_index + index;
135 }
136 match self.reader.next()? {
137 b'[' => square_close_count += 1,
138 b']' => {
139 if square_close_count == 1 {
140 return string_from_bytes(&self.reader.slice[array_start_index..self.reader.index]);
141 } else {
142 square_close_count -= 1;
143 }
144 }
145 _ => {}
146 }
147 }
148 None
149 }
150
151 pub fn reader_index(&self) -> usize {
152 self.reader.index
153 }
154 pub fn reader(&mut self) -> &SliceRead<'json> {
155 &self.reader
156 }
157
158 pub fn set_reader_index(&mut self, index: usize) {
159 self.reader.index = index;
160 }
161
162 #[inline]
163 pub fn consume_string_until_end_of_object(&mut self, should_return: bool) -> Option<&'json str> {
164 let mut square_close_count = 1;
165 let start = self.reader.index - 1;
166 while !self.reader.is_at_end() {
167 let current_index = self.reader.index;
168 let (bytes, _) = self.reader.next_u64();
169 let comparison_curly_close = MASK_CLOSE_CURLY ^ bytes;
170 let comparison_curly_open = MASK_OPEN_CURLY ^ bytes;
171 let high_bit_mask_curly_close = (((comparison_curly_close >> 1) | 0x8080808080808080) - comparison_curly_close) & 0x8080808080808080;
172 let high_bit_mask_curly_open = (((comparison_curly_open >> 1) | 0x8080808080808080) - comparison_curly_open) & 0x8080808080808080;
173
174 if high_bit_mask_curly_close == 0 && high_bit_mask_curly_open == 0 {
175 continue;
176 } else {
177 let mut index = 0;
178 if high_bit_mask_curly_close != 0 {
179 index = (high_bit_mask_curly_close.trailing_zeros() >> 3) as usize;
180 }
181 if high_bit_mask_curly_open != 0 {
182 let open_index = (high_bit_mask_curly_open.trailing_zeros() >> 3) as usize;
183 if open_index < index {
184 index = open_index;
185 }
186 }
187 self.reader.index = current_index + index;
188 }
189
190 match self.reader.next()? {
191 b'{' => square_close_count += 1,
192 b'}' => {
193 if square_close_count == 1 {
194 if should_return {
195 let value = string_from_bytes(&self.reader.slice[start..self.reader.index])?;
196 return Some(value);
197 } else {
198 break;
199 }
200 } else {
201 square_close_count -= 1;
202 }
203 }
204 _ => {}
205 }
206 }
207 None
208 }
209 #[inline]
210 pub fn next_token(&mut self) -> Option<Token<'json>> {
211 loop {
212 match self.reader.next()? {
213 b'{' => return Some(Token::CurlyOpen),
214 b'}' => return Some(Token::CurlyClose),
215 b'[' => return Some(Token::SquareOpen),
216 b']' => return Some(Token::SquareClose),
217 b',' => return Some(Token::Comma),
218 b':' => return Some(Token::Colon),
219 b'-' | b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
220 let start = self.reader.index - 1;
221 while let Some(b) = self.reader.next() {
222 if !((0x30..=0x39).contains(&b) || b == b'.' || b == b'e' || b == b'+' || b == b'-') {
223 break;
224 }
225 }
226 self.reader.index -= 1;
227 let s = string_from_bytes(&self.reader.slice[start..self.reader.index])?;
228 return Some(Token::Number(s));
229 }
230 b'"' => {
231 let start = self.reader.index;
232 while !self.reader.is_at_end() {
233 let (bytes, read_bytes) = self.reader.next_u64();
234 let comparison = MASK_QUOTE ^ bytes;
235 let high_bit_mask1 = (((comparison >> 1) | 0x8080808080808080) - comparison) & 0x8080808080808080;
236 if high_bit_mask1 != 0 {
238 let position = (high_bit_mask1.trailing_zeros() >> 3) as usize;
239 if self.reader.slice[self.reader.index - read_bytes + position - 1] != b'\\' {
240 self.reader.index = self.reader.index - read_bytes + position + 1;
241 break;
242 } else {
243 self.reader.index = self.reader.index - read_bytes + position + 1;
244 }
245 }
246 }
247 let s = string_from_bytes(&self.reader.slice[start..self.reader.index - 1])?;
248 return Some(Token::String(s));
249 }
250 b't' if self.reader.match_pattern(b"rue") => return Some(Token::Boolean(string_from_bytes(&self.reader.slice[self.reader.index - 4..self.reader.index])?)),
251 b'f' if self.reader.match_pattern(b"alse") => return Some(Token::Boolean(string_from_bytes(&self.reader.slice[self.reader.index - 5..self.reader.index])?)),
252 b'n' if self.reader.match_pattern(b"ull") => return Some(Token::Null),
253 _ => {}
254 }
255 }
256 }
257}
258
259