bibtex_parser/parser/
simd.rs1use memchr::{memchr, memchr2};
7
8#[inline]
15#[must_use]
16pub fn find_balanced_braces(input: &[u8]) -> Option<usize> {
17 if input.is_empty() || input[0] != b'{' {
18 return None;
19 }
20
21 let mut depth = 1;
22 let mut pos = 1;
23
24 while pos < input.len() {
27 if let Some(offset) = memchr2(b'{', b'}', &input[pos..]) {
28 let idx = pos + offset;
29 if is_escaped_delimiter(input, idx) {
30 pos = idx + 1;
31 continue;
32 }
33
34 match input[idx] {
35 b'{' => {
36 depth += 1;
37 pos = idx + 1;
38 }
39 b'}' => {
40 depth -= 1;
41 if depth == 0 {
42 return Some(idx + 1); }
44 pos = idx + 1;
45 }
46 _ => unreachable!(),
47 }
48 } else {
49 return None;
51 }
52 }
53
54 None }
56
57#[inline]
63#[must_use]
64pub fn find_balanced_quotes(input: &[u8]) -> Option<usize> {
65 if input.is_empty() || input[0] != b'"' {
66 return None;
67 }
68
69 let mut pos = 1;
70
71 while pos < input.len() {
74 let offset = memchr(b'"', &input[pos..])?;
75 let idx = pos + offset;
76 if is_escaped_delimiter(input, idx) {
77 pos = idx + 1;
78 continue;
79 }
80 return Some(idx + 1); }
82
83 None }
85
86#[inline]
88#[must_use]
89pub fn find_balanced_parentheses(input: &[u8]) -> Option<usize> {
90 if input.is_empty() || input[0] != b'(' {
91 return None;
92 }
93
94 let mut depth = 1;
95 let mut pos = 1;
96
97 while pos < input.len() {
99 if let Some(offset) = memchr::memchr2(b'(', b')', &input[pos..]) {
101 let idx = pos + offset;
102
103 match input[idx] {
104 b'(' => {
105 depth += 1;
106 pos = idx + 1;
107 }
108 b')' => {
109 depth -= 1;
110 if depth == 0 {
111 return Some(idx + 1); }
113 pos = idx + 1;
114 }
115 _ => unreachable!(),
116 }
117 } else {
118 return None;
120 }
121 }
122
123 None }
125
126#[inline]
127fn is_escaped_delimiter(input: &[u8], delimiter: usize) -> bool {
128 if delimiter == 0 || input[delimiter - 1] != b'\\' {
129 return false;
130 }
131
132 let mut slash_count = 0usize;
133 let mut pos = delimiter;
134
135 while pos > 0 && input[pos - 1] == b'\\' {
136 slash_count += 1;
137 pos -= 1;
138 }
139
140 slash_count % 2 == 1
141}
142
143#[inline]
145#[must_use]
146pub fn find_entry_start(input: &[u8]) -> Option<usize> {
147 memchr::memchr(b'@', input)
148}
149
150#[inline]
152#[must_use]
153pub fn find_field_separator(input: &[u8]) -> Option<usize> {
154 memchr::memchr(b'=', input)
155}
156
157#[inline]
159#[must_use]
160pub fn find_field_end(input: &[u8]) -> Option<usize> {
161 memchr::memchr3(b',', b'}', b')', input)
162}
163
164#[inline]
167#[must_use]
168pub fn scan_identifier(input: &[u8]) -> usize {
169 let mut pos = 0;
170 let len = input.len();
171
172 while pos + 4 <= len {
174 if !is_identifier_byte(input[pos]) {
176 return pos;
177 }
178 if !is_identifier_byte(input[pos + 1]) {
179 return pos + 1;
180 }
181 if !is_identifier_byte(input[pos + 2]) {
182 return pos + 2;
183 }
184 if !is_identifier_byte(input[pos + 3]) {
185 return pos + 3;
186 }
187 pos += 4;
188 }
189
190 while pos < len && is_identifier_byte(input[pos]) {
192 pos += 1;
193 }
194
195 pos
196}
197
198#[inline]
199const fn is_identifier_byte(byte: u8) -> bool {
200 IDENT_TABLE[byte as usize] == 1
201}
202
203const IDENT_TABLE: [u8; 256] = {
204 let mut table = [0u8; 256];
205 let mut byte = b'0';
206 while byte <= b'9' {
207 table[byte as usize] = 1;
208 byte += 1;
209 }
210 byte = b'A';
211 while byte <= b'Z' {
212 table[byte as usize] = 1;
213 byte += 1;
214 }
215 byte = b'a';
216 while byte <= b'z' {
217 table[byte as usize] = 1;
218 byte += 1;
219 }
220 table[b'_' as usize] = 1;
221 table[b'-' as usize] = 1;
222 table[b':' as usize] = 1;
223 table[b'.' as usize] = 1;
224 table
225};
226
227const WS_TABLE: [u8; 256] = {
229 let mut table = [0u8; 256];
230 table[b' ' as usize] = 1;
231 table[b'\t' as usize] = 1;
232 table[b'\n' as usize] = 1;
233 table[b'\r' as usize] = 1;
234 table
235};
236
237#[inline]
240#[must_use]
241pub const fn scan_whitespace(input: &[u8]) -> usize {
242 let mut pos = 0;
243 let len = input.len();
244
245 while pos + 4 <= len {
247 if WS_TABLE[input[pos] as usize] == 0 {
249 return pos;
250 }
251 if WS_TABLE[input[pos + 1] as usize] == 0 {
252 return pos + 1;
253 }
254 if WS_TABLE[input[pos + 2] as usize] == 0 {
255 return pos + 2;
256 }
257 if WS_TABLE[input[pos + 3] as usize] == 0 {
258 return pos + 3;
259 }
260 pos += 4;
261 }
262
263 while pos < len && WS_TABLE[input[pos] as usize] == 1 {
265 pos += 1;
266 }
267
268 pos
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274
275 #[test]
276 fn test_balanced_braces() {
277 assert_eq!(find_balanced_braces(b"{}"), Some(2));
278 assert_eq!(find_balanced_braces(b"{hello}"), Some(7));
279 assert_eq!(find_balanced_braces(b"{nested {braces}}"), Some(17));
280 assert_eq!(find_balanced_braces(b"{escaped \\} brace}"), Some(18));
281 assert_eq!(find_balanced_braces(b"{unclosed"), None);
282 assert_eq!(find_balanced_braces(b""), None);
283 assert_eq!(find_balanced_braces(b"not starting with brace"), None);
284 }
285
286 #[test]
287 fn test_balanced_quotes() {
288 assert_eq!(find_balanced_quotes(b"\"\""), Some(2));
289 assert_eq!(find_balanced_quotes(b"\"hello\""), Some(7));
290 assert_eq!(find_balanced_quotes(b"\"escaped \\\" quote\""), Some(18));
291 assert_eq!(find_balanced_quotes(b"\"unclosed"), None);
292 assert_eq!(find_balanced_quotes(b""), None);
293 assert_eq!(find_balanced_quotes(b"not starting with quote"), None);
294 }
295
296 #[test]
297 fn test_balanced_parentheses() {
298 assert_eq!(find_balanced_parentheses(b"()"), Some(2));
299 assert_eq!(find_balanced_parentheses(b"(hello)"), Some(7));
300 assert_eq!(find_balanced_parentheses(b"(nested (parens))"), Some(17));
301 assert_eq!(find_balanced_parentheses(b"(unclosed"), None);
302 }
303}