bibtex_parser/parser/
lexer.rs1use super::{delimiter, PResult};
4use memchr;
5use winnow::prelude::*;
6use winnow::{
7 ascii::digit1,
8 combinator::{alt, opt},
9};
10
11#[inline]
13pub fn identifier<'a>(input: &mut &'a str) -> PResult<'a, &'a str> {
14 let bytes = input.as_bytes();
15 let len = super::simd::scan_identifier(bytes);
16
17 if len == 0 {
18 return super::backtrack();
19 }
20
21 let result = &input[..len];
22 *input = &input[len..];
23 Ok(result)
24}
25
26#[inline]
28pub fn field_name<'a>(input: &mut &'a str) -> PResult<'a, &'a str> {
29 identifier(input)
30}
31
32#[inline]
34pub fn balanced_braces<'a>(input: &mut &'a str) -> PResult<'a, &'a str> {
35 let original_input = *input;
36 let bytes = input.as_bytes();
37 let mut depth = 0;
38 let mut pos = 0;
39
40 while pos < bytes.len() {
42 if let Some(offset) = memchr::memchr3(b'{', b'}', b'\\', &bytes[pos..]) {
44 let idx = pos + offset;
45
46 match bytes[idx] {
48 b'{' => {
49 depth += 1;
50 pos = idx + 1;
51 }
52 b'}' => {
53 if depth == 0 {
54 let result = &original_input[..idx];
55 *input = &input[idx..];
56 return Ok(result);
57 }
58 depth -= 1;
59 pos = idx + 1;
60 }
61 b'\\' => {
62 pos = idx + 2;
64 }
65 _ => unreachable!(),
66 }
67 } else {
68 break;
70 }
71 }
72
73 super::backtrack()
74}
75
76#[inline]
78pub fn quoted_string<'a>(input: &mut &'a str) -> PResult<'a, &'a str> {
79 let bytes = input.as_bytes();
80
81 super::simd::find_balanced_quotes(bytes).map_or_else(super::backtrack, |end_pos| {
83 let result = &input[1..end_pos - 1];
85 *input = &input[end_pos..];
86 Ok(result)
87 })
88}
89
90#[inline]
92pub fn number<'a>(input: &mut &'a str) -> PResult<'a, i64> {
93 let sign = opt(alt(('+', '-'))).parse_next(input)?;
94 let digits = digit1.parse_next(input)?;
95
96 let mut num = digits.parse::<i64>().map_err(|_| super::backtrack_err())?;
97
98 if sign == Some('-') {
99 num = -num;
100 }
101
102 Ok(num)
103}
104
105#[inline]
107pub fn balanced_parentheses<'a>(input: &mut &'a str) -> PResult<'a, &'a str> {
108 let original_input = *input;
109 let bytes = input.as_bytes();
110 let mut depth = 0;
111 let mut pos = 0;
112
113 while pos < bytes.len() {
115 if let Some(offset) = memchr::memchr2(b'(', b')', &bytes[pos..]) {
117 let idx = pos + offset;
118
119 match bytes[idx] {
120 b'(' => {
121 depth += 1;
122 pos = idx + 1;
123 }
124 b')' => {
125 if depth == 0 {
126 let result = &original_input[..idx];
127 *input = &input[idx..];
128 return Ok(result);
129 }
130 depth -= 1;
131 pos = idx + 1;
132 }
133 _ => unreachable!(),
134 }
135 } else {
136 break;
138 }
139 }
140
141 super::backtrack()
142}
143
144#[inline]
146pub fn skip_whitespace(input: &mut &str) {
147 let bytes = input.as_bytes();
148 let mut pos = 0;
149
150 while let Some(&byte) = bytes.get(pos) {
151 match byte {
152 b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
153 _ => break,
154 }
155 }
156
157 *input = &input[pos..];
158}
159
160#[inline]
161pub(crate) fn skip_whitespace_peek(input: &mut &str) -> Option<u8> {
162 let bytes = input.as_bytes();
163 let mut pos = 0;
164
165 while let Some(&byte) = bytes.get(pos) {
166 match byte {
167 b' ' | b'\t' | b'\n' | b'\r' => pos += 1,
168 _ => {
169 *input = &input[pos..];
170 return Some(byte);
171 }
172 }
173 }
174
175 *input = "";
176 None
177}
178
179#[must_use]
181pub fn scan_to_bibtex_delimiter(haystack: &[u8], start: usize) -> Option<(usize, u8)> {
182 delimiter::find_delimiter(haystack, start)
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188
189 #[test]
190 fn test_identifier() {
191 let mut input = "hello-world_123:test.com xxx";
192 let result = identifier(&mut input).unwrap();
193 assert_eq!(result, "hello-world_123:test.com");
194 assert_eq!(input, " xxx");
195 }
196
197 #[test]
198 fn test_balanced_braces() {
199 let mut input = "hello {nested {braces}} world} xxx";
200 let result = balanced_braces(&mut input).unwrap();
201 assert_eq!(result, "hello {nested {braces}} world");
202 assert_eq!(input, "} xxx");
203 }
204
205 #[test]
206 fn test_balanced_braces_with_spaces() {
207 let mut input = "Second preamble} xxx";
208 let result = balanced_braces(&mut input).unwrap();
209 assert_eq!(result, "Second preamble");
210 assert_eq!(input, "} xxx");
211 }
212
213 #[test]
214 fn test_balanced_parentheses() {
215 let mut input = "hello (nested (parens)) world) xxx";
216 let result = balanced_parentheses(&mut input).unwrap();
217 assert_eq!(result, "hello (nested (parens)) world");
218 assert_eq!(input, ") xxx");
219 }
220
221 #[test]
222 fn test_quoted_string() {
223 let mut input = r#""hello \"world\"" xxx"#;
224 let result = quoted_string(&mut input).unwrap();
225 assert_eq!(result, r#"hello \"world\""#);
226 assert_eq!(input, " xxx");
227
228 let mut input = r#""hello {world}" xxx"#;
230 let result = quoted_string(&mut input).unwrap();
231 assert_eq!(result, "hello {world}");
232 }
233
234 #[test]
235 fn test_number() {
236 let mut input = "42 xxx";
237 assert_eq!(number(&mut input).unwrap(), 42);
238
239 let mut input = "-42 xxx";
240 assert_eq!(number(&mut input).unwrap(), -42);
241
242 let mut input = "+42 xxx";
243 assert_eq!(number(&mut input).unwrap(), 42);
244 }
245
246 #[test]
247 fn test_scan_to_bibtex_delimiter() {
248 let input = b"hello @ world { test } = value, end";
249
250 assert_eq!(scan_to_bibtex_delimiter(input, 0), Some((6, b'@')));
251 assert_eq!(scan_to_bibtex_delimiter(input, 7), Some((14, b'{')));
252 assert_eq!(scan_to_bibtex_delimiter(input, 15), Some((21, b'}')));
253 assert_eq!(scan_to_bibtex_delimiter(input, 22), Some((23, b'=')));
254 assert_eq!(scan_to_bibtex_delimiter(input, 24), Some((30, b',')));
255 assert_eq!(scan_to_bibtex_delimiter(input, 31), None);
256 }
257}