1use super::{lexer, value, PResult};
4use crate::model::{Entry, EntryType, Field};
5use crate::{EntryDelimiter, Value, ValueDelimiter};
6use std::borrow::Cow;
7
8const DEFAULT_FIELD_CAPACITY: usize = 17;
9
10#[derive(Debug, Clone)]
11pub(crate) struct LocatedEntry<'a> {
12 pub(crate) entry: Entry<'a>,
13 pub(crate) entry_type: (usize, usize),
14 pub(crate) key: (usize, usize),
15 pub(crate) delimiter: EntryDelimiter,
16 pub(crate) fields: Vec<LocatedField>,
17}
18
19#[derive(Debug, Clone, Copy)]
20pub(crate) struct LocatedField {
21 pub(crate) whole: (usize, usize),
22 pub(crate) name: (usize, usize),
23 pub(crate) value: (usize, usize),
24 pub(crate) value_delimiter: ValueDelimiter,
25}
26
27#[inline]
29pub fn parse_entry<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
30 lexer::skip_whitespace(input);
31 parse_entry_at(input)
32}
33
34#[inline]
36pub fn parse_entry_at<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
37 match input.as_bytes().first() {
38 Some(b'@') => {
39 *input = &input[1..];
40 parse_entry_content(input)
41 }
42 _ => super::backtrack(),
43 }
44}
45
46#[inline]
47pub(crate) fn parse_entry_at_with_locations<'a>(
48 input: &mut &'a str,
49 absolute_start: usize,
50) -> PResult<'a, LocatedEntry<'a>> {
51 let root = *input;
52 match input.as_bytes().first() {
53 Some(b'@') => {
54 *input = &input[1..];
55 parse_entry_content_with_locations(input, root, absolute_start)
56 }
57 _ => super::backtrack(),
58 }
59}
60
61#[inline]
62fn parse_entry_content<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
63 let entry_type_str = lexer::identifier(input)?;
64 let entry_type = EntryType::parse(entry_type_str);
65
66 lexer::skip_whitespace(input);
67
68 let closing_delimiter = match input.as_bytes().first() {
69 Some(b'{') => b'}',
70 Some(b'(') => b')',
71 _ => return super::backtrack(),
72 };
73 *input = &input[1..];
74
75 parse_entry_body(input, entry_type, closing_delimiter)
76}
77
78#[inline]
79fn parse_entry_content_with_locations<'a>(
80 input: &mut &'a str,
81 root: &'a str,
82 absolute_start: usize,
83) -> PResult<'a, LocatedEntry<'a>> {
84 let entry_type_start = source_offset(root, input, absolute_start);
85 let entry_type_str = lexer::identifier(input)?;
86 let entry_type_end = source_offset(root, input, absolute_start);
87 let entry_type = EntryType::parse(entry_type_str);
88
89 lexer::skip_whitespace(input);
90
91 let opening = match input.as_bytes().first() {
92 Some(b'{') => b'{',
93 Some(b'(') => b'(',
94 _ => return super::backtrack(),
95 };
96 let (delimiter, closing_delimiter) = match opening {
97 b'{' => (EntryDelimiter::Braces, b'}'),
98 b'(' => (EntryDelimiter::Parentheses, b')'),
99 _ => unreachable!(),
100 };
101 *input = &input[1..];
102
103 parse_entry_body_with_locations(
104 input,
105 root,
106 absolute_start,
107 entry_type,
108 (entry_type_start, entry_type_end),
109 delimiter,
110 closing_delimiter,
111 )
112}
113
114#[inline]
116fn parse_entry_body<'a>(
117 input: &mut &'a str,
118 entry_type: EntryType<'a>,
119 closing_delimiter: u8,
120) -> PResult<'a, Entry<'a>> {
121 lexer::skip_whitespace(input);
122 let key = lexer::identifier(input)?;
123
124 lexer::skip_whitespace(input);
125 expect_byte(input, b',')?;
126
127 let fields = parse_fields(input, closing_delimiter)?;
128 expect_byte(input, closing_delimiter)?;
129
130 Ok(Entry {
131 ty: entry_type,
132 key: Cow::Borrowed(key),
133 fields,
134 })
135}
136
137#[inline]
138fn parse_entry_body_with_locations<'a>(
139 input: &mut &'a str,
140 root: &'a str,
141 absolute_start: usize,
142 entry_type: EntryType<'a>,
143 entry_type_location: (usize, usize),
144 delimiter: EntryDelimiter,
145 closing_delimiter: u8,
146) -> PResult<'a, LocatedEntry<'a>> {
147 lexer::skip_whitespace(input);
148 let key_start = source_offset(root, input, absolute_start);
149 let key = lexer::identifier(input)?;
150 let key_end = source_offset(root, input, absolute_start);
151
152 lexer::skip_whitespace(input);
153 expect_byte(input, b',')?;
154
155 let (fields, field_locations) =
156 parse_fields_with_locations(input, root, absolute_start, closing_delimiter)?;
157 expect_byte(input, closing_delimiter)?;
158
159 Ok(LocatedEntry {
160 entry: Entry {
161 ty: entry_type,
162 key: Cow::Borrowed(key),
163 fields,
164 },
165 entry_type: entry_type_location,
166 key: (key_start, key_end),
167 delimiter,
168 fields: field_locations,
169 })
170}
171
172#[inline]
173fn expect_byte<'a>(input: &mut &'a str, byte: u8) -> PResult<'a, ()> {
174 match input.as_bytes().first() {
175 Some(&b) if b == byte => {
176 *input = &input[1..];
177 Ok(())
178 }
179 _ => super::backtrack(),
180 }
181}
182
183#[inline]
185fn parse_fields<'a>(input: &mut &'a str, closing_delimiter: u8) -> PResult<'a, Vec<Field<'a>>> {
186 let mut fields = Vec::with_capacity(DEFAULT_FIELD_CAPACITY);
187
188 while let Some(first) = lexer::skip_whitespace_peek(input) {
189 if first == closing_delimiter {
190 break;
191 }
192
193 let name = lexer::field_name(input)?;
194 lexer::skip_whitespace(input);
195 expect_byte(input, b'=')?;
196 lexer::skip_whitespace(input);
197 let value = value::parse_value_field(input)?;
198
199 fields.push(Field {
200 name: Cow::Borrowed(name),
201 value,
202 });
203
204 match input.as_bytes().first() {
205 Some(b',') => {
206 *input = &input[1..];
207 }
208 Some(&b) if b == closing_delimiter => {}
209 _ => return super::backtrack(),
210 }
211 }
212
213 let max_reasonable_capacity = (fields.len() * 2).max(8);
214 if fields.capacity() > max_reasonable_capacity {
215 fields.shrink_to_fit();
216 }
217
218 Ok(fields)
219}
220
221#[inline]
222fn parse_fields_with_locations<'a>(
223 input: &mut &'a str,
224 root: &'a str,
225 absolute_start: usize,
226 closing_delimiter: u8,
227) -> PResult<'a, (Vec<Field<'a>>, Vec<LocatedField>)> {
228 let mut fields = Vec::with_capacity(DEFAULT_FIELD_CAPACITY);
229 let mut locations = Vec::with_capacity(DEFAULT_FIELD_CAPACITY);
230 let root_bytes = root.as_bytes();
231
232 while let Some(first) = lexer::skip_whitespace_peek(input) {
233 if first == closing_delimiter {
234 break;
235 }
236
237 let field_start = source_offset(root, input, absolute_start);
238 let name_start = field_start;
239 let name = lexer::field_name(input)?;
240 let name_end = source_offset(root, input, absolute_start);
241
242 lexer::skip_whitespace(input);
243 expect_byte(input, b'=')?;
244 lexer::skip_whitespace(input);
245
246 let value_start = source_offset(root, input, absolute_start);
247 let parsed_value = value::parse_value_field(input)?;
248 let value_boundary = source_offset(root, input, absolute_start);
249 let value_end = trim_ascii_whitespace_end_absolute(
250 root_bytes,
251 absolute_start,
252 value_start,
253 value_boundary,
254 );
255 let value_delimiter = value_delimiter_from_parse(
256 &parsed_value,
257 root_bytes,
258 absolute_start,
259 value_start,
260 value_end,
261 );
262
263 let mut whole_end = value_end;
264 match input.as_bytes().first() {
265 Some(b',') => {
266 whole_end = source_offset(root, input, absolute_start) + 1;
267 *input = &input[1..];
268 }
269 Some(&b) if b == closing_delimiter => {}
270 _ => return super::backtrack(),
271 }
272
273 fields.push(Field {
274 name: Cow::Borrowed(name),
275 value: parsed_value,
276 });
277 locations.push(LocatedField {
278 whole: (field_start, whole_end),
279 name: (name_start, name_end),
280 value: (value_start, value_end),
281 value_delimiter,
282 });
283 }
284
285 let max_reasonable_capacity = (fields.len() * 2).max(8);
286 if fields.capacity() > max_reasonable_capacity {
287 fields.shrink_to_fit();
288 }
289 if locations.capacity() > max_reasonable_capacity {
290 locations.shrink_to_fit();
291 }
292
293 Ok((fields, locations))
294}
295
296#[inline]
297const fn source_offset(root: &str, input: &str, absolute_start: usize) -> usize {
298 absolute_start + root.len() - input.len()
299}
300
301#[inline]
302fn trim_ascii_whitespace_end_absolute(
303 bytes: &[u8],
304 absolute_start: usize,
305 start: usize,
306 end: usize,
307) -> usize {
308 let mut pos = end - absolute_start;
309 let start = start - absolute_start;
310 while pos > start && bytes[pos - 1].is_ascii_whitespace() {
311 pos -= 1;
312 }
313 absolute_start + pos
314}
315
316#[inline]
317fn value_delimiter_from_parse(
318 value: &Value<'_>,
319 bytes: &[u8],
320 absolute_start: usize,
321 start: usize,
322 end: usize,
323) -> ValueDelimiter {
324 if matches!(value, Value::Concat(_)) {
325 return ValueDelimiter::Concatenation;
326 }
327
328 let start = start - absolute_start;
329 let end = end - absolute_start;
330 match bytes.get(start..end).and_then(|raw| raw.first()).copied() {
331 Some(b'{') => ValueDelimiter::Braces,
332 Some(b'"') => ValueDelimiter::Quotes,
333 _ => ValueDelimiter::Bare,
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340 use crate::model::Value;
341 use std::borrow::Cow;
342
343 #[test]
344 fn test_parse_simple_entry() {
345 let mut input = r#"@article{einstein1905,
346 author = "Albert Einstein",
347 title = {Zur Elektrodynamik bewegter Körper},
348 year = 1905
349 }"#;
350
351 let entry = parse_entry(&mut input).unwrap();
352 assert_eq!(entry.ty, EntryType::Article);
353 assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
354 assert_eq!(entry.fields.len(), 3);
355
356 assert_eq!(entry.fields[0].name, "author");
357 assert_eq!(
358 entry.fields[0].value,
359 Value::Literal(Cow::Borrowed("Albert Einstein"))
360 );
361
362 assert_eq!(entry.fields[1].name, "title");
363 assert_eq!(
364 entry.fields[1].value,
365 Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
366 );
367
368 assert_eq!(entry.fields[2].name, "year");
369 assert_eq!(entry.fields[2].value, Value::Number(1905));
370 }
371
372 #[test]
373 fn test_parse_entry_with_concatenation() {
374 let mut input = r#"@misc{test,
375 author = name # " et al.",
376 note = "See " # url
377 }"#;
378
379 let entry = parse_entry(&mut input).unwrap();
380 assert_eq!(entry.ty, EntryType::Misc);
381 assert_eq!(entry.key, Cow::Borrowed("test"));
382 assert_eq!(entry.fields.len(), 2);
383
384 match &entry.fields[0].value {
385 Value::Concat(parts) => {
386 assert_eq!(parts.len(), 2);
387 assert_eq!(parts[0], Value::Variable(Cow::Borrowed("name")));
388 assert_eq!(parts[1], Value::Literal(Cow::Borrowed(" et al.")));
389 }
390 _ => panic!("Expected concatenated value"),
391 }
392 }
393
394 #[test]
395 fn test_parse_entry_with_trailing_comma() {
396 let mut input = r#"@book{knuth1984,
397 author = "Donald Knuth",
398 title = "The TeXbook",
399 year = 1984,
400 }"#;
401
402 let entry = parse_entry(&mut input).unwrap();
403 assert_eq!(entry.fields.len(), 3);
404 }
405
406 #[test]
407 fn test_parse_entry_with_parentheses() {
408 let mut input = r#"@article(einstein1905,
409 author = "Albert Einstein",
410 title = {Zur Elektrodynamik bewegter Körper},
411 year = 1905
412 )"#;
413
414 let entry = parse_entry(&mut input).unwrap();
415 assert_eq!(entry.ty, EntryType::Article);
416 assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
417 assert_eq!(entry.fields.len(), 3);
418
419 assert_eq!(entry.fields[0].name, "author");
420 assert_eq!(
421 entry.fields[0].value,
422 Value::Literal(Cow::Borrowed("Albert Einstein"))
423 );
424
425 assert_eq!(entry.fields[1].name, "title");
426 assert_eq!(
427 entry.fields[1].value,
428 Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
429 );
430
431 assert_eq!(entry.fields[2].name, "year");
432 assert_eq!(entry.fields[2].value, Value::Number(1905));
433 }
434
435 #[test]
436 fn test_parse_entry_mixed_delimiters() {
437 let mut input = r#"@book(test2024,
439 title = {A Title with {Nested} Braces},
440 author = "John Doe"
441 )"#;
442
443 let entry = parse_entry(&mut input).unwrap();
444 assert_eq!(entry.ty, EntryType::Book);
445 assert_eq!(entry.key, Cow::Borrowed("test2024"));
446 assert_eq!(entry.fields.len(), 2);
447
448 assert_eq!(entry.fields[0].name, "title");
449 assert_eq!(
450 entry.fields[0].value,
451 Value::Literal(Cow::Borrowed("A Title with {Nested} Braces"))
452 );
453
454 assert_eq!(entry.fields[1].name, "author");
455 assert_eq!(
456 entry.fields[1].value,
457 Value::Literal(Cow::Borrowed("John Doe"))
458 );
459 }
460}