1pub mod delimiter;
46pub mod entry;
47pub mod lexer;
48pub mod simd;
49pub mod utils;
50pub mod value;
51
52use crate::{Error, Result, SourceMap, SourceSpan};
53use winnow::ascii::multispace0;
54use winnow::prelude::*;
55
56pub use entry::parse_entry;
57
58pub type PResult<'a, O> = winnow::PResult<O, winnow::error::ContextError>;
60
61#[cold]
62#[inline(never)]
63pub(crate) fn backtrack_err() -> winnow::error::ErrMode<winnow::error::ContextError> {
64 winnow::error::ErrMode::Backtrack(winnow::error::ContextError::default())
65}
66
67#[cold]
68#[inline(never)]
69pub(crate) fn backtrack<O>() -> PResult<'static, O> {
70 Err(backtrack_err())
71}
72
73#[inline]
129pub fn parse_bibtex(input: &str) -> Result<Vec<ParsedItem<'_>>> {
130 let mut items = Vec::new();
131 parse_bibtex_stream(input, |item| {
132 items.push(item);
133 Ok(())
134 })?;
135 Ok(items)
136}
137
138#[inline]
143pub(crate) fn parse_bibtex_stream<'a, F>(input: &'a str, mut on_item: F) -> Result<()>
144where
145 F: FnMut(ParsedItem<'a>) -> Result<()>,
146{
147 let mut remaining = input;
148
149 loop {
150 lexer::skip_whitespace(&mut remaining);
152 if remaining.is_empty() {
153 break;
154 }
155
156 match parse_item(&mut remaining) {
158 Ok(item) => on_item(item)?,
159 Err(e) => {
160 let consumed = input.len() - remaining.len();
162 let (line, column) = calculate_position(input, consumed);
163
164 return Err(Error::ParseError {
165 line,
166 column,
167 message: format!("Failed to parse entry: {e}"),
168 snippet: Some(get_snippet(remaining, 40)),
169 });
170 }
171 }
172 }
173
174 Ok(())
175}
176
177#[inline]
179pub(crate) fn parse_bibtex_stream_with_spans<'a, F>(input: &'a str, mut on_item: F) -> Result<()>
180where
181 F: FnMut(ParsedItem<'a>, SourceSpan, &'a str) -> Result<()>,
182{
183 let source_map = SourceMap::anonymous(input);
184 let mut remaining = input;
185
186 loop {
187 lexer::skip_whitespace(&mut remaining);
188 if remaining.is_empty() {
189 break;
190 }
191
192 let start = input.len() - remaining.len();
193 let before_item = remaining;
194 match parse_item(&mut remaining) {
195 Ok(item) => {
196 let end = input.len() - remaining.len();
197 let span = source_map.span(start, end);
198 on_item(item, span, &input[start..end])?;
199 }
200 Err(e) => {
201 let (line, column) = calculate_position(input, start);
202
203 return Err(Error::ParseError {
204 line,
205 column,
206 message: format!("Failed to parse entry: {e}"),
207 snippet: Some(get_snippet(before_item, 40)),
208 });
209 }
210 }
211 }
212
213 Ok(())
214}
215
216#[inline]
217pub(crate) fn parse_bibtex_stream_with_entry_locations<'a, F>(
218 input: &'a str,
219 mut on_item: F,
220) -> Result<()>
221where
222 F: FnMut(LocatedParsedItem<'a>, usize, usize, &'a str) -> Result<()>,
223{
224 let mut remaining = input;
225
226 loop {
227 lexer::skip_whitespace(&mut remaining);
228 if remaining.is_empty() {
229 break;
230 }
231
232 let start = input.len() - remaining.len();
233 let before_item = remaining;
234 match parse_item_with_entry_locations(&mut remaining, start) {
235 Ok(item) => {
236 let end = input.len() - remaining.len();
237 on_item(item, start, end, &input[start..end])?;
238 }
239 Err(e) => {
240 let (line, column) = calculate_position(input, start);
241
242 return Err(Error::ParseError {
243 line,
244 column,
245 message: format!("Failed to parse entry: {e}"),
246 snippet: Some(get_snippet(before_item, 40)),
247 });
248 }
249 }
250 }
251
252 Ok(())
253}
254
255#[derive(Debug, Clone, PartialEq)]
291pub enum ParsedItem<'a> {
292 Entry(crate::Entry<'a>),
297
298 String(&'a str, crate::Value<'a>),
303
304 Preamble(crate::Value<'a>),
309
310 Comment(&'a str),
315}
316
317pub(crate) enum LocatedParsedItem<'a> {
318 Entry(entry::LocatedEntry<'a>),
319 String(&'a str, crate::Value<'a>),
320 Preamble(crate::Value<'a>),
321 Comment(&'a str),
322}
323
324#[inline]
326pub(crate) fn parse_item<'a>(input: &mut &'a str) -> PResult<'a, ParsedItem<'a>> {
327 let bytes = input.as_bytes();
329
330 if !bytes.is_empty() && bytes[0] != b'@' {
332 if let Some(at_pos) = delimiter::find_byte(bytes, b'@', 0) {
334 let comment = &input[..at_pos];
335 *input = &input[at_pos..];
336 return Ok(ParsedItem::Comment(comment));
337 }
338 let comment = *input;
340 *input = "";
341 return Ok(ParsedItem::Comment(comment));
342 }
343
344 let second = bytes.get(1).copied().unwrap_or_default();
347 match second | 0x20 {
348 b's' if starts_with_keyword(bytes, b"string") => {
349 parse_string(input).map(|(k, v)| ParsedItem::String(k, v))
350 }
351 b'p' if starts_with_keyword(bytes, b"preamble") => {
352 parse_preamble(input).map(ParsedItem::Preamble)
353 }
354 b'c' if starts_with_keyword(bytes, b"comment") => {
355 parse_comment(input).map(ParsedItem::Comment)
356 }
357 _ => entry::parse_entry_at(input).map(ParsedItem::Entry),
358 }
359}
360
361#[inline]
362fn parse_item_with_entry_locations<'a>(
363 input: &mut &'a str,
364 absolute_start: usize,
365) -> PResult<'a, LocatedParsedItem<'a>> {
366 let bytes = input.as_bytes();
367
368 if !bytes.is_empty() && bytes[0] != b'@' {
369 if let Some(at_pos) = delimiter::find_byte(bytes, b'@', 0) {
370 let comment = &input[..at_pos];
371 *input = &input[at_pos..];
372 return Ok(LocatedParsedItem::Comment(comment));
373 }
374 let comment = *input;
375 *input = "";
376 return Ok(LocatedParsedItem::Comment(comment));
377 }
378
379 let second = bytes.get(1).copied().unwrap_or_default();
380 match second | 0x20 {
381 b's' if starts_with_keyword(bytes, b"string") => {
382 parse_string(input).map(|(k, v)| LocatedParsedItem::String(k, v))
383 }
384 b'p' if starts_with_keyword(bytes, b"preamble") => {
385 parse_preamble(input).map(LocatedParsedItem::Preamble)
386 }
387 b'c' if starts_with_keyword(bytes, b"comment") => {
388 parse_comment(input).map(LocatedParsedItem::Comment)
389 }
390 _ => entry::parse_entry_at_with_locations(input, absolute_start)
391 .map(LocatedParsedItem::Entry),
392 }
393}
394
395#[inline(never)]
396fn starts_with_keyword(input: &[u8], keyword: &[u8]) -> bool {
397 if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
398 return false;
399 }
400
401 for (offset, &expected) in keyword.iter().enumerate() {
402 if (input[offset + 1] | 0x20) != expected {
403 return false;
404 }
405 }
406
407 if input.len() == keyword.len() + 1 {
408 return true;
409 }
410
411 !is_identifier_char(input[keyword.len() + 1])
412}
413
414#[inline]
415const fn is_identifier_char(byte: u8) -> bool {
416 matches!(
417 byte,
418 b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
419 )
420}
421
422fn parse_string<'a>(input: &mut &'a str) -> PResult<'a, (&'a str, crate::Value<'a>)> {
424 use winnow::combinator::{alt, delimited, preceded};
425
426 preceded(
427 (multispace0, '@', utils::tag_no_case("string"), multispace0),
428 alt((
429 delimited('{', parse_string_content, '}'),
430 delimited('(', parse_string_content, ')'),
431 )),
432 )
433 .parse_next(input)
434}
435
436fn parse_string_content<'a>(input: &mut &'a str) -> PResult<'a, (&'a str, crate::Value<'a>)> {
438 use winnow::combinator::separated_pair;
439
440 separated_pair(
441 utils::ws(lexer::identifier),
442 utils::ws('='),
443 utils::ws(value::parse_value),
444 )
445 .parse_next(input)
446}
447
448fn parse_preamble<'a>(input: &mut &'a str) -> PResult<'a, crate::Value<'a>> {
450 use winnow::combinator::{alt, delimited, preceded};
451
452 preceded(
453 (
454 multispace0,
455 '@',
456 utils::tag_no_case("preamble"),
457 multispace0,
458 ),
459 alt((
460 delimited('{', parse_preamble_value, '}'),
461 delimited('(', parse_preamble_value, ')'),
462 )),
463 )
464 .parse_next(input)
465}
466
467fn parse_preamble_value<'a>(input: &mut &'a str) -> PResult<'a, crate::Value<'a>> {
469 utils::ws(value::parse_value).parse_next(input)
470}
471
472fn parse_comment<'a>(input: &mut &'a str) -> PResult<'a, &'a str> {
474 use winnow::ascii::till_line_ending;
475 use winnow::combinator::{alt, delimited, preceded};
476 use winnow::token::take_until;
477
478 alt((
479 preceded(
481 (multispace0, '@', utils::tag_no_case("comment"), multispace0),
482 alt((
483 delimited('{', lexer::balanced_braces, '}'),
484 delimited('(', lexer::balanced_parentheses, ')'),
485 )),
486 ),
487 preceded('%', till_line_ending),
489 take_until(1.., "@").verify(|s: &str| !s.trim().is_empty()),
491 ))
492 .parse_next(input)
493}
494
495fn calculate_position(input: &str, pos: usize) -> (usize, usize) {
497 let mut line = 1;
498 let mut column = 1;
499
500 for (i, ch) in input.char_indices() {
501 if i >= pos {
502 break;
503 }
504 if ch == '\n' {
505 line += 1;
506 column = 1;
507 } else {
508 column += 1;
509 }
510 }
511
512 (line, column)
513}
514
515fn get_snippet(input: &str, max_len: usize) -> String {
517 let snippet: String = input.chars().take(max_len).collect();
518 if input.len() > max_len {
519 format!("{snippet}...")
520 } else {
521 snippet
522 }
523}