pub mod delimiter;
pub mod entry;
pub mod lexer;
pub mod simd;
pub mod utils;
pub mod value;
use crate::{Error, Result, SourceMap, SourceSpan};
use winnow::ascii::multispace0;
use winnow::prelude::*;
pub use entry::parse_entry;
pub type PResult<'a, O> = winnow::PResult<O, winnow::error::ContextError>;
#[cold]
#[inline(never)]
pub(crate) fn backtrack_err() -> winnow::error::ErrMode<winnow::error::ContextError> {
winnow::error::ErrMode::Backtrack(winnow::error::ContextError::default())
}
#[cold]
#[inline(never)]
pub(crate) fn backtrack<O>() -> PResult<'static, O> {
Err(backtrack_err())
}
#[inline]
pub fn parse_bibtex(input: &str) -> Result<Vec<ParsedItem<'_>>> {
let mut items = Vec::new();
parse_bibtex_stream(input, |item| {
items.push(item);
Ok(())
})?;
Ok(items)
}
#[inline]
pub(crate) fn parse_bibtex_stream<'a, F>(input: &'a str, mut on_item: F) -> Result<()>
where
F: FnMut(ParsedItem<'a>) -> Result<()>,
{
let mut remaining = input;
loop {
lexer::skip_whitespace(&mut remaining);
if remaining.is_empty() {
break;
}
match parse_item(&mut remaining) {
Ok(item) => on_item(item)?,
Err(e) => {
let consumed = input.len() - remaining.len();
let (line, column) = calculate_position(input, consumed);
return Err(Error::ParseError {
line,
column,
message: format!("Failed to parse entry: {e}"),
snippet: Some(get_snippet(remaining, 40)),
});
}
}
}
Ok(())
}
#[inline]
pub(crate) fn parse_bibtex_stream_with_spans<'a, F>(input: &'a str, mut on_item: F) -> Result<()>
where
F: FnMut(ParsedItem<'a>, SourceSpan, &'a str) -> Result<()>,
{
let source_map = SourceMap::anonymous(input);
let mut remaining = input;
loop {
lexer::skip_whitespace(&mut remaining);
if remaining.is_empty() {
break;
}
let start = input.len() - remaining.len();
let before_item = remaining;
match parse_item(&mut remaining) {
Ok(item) => {
let end = input.len() - remaining.len();
let span = source_map.span(start, end);
on_item(item, span, &input[start..end])?;
}
Err(e) => {
let (line, column) = calculate_position(input, start);
return Err(Error::ParseError {
line,
column,
message: format!("Failed to parse entry: {e}"),
snippet: Some(get_snippet(before_item, 40)),
});
}
}
}
Ok(())
}
#[inline]
pub(crate) fn parse_bibtex_stream_with_entry_locations<'a, F>(
input: &'a str,
mut on_item: F,
) -> Result<()>
where
F: FnMut(LocatedParsedItem<'a>, usize, usize, &'a str) -> Result<()>,
{
let mut remaining = input;
loop {
lexer::skip_whitespace(&mut remaining);
if remaining.is_empty() {
break;
}
let start = input.len() - remaining.len();
let before_item = remaining;
match parse_item_with_entry_locations(&mut remaining, start) {
Ok(item) => {
let end = input.len() - remaining.len();
on_item(item, start, end, &input[start..end])?;
}
Err(e) => {
let (line, column) = calculate_position(input, start);
return Err(Error::ParseError {
line,
column,
message: format!("Failed to parse entry: {e}"),
snippet: Some(get_snippet(before_item, 40)),
});
}
}
}
Ok(())
}
#[derive(Debug, Clone, PartialEq)]
pub enum ParsedItem<'a> {
Entry(crate::Entry<'a>),
String(&'a str, crate::Value<'a>),
Preamble(crate::Value<'a>),
Comment(&'a str),
}
pub(crate) enum LocatedParsedItem<'a> {
Entry(entry::LocatedEntry<'a>),
String(&'a str, crate::Value<'a>),
Preamble(crate::Value<'a>),
Comment(&'a str),
}
#[inline]
pub(crate) fn parse_item<'a>(input: &mut &'a str) -> PResult<'a, ParsedItem<'a>> {
let bytes = input.as_bytes();
if !bytes.is_empty() && bytes[0] != b'@' {
if let Some(at_pos) = delimiter::find_byte(bytes, b'@', 0) {
let comment = &input[..at_pos];
*input = &input[at_pos..];
return Ok(ParsedItem::Comment(comment));
}
let comment = *input;
*input = "";
return Ok(ParsedItem::Comment(comment));
}
let second = bytes.get(1).copied().unwrap_or_default();
match second | 0x20 {
b's' if starts_with_keyword(bytes, b"string") => {
parse_string(input).map(|(k, v)| ParsedItem::String(k, v))
}
b'p' if starts_with_keyword(bytes, b"preamble") => {
parse_preamble(input).map(ParsedItem::Preamble)
}
b'c' if starts_with_keyword(bytes, b"comment") => {
parse_comment(input).map(ParsedItem::Comment)
}
_ => entry::parse_entry_at(input).map(ParsedItem::Entry),
}
}
#[inline]
fn parse_item_with_entry_locations<'a>(
input: &mut &'a str,
absolute_start: usize,
) -> PResult<'a, LocatedParsedItem<'a>> {
let bytes = input.as_bytes();
if !bytes.is_empty() && bytes[0] != b'@' {
if let Some(at_pos) = delimiter::find_byte(bytes, b'@', 0) {
let comment = &input[..at_pos];
*input = &input[at_pos..];
return Ok(LocatedParsedItem::Comment(comment));
}
let comment = *input;
*input = "";
return Ok(LocatedParsedItem::Comment(comment));
}
let second = bytes.get(1).copied().unwrap_or_default();
match second | 0x20 {
b's' if starts_with_keyword(bytes, b"string") => {
parse_string(input).map(|(k, v)| LocatedParsedItem::String(k, v))
}
b'p' if starts_with_keyword(bytes, b"preamble") => {
parse_preamble(input).map(LocatedParsedItem::Preamble)
}
b'c' if starts_with_keyword(bytes, b"comment") => {
parse_comment(input).map(LocatedParsedItem::Comment)
}
_ => entry::parse_entry_at_with_locations(input, absolute_start)
.map(LocatedParsedItem::Entry),
}
}
#[inline(never)]
fn starts_with_keyword(input: &[u8], keyword: &[u8]) -> bool {
if input.first() != Some(&b'@') || input.len() < keyword.len() + 1 {
return false;
}
for (offset, &expected) in keyword.iter().enumerate() {
if (input[offset + 1] | 0x20) != expected {
return false;
}
}
if input.len() == keyword.len() + 1 {
return true;
}
!is_identifier_char(input[keyword.len() + 1])
}
#[inline]
const fn is_identifier_char(byte: u8) -> bool {
matches!(
byte,
b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' | b':' | b'.'
)
}
fn parse_string<'a>(input: &mut &'a str) -> PResult<'a, (&'a str, crate::Value<'a>)> {
use winnow::combinator::{alt, delimited, preceded};
preceded(
(multispace0, '@', utils::tag_no_case("string"), multispace0),
alt((
delimited('{', parse_string_content, '}'),
delimited('(', parse_string_content, ')'),
)),
)
.parse_next(input)
}
fn parse_string_content<'a>(input: &mut &'a str) -> PResult<'a, (&'a str, crate::Value<'a>)> {
use winnow::combinator::separated_pair;
separated_pair(
utils::ws(lexer::identifier),
utils::ws('='),
utils::ws(value::parse_value),
)
.parse_next(input)
}
fn parse_preamble<'a>(input: &mut &'a str) -> PResult<'a, crate::Value<'a>> {
use winnow::combinator::{alt, delimited, preceded};
preceded(
(
multispace0,
'@',
utils::tag_no_case("preamble"),
multispace0,
),
alt((
delimited('{', parse_preamble_value, '}'),
delimited('(', parse_preamble_value, ')'),
)),
)
.parse_next(input)
}
fn parse_preamble_value<'a>(input: &mut &'a str) -> PResult<'a, crate::Value<'a>> {
utils::ws(value::parse_value).parse_next(input)
}
fn parse_comment<'a>(input: &mut &'a str) -> PResult<'a, &'a str> {
use winnow::ascii::till_line_ending;
use winnow::combinator::{alt, delimited, preceded};
use winnow::token::take_until;
alt((
preceded(
(multispace0, '@', utils::tag_no_case("comment"), multispace0),
alt((
delimited('{', lexer::balanced_braces, '}'),
delimited('(', lexer::balanced_parentheses, ')'),
)),
),
preceded('%', till_line_ending),
take_until(1.., "@").verify(|s: &str| !s.trim().is_empty()),
))
.parse_next(input)
}
fn calculate_position(input: &str, pos: usize) -> (usize, usize) {
let mut line = 1;
let mut column = 1;
for (i, ch) in input.char_indices() {
if i >= pos {
break;
}
if ch == '\n' {
line += 1;
column = 1;
} else {
column += 1;
}
}
(line, column)
}
fn get_snippet(input: &str, max_len: usize) -> String {
let snippet: String = input.chars().take(max_len).collect();
if input.len() > max_len {
format!("{snippet}...")
} else {
snippet
}
}