Skip to main content

bibtex_parser/parser/
entry.rs

1//! Entry parsing for BibTeX
2
3use super::{lexer, value, PResult};
4use crate::model::{Entry, EntryType, Field};
5use std::borrow::Cow;
6
7const DEFAULT_FIELD_CAPACITY: usize = 17;
8
9/// Parse a bibliography entry
10#[inline]
11pub fn parse_entry<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
12    lexer::skip_whitespace(input);
13    parse_entry_at(input)
14}
15
16/// Parse a bibliography entry when `input` is already positioned at `@`.
17#[inline]
18pub fn parse_entry_at<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
19    match input.as_bytes().first() {
20        Some(b'@') => {
21            *input = &input[1..];
22            parse_entry_content(input)
23        }
24        _ => super::backtrack(),
25    }
26}
27
28#[inline]
29fn parse_entry_content<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
30    let entry_type_str = lexer::identifier(input)?;
31    let entry_type = EntryType::parse(entry_type_str);
32
33    lexer::skip_whitespace(input);
34
35    let closing_delimiter = match input.as_bytes().first() {
36        Some(b'{') => b'}',
37        Some(b'(') => b')',
38        _ => return super::backtrack(),
39    };
40    *input = &input[1..];
41
42    parse_entry_body(input, entry_type, closing_delimiter)
43}
44
45/// Parse the body of an entry (key and fields)
46#[inline]
47fn parse_entry_body<'a>(
48    input: &mut &'a str,
49    entry_type: EntryType<'a>,
50    closing_delimiter: u8,
51) -> PResult<'a, Entry<'a>> {
52    lexer::skip_whitespace(input);
53    let key = lexer::identifier(input)?;
54
55    lexer::skip_whitespace(input);
56    expect_byte(input, b',')?;
57
58    let fields = parse_fields(input, closing_delimiter)?;
59    expect_byte(input, closing_delimiter)?;
60
61    Ok(Entry {
62        ty: entry_type,
63        key: Cow::Borrowed(key),
64        fields,
65    })
66}
67
68#[inline]
69fn expect_byte<'a>(input: &mut &'a str, byte: u8) -> PResult<'a, ()> {
70    match input.as_bytes().first() {
71        Some(&b) if b == byte => {
72            *input = &input[1..];
73            Ok(())
74        }
75        _ => super::backtrack(),
76    }
77}
78
79/// Parse all fields in an entry.
80#[inline]
81fn parse_fields<'a>(input: &mut &'a str, closing_delimiter: u8) -> PResult<'a, Vec<Field<'a>>> {
82    let mut fields = Vec::with_capacity(DEFAULT_FIELD_CAPACITY);
83
84    loop {
85        let Some(first) = lexer::skip_whitespace_peek(input) else {
86            break;
87        };
88        if first == closing_delimiter {
89            break;
90        }
91
92        let name = lexer::field_name(input)?;
93        lexer::skip_whitespace(input);
94        expect_byte(input, b'=')?;
95        lexer::skip_whitespace(input);
96        let value = value::parse_value_field(input)?;
97
98        fields.push(Field {
99            name: Cow::Borrowed(name),
100            value,
101        });
102
103        match input.as_bytes().first() {
104            Some(b',') => {
105                *input = &input[1..];
106            }
107            Some(&b) if b == closing_delimiter => {}
108            _ => return super::backtrack(),
109        }
110    }
111
112    let max_reasonable_capacity = (fields.len() * 2).max(8);
113    if fields.capacity() > max_reasonable_capacity {
114        fields.shrink_to_fit();
115    }
116
117    Ok(fields)
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123    use crate::model::Value;
124    use std::borrow::Cow;
125
126    #[test]
127    fn test_parse_simple_entry() {
128        let mut input = r#"@article{einstein1905,
129            author = "Albert Einstein",
130            title = {Zur Elektrodynamik bewegter Körper},
131            year = 1905
132        }"#;
133
134        let entry = parse_entry(&mut input).unwrap();
135        assert_eq!(entry.ty, EntryType::Article);
136        assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
137        assert_eq!(entry.fields.len(), 3);
138
139        assert_eq!(entry.fields[0].name, "author");
140        assert_eq!(
141            entry.fields[0].value,
142            Value::Literal(Cow::Borrowed("Albert Einstein"))
143        );
144
145        assert_eq!(entry.fields[1].name, "title");
146        assert_eq!(
147            entry.fields[1].value,
148            Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
149        );
150
151        assert_eq!(entry.fields[2].name, "year");
152        assert_eq!(entry.fields[2].value, Value::Number(1905));
153    }
154
155    #[test]
156    fn test_parse_entry_with_concatenation() {
157        let mut input = r#"@misc{test,
158            author = name # " et al.",
159            note = "See " # url
160        }"#;
161
162        let entry = parse_entry(&mut input).unwrap();
163        assert_eq!(entry.ty, EntryType::Misc);
164        assert_eq!(entry.key, Cow::Borrowed("test"));
165        assert_eq!(entry.fields.len(), 2);
166
167        match &entry.fields[0].value {
168            Value::Concat(parts) => {
169                assert_eq!(parts.len(), 2);
170                assert_eq!(parts[0], Value::Variable(Cow::Borrowed("name")));
171                assert_eq!(parts[1], Value::Literal(Cow::Borrowed(" et al.")));
172            }
173            _ => panic!("Expected concatenated value"),
174        }
175    }
176
177    #[test]
178    fn test_parse_entry_with_trailing_comma() {
179        let mut input = r#"@book{knuth1984,
180            author = "Donald Knuth",
181            title = "The TeXbook",
182            year = 1984,
183        }"#;
184
185        let entry = parse_entry(&mut input).unwrap();
186        assert_eq!(entry.fields.len(), 3);
187    }
188
189    #[test]
190    fn test_parse_entry_with_parentheses() {
191        let mut input = r#"@article(einstein1905,
192            author = "Albert Einstein",
193            title = {Zur Elektrodynamik bewegter Körper},
194            year = 1905
195        )"#;
196
197        let entry = parse_entry(&mut input).unwrap();
198        assert_eq!(entry.ty, EntryType::Article);
199        assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
200        assert_eq!(entry.fields.len(), 3);
201
202        assert_eq!(entry.fields[0].name, "author");
203        assert_eq!(
204            entry.fields[0].value,
205            Value::Literal(Cow::Borrowed("Albert Einstein"))
206        );
207
208        assert_eq!(entry.fields[1].name, "title");
209        assert_eq!(
210            entry.fields[1].value,
211            Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
212        );
213
214        assert_eq!(entry.fields[2].name, "year");
215        assert_eq!(entry.fields[2].value, Value::Number(1905));
216    }
217
218    #[test]
219    fn test_parse_entry_mixed_delimiters() {
220        // Entry uses parentheses, but field values can use braces
221        let mut input = r#"@book(test2024,
222            title = {A Title with {Nested} Braces},
223            author = "John Doe"
224        )"#;
225
226        let entry = parse_entry(&mut input).unwrap();
227        assert_eq!(entry.ty, EntryType::Book);
228        assert_eq!(entry.key, Cow::Borrowed("test2024"));
229        assert_eq!(entry.fields.len(), 2);
230
231        assert_eq!(entry.fields[0].name, "title");
232        assert_eq!(
233            entry.fields[0].value,
234            Value::Literal(Cow::Borrowed("A Title with {Nested} Braces"))
235        );
236
237        assert_eq!(entry.fields[1].name, "author");
238        assert_eq!(
239            entry.fields[1].value,
240            Value::Literal(Cow::Borrowed("John Doe"))
241        );
242    }
243}