Skip to main content

bibtex_parser/parser/
entry.rs

1//! Entry parsing for BibTeX
2
3use super::{lexer, value, PResult};
4use crate::model::{Entry, EntryType, Field};
5use std::borrow::Cow;
6
7const DEFAULT_FIELD_CAPACITY: usize = 17;
8
9/// Parse a bibliography entry
10#[inline]
11pub fn parse_entry<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
12    lexer::skip_whitespace(input);
13    parse_entry_at(input)
14}
15
16/// Parse a bibliography entry when `input` is already positioned at `@`.
17#[inline]
18pub fn parse_entry_at<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
19    match input.as_bytes().first() {
20        Some(b'@') => {
21            *input = &input[1..];
22            parse_entry_content(input)
23        }
24        _ => super::backtrack(),
25    }
26}
27
28#[inline]
29fn parse_entry_content<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
30    let entry_type_str = lexer::identifier(input)?;
31    let entry_type = EntryType::parse(entry_type_str);
32
33    lexer::skip_whitespace(input);
34
35    let closing_delimiter = match input.as_bytes().first() {
36        Some(b'{') => b'}',
37        Some(b'(') => b')',
38        _ => return super::backtrack(),
39    };
40    *input = &input[1..];
41
42    parse_entry_body(input, entry_type, closing_delimiter)
43}
44
45/// Parse the body of an entry (key and fields)
46#[inline]
47fn parse_entry_body<'a>(
48    input: &mut &'a str,
49    entry_type: EntryType<'a>,
50    closing_delimiter: u8,
51) -> PResult<'a, Entry<'a>> {
52    lexer::skip_whitespace(input);
53    let key = lexer::identifier(input)?;
54
55    lexer::skip_whitespace(input);
56    expect_byte(input, b',')?;
57
58    let fields = parse_fields(input, closing_delimiter)?;
59    expect_byte(input, closing_delimiter)?;
60
61    Ok(Entry {
62        ty: entry_type,
63        key: Cow::Borrowed(key),
64        fields,
65    })
66}
67
68#[inline]
69fn expect_byte<'a>(input: &mut &'a str, byte: u8) -> PResult<'a, ()> {
70    match input.as_bytes().first() {
71        Some(&b) if b == byte => {
72            *input = &input[1..];
73            Ok(())
74        }
75        _ => super::backtrack(),
76    }
77}
78
79/// Parse all fields in an entry.
80#[inline]
81fn parse_fields<'a>(input: &mut &'a str, closing_delimiter: u8) -> PResult<'a, Vec<Field<'a>>> {
82    let mut fields = Vec::with_capacity(DEFAULT_FIELD_CAPACITY);
83
84    while let Some(first) = lexer::skip_whitespace_peek(input) {
85        if first == closing_delimiter {
86            break;
87        }
88
89        let name = lexer::field_name(input)?;
90        lexer::skip_whitespace(input);
91        expect_byte(input, b'=')?;
92        lexer::skip_whitespace(input);
93        let value = value::parse_value_field(input)?;
94
95        fields.push(Field {
96            name: Cow::Borrowed(name),
97            value,
98        });
99
100        match input.as_bytes().first() {
101            Some(b',') => {
102                *input = &input[1..];
103            }
104            Some(&b) if b == closing_delimiter => {}
105            _ => return super::backtrack(),
106        }
107    }
108
109    let max_reasonable_capacity = (fields.len() * 2).max(8);
110    if fields.capacity() > max_reasonable_capacity {
111        fields.shrink_to_fit();
112    }
113
114    Ok(fields)
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use crate::model::Value;
121    use std::borrow::Cow;
122
123    #[test]
124    fn test_parse_simple_entry() {
125        let mut input = r#"@article{einstein1905,
126            author = "Albert Einstein",
127            title = {Zur Elektrodynamik bewegter Körper},
128            year = 1905
129        }"#;
130
131        let entry = parse_entry(&mut input).unwrap();
132        assert_eq!(entry.ty, EntryType::Article);
133        assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
134        assert_eq!(entry.fields.len(), 3);
135
136        assert_eq!(entry.fields[0].name, "author");
137        assert_eq!(
138            entry.fields[0].value,
139            Value::Literal(Cow::Borrowed("Albert Einstein"))
140        );
141
142        assert_eq!(entry.fields[1].name, "title");
143        assert_eq!(
144            entry.fields[1].value,
145            Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
146        );
147
148        assert_eq!(entry.fields[2].name, "year");
149        assert_eq!(entry.fields[2].value, Value::Number(1905));
150    }
151
152    #[test]
153    fn test_parse_entry_with_concatenation() {
154        let mut input = r#"@misc{test,
155            author = name # " et al.",
156            note = "See " # url
157        }"#;
158
159        let entry = parse_entry(&mut input).unwrap();
160        assert_eq!(entry.ty, EntryType::Misc);
161        assert_eq!(entry.key, Cow::Borrowed("test"));
162        assert_eq!(entry.fields.len(), 2);
163
164        match &entry.fields[0].value {
165            Value::Concat(parts) => {
166                assert_eq!(parts.len(), 2);
167                assert_eq!(parts[0], Value::Variable(Cow::Borrowed("name")));
168                assert_eq!(parts[1], Value::Literal(Cow::Borrowed(" et al.")));
169            }
170            _ => panic!("Expected concatenated value"),
171        }
172    }
173
174    #[test]
175    fn test_parse_entry_with_trailing_comma() {
176        let mut input = r#"@book{knuth1984,
177            author = "Donald Knuth",
178            title = "The TeXbook",
179            year = 1984,
180        }"#;
181
182        let entry = parse_entry(&mut input).unwrap();
183        assert_eq!(entry.fields.len(), 3);
184    }
185
186    #[test]
187    fn test_parse_entry_with_parentheses() {
188        let mut input = r#"@article(einstein1905,
189            author = "Albert Einstein",
190            title = {Zur Elektrodynamik bewegter Körper},
191            year = 1905
192        )"#;
193
194        let entry = parse_entry(&mut input).unwrap();
195        assert_eq!(entry.ty, EntryType::Article);
196        assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
197        assert_eq!(entry.fields.len(), 3);
198
199        assert_eq!(entry.fields[0].name, "author");
200        assert_eq!(
201            entry.fields[0].value,
202            Value::Literal(Cow::Borrowed("Albert Einstein"))
203        );
204
205        assert_eq!(entry.fields[1].name, "title");
206        assert_eq!(
207            entry.fields[1].value,
208            Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
209        );
210
211        assert_eq!(entry.fields[2].name, "year");
212        assert_eq!(entry.fields[2].value, Value::Number(1905));
213    }
214
215    #[test]
216    fn test_parse_entry_mixed_delimiters() {
217        // Entry uses parentheses, but field values can use braces
218        let mut input = r#"@book(test2024,
219            title = {A Title with {Nested} Braces},
220            author = "John Doe"
221        )"#;
222
223        let entry = parse_entry(&mut input).unwrap();
224        assert_eq!(entry.ty, EntryType::Book);
225        assert_eq!(entry.key, Cow::Borrowed("test2024"));
226        assert_eq!(entry.fields.len(), 2);
227
228        assert_eq!(entry.fields[0].name, "title");
229        assert_eq!(
230            entry.fields[0].value,
231            Value::Literal(Cow::Borrowed("A Title with {Nested} Braces"))
232        );
233
234        assert_eq!(entry.fields[1].name, "author");
235        assert_eq!(
236            entry.fields[1].value,
237            Value::Literal(Cow::Borrowed("John Doe"))
238        );
239    }
240}