bibtex_parser/parser/
entry.rs1use super::{lexer, value, PResult};
4use crate::model::{Entry, EntryType, Field};
5use std::borrow::Cow;
6
7const DEFAULT_FIELD_CAPACITY: usize = 17;
8
9#[inline]
11pub fn parse_entry<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
12 lexer::skip_whitespace(input);
13 parse_entry_at(input)
14}
15
16#[inline]
18pub fn parse_entry_at<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
19 match input.as_bytes().first() {
20 Some(b'@') => {
21 *input = &input[1..];
22 parse_entry_content(input)
23 }
24 _ => super::backtrack(),
25 }
26}
27
28#[inline]
29fn parse_entry_content<'a>(input: &mut &'a str) -> PResult<'a, Entry<'a>> {
30 let entry_type_str = lexer::identifier(input)?;
31 let entry_type = EntryType::parse(entry_type_str);
32
33 lexer::skip_whitespace(input);
34
35 let closing_delimiter = match input.as_bytes().first() {
36 Some(b'{') => b'}',
37 Some(b'(') => b')',
38 _ => return super::backtrack(),
39 };
40 *input = &input[1..];
41
42 parse_entry_body(input, entry_type, closing_delimiter)
43}
44
45#[inline]
47fn parse_entry_body<'a>(
48 input: &mut &'a str,
49 entry_type: EntryType<'a>,
50 closing_delimiter: u8,
51) -> PResult<'a, Entry<'a>> {
52 lexer::skip_whitespace(input);
53 let key = lexer::identifier(input)?;
54
55 lexer::skip_whitespace(input);
56 expect_byte(input, b',')?;
57
58 let fields = parse_fields(input, closing_delimiter)?;
59 expect_byte(input, closing_delimiter)?;
60
61 Ok(Entry {
62 ty: entry_type,
63 key: Cow::Borrowed(key),
64 fields,
65 })
66}
67
68#[inline]
69fn expect_byte<'a>(input: &mut &'a str, byte: u8) -> PResult<'a, ()> {
70 match input.as_bytes().first() {
71 Some(&b) if b == byte => {
72 *input = &input[1..];
73 Ok(())
74 }
75 _ => super::backtrack(),
76 }
77}
78
79#[inline]
81fn parse_fields<'a>(input: &mut &'a str, closing_delimiter: u8) -> PResult<'a, Vec<Field<'a>>> {
82 let mut fields = Vec::with_capacity(DEFAULT_FIELD_CAPACITY);
83
84 loop {
85 let Some(first) = lexer::skip_whitespace_peek(input) else {
86 break;
87 };
88 if first == closing_delimiter {
89 break;
90 }
91
92 let name = lexer::field_name(input)?;
93 lexer::skip_whitespace(input);
94 expect_byte(input, b'=')?;
95 lexer::skip_whitespace(input);
96 let value = value::parse_value_field(input)?;
97
98 fields.push(Field {
99 name: Cow::Borrowed(name),
100 value,
101 });
102
103 match input.as_bytes().first() {
104 Some(b',') => {
105 *input = &input[1..];
106 }
107 Some(&b) if b == closing_delimiter => {}
108 _ => return super::backtrack(),
109 }
110 }
111
112 let max_reasonable_capacity = (fields.len() * 2).max(8);
113 if fields.capacity() > max_reasonable_capacity {
114 fields.shrink_to_fit();
115 }
116
117 Ok(fields)
118}
119
120#[cfg(test)]
121mod tests {
122 use super::*;
123 use crate::model::Value;
124 use std::borrow::Cow;
125
126 #[test]
127 fn test_parse_simple_entry() {
128 let mut input = r#"@article{einstein1905,
129 author = "Albert Einstein",
130 title = {Zur Elektrodynamik bewegter Körper},
131 year = 1905
132 }"#;
133
134 let entry = parse_entry(&mut input).unwrap();
135 assert_eq!(entry.ty, EntryType::Article);
136 assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
137 assert_eq!(entry.fields.len(), 3);
138
139 assert_eq!(entry.fields[0].name, "author");
140 assert_eq!(
141 entry.fields[0].value,
142 Value::Literal(Cow::Borrowed("Albert Einstein"))
143 );
144
145 assert_eq!(entry.fields[1].name, "title");
146 assert_eq!(
147 entry.fields[1].value,
148 Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
149 );
150
151 assert_eq!(entry.fields[2].name, "year");
152 assert_eq!(entry.fields[2].value, Value::Number(1905));
153 }
154
155 #[test]
156 fn test_parse_entry_with_concatenation() {
157 let mut input = r#"@misc{test,
158 author = name # " et al.",
159 note = "See " # url
160 }"#;
161
162 let entry = parse_entry(&mut input).unwrap();
163 assert_eq!(entry.ty, EntryType::Misc);
164 assert_eq!(entry.key, Cow::Borrowed("test"));
165 assert_eq!(entry.fields.len(), 2);
166
167 match &entry.fields[0].value {
168 Value::Concat(parts) => {
169 assert_eq!(parts.len(), 2);
170 assert_eq!(parts[0], Value::Variable(Cow::Borrowed("name")));
171 assert_eq!(parts[1], Value::Literal(Cow::Borrowed(" et al.")));
172 }
173 _ => panic!("Expected concatenated value"),
174 }
175 }
176
177 #[test]
178 fn test_parse_entry_with_trailing_comma() {
179 let mut input = r#"@book{knuth1984,
180 author = "Donald Knuth",
181 title = "The TeXbook",
182 year = 1984,
183 }"#;
184
185 let entry = parse_entry(&mut input).unwrap();
186 assert_eq!(entry.fields.len(), 3);
187 }
188
189 #[test]
190 fn test_parse_entry_with_parentheses() {
191 let mut input = r#"@article(einstein1905,
192 author = "Albert Einstein",
193 title = {Zur Elektrodynamik bewegter Körper},
194 year = 1905
195 )"#;
196
197 let entry = parse_entry(&mut input).unwrap();
198 assert_eq!(entry.ty, EntryType::Article);
199 assert_eq!(entry.key, Cow::Borrowed("einstein1905"));
200 assert_eq!(entry.fields.len(), 3);
201
202 assert_eq!(entry.fields[0].name, "author");
203 assert_eq!(
204 entry.fields[0].value,
205 Value::Literal(Cow::Borrowed("Albert Einstein"))
206 );
207
208 assert_eq!(entry.fields[1].name, "title");
209 assert_eq!(
210 entry.fields[1].value,
211 Value::Literal(Cow::Borrowed("Zur Elektrodynamik bewegter Körper"))
212 );
213
214 assert_eq!(entry.fields[2].name, "year");
215 assert_eq!(entry.fields[2].value, Value::Number(1905));
216 }
217
218 #[test]
219 fn test_parse_entry_mixed_delimiters() {
220 let mut input = r#"@book(test2024,
222 title = {A Title with {Nested} Braces},
223 author = "John Doe"
224 )"#;
225
226 let entry = parse_entry(&mut input).unwrap();
227 assert_eq!(entry.ty, EntryType::Book);
228 assert_eq!(entry.key, Cow::Borrowed("test2024"));
229 assert_eq!(entry.fields.len(), 2);
230
231 assert_eq!(entry.fields[0].name, "title");
232 assert_eq!(
233 entry.fields[0].value,
234 Value::Literal(Cow::Borrowed("A Title with {Nested} Braces"))
235 );
236
237 assert_eq!(entry.fields[1].name, "author");
238 assert_eq!(
239 entry.fields[1].value,
240 Value::Literal(Cow::Borrowed("John Doe"))
241 );
242 }
243}