bibtex_parser/parser/
value.rs1use super::{lexer, PResult};
4use crate::model::Value;
5use std::borrow::Cow;
6
7#[inline]
9pub fn parse_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
10 parse_concatenated_value(input)
11}
12
13#[inline]
18pub(crate) fn parse_value_field<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
19 parse_concatenated_value_field(input)
20}
21
22#[inline]
24fn parse_concatenated_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
25 let first = parse_single_value(input)?;
26
27 if !consume_concat_separator(input) {
29 return Ok(first);
30 }
31
32 let mut parts = Vec::with_capacity(3);
34 parts.push(first);
35
36 loop {
37 let part = parse_single_value(input)?;
38 parts.push(part);
39
40 if !consume_concat_separator(input) {
41 break;
42 }
43 }
44
45 Ok(Value::Concat(parts.into_boxed_slice()))
46}
47
48#[inline]
50fn parse_concatenated_value_field<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
51 let first = parse_single_value(input)?;
52
53 if !consume_concat_separator_field(input) {
54 return Ok(first);
55 }
56
57 let mut parts = Vec::with_capacity(3);
59 parts.push(first);
60
61 loop {
62 let part = parse_single_value(input)?;
63 parts.push(part);
64
65 if !consume_concat_separator_field(input) {
66 break;
67 }
68 }
69
70 Ok(Value::Concat(parts.into_boxed_slice()))
71}
72
73#[inline]
78fn consume_concat_separator(input: &mut &str) -> bool {
79 let mut probe = *input;
80 lexer::skip_whitespace(&mut probe);
81 if probe.as_bytes().first() != Some(&b'#') {
82 return false;
83 }
84
85 probe = &probe[1..];
86 lexer::skip_whitespace(&mut probe);
87 *input = probe;
88 true
89}
90
91#[inline]
96fn consume_concat_separator_field(input: &mut &str) -> bool {
97 match input.as_bytes().first() {
98 Some(b'#') => {
99 *input = &input[1..];
100 lexer::skip_whitespace(input);
101 true
102 }
103 Some(b' ' | b'\t' | b'\n' | b'\r') => {
104 lexer::skip_whitespace(input);
105 if input.as_bytes().first() == Some(&b'#') {
106 *input = &input[1..];
107 lexer::skip_whitespace(input);
108 true
109 } else {
110 false
111 }
112 }
113 Some(_) | None => false,
114 }
115}
116
117#[inline]
119fn parse_single_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
120 let bytes = input.as_bytes();
122 if let Some(&first) = bytes.first() {
123 match first {
124 b'"' => {
125 super::simd::find_balanced_quotes(bytes).map_or_else(super::backtrack, |end_pos| {
126 let content = &input[1..end_pos - 1];
127 *input = &input[end_pos..];
128 Ok(Value::Literal(Cow::Borrowed(content)))
129 })
130 }
131 b'{' => {
132 super::simd::find_balanced_braces(bytes).map_or_else(super::backtrack, |end_pos| {
133 let content = &input[1..end_pos - 1];
134 *input = &input[end_pos..];
135 Ok(Value::Literal(Cow::Borrowed(content)))
136 })
137 }
138 b'0'..=b'9' | b'+' | b'-' => parse_number_or_digit_string(input),
139 _ => parse_variable_value(input),
140 }
141 } else {
142 super::backtrack()
143 }
144}
145
146#[inline]
149fn parse_number_or_digit_string<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
150 let bytes = input.as_bytes();
151 let Some(&first) = bytes.first() else {
152 return super::backtrack();
153 };
154
155 let len = super::simd::scan_identifier(bytes);
156 if len == 0 {
157 return super::backtrack();
158 }
159
160 let token = &input[..len];
161 let token_bytes = token.as_bytes();
162
163 if first == b'+' || first == b'-' {
166 if token_bytes.len() <= 1 || !token_bytes[1..].iter().all(u8::is_ascii_digit) {
167 return super::backtrack();
168 }
169 let num = parse_i64_ascii(token)?;
170 *input = &input[len..];
171 return Ok(Value::Number(num));
172 }
173
174 if !first.is_ascii_digit() {
177 return super::backtrack();
178 }
179
180 *input = &input[len..];
181 if token_bytes.iter().all(u8::is_ascii_digit) {
182 let num = parse_i64_ascii(token)?;
183 Ok(Value::Number(num))
184 } else {
185 Ok(Value::Literal(Cow::Borrowed(token)))
186 }
187}
188
189#[inline]
190fn parse_i64_ascii(token: &str) -> PResult<'_, i64> {
191 let bytes = token.as_bytes();
192 let (negative, start) = match bytes.first() {
193 Some(b'-') => (true, 1),
194 Some(b'+') => (false, 1),
195 _ => (false, 0),
196 };
197
198 if start >= bytes.len() {
199 return super::backtrack();
200 }
201
202 let mut value: i64 = 0;
203 for &byte in &bytes[start..] {
204 if !byte.is_ascii_digit() {
205 return super::backtrack();
206 }
207
208 let digit = i64::from(byte - b'0');
209 value = if negative {
210 value
211 .checked_mul(10)
212 .and_then(|v| v.checked_sub(digit))
213 .ok_or_else(super::backtrack_err)?
214 } else {
215 value
216 .checked_mul(10)
217 .and_then(|v| v.checked_add(digit))
218 .ok_or_else(super::backtrack_err)?
219 };
220 }
221
222 Ok(value)
223}
224
225#[inline]
227fn parse_variable_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
228 let ident = lexer::identifier(input)?;
230 Ok(Value::Variable(Cow::Borrowed(ident)))
231}
232
233#[must_use]
235pub fn normalize_value(s: &str) -> String {
236 s.split_whitespace().collect::<Vec<_>>().join(" ")
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 #[test]
245 fn test_parse_quoted_value() {
246 let mut input = r#""hello world" xxx"#;
247 let value = parse_value(&mut input).unwrap();
248 assert_eq!(value, Value::Literal(Cow::Borrowed("hello world")));
249 assert_eq!(input, " xxx");
250 }
251
252 #[test]
253 fn test_parse_braced_value() {
254 let mut input = "{hello world} xxx";
255 let value = parse_value(&mut input).unwrap();
256 assert_eq!(value, Value::Literal(Cow::Borrowed("hello world")));
257 assert_eq!(input, " xxx");
258 }
259
260 #[test]
261 fn test_parse_number_value() {
262 let mut input = "2023 xxx";
263 let value = parse_value(&mut input).unwrap();
264 assert_eq!(value, Value::Number(2023));
265 assert_eq!(input, " xxx");
266 }
267
268 #[test]
269 fn test_parse_variable_value() {
270 let mut input = "myvar xxx";
271 let value = parse_value(&mut input).unwrap();
272 assert_eq!(value, Value::Variable(Cow::Borrowed("myvar")));
273 assert_eq!(input, " xxx");
274 }
275
276 #[test]
277 fn test_parse_concatenated_value() {
278 let mut input = r#""hello" # myvar # {world} xxx"#;
279 let value = parse_value(&mut input).unwrap();
280 match value {
281 Value::Concat(parts) => {
282 assert_eq!(parts.len(), 3);
283 assert_eq!(parts[0], Value::Literal(Cow::Borrowed("hello")));
284 assert_eq!(parts[1], Value::Variable(Cow::Borrowed("myvar")));
285 assert_eq!(parts[2], Value::Literal(Cow::Borrowed("world")));
286 }
287 _ => panic!("Expected concatenated value"),
288 }
289 assert_eq!(input, " xxx");
290 }
291}