bibtex_parser/parser/
value.rs1use super::{lexer, PResult};
4use crate::model::Value;
5use std::borrow::Cow;
6
7#[inline]
9pub fn parse_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
10 parse_concatenated_value(input)
11}
12
13#[inline]
18pub(crate) fn parse_value_field<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
19 parse_concatenated_value_field(input)
20}
21
22#[inline]
24fn parse_concatenated_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
25 let first = parse_single_value(input)?;
26
27 if !consume_concat_separator(input) {
29 return Ok(first);
30 }
31
32 let mut parts = Vec::with_capacity(3);
34 parts.push(first);
35
36 loop {
37 let part = parse_single_value(input)?;
38 parts.push(part);
39
40 if !consume_concat_separator(input) {
41 break;
42 }
43 }
44
45 Ok(Value::Concat(parts.into_boxed_slice()))
46}
47
48#[inline]
50fn parse_concatenated_value_field<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
51 let first = parse_single_value(input)?;
52
53 if lexer::skip_whitespace_peek(input) != Some(b'#') {
54 return Ok(first);
55 }
56
57 let mut parts = Vec::with_capacity(3);
59 parts.push(first);
60
61 loop {
62 *input = &input[1..];
64 lexer::skip_whitespace(input);
65
66 let part = parse_single_value(input)?;
67 parts.push(part);
68
69 if lexer::skip_whitespace_peek(input) != Some(b'#') {
70 break;
71 }
72 }
73
74 Ok(Value::Concat(parts.into_boxed_slice()))
75}
76
77#[inline]
82fn consume_concat_separator(input: &mut &str) -> bool {
83 let mut probe = *input;
84 lexer::skip_whitespace(&mut probe);
85 if probe.as_bytes().first() != Some(&b'#') {
86 return false;
87 }
88
89 probe = &probe[1..];
90 lexer::skip_whitespace(&mut probe);
91 *input = probe;
92 true
93}
94
95#[inline]
97fn parse_single_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
98 let bytes = input.as_bytes();
100 if let Some(&first) = bytes.first() {
101 match first {
102 b'"' => {
103 super::simd::find_balanced_quotes(bytes).map_or_else(super::backtrack, |end_pos| {
104 let content = &input[1..end_pos - 1];
105 *input = &input[end_pos..];
106 Ok(Value::Literal(Cow::Borrowed(content)))
107 })
108 }
109 b'{' => {
110 super::simd::find_balanced_braces(bytes).map_or_else(super::backtrack, |end_pos| {
111 let content = &input[1..end_pos - 1];
112 *input = &input[end_pos..];
113 Ok(Value::Literal(Cow::Borrowed(content)))
114 })
115 }
116 b'0'..=b'9' | b'+' | b'-' => parse_number_or_digit_string(input),
117 _ => parse_variable_value(input),
118 }
119 } else {
120 super::backtrack()
121 }
122}
123
124#[inline]
127fn parse_number_or_digit_string<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
128 let bytes = input.as_bytes();
129 let Some(&first) = bytes.first() else {
130 return super::backtrack();
131 };
132
133 let len = super::simd::scan_identifier(bytes);
134 if len == 0 {
135 return super::backtrack();
136 }
137
138 let token = &input[..len];
139 let token_bytes = token.as_bytes();
140
141 if first == b'+' || first == b'-' {
144 if token_bytes.len() <= 1 || !token_bytes[1..].iter().all(u8::is_ascii_digit) {
145 return super::backtrack();
146 }
147 let num = parse_i64_ascii(token)?;
148 *input = &input[len..];
149 return Ok(Value::Number(num));
150 }
151
152 if !first.is_ascii_digit() {
155 return super::backtrack();
156 }
157
158 *input = &input[len..];
159 if token_bytes.iter().all(u8::is_ascii_digit) {
160 let num = parse_i64_ascii(token)?;
161 Ok(Value::Number(num))
162 } else {
163 Ok(Value::Literal(Cow::Borrowed(token)))
164 }
165}
166
167#[inline]
168fn parse_i64_ascii(token: &str) -> PResult<'_, i64> {
169 let bytes = token.as_bytes();
170 let (negative, start) = match bytes.first() {
171 Some(b'-') => (true, 1),
172 Some(b'+') => (false, 1),
173 _ => (false, 0),
174 };
175
176 if start >= bytes.len() {
177 return super::backtrack();
178 }
179
180 let mut value: i64 = 0;
181 for &byte in &bytes[start..] {
182 if !byte.is_ascii_digit() {
183 return super::backtrack();
184 }
185
186 let digit = i64::from(byte - b'0');
187 value = if negative {
188 value
189 .checked_mul(10)
190 .and_then(|v| v.checked_sub(digit))
191 .ok_or_else(super::backtrack_err)?
192 } else {
193 value
194 .checked_mul(10)
195 .and_then(|v| v.checked_add(digit))
196 .ok_or_else(super::backtrack_err)?
197 };
198 }
199
200 Ok(value)
201}
202
203#[inline]
205fn parse_variable_value<'a>(input: &mut &'a str) -> PResult<'a, Value<'a>> {
206 let ident = lexer::identifier(input)?;
208 Ok(Value::Variable(Cow::Borrowed(ident)))
209}
210
211#[must_use]
213pub fn normalize_value(s: &str) -> String {
214 s.split_whitespace().collect::<Vec<_>>().join(" ")
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 #[test]
223 fn test_parse_quoted_value() {
224 let mut input = r#""hello world" xxx"#;
225 let value = parse_value(&mut input).unwrap();
226 assert_eq!(value, Value::Literal(Cow::Borrowed("hello world")));
227 assert_eq!(input, " xxx");
228 }
229
230 #[test]
231 fn test_parse_braced_value() {
232 let mut input = "{hello world} xxx";
233 let value = parse_value(&mut input).unwrap();
234 assert_eq!(value, Value::Literal(Cow::Borrowed("hello world")));
235 assert_eq!(input, " xxx");
236 }
237
238 #[test]
239 fn test_parse_number_value() {
240 let mut input = "2023 xxx";
241 let value = parse_value(&mut input).unwrap();
242 assert_eq!(value, Value::Number(2023));
243 assert_eq!(input, " xxx");
244 }
245
246 #[test]
247 fn test_parse_variable_value() {
248 let mut input = "myvar xxx";
249 let value = parse_value(&mut input).unwrap();
250 assert_eq!(value, Value::Variable(Cow::Borrowed("myvar")));
251 assert_eq!(input, " xxx");
252 }
253
254 #[test]
255 fn test_parse_concatenated_value() {
256 let mut input = r#""hello" # myvar # {world} xxx"#;
257 let value = parse_value(&mut input).unwrap();
258 match value {
259 Value::Concat(parts) => {
260 assert_eq!(parts.len(), 3);
261 assert_eq!(parts[0], Value::Literal(Cow::Borrowed("hello")));
262 assert_eq!(parts[1], Value::Variable(Cow::Borrowed("myvar")));
263 assert_eq!(parts[2], Value::Literal(Cow::Borrowed("world")));
264 }
265 _ => panic!("Expected concatenated value"),
266 }
267 assert_eq!(input, " xxx");
268 }
269}