1use nom::{InputTake, Offset};
2use nom::branch::alt;
3use nom::bytes::complete::{is_not, tag, take_while, take_while1, take_while_m_n};
4use nom::character::complete::{anychar, char, multispace1};
5use nom::combinator::{all_consuming, map, map_opt, map_res, recognize, value, verify};
6use nom::multi::{fold_many0, many1};
7use nom::number::complete::double;
8use nom::sequence::{delimited, preceded};
9use serde_json::Number;
10
11use crate::resource_path::string::lexer_utils::{IResult, LexerError, LexerState, LocatedSpan};
12use crate::resource_path::string::token::{Token, TokenValue};
13
14
15use super::range_ex::AsRange;
16
17macro_rules! tag_token {
19 ($name:ident, $repr:literal, $token_value:expr) => {
20 fn $name(input: LocatedSpan) -> IResult<Token> {
21 map(tag($repr), |span: LocatedSpan| {
22 Token::new(span, $token_value)
23 })(input)
24 }
25 };
26}
27
28#[derive(Debug, Clone, PartialEq, Eq)]
30enum StringFragment<'a> {
31 Literal(LocatedSpan<'a>),
33
34 EscapedChar(char),
36
37 EscapedWhitespace,
39}
40
41fn expect<'a, F, E, T>(
44 mut parser: F,
45 err_msg: E,
46) -> impl FnMut(LocatedSpan<'a>) -> IResult<Option<T>>
47where
48 F: FnMut(LocatedSpan<'a>) -> IResult<T>,
49 E: ToString,
50{
51 use nom::error::Error as NomError;
52 move |input| match parser(input) {
53 Ok((remaining, output)) => Ok((remaining, Some(output))),
54 Err(nom::Err::Error(NomError { input, code: _ }))
55 | Err(nom::Err::Failure(NomError { input, code: _ })) => {
56 let err = LexerError(input.as_range(), err_msg.to_string());
57 input.extra.report_error(err);
58
59 Ok((input, None))
60 }
61 Err(err) => Err(err),
62 }
63}
64
65fn lit_bool(input: LocatedSpan) -> IResult<Token> {
66 alt((
67 map(tag("false"), |span: LocatedSpan| {
68 Token::new(span, TokenValue::LitBool(false))
69 }),
70 map(tag("true"), |span: LocatedSpan| {
71 Token::new(span, TokenValue::LitBool(true))
72 }),
73 ))(input)
74}
75
76fn ident(input: LocatedSpan) -> IResult<Token> {
78 let first = verify(anychar, |c| c.is_ascii_alphabetic() || *c == '_');
79 let rest = take_while(|c: char| c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_');
80 let ident = recognize(preceded(first, rest));
81 map(ident, |span: LocatedSpan| {
82 let fragment = span.fragment().to_string();
83 Token::new(span, TokenValue::Ident(fragment))
84 })(input)
85}
86
87fn lit_str_unicode_char(input: LocatedSpan) -> IResult<char> {
90 let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());
91 let parse_delim_hex = preceded(
93 char('u'),
94 delimited(
95 char('{'),
96 expect(parse_hex, "expected 1-6 hex digits"),
97 expect(char('}'), "expected closing brace"),
98 ),
99 );
100 let parse_u32 = map_res(parse_delim_hex, move |hex| match hex {
101 None => Err("cannot parse number"),
102 Some(hex) => match u32::from_str_radix(hex.fragment(), 16) {
103 Ok(val) => Ok(val),
104 Err(_) => Err("invalid number"),
105 },
106 });
107 map_opt(parse_u32, std::char::from_u32)(input)
108}
109
110fn lit_str_escaped_char(input: LocatedSpan) -> IResult<char> {
112 preceded(
113 char('\\'),
114 alt((
115 lit_str_unicode_char,
116 value('\n', char('n')),
117 value('\r', char('r')),
118 value('\t', char('t')),
119 value('\\', char('\\')),
120 value('/', char('/')),
121 value('"', char('"')),
122 )),
123 )(input)
124}
125
126fn lit_str_escaped_whitespace(input: LocatedSpan) -> IResult<LocatedSpan> {
129 preceded(char('\\'), multispace1)(input)
130}
131
132fn lit_str_literal(input: LocatedSpan) -> IResult<LocatedSpan> {
134 let not_quote_slash = is_not("\"\\");
135 verify(not_quote_slash, |s: &LocatedSpan| !s.is_empty())(input)
136}
137
138fn lit_str_fragment(input: LocatedSpan) -> IResult<StringFragment> {
142 alt((
143 map(lit_str_literal, StringFragment::Literal),
144 map(lit_str_escaped_char, StringFragment::EscapedChar),
145 value(
146 StringFragment::EscapedWhitespace,
147 lit_str_escaped_whitespace,
148 ),
149 ))(input)
150}
151
152fn lit_str(input: LocatedSpan) -> IResult<Token> {
158 let build_string = fold_many0(lit_str_fragment, String::new, |mut string, fragment| {
159 match fragment {
160 StringFragment::Literal(s) => string.push_str(s.fragment()),
161 StringFragment::EscapedChar(c) => string.push(c),
162 StringFragment::EscapedWhitespace => {}
163 }
164 string
165 });
166
167 let (remainder, s) = delimited(
168 char('"'),
169 build_string,
170 expect(char('"'), "expected closing quote"),
171 )(input.clone())?;
172 let span_offset = input.offset(&remainder);
173 let span = input.take(span_offset);
174 Ok((remainder, Token::new(span, TokenValue::LitStr(s))))
175}
176
177fn lit_num(input: LocatedSpan) -> IResult<Token> {
179 let num = map_opt(double, |v: f64| {
180 let n = if v == (v as u64) as f64 {
181 Some(Number::from(v as u64))
182 } else if v < 0.0 && v == (v as i64) as f64 {
183 Some(Number::from(v as i64))
184 } else {
185 Number::from_f64(v)
186 };
187 n.map(TokenValue::LitNum)
188 });
189
190 map(num, |tv: TokenValue| Token::new(input.clone(), tv))(input.clone())
191}
192
193fn whitespace(input: LocatedSpan) -> IResult<Token> {
195 let ws = take_while1(|c: char| c.is_ascii_whitespace());
196 map(ws, |span: LocatedSpan| {
197 Token::new(span, TokenValue::Whitespace)
198 })(input)
199}
200
201tag_token!(scope, "::", TokenValue::Scope);
202tag_token!(colon, ":", TokenValue::Colon);
203tag_token!(wildcard, "*", TokenValue::Wildcard);
204tag_token!(lcurly, "{", TokenValue::LCurly);
205tag_token!(rcurly, "}", TokenValue::RCurly);
206tag_token!(lparen, "(", TokenValue::LParen);
207tag_token!(rparen, ")", TokenValue::RParen);
208tag_token!(comma, ",", TokenValue::Comma);
209
210fn expr(input: LocatedSpan) -> IResult<Vec<Token>> {
211 let tokens = many1(alt((
212 lit_bool, ident, lit_str, lit_num, whitespace, scope, colon, wildcard, lcurly, rcurly,
213 lparen, rparen, comma,
214 )));
215 let (remainder, token_list) = expect(all_consuming(tokens), "expected end-of-file")(input)?;
216 Ok((remainder, token_list.unwrap_or_default()))
217}
218
219fn tokenize<'a>(raw: &'a str) -> (Vec<Token>, Vec<LexerError>) {
226 let input = LocatedSpan::<'a>::new_extra(raw, LexerState::new());
227 let (remainder, tokens) = expr(input).expect("parser cannot fail");
228 (tokens, remainder.extra.0.into_inner())
229}
230
231#[cfg(test)]
232mod test {
233 use std::ops::Range;
234
235 use rstest::rstest;
236
237 use crate::resource_path::string::lexer::tokenize;
238 use crate::resource_path::string::lexer_utils::LexerError;
239 use crate::resource_path::string::token::TokenValue;
240
241 #[rstest]
242 #[case("::", 0..2, TokenValue::Scope)]
243 #[case("*", 0..1, TokenValue::Wildcard)]
244 #[case("{", 0..1, TokenValue::LCurly)]
245 #[case("}", 0..1, TokenValue::RCurly)]
246 #[case("(", 0..1, TokenValue::LParen)]
247 #[case(")", 0..1, TokenValue::RParen)]
248 #[case(",", 0..1, TokenValue::Comma)]
249 #[case(":", 0..1, TokenValue::Colon)]
250 #[case("false", 0..5, TokenValue::LitBool(false))]
251 #[case("true", 0..4, TokenValue::LitBool(true))]
252 fn tag_token(#[case] raw: &str, #[case] range: Range<usize>, #[case] tv: TokenValue) {
253 let (tokens, errors) = tokenize(raw);
254 assert!(errors.is_empty());
255 assert_eq!(tokens.len(), 1);
256
257 let actual = tokens.first().unwrap();
258 assert_eq!(actual.span.range, range, "wrong range for: {}", raw);
259 assert_eq!(&actual.value, &tv);
260 }
261
262 #[rstest]
263 #[case("\"\"", "")]
264 #[case("\"a\"", "a")]
265 #[case("\"a\"", "a")]
266 #[case("\"a\\\"\"", "a\"")]
268 #[case("\"a\\nb\"", "a\nb")]
270 #[case("\"\\u{61}bc\"", "abc")]
271 #[case("\"a\\ \\nb\"", "a\nb")]
273 fn lit_str(#[case] raw: &str, #[case] expected: &str) {
274 let (tokens, errors) = tokenize(raw);
275 assert!(errors.is_empty());
276 assert_eq!(tokens.len(), 1);
277
278 let actual = tokens.first().unwrap();
279 assert_eq!(&actual.value, &TokenValue::LitStr(expected.to_string()));
280 }
281
282 #[test]
283 fn unclosed_str_lit() {
284 let (tokens, errors) = tokenize("\"abc");
285 assert_eq!(tokens.len(), 1);
286
287 let actual = tokens.first().unwrap();
288 assert_eq!(&actual.value, &TokenValue::LitStr("abc".to_string()));
289
290 let LexerError(err_range, err_msg) = errors.first().unwrap();
291 assert_eq!(err_range, &(4..4));
292 assert_eq!(err_msg, &"expected closing quote");
293 }
294
295 #[test]
296 fn invalid_unicode_escape() {
297 let (tokens, errors) = tokenize("\"\\u{61\"");
298 assert_eq!(tokens.len(), 1);
299
300 let actual = tokens.first().unwrap();
301 assert_eq!(&actual.value, &TokenValue::LitStr("a".to_string()));
302
303 let LexerError(err_range, err_msg) = errors.first().unwrap();
304 assert_eq!(err_range, &(6..7));
305 assert_eq!(err_msg, &"expected closing brace");
306 }
307
308 #[rstest]
309 #[case("123", true, true, false)]
310 #[case("-12", false, true, false)]
311 #[case("0.12", false, false, true)]
312 #[case("-0.1", false, false, true)]
313 #[case("36893488147419103232", false, false, true)]
315 #[case("-36893488147419103232", false, false, true)]
317 fn lit_num(
318 #[case] raw: &str,
319 #[case] is_u64: bool,
320 #[case] is_i64: bool,
321 #[case] is_f64: bool,
322 ) {
323 let (tokens, errors) = tokenize(raw);
324 assert_eq!(tokens.len(), 1);
325 assert!(errors.is_empty());
326
327 let actual = tokens.first().unwrap();
328 match &actual.value {
329 TokenValue::LitNum(v) => {
330 assert_eq!(v.is_u64(), is_u64);
331 assert_eq!(v.is_i64(), is_i64);
332 assert_eq!(v.is_f64(), is_f64);
333 }
334 _ => panic!("token value must be a numeric literal"),
335 }
336 }
337
338 #[test]
339 fn simple() {
340 let (tokens, errors) = tokenize("a(b: true)::{c, d}");
341 assert!(errors.is_empty());
342
343 let token_values = [
344 TokenValue::Ident("a".to_string()),
345 TokenValue::LParen,
346 TokenValue::Ident("b".to_string()),
347 TokenValue::Colon,
348 TokenValue::Whitespace,
349 TokenValue::LitBool(true),
350 TokenValue::RParen,
351 TokenValue::Scope,
352 TokenValue::LCurly,
353 TokenValue::Ident("c".to_string()),
354 TokenValue::Comma,
355 TokenValue::Whitespace,
356 TokenValue::Ident("d".to_string()),
357 TokenValue::RCurly,
358 ];
359
360 assert_eq!(
361 tokens.into_iter().map(|t| t.value).collect::<Vec<_>>(),
362 token_values.to_vec(),
363 );
364 }
365}