1use core::fmt;
2use std::fmt::{Display, Formatter};
3
4use logos::Logos;
5use thiserror::Error;
6
7#[derive(Error, Clone, Default, Debug, PartialEq)]
8pub enum Error {
9 #[default]
10 #[error("Unknown character")]
11 UnknownCharacter,
12}
13
14#[derive(Clone, Debug, Logos, PartialEq)]
15#[logos(skip r"[ \t\r\n\f]+")]
16#[logos(error = Error)]
17pub enum Token {
18 #[token("{")]
19 LBrace,
20 #[token("}")]
21 RBrace,
22 #[token("(")]
23 LParen,
24 #[token(")")]
25 RParen,
26 #[token(".")]
27 Dot,
28 #[token(":")]
29 Colon,
30 #[token(",")]
31 Comma,
32 #[token("=")]
33 Equal,
34 #[token("!=")]
35 NotEqual,
36 #[token(">")]
37 Greater,
38 #[token(">=")]
39 GreaterEqual,
40 #[token("<")]
41 Less,
42 #[token("<=")]
43 LessEqual,
44 #[token("~")]
45 Tilde,
46 #[token("!~")]
47 NotTilde,
48 #[regex(r"[a-zA-Z_][\w-]*", |lex| lex.slice().to_string())]
50 Identifier(String),
51 #[token("false", |_| false)]
53 #[token("true", |_| true)]
54 Bool(bool),
55 #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| lex.slice().parse::<f64>().unwrap())]
58 Number(f64),
59 #[regex(r#""(?:[^"]|\\")*""#, |lex| {
62 let target_slice = &lex.slice()[1..lex.slice().len() - 1];
65 escape8259::unescape(target_slice).expect("Error while unquoting")
66 }
67 )]
68 String(String),
69 #[token("null")]
70 Null,
71 EOF,
72}
73
74impl Display for Token {
75 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
76 match self {
77 Token::LBrace => '{'.fmt(f),
78 Token::RBrace => '}'.fmt(f),
79 Token::LParen => '('.fmt(f),
80 Token::RParen => ')'.fmt(f),
81 Token::Dot => '.'.fmt(f),
82 Token::Colon => ':'.fmt(f),
83 Token::Comma => ','.fmt(f),
84 Token::Equal => '='.fmt(f),
85 Token::NotEqual => "!=".fmt(f),
86 Token::Greater => '>'.fmt(f),
87 Token::GreaterEqual => ">=".fmt(f),
88 Token::Less => '<'.fmt(f),
89 Token::LessEqual => "<=".fmt(f),
90 Token::Tilde => '~'.fmt(f),
91 Token::NotTilde => "!~".fmt(f),
92 Token::Identifier(key) => key.fmt(f),
93 Token::Bool(b) => b.fmt(f),
94 Token::Number(n) => n.fmt(f),
95 Token::String(s) => s.fmt(f),
96 Token::Null => "null".fmt(f),
97 Token::EOF => "EOF".fmt(f),
98 }
99 }
100}
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105 use rstest::rstest;
106
107 fn get_next_token(input: &str) -> Token {
109 let mut lexer = Token::lexer(input);
110 let token = lexer
111 .next()
112 .expect("There should be at least one token")
113 .expect("Error parsing token");
114 assert_eq!(lexer.next(), None);
115
116 token
117 }
118
119 #[rstest]
120 #[case::l_brace("{", Token::LBrace)]
121 #[case::r_brace("}", Token::RBrace)]
122 #[case::l_paren("(", Token::LParen)]
123 #[case::r_paren(")", Token::RParen)]
124 #[case::dot(".", Token::Dot)]
125 #[case::colon(":", Token::Colon)]
126 #[case::comma(",", Token::Comma)]
127 #[case::equal("=", Token::Equal)]
128 #[case::not_equal("!=", Token::NotEqual)]
129 #[case::greater(">", Token::Greater)]
130 #[case::greater_equal(">=", Token::GreaterEqual)]
131 #[case::less("<", Token::Less)]
132 #[case::less_equal("<=", Token::LessEqual)]
133 #[case::tilde("~", Token::Tilde)]
134 #[case::not_tilde("!~", Token::NotTilde)]
135 #[case::true_token("true", Token::Bool(true))]
136 #[case::false_token("false", Token::Bool(false))]
137 #[case::null("null", Token::Null)]
138 fn simple_token_parses(#[case] input: &str, #[case] expected: Token) {
139 let token = get_next_token(input);
140 assert_eq!(token, expected);
141 }
142
143 #[rstest]
144 #[case::positive("5", 5.0)]
145 #[case::negative("-5", -5.0)]
146 #[case::float("5.5", 5.5)]
147 #[case::negative_float("-5.5", -5.5)]
148 #[case::float_with_exponent("5.5e5", 5.5e5)]
149 #[case::float_with_negative_exponent("5.5e-5", 5.5e-5)]
150 #[case::float_with_positive_exponent("5.5e+5", 5.5e5)]
151 #[case::float_with_uppercase_exponent("5.5E5", 5.5e5)]
152 #[case::float_with_uppercase_positive_exponent("5.5E+5", 5.5e5)]
153 #[case::float_with_uppercase_negative_exponent("5.5E-5", 5.5e-5)]
154 fn number_token_parses(#[case] input: &str, #[case] expected: f64) {
155 let token = get_next_token(input);
156 let expected = Token::Number(expected);
157 assert_eq!(token, expected);
158 }
159
160 #[test]
161 #[should_panic]
162 fn number_token_parse_fails_when_wrong_decimal_separator() {
163 let input = "5,5";
164 get_next_token(input);
167 }
168
169 #[rstest]
170 #[case::simple("key")]
171 #[case::with_underscore("key_with_underscore")]
172 #[case::with_numbers("key_with_123_numbers")]
173 #[case::with_dash("key-with-dash")]
174 #[case::with_dash_and_underscore("key-with-dash_and_underscore")]
175 #[case::with_caps("KeyWithCaps")]
176 #[case::starting_with_underscore("_key")]
177 fn identifier_token_parses(#[case] input: &str) {
178 let expected = Token::Identifier(input.to_string());
179 let token = get_next_token(input);
180 assert_eq!(token, expected);
181 }
182
183 #[rstest]
184 #[case::simple(r#""JavaScript""#, "JavaScript")]
185 #[case::with_space(r#""Java Script""#, "Java Script")]
186 #[case::with_double_commas(r#""Java\"Script""#, "Java\"Script")]
187 #[case::with_single_commas(r#""Java'Script""#, "Java'Script")]
188 #[case::double_quoted_with_single_commas(r#""Java'Script""#, "Java'Script")]
189 #[case::newline(r#""Java\nScript""#, "Java\nScript")]
190 #[case::tab(r#""Java\tScript""#, "Java\tScript")]
191 #[case::backslash(r#""Java\\Script""#, "Java\\Script")]
192 #[case::backslash_and_quote(r#""Java\\\"Script""#, "Java\\\"Script")]
193 #[case::mixed(r#""/Jav\r\n\ta\\\"Scri\"pt\n""#, "/Jav\r\n\ta\\\"Scri\"pt\n")]
194 fn string_token_parses(#[case] input: &str, #[case] expected: &str) {
195 let expected = Token::String(expected.to_string());
196 let token = get_next_token(input);
197 assert_eq!(token, expected);
198 }
199
200 #[test]
202 #[should_panic]
203 fn string_token_parse_unescape_fails_when_malformed_escaped_input() {
204 let input = r#""Java\\"Script""#;
205 get_next_token(input);
206 }
207
208 #[test]
210 #[should_panic]
211 fn string_token_parse_fails_when_single_quoted() {
212 let input = r#"'Java Script'"#;
213 get_next_token(input);
214 }
215}