1use super::Token;
2use nom::{branch::{alt, permutation}, bytes::complete::{tag, take_until, take_while}, character::{
3 complete::{alpha1, bin_digit1, char, digit1, hex_digit1, oct_digit1},
4 one_of,
5 streaming::multispace0,
6}, combinator::{map, opt}, error::{Error, ErrorKind}, multi::many0, sequence::{delimited, pair, preceded, terminated}, IResult, Parser};
7
8fn keyword(i: &str) -> IResult<&str, Token> {
9 let (remaining, out) = alt([
10 tag(Token::ABSTRACT),
11 tag(Token::ASSERT),
12 tag(Token::BOOLEAN),
13 tag(Token::BREAK),
14 tag(Token::BYTE),
15 tag(Token::CASE),
16 tag(Token::CATCH),
17 tag(Token::CHAR),
18 tag(Token::CLASS),
19 tag(Token::CONST),
20 tag(Token::CONTINUE),
21 tag(Token::DEFAULT),
22 tag(Token::DO),
23 tag(Token::DOUBLE),
24 tag(Token::ELSE),
25 tag(Token::ENUM),
26 tag(Token::EXTENDS),
27 tag(Token::FINAL),
28 tag(Token::FINALLY),
29 tag(Token::FLOAT),
30 tag(Token::FOR),
31 tag(Token::IF),
32 tag(Token::IMPLEMENTS),
33 tag(Token::IMPORT),
34 tag(Token::INSTANCEOF),
35 tag(Token::INT),
36 tag(Token::INTERFACE),
37 tag(Token::LONG),
38 tag(Token::NATIVE),
39 tag(Token::NEW),
40 tag(Token::PACKAGE),
41 tag(Token::PRIVATE),
42 tag(Token::PROTECTED),
43 tag(Token::PUBLIC),
44 tag(Token::RETURN),
45 tag(Token::SHORT),
46 tag(Token::STATIC),
47 tag(Token::STRICTFP),
48 tag(Token::SUPER),
49 tag(Token::SWITCH),
50 tag(Token::SYNCHRONIZED),
51 tag(Token::THIS),
52 tag(Token::THROW),
53 tag(Token::THROWS),
54 tag(Token::TRANSIENT),
55 tag(Token::TRY),
56 tag(Token::VOID),
57 tag(Token::VOLATILE),
58 tag(Token::WHILE),
59 ])
60 .parse(i)?;
61 Ok((remaining, Token::from(out)))
62}
63
64fn identifier(i: &str) -> IResult<&str, Token> {
65 let (remaining, out) = alt((alpha1, tag("_"))).parse(i)?;
66 let (remaining, out2) =
67 take_while(|c: char| c.is_alphanumeric() || c == '_').parse(remaining)?;
68 Ok((remaining, Token::Identifier(out.to_string() + out2)))
69}
70
71fn boolean_literal(i: &str) -> IResult<&str, Token> {
72 map(alt((tag(Token::TRUE), tag(Token::FALSE))), |b: &str| {
73 Token::BooleanLiteral(b == Token::TRUE)
74 })
75 .parse(i)
76}
77
78fn null_literal(i: &str) -> IResult<&str, Token> {
79 map(tag(Token::NULL), |_| Token::NullLiteral).parse(i)
80}
81
82fn number<const RADIX: u8>(i: &str) -> IResult<&str, String> {
83 let d = match RADIX {
84 2 => bin_digit1,
85 8 => oct_digit1,
86 10 => digit1,
87 16 => hex_digit1,
88 _ => unreachable!(),
89 };
90 map(
91 pair(d, many0(preceded(opt(tag("_")), d))),
92 |(a, b): (&str, Vec<&str>)| a.to_owned() + b.join("").as_str(),
93 )
94 .parse(i)
95}
96
97fn integer_literal(i: &str) -> IResult<&str, Token> {
98 map(number::<10>, |s| Token::IntegerLiteral(s.parse().unwrap())).parse(i)
99}
100
101fn double_literal(i: &str) -> IResult<&str, Token> {
102 map(
103 permutation((number::<10>, tag("."), number::<10>)),
104 |(a, _, b)| {
105 let a = a.parse().unwrap_or(0f64);
106 let l = b.len();
107 let b = b.parse().unwrap_or(0f64);
108 Token::DoubleLiteral(a + b / 10.0f64.powi(l as _))
109 },
110 )
111 .parse(i)
112}
113
114fn float_literal(i: &str) -> IResult<&str, Token> {
115 map(
116 terminated(
117 permutation((number::<10>, tag("."), number::<10>)),
118 tag("f"),
119 ),
120 |(a, _, b)| {
121 let a = a.parse().unwrap_or(0f32);
122 let l = b.len();
123 let b = b.parse().unwrap_or(0f32);
124 Token::FloatLiteral(a + b / 10.0f32.powi(l as _))
125 },
126 )
127 .parse(i)
128}
129
130fn hex_literal(i: &str) -> IResult<&str, Token> {
131 map(
132 permutation((tag("0"), one_of("xX"), number::<16>)),
133 |(_, _, h)| Token::HexLiteral(u32::from_str_radix(&h, 16).unwrap_or(0)),
134 )
135 .parse(i)
136}
137
138fn oct_literal(i: &str) -> IResult<&str, Token> {
139 map(permutation((tag("0"), number::<8>)), |(_, o)| {
140 Token::OctLiteral(u32::from_str_radix(&o, 8).unwrap_or(0))
141 })
142 .parse(i)
143}
144
145fn bin_literal(i: &str) -> IResult<&str, Token> {
146 map(
147 permutation((tag("0"), one_of("bB"), number::<2>)),
148 |(_, _, b)| Token::BinLiteral(u32::from_str_radix(&b, 2).unwrap_or(0)),
149 )
150 .parse(i)
151}
152
153fn char_literal(i: &str) -> IResult<&str, Token> {
154 map(
155 delimited(char('\''), take_while(|c: char| c != '\''), char('\'')),
156 |s: &str| Token::CharLiteral(s.chars().next().unwrap()),
157 )
158 .parse(i)
159}
160
161fn string_literal(i: &str) -> IResult<&str, Token> {
162 map(
163 delimited(char('"'), take_while(|c: char| c != '"'), char('"')),
164 |s: &str| Token::StringLiteral(s.to_string()),
165 )
166 .parse(i)
167}
168
169fn separator(i: &str) -> IResult<&str, Token> {
170 let (remaining, out) = alt((
171 tag(Token::LEFT_PAREN),
172 tag(Token::RIGHT_PAREN),
173 tag(Token::LEFT_BRACE),
174 tag(Token::RIGHT_BRACE),
175 tag(Token::LEFT_BRACKET),
176 tag(Token::RIGHT_BRACKET),
177 tag(Token::SEMI_COLON),
178 tag(Token::COMMA),
179 tag(Token::DOT),
180 ))
181 .parse(i)?;
182 Ok((remaining, Token::from(out)))
183}
184
185fn operator(i: &str) -> IResult<&str, Token> {
186 let (remaining, out) = alt([
187 tag(Token::UNSIGNED_SHIFT_RIGHT_ASSIGN),
188 tag(Token::UNSIGNED_SHIFT_RIGHT),
189 tag(Token::SHIFT_RIGHT_ASSIGN),
190 tag(Token::SHIFT_LEFT_ASSIGN),
191 tag(Token::MOD_ASSIGN),
192 tag(Token::XOR_ASSIGN),
193 tag(Token::OR_ASSIGN),
194 tag(Token::AND_ASSIGN),
195 tag(Token::SLASH_ASSIGN),
196 tag(Token::STAR_ASSIGN),
197 tag(Token::MINUS_ASSIGN),
198 tag(Token::PLUS_ASSIGN),
199 tag(Token::SHIFT_LEFT),
200 tag(Token::SHIFT_RIGHT),
201 tag(Token::DOUBLE_MINUS),
202 tag(Token::DOUBLE_PLUS),
203 tag(Token::LOGICAL_OR),
204 tag(Token::LOGICAL_AND),
205 tag(Token::NOT_EQUAL),
206 tag(Token::GREATER_THAN_OR_EQUAL),
207 tag(Token::LESS_THAN_OR_EQUAL),
208 tag(Token::DOUBLE_EQUAL),
209 tag(Token::MOD),
210 tag(Token::XOR),
211 tag(Token::OR),
212 tag(Token::AND),
213 tag(Token::SLASH),
214 tag(Token::STAR),
215 tag(Token::MINUS),
216 tag(Token::PLUS),
217 tag(Token::COLON),
218 tag(Token::QUESTION),
219 tag(Token::NOT),
220 tag(Token::LOGICAL_NOT),
221 tag(Token::LESS_THAN),
222 tag(Token::GREATER_THAN),
223 tag(Token::ASSIGN),
224 tag(Token::TRIPLE_DOT),
225 tag(Token::ARROW),
226 tag(Token::DOUBLE_COLON),
227 ])
228 .parse(i)?;
229 Ok((remaining, Token::from(out)))
230}
231
232fn single_comment(i: &str) -> IResult<&str, Token> {
233 let (remaining, _) = tag("//").parse(i)?;
234 let (remaining, out) = take_while(|c| c != '\n' && c != '\r').parse(remaining)?;
235 Ok((
236 remaining,
237 Token::Comment {
238 text: out.to_string(),
239 single_line: true,
240 },
241 ))
242}
243
244fn multi_comment(i: &str) -> IResult<&str, Token> {
245 let (remaining, _) = tag("/*").parse(i)?;
246 let (remaining, out) = take_until("*/").parse(remaining)?;
247 let (remaining, _) = tag("*/").parse(remaining)?;
248 Ok((
249 remaining,
250 Token::Comment {
251 text: out.to_string(),
252 single_line: false,
253 },
254 ))
255}
256
257fn java_doc(i: &str) -> IResult<&str, Token> {
258 let (remaining, _) = tag("/**").parse(i)?;
259 let (remaining, out) = take_until("*/").parse(remaining)?;
260 let (remaining, _) = tag("*/").parse(remaining)?;
261 Ok((remaining, Token::JavaDoc(out.to_string())))
262}
263
264pub fn one_token(i: &str) -> IResult<&str, Token> {
265 let Ok((remaining, _)) = multispace0::<_, Error<_>>(i) else {
266 return Err(nom::Err::Error(Error::new("", ErrorKind::Complete)));
267 };
268
269 alt((
270 java_doc,
271 single_comment,
272 multi_comment,
273 keyword,
274 operator,
275 boolean_literal,
276 null_literal,
277 hex_literal,
278 oct_literal,
279 bin_literal,
280 float_literal,
281 double_literal,
282 integer_literal,
283 char_literal,
284 string_literal,
285 separator,
286 identifier,
287 ))
288 .parse(remaining)
289}