java_lang/tokenizer/
parser.rs

1use super::Token;
2use nom::{branch::{alt, permutation}, bytes::complete::{tag, take_until, take_while}, character::{
3    complete::{alpha1, bin_digit1, char, digit1, hex_digit1, oct_digit1},
4    one_of,
5    streaming::multispace0,
6}, combinator::{map, opt}, error::{Error, ErrorKind}, multi::many0, sequence::{delimited, pair, preceded, terminated}, IResult, Parser};
7
8fn keyword(i: &str) -> IResult<&str, Token> {
9    let (remaining, out) = alt([
10        tag(Token::ABSTRACT),
11        tag(Token::ASSERT),
12        tag(Token::BOOLEAN),
13        tag(Token::BREAK),
14        tag(Token::BYTE),
15        tag(Token::CASE),
16        tag(Token::CATCH),
17        tag(Token::CHAR),
18        tag(Token::CLASS),
19        tag(Token::CONST),
20        tag(Token::CONTINUE),
21        tag(Token::DEFAULT),
22        tag(Token::DO),
23        tag(Token::DOUBLE),
24        tag(Token::ELSE),
25        tag(Token::ENUM),
26        tag(Token::EXTENDS),
27        tag(Token::FINAL),
28        tag(Token::FINALLY),
29        tag(Token::FLOAT),
30        tag(Token::FOR),
31        tag(Token::IF),
32        tag(Token::IMPLEMENTS),
33        tag(Token::IMPORT),
34        tag(Token::INSTANCEOF),
35        tag(Token::INT),
36        tag(Token::INTERFACE),
37        tag(Token::LONG),
38        tag(Token::NATIVE),
39        tag(Token::NEW),
40        tag(Token::PACKAGE),
41        tag(Token::PRIVATE),
42        tag(Token::PROTECTED),
43        tag(Token::PUBLIC),
44        tag(Token::RETURN),
45        tag(Token::SHORT),
46        tag(Token::STATIC),
47        tag(Token::STRICTFP),
48        tag(Token::SUPER),
49        tag(Token::SWITCH),
50        tag(Token::SYNCHRONIZED),
51        tag(Token::THIS),
52        tag(Token::THROW),
53        tag(Token::THROWS),
54        tag(Token::TRANSIENT),
55        tag(Token::TRY),
56        tag(Token::VOID),
57        tag(Token::VOLATILE),
58        tag(Token::WHILE),
59    ])
60    .parse(i)?;
61    Ok((remaining, Token::from(out)))
62}
63
64fn identifier(i: &str) -> IResult<&str, Token> {
65    let (remaining, out) = alt((alpha1, tag("_"))).parse(i)?;
66    let (remaining, out2) =
67        take_while(|c: char| c.is_alphanumeric() || c == '_').parse(remaining)?;
68    Ok((remaining, Token::Identifier(out.to_string() + out2)))
69}
70
71fn boolean_literal(i: &str) -> IResult<&str, Token> {
72    map(alt((tag(Token::TRUE), tag(Token::FALSE))), |b: &str| {
73        Token::BooleanLiteral(b == Token::TRUE)
74    })
75    .parse(i)
76}
77
78fn null_literal(i: &str) -> IResult<&str, Token> {
79    map(tag(Token::NULL), |_| Token::NullLiteral).parse(i)
80}
81
82fn number<const RADIX: u8>(i: &str) -> IResult<&str, String> {
83    let d = match RADIX {
84        2 => bin_digit1,
85        8 => oct_digit1,
86        10 => digit1,
87        16 => hex_digit1,
88        _ => unreachable!(),
89    };
90    map(
91        pair(d, many0(preceded(opt(tag("_")), d))),
92        |(a, b): (&str, Vec<&str>)| a.to_owned() + b.join("").as_str(),
93    )
94    .parse(i)
95}
96
97fn integer_literal(i: &str) -> IResult<&str, Token> {
98    map(number::<10>, |s| Token::IntegerLiteral(s.parse().unwrap())).parse(i)
99}
100
101fn double_literal(i: &str) -> IResult<&str, Token> {
102    map(
103        permutation((number::<10>, tag("."), number::<10>)),
104        |(a, _, b)| {
105            let a = a.parse().unwrap_or(0f64);
106            let l = b.len();
107            let b = b.parse().unwrap_or(0f64);
108            Token::DoubleLiteral(a + b / 10.0f64.powi(l as _))
109        },
110    )
111    .parse(i)
112}
113
114fn float_literal(i: &str) -> IResult<&str, Token> {
115    map(
116        terminated(
117            permutation((number::<10>, tag("."), number::<10>)),
118            tag("f"),
119        ),
120        |(a, _, b)| {
121            let a = a.parse().unwrap_or(0f32);
122            let l = b.len();
123            let b = b.parse().unwrap_or(0f32);
124            Token::FloatLiteral(a + b / 10.0f32.powi(l as _))
125        },
126    )
127    .parse(i)
128}
129
130fn hex_literal(i: &str) -> IResult<&str, Token> {
131    map(
132        permutation((tag("0"), one_of("xX"), number::<16>)),
133        |(_, _, h)| Token::HexLiteral(u32::from_str_radix(&h, 16).unwrap_or(0)),
134    )
135    .parse(i)
136}
137
138fn oct_literal(i: &str) -> IResult<&str, Token> {
139    map(permutation((tag("0"), number::<8>)), |(_, o)| {
140        Token::OctLiteral(u32::from_str_radix(&o, 8).unwrap_or(0))
141    })
142    .parse(i)
143}
144
145fn bin_literal(i: &str) -> IResult<&str, Token> {
146    map(
147        permutation((tag("0"), one_of("bB"), number::<2>)),
148        |(_, _, b)| Token::BinLiteral(u32::from_str_radix(&b, 2).unwrap_or(0)),
149    )
150    .parse(i)
151}
152
153fn char_literal(i: &str) -> IResult<&str, Token> {
154    map(
155        delimited(char('\''), take_while(|c: char| c != '\''), char('\'')),
156        |s: &str| Token::CharLiteral(s.chars().next().unwrap()),
157    )
158    .parse(i)
159}
160
161fn string_literal(i: &str) -> IResult<&str, Token> {
162    map(
163        delimited(char('"'), take_while(|c: char| c != '"'), char('"')),
164        |s: &str| Token::StringLiteral(s.to_string()),
165    )
166    .parse(i)
167}
168
169fn separator(i: &str) -> IResult<&str, Token> {
170    let (remaining, out) = alt((
171        tag(Token::LEFT_PAREN),
172        tag(Token::RIGHT_PAREN),
173        tag(Token::LEFT_BRACE),
174        tag(Token::RIGHT_BRACE),
175        tag(Token::LEFT_BRACKET),
176        tag(Token::RIGHT_BRACKET),
177        tag(Token::SEMI_COLON),
178        tag(Token::COMMA),
179        tag(Token::DOT),
180    ))
181    .parse(i)?;
182    Ok((remaining, Token::from(out)))
183}
184
185fn operator(i: &str) -> IResult<&str, Token> {
186    let (remaining, out) = alt([
187        tag(Token::UNSIGNED_SHIFT_RIGHT_ASSIGN),
188        tag(Token::UNSIGNED_SHIFT_RIGHT),
189        tag(Token::SHIFT_RIGHT_ASSIGN),
190        tag(Token::SHIFT_LEFT_ASSIGN),
191        tag(Token::MOD_ASSIGN),
192        tag(Token::XOR_ASSIGN),
193        tag(Token::OR_ASSIGN),
194        tag(Token::AND_ASSIGN),
195        tag(Token::SLASH_ASSIGN),
196        tag(Token::STAR_ASSIGN),
197        tag(Token::MINUS_ASSIGN),
198        tag(Token::PLUS_ASSIGN),
199        tag(Token::SHIFT_LEFT),
200        tag(Token::SHIFT_RIGHT),
201        tag(Token::DOUBLE_MINUS),
202        tag(Token::DOUBLE_PLUS),
203        tag(Token::LOGICAL_OR),
204        tag(Token::LOGICAL_AND),
205        tag(Token::NOT_EQUAL),
206        tag(Token::GREATER_THAN_OR_EQUAL),
207        tag(Token::LESS_THAN_OR_EQUAL),
208        tag(Token::DOUBLE_EQUAL),
209        tag(Token::MOD),
210        tag(Token::XOR),
211        tag(Token::OR),
212        tag(Token::AND),
213        tag(Token::SLASH),
214        tag(Token::STAR),
215        tag(Token::MINUS),
216        tag(Token::PLUS),
217        tag(Token::COLON),
218        tag(Token::QUESTION),
219        tag(Token::NOT),
220        tag(Token::LOGICAL_NOT),
221        tag(Token::LESS_THAN),
222        tag(Token::GREATER_THAN),
223        tag(Token::ASSIGN),
224        tag(Token::TRIPLE_DOT),
225        tag(Token::ARROW),
226        tag(Token::DOUBLE_COLON),
227    ])
228    .parse(i)?;
229    Ok((remaining, Token::from(out)))
230}
231
232fn single_comment(i: &str) -> IResult<&str, Token> {
233    let (remaining, _) = tag("//").parse(i)?;
234    let (remaining, out) = take_while(|c| c != '\n' && c != '\r').parse(remaining)?;
235    Ok((
236        remaining,
237        Token::Comment {
238            text: out.to_string(),
239            single_line: true,
240        },
241    ))
242}
243
244fn multi_comment(i: &str) -> IResult<&str, Token> {
245    let (remaining, _) = tag("/*").parse(i)?;
246    let (remaining, out) = take_until("*/").parse(remaining)?;
247    let (remaining, _) = tag("*/").parse(remaining)?;
248    Ok((
249        remaining,
250        Token::Comment {
251            text: out.to_string(),
252            single_line: false,
253        },
254    ))
255}
256
257fn java_doc(i: &str) -> IResult<&str, Token> {
258    let (remaining, _) = tag("/**").parse(i)?;
259    let (remaining, out) = take_until("*/").parse(remaining)?;
260    let (remaining, _) = tag("*/").parse(remaining)?;
261    Ok((remaining, Token::JavaDoc(out.to_string())))
262}
263
264pub fn one_token(i: &str) -> IResult<&str, Token> {
265    let Ok((remaining, _)) = multispace0::<_, Error<_>>(i) else {
266        return Err(nom::Err::Error(Error::new("", ErrorKind::Complete)));
267    };
268
269    alt((
270        java_doc,
271        single_comment,
272        multi_comment,
273        keyword,
274        operator,
275        boolean_literal,
276        null_literal,
277        hex_literal,
278        oct_literal,
279        bin_literal,
280        float_literal,
281        double_literal,
282        integer_literal,
283        char_literal,
284        string_literal,
285        separator,
286        identifier,
287    ))
288    .parse(remaining)
289}