1use crate::error::LexerError;
11use crate::token::{Operator, Sym, Symbol, Text, Token};
12use nom::branch::alt;
13use nom::bytes::complete::{tag, take_while};
14use nom::character::complete::{alpha1, alphanumeric0, char, multispace1};
15use nom::character::one_of;
16use nom::combinator::{eof, opt, recognize};
17use nom::error::{Error, context};
18use nom::multi::many0;
19use nom::number::complete::double;
20use nom::sequence::{delimited, pair};
21use nom::{IResult, Parser};
22
23pub fn tokenize(input: &str) -> Result<Vec<Token<'_>>, LexerError> {
37 let mut input = Text::new(input);
38 let mut tokens = Vec::new();
39
40 loop {
41 let (remaining, token) = token(input).map_err(massage_nom_error)?;
42 input = remaining;
43
44 tokens.push(token);
45
46 if matches!(token.sym, Sym::Eof) {
47 break;
48 }
49 }
50
51 Ok(tokens)
52}
53
54fn massage_nom_error(err: nom::Err<Error<Text>>) -> LexerError {
55 match err {
56 nom::Err::Incomplete(_) => LexerError::IncompleteInput,
57 nom::Err::Error(err) => {
58 LexerError::InvalidSymbol(err.input.location_line(), err.input.get_column() as u32)
59 }
60 nom::Err::Failure(err) => {
61 LexerError::InvalidSymbol(err.input.location_line(), err.input.get_column() as u32)
62 }
63 }
64}
65
66fn token(input: Text) -> IResult<Text, Token> {
67 delimited(
68 skip_whitespace_and_comments,
69 parse_token,
70 skip_whitespace_and_comments,
71 )
72 .parse(input)
73}
74
75fn skip_whitespace_and_comments(input: Text) -> IResult<Text, Text> {
76 recognize(many0(alt((comment, multispace1)))).parse(input)
77}
78
79fn comment(input: Text) -> IResult<Text, Text> {
80 recognize(pair(
81 pair(tag("//"), take_while(|c: char| c != '\n' && c != '\r')),
82 opt(alt((tag("\r\n"), tag("\n"), tag("\r")))),
83 ))
84 .parse(input)
85}
86
87fn parse_token(input: Text) -> IResult<Text, Token> {
88 alt((end_of_file, symbol, operator, ident, number, string)).parse(input)
89}
90
91fn symbol(input: Text) -> IResult<Text, Token> {
92 one_of("().,:[]{}")
93 .map(|c| match c {
94 '(' => Symbol::OpenParen,
95 ')' => Symbol::CloseParen,
96 '.' => Symbol::Dot,
97 ',' => Symbol::Comma,
98 ':' => Symbol::Colon,
99 '[' => Symbol::OpenBracket,
100 ']' => Symbol::CloseBracket,
101 '{' => Symbol::OpenBrace,
102 '}' => Symbol::CloseBrace,
103 _ => unreachable!(),
104 })
105 .map(move |sym| Token {
106 sym: Sym::Symbol(sym),
107 line: input.location_line(),
108 col: input.get_column() as u32,
109 })
110 .parse(input)
111}
112
113fn end_of_file(input: Text) -> IResult<Text, Token> {
114 eof.map(|_| Token {
115 sym: Sym::Eof,
116 line: input.location_line(),
117 col: input.get_column() as u32,
118 })
119 .parse(input)
120}
121
122fn operator(input: Text) -> IResult<Text, Token> {
123 alt((operator_1, operator_2)).parse(input)
124}
125
126fn operator_1(input: Text) -> IResult<Text, Token> {
127 one_of("+-*/^")
128 .map(|c| match c {
129 '+' => Operator::Add,
130 '-' => Operator::Sub,
131 '*' => Operator::Mul,
132 '/' => Operator::Div,
133 _ => unreachable!(),
134 })
135 .map(move |op| Token {
136 sym: Sym::Operator(op),
137 line: input.location_line(),
138 col: input.get_column() as u32,
139 })
140 .parse(input)
141}
142
143fn operator_2(input: Text) -> IResult<Text, Token> {
144 one_of("<>!=")
145 .flat_map(|c| {
146 context(
147 "valid character when parsing an operator",
148 opt(char('=')).map_opt(move |eq_opt| match (c, eq_opt.is_some()) {
149 ('<', false) => Some(Operator::Lt),
150 ('<', true) => Some(Operator::Lte),
151 ('>', false) => Some(Operator::Gt),
152 ('>', true) => Some(Operator::Gte),
153 ('!', true) => Some(Operator::Neq),
154 ('=', true) => Some(Operator::Eq),
155 _ => None,
156 }),
157 )
158 })
159 .map(move |op| Token {
160 sym: Sym::Operator(op),
161 line: input.location_line(),
162 col: input.get_column() as u32,
163 })
164 .parse(input)
165}
166
167fn ident(input: Text) -> IResult<Text, Token> {
168 recognize(pair(alpha1, alphanumeric0))
169 .map(|value: Text| {
170 let sym = if value.fragment().eq_ignore_ascii_case("and") {
171 Sym::Operator(Operator::And)
172 } else if value.fragment().eq_ignore_ascii_case("or") {
173 Sym::Operator(Operator::Or)
174 } else if value.fragment().eq_ignore_ascii_case("xor") {
175 Sym::Operator(Operator::Xor)
176 } else if value.fragment().eq_ignore_ascii_case("not") {
177 Sym::Operator(Operator::Not)
178 } else if value.fragment().eq_ignore_ascii_case("contains") {
179 Sym::Operator(Operator::Contains)
180 } else if value.fragment().eq_ignore_ascii_case("as") {
181 Sym::Operator(Operator::As)
182 } else {
183 Sym::Id(value.fragment())
184 };
185
186 Token {
187 sym,
188 line: value.location_line(),
189 col: value.get_column() as u32,
190 }
191 })
192 .parse(input)
193}
194
195fn number(input: Text) -> IResult<Text, Token> {
196 double
197 .map(|value| Token {
198 sym: Sym::Number(value),
199 line: input.location_line(),
200 col: input.get_column() as u32,
201 })
202 .parse(input)
203}
204
205fn string(input: Text) -> IResult<Text, Token> {
206 delimited(char('"'), take_while(|c| c != '"'), char('"'))
207 .map(|value: Text| Token {
208 sym: Sym::String(value.fragment()),
209 line: input.location_line(),
210 col: input.get_column() as u32,
211 })
212 .parse(input)
213}