1use crate::error::LexerError;
11use crate::token::{Operator, Sym, Symbol, Text, Token};
12use nom::branch::alt;
13use nom::bytes::complete::{tag, take_while};
14use nom::character::complete::{alpha1, char, multispace1, satisfy};
15use nom::character::one_of;
16use nom::combinator::{eof, opt, recognize};
17use nom::error::{Error, context};
18use nom::multi::many0;
19use nom::number::complete::double;
20use nom::sequence::{delimited, pair};
21use nom::{AsChar, IResult, Parser};
22
23pub(crate) fn tokenize(input: &str) -> Result<Vec<Token<'_>>, LexerError> {
37 let mut input = Text::new(input);
38 let mut tokens = Vec::new();
39
40 loop {
41 let (remaining, token) = token(input).map_err(massage_nom_error)?;
42 input = remaining;
43
44 tokens.push(token);
45
46 if matches!(token.sym, Sym::Eof) {
47 break;
48 }
49 }
50
51 Ok(tokens)
52}
53
54fn massage_nom_error(err: nom::Err<Error<Text>>) -> LexerError {
55 match err {
56 nom::Err::Incomplete(_) => LexerError::IncompleteInput,
57 nom::Err::Error(err) => {
58 LexerError::InvalidSymbol(err.input.location_line(), err.input.get_column() as u32)
59 }
60 nom::Err::Failure(err) => {
61 LexerError::InvalidSymbol(err.input.location_line(), err.input.get_column() as u32)
62 }
63 }
64}
65
66fn token(input: Text) -> IResult<Text, Token> {
67 delimited(
68 skip_whitespace_and_comments,
69 parse_token,
70 skip_whitespace_and_comments,
71 )
72 .parse(input)
73}
74
75fn skip_whitespace_and_comments(input: Text) -> IResult<Text, Text> {
76 recognize(many0(alt((comment, multispace1)))).parse(input)
77}
78
79fn comment(input: Text) -> IResult<Text, Text> {
80 recognize(pair(
81 pair(tag("//"), take_while(|c: char| c != '\n' && c != '\r')),
82 opt(alt((tag("\r\n"), tag("\n"), tag("\r")))),
83 ))
84 .parse(input)
85}
86
87fn parse_token(input: Text) -> IResult<Text, Token> {
88 alt((end_of_file, symbol, operator, ident, number, string)).parse(input)
89}
90
91fn symbol(input: Text) -> IResult<Text, Token> {
92 one_of("().,:[]{}")
93 .map(|c| match c {
94 '(' => Symbol::OpenParen,
95 ')' => Symbol::CloseParen,
96 '.' => Symbol::Dot,
97 ',' => Symbol::Comma,
98 ':' => Symbol::Colon,
99 '[' => Symbol::OpenBracket,
100 ']' => Symbol::CloseBracket,
101 '{' => Symbol::OpenBrace,
102 '}' => Symbol::CloseBrace,
103 _ => unreachable!(),
104 })
105 .map(move |sym| Token {
106 sym: Sym::Symbol(sym),
107 line: input.location_line(),
108 col: input.get_column() as u32,
109 })
110 .parse(input)
111}
112
113fn end_of_file(input: Text) -> IResult<Text, Token> {
114 eof.map(|_| Token {
115 sym: Sym::Eof,
116 line: input.location_line(),
117 col: input.get_column() as u32,
118 })
119 .parse(input)
120}
121
122fn operator(input: Text) -> IResult<Text, Token> {
123 alt((operator_1, operator_2)).parse(input)
124}
125
126fn operator_1(input: Text) -> IResult<Text, Token> {
127 one_of("+-*/^")
128 .map(|c| match c {
129 '+' => Operator::Add,
130 '-' => Operator::Sub,
131 '*' => Operator::Mul,
132 '/' => Operator::Div,
133 _ => unreachable!(),
134 })
135 .map(move |op| Token {
136 sym: Sym::Operator(op),
137 line: input.location_line(),
138 col: input.get_column() as u32,
139 })
140 .parse(input)
141}
142
143fn operator_2(input: Text) -> IResult<Text, Token> {
144 one_of("<>!=")
145 .flat_map(|c| {
146 context(
147 "valid character when parsing an operator",
148 opt(char('=')).map_opt(move |eq_opt| match (c, eq_opt.is_some()) {
149 ('<', false) => Some(Operator::Lt),
150 ('<', true) => Some(Operator::Lte),
151 ('>', false) => Some(Operator::Gt),
152 ('>', true) => Some(Operator::Gte),
153 ('!', true) => Some(Operator::Neq),
154 ('=', true) => Some(Operator::Eq),
155 _ => None,
156 }),
157 )
158 })
159 .map(move |op| Token {
160 sym: Sym::Operator(op),
161 line: input.location_line(),
162 col: input.get_column() as u32,
163 })
164 .parse(input)
165}
166
167fn ident(input: Text) -> IResult<Text, Token> {
168 recognize(pair(
169 alpha1,
170 many0(satisfy(|c| AsChar::is_alphanum(c) || c == '_')),
171 ))
172 .map(|value: Text| {
173 let sym = if value.fragment().eq_ignore_ascii_case("and") {
174 Sym::Operator(Operator::And)
175 } else if value.fragment().eq_ignore_ascii_case("or") {
176 Sym::Operator(Operator::Or)
177 } else if value.fragment().eq_ignore_ascii_case("xor") {
178 Sym::Operator(Operator::Xor)
179 } else if value.fragment().eq_ignore_ascii_case("not") {
180 Sym::Operator(Operator::Not)
181 } else if value.fragment().eq_ignore_ascii_case("contains") {
182 Sym::Operator(Operator::Contains)
183 } else if value.fragment().eq_ignore_ascii_case("as") {
184 Sym::Operator(Operator::As)
185 } else {
186 Sym::Id(value.fragment())
187 };
188
189 Token {
190 sym,
191 line: value.location_line(),
192 col: value.get_column() as u32,
193 }
194 })
195 .parse(input)
196}
197
198fn number(input: Text) -> IResult<Text, Token> {
199 double
200 .map(|value| Token {
201 sym: Sym::Number(value),
202 line: input.location_line(),
203 col: input.get_column() as u32,
204 })
205 .parse(input)
206}
207
208fn string(input: Text) -> IResult<Text, Token> {
209 delimited(char('"'), take_while(|c| c != '"'), char('"'))
210 .map(|value: Text| Token {
211 sym: Sym::String(value.fragment()),
212 line: input.location_line(),
213 col: input.get_column() as u32,
214 })
215 .parse(input)
216}