1use crate::token::{Operator, Sym, Symbol, Text, Token};
11use nom::branch::alt;
12use nom::bytes::complete::take_while;
13use nom::character::complete::{alpha1, alphanumeric0, char, multispace0};
14use nom::character::one_of;
15use nom::combinator::{eof, opt, recognize};
16use nom::error::{Error, context};
17use nom::number::complete::double;
18use nom::sequence::{delimited, pair};
19use nom::{IResult, Parser};
20
21pub fn tokenize(input: &str) -> Result<Vec<Token<'_>>, nom::Err<Error<Text<'_>>>> {
35 let mut input = Text::new(input);
36 let mut tokens = Vec::new();
37
38 loop {
39 let (remaining, token) = token(input)?;
40 input = remaining;
41
42 tokens.push(token);
43
44 if matches!(token.sym, Sym::Eof) {
45 break;
46 }
47 }
48
49 Ok(tokens)
50}
51
52fn token(input: Text) -> IResult<Text, Token> {
53 delimited(
54 multispace0,
55 alt((end_of_file, symbol, operator, ident, number, string)),
56 multispace0,
57 )
58 .parse(input)
59}
60
61fn symbol(input: Text) -> IResult<Text, Token> {
62 one_of("().,:[]{}")
63 .map(|c| match c {
64 '(' => Symbol::OpenParen,
65 ')' => Symbol::CloseParen,
66 '.' => Symbol::Dot,
67 ',' => Symbol::Comma,
68 ':' => Symbol::Colon,
69 '[' => Symbol::OpenBracket,
70 ']' => Symbol::CloseBracket,
71 '{' => Symbol::OpenBrace,
72 '}' => Symbol::CloseBrace,
73 _ => unreachable!(),
74 })
75 .map(move |sym| Token {
76 sym: Sym::Symbol(sym),
77 line: input.location_line(),
78 col: input.get_column() as u32,
79 })
80 .parse(input)
81}
82
83fn end_of_file(input: Text) -> IResult<Text, Token> {
84 eof.map(|_| Token {
85 sym: Sym::Eof,
86 line: input.location_line(),
87 col: input.get_column() as u32,
88 })
89 .parse(input)
90}
91
92fn operator(input: Text) -> IResult<Text, Token> {
93 alt((operator_1, operator_2)).parse(input)
94}
95
96fn operator_1(input: Text) -> IResult<Text, Token> {
97 one_of("+-*/^")
98 .map(|c| match c {
99 '+' => Operator::Add,
100 '-' => Operator::Sub,
101 '*' => Operator::Mul,
102 '/' => Operator::Div,
103 _ => unreachable!(),
104 })
105 .map(move |op| Token {
106 sym: Sym::Operator(op),
107 line: input.location_line(),
108 col: input.get_column() as u32,
109 })
110 .parse(input)
111}
112
113fn operator_2(input: Text) -> IResult<Text, Token> {
114 one_of("<>!=")
115 .flat_map(|c| {
116 context(
117 "valid character when parsing an operator",
118 opt(char('=')).map_opt(move |eq_opt| match (c, eq_opt.is_some()) {
119 ('<', false) => Some(Operator::Lt),
120 ('<', true) => Some(Operator::Lte),
121 ('>', false) => Some(Operator::Gt),
122 ('>', true) => Some(Operator::Gte),
123 ('!', true) => Some(Operator::Neq),
124 ('=', true) => Some(Operator::Eq),
125 _ => None,
126 }),
127 )
128 })
129 .map(move |op| Token {
130 sym: Sym::Operator(op),
131 line: input.location_line(),
132 col: input.get_column() as u32,
133 })
134 .parse(input)
135}
136
137fn ident(input: Text) -> IResult<Text, Token> {
138 recognize(pair(alpha1, alphanumeric0))
139 .map(|value: Text| {
140 let sym = if value.fragment().eq_ignore_ascii_case("and") {
141 Sym::Operator(Operator::And)
142 } else if value.fragment().eq_ignore_ascii_case("or") {
143 Sym::Operator(Operator::Or)
144 } else if value.fragment().eq_ignore_ascii_case("xor") {
145 Sym::Operator(Operator::Xor)
146 } else if value.fragment().eq_ignore_ascii_case("not") {
147 Sym::Operator(Operator::Not)
148 } else {
149 Sym::Id(value.fragment())
150 };
151
152 Token {
153 sym,
154 line: value.location_line(),
155 col: value.get_column() as u32,
156 }
157 })
158 .parse(input)
159}
160
161fn number(input: Text) -> IResult<Text, Token> {
162 double
163 .map(|value| Token {
164 sym: Sym::Number(value),
165 line: input.location_line(),
166 col: input.get_column() as u32,
167 })
168 .parse(input)
169}
170
171fn string(input: Text) -> IResult<Text, Token> {
172 delimited(char('"'), take_while(|c| c != '"'), char('"'))
173 .map(|value: Text| Token {
174 sym: Sym::String(value.fragment()),
175 line: input.location_line(),
176 col: input.get_column() as u32,
177 })
178 .parse(input)
179}