1use logos::Logos;
2use std::{fs, path::Path};
3pub use token::Token;
4
5mod token;
6
7#[derive(Debug, Copy, Clone, PartialEq)]
8pub struct Span {
10 pub line: usize,
14 pub col_start: usize,
16 pub col_end: usize,
18}
19
20pub static ZERO_SPAN: Span = Span {
21 line: 0,
22 col_start: 0,
23 col_end: 0,
24};
25
26impl Span {
27 pub fn zero() -> Self {
28 Self {
29 line: 0,
30 col_start: 0,
31 col_end: 0,
32 }
33 }
34}
35
36#[derive(Debug, PartialEq)]
37pub struct PlacedToken {
38 pub token: Token,
39 pub span: Span,
40}
41
42pub fn string_to_tokens(content: &str) -> Vec<PlacedToken> {
43 let mut char_at_byte = vec![None; content.len()];
55 for (i, (pos, _)) in content.char_indices().enumerate() {
56 char_at_byte[pos] = Some(i + 1);
57 }
58 char_at_byte.push(Some(content.chars().count() + 1));
60
61 let mut line = 1;
65 let mut last_newline = 0;
66
67 Token::lexer(&content)
68 .spanned()
69 .map(|(token, byte_range)| {
71 let is_newline = token == Token::Newline;
72 let col_start = char_at_byte[byte_range.start].unwrap() - last_newline;
73 let col_end = char_at_byte[byte_range.end].unwrap() - last_newline;
74 let placed_token = PlacedToken {
75 token,
76 span: Span {
77 line,
78 col_start,
79 col_end,
80 },
81 };
82 if is_newline {
83 last_newline = char_at_byte[byte_range.start].unwrap();
84 line += 1;
85 }
86 placed_token
87 })
88 .collect()
89}
90
91pub fn file_to_tokens(file: &Path) -> Result<Vec<PlacedToken>, std::io::Error> {
92 Ok(string_to_tokens(&fs::read_to_string(file)?))
93}
94
95#[cfg(test)]
96mod tests {
97 use crate::{Token, string_to_tokens};
98 use logos::Logos;
99
100 fn lex(s: &str) -> Vec<Token> {
101 Token::lexer(s).collect()
102 }
103
104 fn lex_once(s: &str) -> Token {
105 let mut lexer = Token::lexer(s);
106 let res = lexer.next().unwrap();
107 assert_eq!(lexer.next(), None);
108 res
109 }
110
111 fn vecs_match<T: PartialEq<T>>(a: &Vec<T>, b: &Vec<T>) -> bool {
112 if a.len() == b.len() {
113 a.iter().zip(b.iter()).all(|(a, b)| a == b)
114 } else {
115 false
116 }
117 }
118
119 macro_rules! assert_placed_eq {
120 ($a:expr, $( ($token:expr, $line:expr, $range:expr) ),+ $(,)? ) => {
121 let a = $a;
122 let b = vec![ $(
123 $crate::PlacedToken {
124 token: $token,
125 span: $crate::Span {
126 line: $line,
127 col_start: $range.start,
128 col_end: $range.end,
129 }
130 }
131 ),*];
132 if !vecs_match(&a, &b) {
133 panic!("\n{:?}\ndoes not match\n{:?}", a, b);
134 }
135 };
136 }
137
138 #[test]
139 fn simple_span() {
140 assert_placed_eq!(
141 string_to_tokens("1"),
142 (Token::Int(1), 1, 1..2),
143 );
144 assert_placed_eq!(
145 string_to_tokens("1\n"),
146 (Token::Int(1), 1, 1..2),
147 (Token::Newline, 1, 2..3),
148 );
149 assert_placed_eq!(
150 string_to_tokens("1\n23\n456"),
151 (Token::Int(1), 1, 1..2),
152 (Token::Newline, 1, 2..3),
153 (Token::Int(23), 2, 1..3),
154 (Token::Newline, 2, 3..4),
155 (Token::Int(456), 3, 1..4),
156 );
157 }
158
159 #[test]
160 fn span_with_non_ascii() {
161 assert_placed_eq!(
163 string_to_tokens("wow\nwöw\n"),
164 (Token::Identifier(String::from("wow")), 1, 1..4),
165 (Token::Newline, 1, 4..5),
166
167 (Token::Identifier(String::from("w")), 2, 1..2),
168 (Token::Error, 2, 2..3),
169 (Token::Identifier(String::from("w")), 2, 3..4),
170 (Token::Newline, 2, 4..5),
171 );
172 }
173
174 #[test]
175 fn test_lex_once() {
176 lex_once("1");
177 }
178
179 #[test]
180 #[should_panic]
181 fn test_lex_once_panic() {
182 lex_once("1 2");
183 }
184
185 #[test]
186 fn number() {
187 assert_eq!(lex_once("1"), Token::Int(1));
188 assert_eq!(lex_once("1.1"), Token::Float(1.1));
189 assert_eq!(lex_once("123"), Token::Int(123));
190 assert_eq!(lex_once(".1"), Token::Float(0.1));
191 assert_eq!(lex_once("1."), Token::Float(1.0));
192 }
193
194 #[test]
195 fn identifiers() {
196 let ident_cmp = |s| assert_eq!(lex_once(s), Token::Identifier(String::from(s)));
197 ident_cmp("a");
198 ident_cmp("aaaaaaaa");
199 ident_cmp("a1");
200 ident_cmp("a_");
201 ident_cmp("_a");
202 ident_cmp("__");
203 }
204
205 #[test]
206 fn whitespace() {
207 lex_once("1 ");
208 lex_once(" 1");
209 lex_once(" 1 ");
210
211 assert_eq!(lex("1 2").len(), 2);
212 assert_eq!(lex("1\t2").len(), 2);
213 assert_eq!(lex("1 2").len(), 2);
214 assert_eq!(lex("\t1 \t \t\t 2\t").len(), 2);
215 }
216
217 #[test]
218 fn comment() {
219 assert_eq!(lex("// a\n1").len(), 2);
220 assert_eq!(lex("1// a\n2").len(), 3);
221 assert_eq!(lex("1\n// a\n2").len(), 4); }
223}