1use super::*;
3use std::collections::HashMap;
4
5pub type Result<T, E = ErrorMessage> = std::result::Result<T, E>;
6
7#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
8pub enum StringType {
9 SingleQuoted,
10 DoubleQuoted,
11}
12
13#[allow(dead_code)]
14#[derive(Debug, Clone, PartialEq, Eq, Hash)]
15pub enum Token {
16 Ident {
17 raw: String,
18 name: String,
19 is_raw: bool,
20 },
21 Punctuation {
22 raw: String,
23 },
24 String {
25 raw: String,
26 contents: String,
27 typ: StringType,
28 },
29 Number {
30 raw: String,
31 },
32 Comment {
33 raw: String,
34 contents: String,
35 },
36 Eof,
37}
38
39impl std::fmt::Display for Token {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 write!(f, "{:?}", self.raw())
42 }
43}
44
45impl Token {
46 pub fn into_raw(self) -> String {
47 match self {
48 Token::Ident { raw, .. } => raw,
49 Token::Punctuation { raw } => raw,
50 Token::String { raw, .. } => raw,
51 Token::Number { raw } => raw,
52 Token::Comment { raw, .. } => raw,
53 Token::Eof => "<EOF>".to_owned(),
54 }
55 }
56 pub fn raw(&self) -> &str {
57 match self {
58 Token::Ident { raw, .. } => raw,
59 Token::Punctuation { raw } => raw,
60 Token::String { raw, .. } => raw,
61 Token::Number { raw } => raw,
62 Token::Comment { raw, .. } => raw,
63 Token::Eof => "<EOF>",
64 }
65 }
66 pub fn is_eof(&self) -> bool {
67 matches!(self, Self::Eof)
68 }
69 pub fn is_comment(&self) -> bool {
70 matches!(self, Self::Comment { .. })
71 }
72}
73
74struct Lexer {
75 reader: peek2::Reader<char>,
77 next_recording_id: u64,
78 recordings: HashMap<u64, String>,
79}
80
81impl Lexer {
82 fn next_token(&mut self) -> Result<SpannedToken, Error> {
83 match self.next_token_impl() {
84 Ok(result) => Ok(result),
85 Err(message) => {
86 let start = self.reader.position();
87 Err(message.at(Span {
88 filename: self.reader.filename().to_owned(),
89 start,
90 end: match self.reader.peek() {
91 Some('\n') => Position {
92 index: start.index + 1,
93 line: start.line + 1,
94 column: 1,
95 },
96 Some(_) => Position {
97 index: start.index + 1,
98 line: start.line,
99 column: start.column + 1,
100 },
101 None => start,
102 },
103 }))
104 }
105 }
106 }
107 fn next_token_impl(&mut self) -> Result<SpannedToken> {
108 self.skip_whitespace();
109 let start = self.reader.position();
110 let token = [
111 Self::read_simple_comment,
112 Self::read_long_comment,
113 Self::read_string,
114 Self::read_ident,
115 Self::read_number,
116 Self::read_punctuation,
117 ]
118 .into_iter()
119 .find_map(|f| f(self).transpose())
120 .transpose()?;
121 let token = match token {
122 None => {
123 if let Some(c) = self.reader.peek() {
124 return error!("Unexpected char {c:?}");
125 }
126 Token::Eof
127 }
128 Some(token) => token,
129 };
130 let end = self.reader.position();
131 Ok(SpannedToken {
132 token,
133 span: Span {
134 start,
135 end,
136 filename: self.reader.filename().to_owned(),
137 },
138 })
139 }
140 fn skip_whitespace(&mut self) {
141 while self.reader.peek().map_or(false, |c| c.is_whitespace()) {
142 self.next().unwrap();
143 }
144 }
145 fn skip_char(&mut self, expected: char) -> Result<()> {
146 match self.reader.peek() {
147 None => error!("expected {expected:?}, got EOF"),
148 Some(&actual) if actual == expected => {
149 self.next().unwrap();
150 Ok(())
151 }
152 Some(&actual) => error!("expected {expected:?}, got {actual:?}"),
153 }
154 }
155
156 fn read_while(&mut self, mut f: impl FnMut(char) -> bool) -> Result<String> {
157 let mut result = String::new();
158 while let Some(&c) = self.reader.peek() {
159 if f(c) {
160 result.push(c);
161 self.next().unwrap();
162 } else {
163 break;
164 }
165 }
166 Ok(result)
167 }
168}
169
170struct RecordingToken(u64);
171
172impl Lexer {
173 fn start_recording(&mut self) -> RecordingToken {
174 let id = self.next_recording_id;
175 self.next_recording_id += 1;
176 self.recordings.insert(id, String::new());
177 RecordingToken(id)
178 }
179 fn stop_recording(&mut self, token: RecordingToken) -> String {
180 self.recordings.remove(&token.0).unwrap()
181 }
182 fn next(&mut self) -> Option<char> {
183 let next = self.reader.next();
184 if let Some(c) = next {
185 for recording in self.recordings.values_mut() {
186 recording.push(c);
187 }
188 }
189 next
190 }
191}
192
193impl Lexer {
194 fn read_long_comment(&mut self) -> Result<Option<Token>> {
195 if self.reader.peek() != Some(&'/') {
196 return Ok(None);
197 }
198 if self.reader.peek2() != Some(&'*') {
199 return Ok(None);
200 }
201 let raw = self.start_recording();
202 self.skip_char('/')?;
203 self.skip_char('*')?;
204 let mut prev = ['?', '?']; Ok(Some(Token::Comment {
206 contents: self.read_while(|c| {
207 if prev == ['*', '/'] {
208 return false;
209 }
210 let [_prev1, prev2] = prev;
211 prev = [prev2, c];
212 true
213 })?,
214 raw: self.stop_recording(raw),
215 }))
216 }
217 fn read_simple_comment(&mut self) -> Result<Option<Token>> {
218 if self.reader.peek() != Some(&'#') {
219 return Ok(None);
220 }
221 let raw = self.start_recording();
222 self.skip_char('#')?;
223 Ok(Some(Token::Comment {
224 contents: self.read_while(|c| c != '\n')?,
225 raw: self.stop_recording(raw),
226 }))
227 }
228 fn read_string(&mut self) -> Result<Option<Token>> {
229 [StringType::SingleQuoted, StringType::DoubleQuoted]
230 .into_iter()
231 .find_map(|typ| self.read_string_of(typ).transpose())
232 .transpose()
233 }
234 fn read_string_of(&mut self, typ: StringType) -> Result<Option<Token>> {
235 let quote_char = match typ {
236 StringType::SingleQuoted => '\'',
237 StringType::DoubleQuoted => '"',
238 };
239 if self.reader.peek() != Some("e_char) {
240 return Ok(None);
241 }
242 let raw = self.start_recording();
243 self.skip_char(quote_char)?;
244 let mut contents = String::new();
245 while let Some(&c) = self.reader.peek() {
246 if c == quote_char {
247 break;
248 }
249 self.next().unwrap();
250 if c == '\\' {
251 contents.push(match self.next() {
252 None => return error!("Expected escaped character, got EOF"),
253 Some('n') => '\n',
254 Some('r') => '\r',
255 Some('t') => '\t',
256 Some('\\') => '\\',
257 Some('x') => {
258 let mut read_digit = || match self.next() {
259 Some(c) => match c.to_digit(16) {
260 Some(digit) => Ok(digit),
261 None => error!("Expected a hex digit, got {c:?}"),
262 },
263 None => error!("Expected a hex digit, got EOF"),
264 };
265 let digit1 = read_digit()?;
266 let digit2 = read_digit()?;
267 let char_code = digit1 * 16 + digit2;
268 char::from_u32(char_code)
269 .ok_or(error_fmt!("{char_code:?} is not a valid char code"))?
270 }
271 Some(c) => c,
272 });
273 } else {
274 contents.push(c);
275 }
276 }
277 self.skip_char(quote_char)?;
278 Ok(Some(Token::String {
279 raw: self.stop_recording(raw),
280 contents,
281 typ,
282 }))
283 }
284 fn read_ident(&mut self) -> Result<Option<Token>> {
285 let peeked = match self.reader.peek() {
286 Some(&c) => c,
287 None => return Ok(None),
288 };
289 match peeked {
290 '@' => {
291 let raw = self.start_recording();
292 self.next().unwrap();
293 let Some(Token::String { contents: name, .. }) = self.read_string()? else {
294 return error!("Expected a string token after '@' for raw identifier");
295 };
296 Ok(Some(Token::Ident {
297 name,
298 raw: self.stop_recording(raw),
299 is_raw: true,
300 }))
301 }
302 c if c.is_alphabetic() || c == '_' => {
303 let mut name = String::new();
304 while let Some(&c) = self.reader.peek() {
305 let is_good = |c: char| c.is_alphanumeric() || c == '_';
306 if is_good(c) || c == '-' && self.reader.peek2().map_or(false, |&c| is_good(c))
307 {
308 name.push(c);
309 self.next().unwrap();
310 } else {
311 break;
312 }
313 }
314 Ok(Some(Token::Ident {
315 raw: name.clone(),
316 name,
317 is_raw: false,
318 }))
319 }
320 _ => Ok(None),
321 }
322 }
323 fn read_number(&mut self) -> Result<Option<Token>> {
324 let peeked = match self.reader.peek() {
325 Some(&c) => c,
326 None => return Ok(None),
327 };
328 if !peeked.is_ascii_digit() {
329 return Ok(None);
330 }
331 let mut seen_dot = false;
332 let raw = self.read_while(|c| {
333 c.is_ascii_digit() || c == '.' && !std::mem::replace(&mut seen_dot, true) || c == '_'
334 })?;
335 Ok(Some(Token::Number { raw }))
336 }
337 fn read_punctuation(&mut self) -> Result<Option<Token>> {
338 let is_single_punctuation = |c: char| "(){}[]".contains(c);
339 let is_single_char_punctuation = |c: char| ";".contains(c);
340 match self.reader.peek() {
341 Some(&first) if is_punctuation(first) => {
342 if is_single_punctuation(first) {
343 self.next().unwrap();
344 Ok(Some(Token::Punctuation {
345 raw: first.to_string(),
346 }))
347 } else if is_single_char_punctuation(first) {
348 let raw = self.read_while(|c| c == first)?;
349 Ok(Some(Token::Punctuation { raw }))
350 } else {
351 let raw = self.read_while(|c| {
352 is_punctuation(c)
353 && !is_single_punctuation(c)
354 && !is_single_char_punctuation(c)
355 })?;
356 Ok(Some(Token::Punctuation { raw }))
357 }
358 }
359 _ => Ok(None),
360 }
361 }
362}
363
364pub fn is_punctuation(c: char) -> bool {
365 !(c.is_alphanumeric() || "_'\"@".contains(c) || c.is_whitespace())
366}
367
368#[derive(Debug)]
369pub struct SpannedToken {
370 pub token: Token,
371 pub span: Span,
372}
373
374impl peek2::ReadableItem for SpannedToken {
375 fn advance_position(&self) -> peek2::AdvancePosition {
376 peek2::AdvancePosition::SetTo(self.span.start)
377 }
378}
379
380impl std::ops::Deref for SpannedToken {
381 type Target = Token;
382 fn deref(&self) -> &Self::Target {
383 &self.token
384 }
385}
386
387pub fn lex(source: SourceFile) -> Result<peek2::Reader<SpannedToken>, Error> {
388 let filename = source.filename.clone();
389 let mut lexer = Lexer {
390 next_recording_id: 0,
391 recordings: HashMap::new(),
392 reader: peek2::Reader::read(source),
393 };
394 let mut tokens = Vec::new();
395 loop {
396 let token = lexer.next_token()?;
397 let eof = token.token.is_eof();
398 tokens.push(token);
399 if eof {
400 break;
401 }
402 }
403 Ok(peek2::Reader::new(filename, tokens))
404}