1use super::token::{Token, TokenType};
3use pulsar_utils::{
4 error::{ErrorBuilder, ErrorCode, ErrorManager, Level, Style},
5 loc::{Loc, Region, Source}
6};
7use std::{cell::RefCell, rc::Rc};
8
9pub struct Lexer {
21 loc: Loc,
22 buffer: Vec<char>,
23 error_manager: Rc<RefCell<ErrorManager>>
24}
25
26macro_rules! with_unwind {
31 ($self:ident in $($action:tt)*) => {
32 let old_loc = $self.loc.clone();
33 {
34 $($action)*
35 }
36 $self.loc = old_loc;
37 };
38}
39
40impl Lexer {
41 pub fn new(
43 source: Rc<Source>, error_manager: Rc<RefCell<ErrorManager>>
44 ) -> Self {
45 Lexer {
46 loc: Loc {
47 line: 1,
48 col: 1,
49 pos: 0,
50 source: source.clone()
51 },
52 buffer: source.contents().chars().collect(),
53 error_manager
54 }
55 }
56
57 fn current(&self) -> char {
59 self.buffer[self.loc.pos as usize]
60 }
61
62 fn is_eof(&self) -> bool {
65 (self.loc.pos as usize) == self.buffer.len()
66 }
67
68 fn advance(&mut self) {
70 if self.current() == '\n' {
71 self.loc.col = 0;
72 self.loc.line += 1;
73 }
74 self.loc.pos += 1;
75 self.loc.col += 1;
76 }
77
78 fn advance_n(&mut self, n: usize) {
80 for _ in 0..n {
81 self.advance();
82 }
83 }
84
85 fn skip(&mut self) {
87 while !self.is_eof()
88 && self.current().is_whitespace()
89 && self.current() != '\n'
90 {
91 self.advance();
92 }
93 }
94
95 fn make_token(&mut self, ty: TokenType, length: usize) -> Token {
98 let loc_copy = self.loc.clone();
99 self.advance_n(length);
100 let pos_copy = loc_copy.pos as usize;
101 let value: String =
102 self.buffer[pos_copy..pos_copy + length].iter().collect();
103 Token {
104 ty,
105 value,
106 loc: loc_copy
107 }
108 }
109
110 fn make_number_token(&mut self) -> Token {
112 let mut length = 0;
113 with_unwind! { self in
114 while !self.is_eof() && self.current().is_numeric() {
115 self.advance();
116 length += 1;
117 }
118 }
119 self.make_token(TokenType::Integer, length)
120 }
121
122 fn make_identifier_token(&mut self) -> Token {
124 let mut length = 0;
125 with_unwind! { self in
126 while !self.is_eof()
127 && (self.current().is_alphanumeric() || self.current() == '_')
128 {
129 self.advance();
130 length += 1;
131 }
132 }
133 self.make_token(TokenType::Identifier, length)
134 }
135
136 fn make_directive_token(&mut self) -> Option<Token> {
138 let mut length = 1;
139 with_unwind! { self in
140 self.advance();
141 if self.is_eof()
142 || !(self.current().is_alphanumeric() || self.current() == '_')
143 {
144 return None;
145 }
146 while !self.is_eof()
147 && (self.current().is_alphanumeric() || self.current() == '_')
148 {
149 self.advance();
150 length += 1;
151 }
152 }
153 Some(self.make_token(TokenType::Directive, length))
154 }
155}
156
157macro_rules! lex {
158 ($self:ident in $(| $token:expr => {$token_type:expr})* | _ $finally:block) => {
159 $(
160 {
161 let input_token_length = ($token).len();
162 let loc_pos = $self.loc.pos as usize;
163 if loc_pos + input_token_length <= $self.buffer.len()
164 && $self.buffer[loc_pos..loc_pos + input_token_length].iter().copied().eq($token.chars()) {
165 return Some($self.make_token($token_type, input_token_length));
166 };
167 }
168 )*
169 $finally
170 };
171}
172
173impl Iterator for Lexer {
174 type Item = Token;
175
176 fn next(&mut self) -> Option<Token> {
177 if self.is_eof() || self.error_manager.borrow().has_errors() {
178 return None;
179 }
180
181 self.skip();
182
183 lex! { self in
184 | "+" => { TokenType::Plus }
185 | "->" => { TokenType::Arrow }
186 | "-" => { TokenType::Minus }
187 | "*" => { TokenType::Times }
188 | "(" => { TokenType::LeftPar }
189 | ")" => { TokenType::RightPar }
190 | "{" => { TokenType::LeftBrace }
191 | "}" => { TokenType::RightBrace }
192 | "[" => { TokenType::LeftBracket }
193 | "]" => { TokenType::RightBracket }
194 | "<" => { TokenType::LeftAngle }
195 | ">" => { TokenType::RightAngle }
196 | "=" => { TokenType::Assign }
197 | ":" => { TokenType::Colon }
198 | "..." => { TokenType::Dots }
199 | "." => { TokenType::Dot }
200 | "," => { TokenType::Comma }
201 | "\n" => { TokenType::Newline }
202 | "func" => { TokenType::Func }
203 | "let" => { TokenType::Let }
204 | "return" => { TokenType::Return }
205 | "pure" => { TokenType::Pure }
206 | "map" => { TokenType::HardwareMap }
207 | _ {
208 if self.current().is_numeric() {
209 Some(self.make_number_token())
210 } else if self.current().is_alphabetic() || self.current() == '_' {
211 Some(self.make_identifier_token())
212 } else if self.current() == '@' {
213 self.make_directive_token()
214 } else {
215 let error = ErrorBuilder::new()
216 .of_style(Style::Primary)
217 .at_level(Level::Error)
218 .with_code(ErrorCode::UnrecognizedCharacter)
219 .at_region(&Region::unit(self.loc.clone()))
220 .message("Encountered unrecognized character".into())
221 .build();
222 self.error_manager.borrow_mut().record(error);
223 None
224 }
225 }
226 }
227 }
228}