1use crate::token::{lookup_identifier, Span, Token, TokenKind};
2
3mod lexer_test;
4pub mod token;
5
6pub struct Lexer<'a> {
7 input: &'a str,
8 position: usize,
9 read_position: usize,
10 ch: char,
11}
12
13impl<'a> Lexer<'a> {
14 pub fn new(input: &'a str) -> Self {
15 let mut l = Lexer { input, position: 0, read_position: 0, ch: 0 as char };
16
17 l.read_char();
18 return l;
19 }
20
21 fn read_char(&mut self) {
22 if self.read_position >= self.input.len() {
23 self.ch = 0 as char
24 } else {
25 if let Some(ch) = self.input.chars().nth(self.read_position) {
26 self.ch = ch;
27 } else {
28 panic!("read out of range")
29 }
30 }
31
32 self.position = self.read_position;
33 self.read_position += 1;
34 }
35
36 fn peek_char(&self) -> char {
37 if self.read_position >= self.input.len() {
38 0 as char
39 } else {
40 if let Some(ch) = self.input.chars().nth(self.read_position) {
41 ch
42 } else {
43 panic!("read out of range")
44 }
45 }
46 }
47
48 pub fn next_token(&mut self) -> Token {
49 self.skip_ignorable();
52 let t = match self.ch {
53 '=' => {
54 if self.peek_char() == '=' {
55 self.read_char();
56 TokenKind::EQ
57 } else {
58 TokenKind::ASSIGN
59 }
60 }
61 ';' => TokenKind::SEMICOLON,
62 '(' => TokenKind::LPAREN,
63 ')' => TokenKind::RPAREN,
64 ',' => TokenKind::COMMA,
65 '+' => TokenKind::PLUS,
66 '-' => TokenKind::MINUS,
67 '!' => {
68 if self.peek_char() == '=' {
69 self.read_char();
70 TokenKind::NotEq
71 } else {
72 TokenKind::BANG
73 }
74 }
75 '*' => TokenKind::ASTERISK,
76 '/' => TokenKind::SLASH,
77 '<' => TokenKind::LT,
78 '>' => TokenKind::GT,
79 '{' => TokenKind::LBRACE,
80 '}' => TokenKind::RBRACE,
81 '[' => TokenKind::LBRACKET,
82 ':' => TokenKind::COLON,
83 ']' => TokenKind::RBRACKET,
84 '\u{0}' => TokenKind::EOF,
85 '"' => {
86 let (start, end, string) = self.read_string();
87 return Token { span: Span { start, end }, kind: TokenKind::STRING(string) };
88 }
89 _ => {
90 if is_letter(self.ch) {
91 let (start, end, identifier) = self.read_identifier();
92 return Token {
93 span: Span { start, end },
94 kind: lookup_identifier(&identifier),
95 };
96 } else if is_digit(self.ch) {
97 let (start, end, num) = self.read_number();
98 return Token { span: Span { start, end }, kind: TokenKind::INT(num) };
99 } else {
100 TokenKind::ILLEGAL
101 }
102 }
103 };
104
105 self.read_char();
106 return Token {
107 span: Span { start: self.position - 1, end: self.read_position - 1 },
108 kind: t,
109 };
110 }
111
112 fn skip_whitespace(&mut self) {
113 while self.ch.is_ascii_whitespace() {
114 self.read_char();
115 }
116 }
117
118 fn skip_ignorable(&mut self) {
119 loop {
120 self.skip_whitespace();
121 if self.ch == '/' && self.peek_char() == '/' {
122 self.skip_comments();
123 continue;
125 }
126 break;
127 }
128 }
129
130 fn skip_comments(&mut self) {
131 if self.ch == '/' && self.peek_char() == '/' {
132 self.read_char();
133 self.read_char();
134 loop {
135 self.read_char();
136 if self.ch == '\n' || self.ch == '\u{0}' {
137 if self.ch == '\n' {
139 self.read_char();
140 }
141 break;
142 }
143 }
144 }
145 }
146
147 fn read_identifier(&mut self) -> (usize, usize, String) {
148 let pos = self.position;
149 while is_letter(self.ch) {
150 self.read_char();
151 }
152
153 let x = self.input[pos..self.position].to_string();
154 return (pos, self.position, x);
155 }
156
157 fn read_number(&mut self) -> (usize, usize, i64) {
158 let pos = self.position;
159 while is_digit(self.ch) {
160 self.read_char();
161 }
162
163 let x = self.input[pos..self.position].parse().unwrap();
164
165 return (pos, self.position, x);
166 }
167
168 fn read_string(&mut self) -> (usize, usize, String) {
169 let pos = self.position + 1;
170 loop {
171 self.read_char();
172 if self.ch == '"' || self.ch == '\u{0}' {
173 break;
174 }
175 }
176
177 let x = self.input[pos..self.position].to_string();
178
179 if self.ch == '"' {
181 self.read_char();
182 }
183 return (pos - 1, self.position, x);
184 }
185}
186
187fn is_letter(c: char) -> bool {
188 c.is_ascii_alphabetic() || c == '_'
189}
190
191fn is_digit(c: char) -> bool {
192 c >= '0' && c <= '9'
193}