1use crate::token::{lookup_identifier, Span, Token, TokenKind};
2
3mod lexer_test;
4pub mod token;
5
6pub struct Lexer<'a> {
7 input: &'a str,
8 position: usize,
9 read_position: usize,
10 ch: char,
11}
12
13impl<'a> Lexer<'a> {
14 pub fn new(input: &'a str) -> Self {
15 let mut l = Lexer { input, position: 0, read_position: 0, ch: 0 as char };
16
17 l.read_char();
18 return l;
19 }
20
21 fn read_char(&mut self) {
22 if self.read_position >= self.input.len() {
23 self.ch = 0 as char
24 } else {
25 if let Some(ch) = self.input.chars().nth(self.read_position) {
26 self.ch = ch;
27 } else {
28 panic!("read out of range")
29 }
30 }
31
32 self.position = self.read_position;
33 self.read_position += 1;
34 }
35
36 fn peek_char(&self) -> char {
37 if self.read_position >= self.input.len() {
38 0 as char
39 } else {
40 if let Some(ch) = self.input.chars().nth(self.read_position) {
41 ch
42 } else {
43 panic!("read out of range")
44 }
45 }
46 }
47
48 pub fn next_token(&mut self) -> Token {
49 self.skip_whitespace();
51 self.skip_comments();
52 let t = match self.ch {
53 '=' => {
54 if self.peek_char() == '=' {
55 self.read_char();
56 TokenKind::EQ
57 } else {
58 TokenKind::ASSIGN
59 }
60 }
61 ';' => TokenKind::SEMICOLON,
62 '(' => TokenKind::LPAREN,
63 ')' => TokenKind::RPAREN,
64 ',' => TokenKind::COMMA,
65 '+' => TokenKind::PLUS,
66 '-' => TokenKind::MINUS,
67 '!' => {
68 if self.peek_char() == '=' {
69 self.read_char();
70 TokenKind::NotEq
71 } else {
72 TokenKind::BANG
73 }
74 }
75 '*' => TokenKind::ASTERISK,
76 '/' => TokenKind::SLASH,
77 '<' => TokenKind::LT,
78 '>' => TokenKind::GT,
79 '{' => TokenKind::LBRACE,
80 '}' => TokenKind::RBRACE,
81 '[' => TokenKind::LBRACKET,
82 ':' => TokenKind::COLON,
83 ']' => TokenKind::RBRACKET,
84 '\u{0}' => TokenKind::EOF,
85 '"' => {
86 let (start, end, string) = self.read_string();
87 return Token { span: Span { start, end }, kind: TokenKind::STRING(string) };
88 }
89 _ => {
90 if is_letter(self.ch) {
91 let (start, end, identifier) = self.read_identifier();
92 return Token {
93 span: Span { start, end },
94 kind: lookup_identifier(&identifier),
95 };
96 } else if is_digit(self.ch) {
97 let (start, end, num) = self.read_number();
98 return Token { span: Span { start, end }, kind: TokenKind::INT(num) };
99 } else {
100 TokenKind::ILLEGAL
101 }
102 }
103 };
104
105 self.read_char();
106 return Token {
107 span: Span { start: self.position - 1, end: self.read_position - 1 },
108 kind: t,
109 };
110 }
111
112 fn skip_whitespace(&mut self) {
113 while self.ch.is_ascii_whitespace() {
114 self.read_char();
115 }
116 }
117
118 fn skip_comments(&mut self) {
119 if self.ch == '/' && self.peek_char() == '/' {
120 self.read_char();
121 self.read_char();
122 loop {
123 self.read_char();
124 if self.ch == '\n' || self.ch == '\u{0}' {
125 if self.ch == '\n' {
127 self.read_char();
128 }
129 break;
130 }
131 }
132 }
133 }
134
135 fn read_identifier(&mut self) -> (usize, usize, String) {
136 let pos = self.position;
137 while is_letter(self.ch) {
138 self.read_char();
139 }
140
141 let x = self.input[pos..self.position].to_string();
142 return (pos, self.position, x);
143 }
144
145 fn read_number(&mut self) -> (usize, usize, i64) {
146 let pos = self.position;
147 while is_digit(self.ch) {
148 self.read_char();
149 }
150
151 let x = self.input[pos..self.position].parse().unwrap();
152
153 return (pos, self.position, x);
154 }
155
156 fn read_string(&mut self) -> (usize, usize, String) {
157 let pos = self.position + 1;
158 loop {
159 self.read_char();
160 if self.ch == '"' || self.ch == '\u{0}' {
161 break;
162 }
163 }
164
165 let x = self.input[pos..self.position].to_string();
166
167 if self.ch == '"' {
169 self.read_char();
170 }
171 return (pos - 1, self.position, x);
172 }
173}
174
175fn is_letter(c: char) -> bool {
176 c.is_ascii_alphabetic() || c == '_'
177}
178
179fn is_digit(c: char) -> bool {
180 c >= '0' && c <= '9'
181}