oak_smalltalk/lexer/
mod.rs1use crate::{kind::SmalltalkKind, language::SmalltalkLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, SmalltalkLanguage>;
5
6#[derive(Clone)]
7pub struct SmalltalkLexer<'config> {
8 config: &'config SmalltalkLanguage,
9}
10
11impl<'config> SmalltalkLexer<'config> {
12 pub fn new(config: &'config SmalltalkLanguage) -> Self {
13 Self { config }
14 }
15
16 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
17 while state.not_at_end() {
18 let safe_point = state.get_position();
19
20 if self.skip_whitespace(state) {
21 continue;
22 }
23
24 if self.lex_newline(state) {
25 continue;
26 }
27
28 if self.lex_comment(state) {
29 continue;
30 }
31
32 if self.lex_number(state) {
33 continue;
34 }
35
36 if self.lex_identifier(state) {
37 continue;
38 }
39
40 if self.lex_punctuation(state) {
41 continue;
42 }
43
44 let start_pos = state.get_position();
46 if let Some(ch) = state.peek() {
47 state.advance(ch.len_utf8());
48 state.add_token(SmalltalkKind::Error, start_pos, state.get_position());
49 }
50
51 state.safe_check(safe_point);
52 }
53
54 let eof_pos = state.get_position();
56 state.add_token(SmalltalkKind::Eof, eof_pos, eof_pos);
57 Ok(())
58 }
59
60 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
62 let start_pos = state.get_position();
63
64 while let Some(ch) = state.peek() {
65 if ch == ' ' || ch == '\t' {
66 state.advance(ch.len_utf8());
67 }
68 else {
69 break;
70 }
71 }
72
73 if state.get_position() > start_pos {
74 state.add_token(SmalltalkKind::Whitespace, start_pos, state.get_position());
75 true
76 }
77 else {
78 false
79 }
80 }
81
82 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
84 let start_pos = state.get_position();
85
86 if let Some('\n') = state.peek() {
87 state.advance(1);
88 state.add_token(SmalltalkKind::Newline, start_pos, state.get_position());
89 true
90 }
91 else if let Some('\r') = state.peek() {
92 state.advance(1);
93 if let Some('\n') = state.peek() {
94 state.advance(1);
95 }
96 state.add_token(SmalltalkKind::Newline, start_pos, state.get_position());
97 true
98 }
99 else {
100 false
101 }
102 }
103
104 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
106 let start_pos = state.get_position();
107
108 if let Some('"') = state.peek() {
109 state.advance(1);
110
111 while let Some(ch) = state.peek() {
112 if ch == '"' {
113 state.advance(1);
114 break;
115 }
116 state.advance(ch.len_utf8());
117 }
118
119 state.add_token(SmalltalkKind::Comment, start_pos, state.get_position());
120 true
121 }
122 else {
123 false
124 }
125 }
126
127 fn lex_identifier<S: Source>(&self, state: &mut State<S>) -> bool {
129 let start_pos = state.get_position();
130
131 if let Some(ch) = state.peek() {
132 if ch.is_alphabetic() || ch == '_' {
133 state.advance(ch.len_utf8());
134
135 while let Some(ch) = state.peek() {
136 if ch.is_alphanumeric() || ch == '_' {
137 state.advance(ch.len_utf8());
138 }
139 else {
140 break;
141 }
142 }
143
144 state.add_token(SmalltalkKind::Identifier, start_pos, state.get_position());
145 true
146 }
147 else {
148 false
149 }
150 }
151 else {
152 false
153 }
154 }
155
156 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
158 let start_pos = state.get_position();
159
160 if let Some(ch) = state.peek() {
161 if ch.is_ascii_digit() {
162 state.advance(1);
163
164 while let Some(ch) = state.peek() {
165 if ch.is_ascii_digit() {
166 state.advance(1);
167 }
168 else {
169 break;
170 }
171 }
172
173 state.add_token(SmalltalkKind::Number, start_pos, state.get_position());
174 true
175 }
176 else {
177 false
178 }
179 }
180 else {
181 false
182 }
183 }
184
185 fn lex_punctuation<S: Source>(&self, state: &mut State<S>) -> bool {
187 let start_pos = state.get_position();
188
189 if let Some(ch) = state.peek() {
190 let kind = match ch {
191 '(' => SmalltalkKind::LeftParen,
192 ')' => SmalltalkKind::RightParen,
193 '[' => SmalltalkKind::LeftBracket,
194 ']' => SmalltalkKind::RightBracket,
195 '{' => SmalltalkKind::LeftBrace,
196 '}' => SmalltalkKind::RightBrace,
197 '.' => SmalltalkKind::Dot,
198 ';' => SmalltalkKind::Semicolon,
199 ',' => SmalltalkKind::Comma,
200 '+' => SmalltalkKind::Plus,
201 '-' => SmalltalkKind::Minus,
202 '*' => SmalltalkKind::Star,
203 '/' => SmalltalkKind::Slash,
204 '=' => SmalltalkKind::Equal,
205 '<' => SmalltalkKind::Less,
206 '>' => SmalltalkKind::Greater,
207 _ => return false,
208 };
209
210 state.advance(1);
211 state.add_token(kind, start_pos, state.get_position());
212 true
213 }
214 else {
215 false
216 }
217 }
218}
219
220impl<'config> Lexer<SmalltalkLanguage> for SmalltalkLexer<'config> {
221 fn lex_incremental(
222 &self,
223 source: impl Source,
224 changed: usize,
225 cache: IncrementalCache<SmalltalkLanguage>,
226 ) -> LexOutput<SmalltalkLanguage> {
227 let mut state = LexerState::new_with_cache(source, changed, cache);
228 let result = self.run(&mut state);
229 state.finish(result)
230 }
231}