oak_smalltalk/lexer/
mod.rs1use crate::{kind::SmalltalkSyntaxKind, language::SmalltalkLanguage};
2use oak_core::{
3 OakError,
4 lexer::{LexOutput, Lexer, LexerCache, LexerState},
5 source::{Source, TextEdit},
6};
7
8type State<'a, S> = LexerState<'a, S, SmalltalkLanguage>;
9
10#[derive(Clone)]
11pub struct SmalltalkLexer<'config> {
12 _config: &'config SmalltalkLanguage,
13}
14
15impl<'config> Lexer<SmalltalkLanguage> for SmalltalkLexer<'config> {
16 fn lex<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], cache: &'a mut impl LexerCache<SmalltalkLanguage>) -> LexOutput<SmalltalkLanguage> {
17 let relex_from = edits.iter().map(|e| e.span.start).min().unwrap_or(source.length());
18 let mut state = LexerState::new_with_cache(source, relex_from, cache);
19 if state.fully_reused() {
20 let result = Ok(());
21 return state.finish_with_cache(result, cache);
22 }
23 let result = self.run(&mut state);
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl<'config> SmalltalkLexer<'config> {
29 pub fn new(config: &'config SmalltalkLanguage) -> Self {
30 Self { _config: config }
31 }
32
33 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
34 while state.not_at_end() {
35 let safe_point = state.get_position();
36
37 if self.skip_whitespace(state) {
38 continue;
39 }
40
41 if self.lex_newline(state) {
42 continue;
43 }
44
45 if self.lex_comment(state) {
46 continue;
47 }
48
49 if self.lex_number(state) {
50 continue;
51 }
52
53 if self.lex_identifier(state) {
54 continue;
55 }
56
57 if self.lex_punctuation(state) {
58 continue;
59 }
60
61 let start_pos = state.get_position();
63 if let Some(ch) = state.peek() {
64 state.advance(ch.len_utf8());
65 state.add_token(SmalltalkSyntaxKind::Error, start_pos, state.get_position());
66 }
67
68 state.advance_if_dead_lock(safe_point);
69 }
70
71 state.add_eof();
73 Ok(())
74 }
75
76 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78 let start_pos = state.get_position();
79
80 while let Some(ch) = state.peek() {
81 if ch == ' ' || ch == '\t' {
82 state.advance(ch.len_utf8());
83 }
84 else {
85 break;
86 }
87 }
88
89 if state.get_position() > start_pos {
90 state.add_token(SmalltalkSyntaxKind::Whitespace, start_pos, state.get_position());
91 true
92 }
93 else {
94 false
95 }
96 }
97
98 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
100 let start_pos = state.get_position();
101
102 if let Some('\n') = state.peek() {
103 state.advance(1);
104 state.add_token(SmalltalkSyntaxKind::Newline, start_pos, state.get_position());
105 true
106 }
107 else if let Some('\r') = state.peek() {
108 state.advance(1);
109 if let Some('\n') = state.peek() {
110 state.advance(1);
111 }
112 state.add_token(SmalltalkSyntaxKind::Newline, start_pos, state.get_position());
113 true
114 }
115 else {
116 false
117 }
118 }
119
120 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122 let start_pos = state.get_position();
123
124 if let Some('"') = state.peek() {
125 state.advance(1);
126
127 while let Some(ch) = state.peek() {
128 if ch == '"' {
129 state.advance(1);
130 break;
131 }
132 state.advance(ch.len_utf8());
133 }
134
135 state.add_token(SmalltalkSyntaxKind::Comment, start_pos, state.get_position());
136 true
137 }
138 else {
139 false
140 }
141 }
142
143 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
145 let start_pos = state.get_position();
146
147 if let Some(ch) = state.peek() {
148 if ch.is_alphabetic() || ch == '_' {
149 state.advance(ch.len_utf8());
150
151 while let Some(ch) = state.peek() {
152 if ch.is_alphanumeric() || ch == '_' {
153 state.advance(ch.len_utf8());
154 }
155 else {
156 break;
157 }
158 }
159
160 state.add_token(SmalltalkSyntaxKind::Identifier, start_pos, state.get_position());
161 true
162 }
163 else {
164 false
165 }
166 }
167 else {
168 false
169 }
170 }
171
172 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
174 let start_pos = state.get_position();
175
176 if let Some(ch) = state.peek() {
177 if ch.is_ascii_digit() {
178 state.advance(1);
179
180 while let Some(ch) = state.peek() {
181 if ch.is_ascii_digit() {
182 state.advance(1);
183 }
184 else {
185 break;
186 }
187 }
188
189 state.add_token(SmalltalkSyntaxKind::Number, start_pos, state.get_position());
190 true
191 }
192 else {
193 false
194 }
195 }
196 else {
197 false
198 }
199 }
200
201 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
203 let start_pos = state.get_position();
204
205 if let Some(ch) = state.peek() {
206 let kind = match ch {
207 '(' => SmalltalkSyntaxKind::LeftParen,
208 ')' => SmalltalkSyntaxKind::RightParen,
209 '[' => SmalltalkSyntaxKind::LeftBracket,
210 ']' => SmalltalkSyntaxKind::RightBracket,
211 '{' => SmalltalkSyntaxKind::LeftBrace,
212 '}' => SmalltalkSyntaxKind::RightBrace,
213 '.' => SmalltalkSyntaxKind::Dot,
214 ';' => SmalltalkSyntaxKind::Semicolon,
215 ',' => SmalltalkSyntaxKind::Comma,
216 '+' => SmalltalkSyntaxKind::Plus,
217 '-' => SmalltalkSyntaxKind::Minus,
218 '*' => SmalltalkSyntaxKind::Star,
219 '/' => SmalltalkSyntaxKind::Slash,
220 '=' => SmalltalkSyntaxKind::Equal,
221 '<' => SmalltalkSyntaxKind::Less,
222 '>' => SmalltalkSyntaxKind::Greater,
223 _ => return false,
224 };
225
226 state.advance(1);
227 state.add_token(kind, start_pos, state.get_position());
228 true
229 }
230 else {
231 false
232 }
233 }
234}