oak_smalltalk/lexer/
mod.rs1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::SmalltalkLanguage, lexer::token_type::SmalltalkTokenType};
5use oak_core::{
6 OakError,
7 lexer::{LexOutput, Lexer, LexerCache, LexerState},
8 source::{Source, TextEdit},
9};
10
11type State<'a, S> = LexerState<'a, S, SmalltalkLanguage>;
12
13#[derive(Clone)]
14pub struct SmalltalkLexer<'config> {
15 _config: &'config SmalltalkLanguage,
16}
17
18impl<'config> Lexer<SmalltalkLanguage> for SmalltalkLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], cache: &'a mut impl LexerCache<SmalltalkLanguage>) -> LexOutput<SmalltalkLanguage> {
20 let relex_from = edits.iter().map(|e| e.span.start).min().unwrap_or(source.length());
21 let mut state = LexerState::new_with_cache(source, relex_from, cache);
22 if state.fully_reused() {
23 let result = Ok(());
24 return state.finish_with_cache(result, cache);
25 }
26 let result = self.run(&mut state);
27 state.finish_with_cache(result, cache)
28 }
29}
30
31impl<'config> SmalltalkLexer<'config> {
32 pub fn new(config: &'config SmalltalkLanguage) -> Self {
33 Self { _config: config }
34 }
35
36 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
37 while state.not_at_end() {
38 let safe_point = state.get_position();
39
40 if self.skip_whitespace(state) {
41 continue;
42 }
43
44 if self.lex_newline(state) {
45 continue;
46 }
47
48 if self.lex_comment(state) {
49 continue;
50 }
51
52 if self.lex_number(state) {
53 continue;
54 }
55
56 if self.lex_identifier(state) {
57 continue;
58 }
59
60 if self.lex_punctuation(state) {
61 continue;
62 }
63
64 let start_pos = state.get_position();
66 if let Some(ch) = state.peek() {
67 state.advance(ch.len_utf8());
68 state.add_token(SmalltalkTokenType::Error, start_pos, state.get_position());
69 }
70
71 state.advance_if_dead_lock(safe_point)
72 }
73
74 state.add_eof();
76 Ok(())
77 }
78
79 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
81 let start_pos = state.get_position();
82
83 while let Some(ch) = state.peek() {
84 if ch == ' ' || ch == '\t' {
85 state.advance(ch.len_utf8());
86 }
87 else {
88 break;
89 }
90 }
91
92 if state.get_position() > start_pos {
93 state.add_token(SmalltalkTokenType::Whitespace, start_pos, state.get_position());
94 true
95 }
96 else {
97 false
98 }
99 }
100
101 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
103 let start_pos = state.get_position();
104
105 if let Some('\n') = state.peek() {
106 state.advance(1);
107 state.add_token(SmalltalkTokenType::Newline, start_pos, state.get_position());
108 true
109 }
110 else if let Some('\r') = state.peek() {
111 state.advance(1);
112 if let Some('\n') = state.peek() {
113 state.advance(1);
114 }
115 state.add_token(SmalltalkTokenType::Newline, start_pos, state.get_position());
116 true
117 }
118 else {
119 false
120 }
121 }
122
123 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
125 let start_pos = state.get_position();
126
127 if let Some('"') = state.peek() {
128 state.advance(1);
129
130 while let Some(ch) = state.peek() {
131 if ch == '"' {
132 state.advance(1);
133 break;
134 }
135 state.advance(ch.len_utf8());
136 }
137
138 state.add_token(SmalltalkTokenType::Comment, start_pos, state.get_position());
139 true
140 }
141 else {
142 false
143 }
144 }
145
146 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
148 let start_pos = state.get_position();
149
150 if let Some(ch) = state.peek() {
151 if ch.is_alphabetic() || ch == '_' {
152 state.advance(ch.len_utf8());
153
154 while let Some(ch) = state.peek() {
155 if ch.is_alphanumeric() || ch == '_' {
156 state.advance(ch.len_utf8());
157 }
158 else {
159 break;
160 }
161 }
162
163 state.add_token(SmalltalkTokenType::Identifier, start_pos, state.get_position());
164 true
165 }
166 else {
167 false
168 }
169 }
170 else {
171 false
172 }
173 }
174
175 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
177 let start_pos = state.get_position();
178
179 if let Some(ch) = state.peek() {
180 if ch.is_ascii_digit() {
181 state.advance(1);
182
183 while let Some(ch) = state.peek() {
184 if ch.is_ascii_digit() {
185 state.advance(1);
186 }
187 else {
188 break;
189 }
190 }
191
192 state.add_token(SmalltalkTokenType::Number, start_pos, state.get_position());
193 true
194 }
195 else {
196 false
197 }
198 }
199 else {
200 false
201 }
202 }
203
204 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
206 let start_pos = state.get_position();
207
208 if let Some(ch) = state.peek() {
209 let kind = match ch {
210 '(' => SmalltalkTokenType::LeftParen,
211 ')' => SmalltalkTokenType::RightParen,
212 '[' => SmalltalkTokenType::LeftBracket,
213 ']' => SmalltalkTokenType::RightBracket,
214 '{' => SmalltalkTokenType::LeftBrace,
215 '}' => SmalltalkTokenType::RightBrace,
216 '.' => SmalltalkTokenType::Dot,
217 ';' => SmalltalkTokenType::Semicolon,
218 ',' => SmalltalkTokenType::Comma,
219 '+' => SmalltalkTokenType::Plus,
220 '-' => SmalltalkTokenType::Minus,
221 '*' => SmalltalkTokenType::Star,
222 '/' => SmalltalkTokenType::Slash,
223 '=' => SmalltalkTokenType::Equal,
224 '<' => SmalltalkTokenType::Less,
225 '>' => SmalltalkTokenType::Greater,
226 _ => return false,
227 };
228
229 state.advance(1);
230 state.add_token(kind, start_pos, state.get_position());
231 true
232 }
233 else {
234 false
235 }
236 }
237}