oak_smalltalk/lexer/
mod.rs

1use crate::{kind::SmalltalkSyntaxKind, language::SmalltalkLanguage};
2use oak_core::{
3    OakError,
4    lexer::{LexOutput, Lexer, LexerCache, LexerState},
5    source::{Source, TextEdit},
6};
7
8type State<'a, S> = LexerState<'a, S, SmalltalkLanguage>;
9
10#[derive(Clone)]
11pub struct SmalltalkLexer<'config> {
12    _config: &'config SmalltalkLanguage,
13}
14
15impl<'config> Lexer<SmalltalkLanguage> for SmalltalkLexer<'config> {
16    fn lex<'a, S: Source + ?Sized>(&self, source: &S, edits: &[TextEdit], cache: &'a mut impl LexerCache<SmalltalkLanguage>) -> LexOutput<SmalltalkLanguage> {
17        let relex_from = edits.iter().map(|e| e.span.start).min().unwrap_or(source.length());
18        let mut state = LexerState::new_with_cache(source, relex_from, cache);
19        if state.fully_reused() {
20            let result = Ok(());
21            return state.finish_with_cache(result, cache);
22        }
23        let result = self.run(&mut state);
24        state.finish_with_cache(result, cache)
25    }
26}
27
28impl<'config> SmalltalkLexer<'config> {
29    pub fn new(config: &'config SmalltalkLanguage) -> Self {
30        Self { _config: config }
31    }
32
33    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
34        while state.not_at_end() {
35            let safe_point = state.get_position();
36
37            if self.skip_whitespace(state) {
38                continue;
39            }
40
41            if self.lex_newline(state) {
42                continue;
43            }
44
45            if self.lex_comment(state) {
46                continue;
47            }
48
49            if self.lex_number(state) {
50                continue;
51            }
52
53            if self.lex_identifier(state) {
54                continue;
55            }
56
57            if self.lex_punctuation(state) {
58                continue;
59            }
60
61            // 错误处理：如果没有匹配任何规则，跳过当前字符并标记为错误
62            let start_pos = state.get_position();
63            if let Some(ch) = state.peek() {
64                state.advance(ch.len_utf8());
65                state.add_token(SmalltalkSyntaxKind::Error, start_pos, state.get_position());
66            }
67
68            state.advance_if_dead_lock(safe_point);
69        }
70
71        // 添加 EOF token
72        state.add_eof();
73        Ok(())
74    }
75
76    /// 跳过空白字符
77    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78        let start_pos = state.get_position();
79
80        while let Some(ch) = state.peek() {
81            if ch == ' ' || ch == '\t' {
82                state.advance(ch.len_utf8());
83            }
84            else {
85                break;
86            }
87        }
88
89        if state.get_position() > start_pos {
90            state.add_token(SmalltalkSyntaxKind::Whitespace, start_pos, state.get_position());
91            true
92        }
93        else {
94            false
95        }
96    }
97
98    /// 处理换行符
99    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
100        let start_pos = state.get_position();
101
102        if let Some('\n') = state.peek() {
103            state.advance(1);
104            state.add_token(SmalltalkSyntaxKind::Newline, start_pos, state.get_position());
105            true
106        }
107        else if let Some('\r') = state.peek() {
108            state.advance(1);
109            if let Some('\n') = state.peek() {
110                state.advance(1);
111            }
112            state.add_token(SmalltalkSyntaxKind::Newline, start_pos, state.get_position());
113            true
114        }
115        else {
116            false
117        }
118    }
119
120    /// 处理注释
121    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122        let start_pos = state.get_position();
123
124        if let Some('"') = state.peek() {
125            state.advance(1);
126
127            while let Some(ch) = state.peek() {
128                if ch == '"' {
129                    state.advance(1);
130                    break;
131                }
132                state.advance(ch.len_utf8());
133            }
134
135            state.add_token(SmalltalkSyntaxKind::Comment, start_pos, state.get_position());
136            true
137        }
138        else {
139            false
140        }
141    }
142
143    /// 处理标识符
144    fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
145        let start_pos = state.get_position();
146
147        if let Some(ch) = state.peek() {
148            if ch.is_alphabetic() || ch == '_' {
149                state.advance(ch.len_utf8());
150
151                while let Some(ch) = state.peek() {
152                    if ch.is_alphanumeric() || ch == '_' {
153                        state.advance(ch.len_utf8());
154                    }
155                    else {
156                        break;
157                    }
158                }
159
160                state.add_token(SmalltalkSyntaxKind::Identifier, start_pos, state.get_position());
161                true
162            }
163            else {
164                false
165            }
166        }
167        else {
168            false
169        }
170    }
171
172    /// 处理数字
173    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
174        let start_pos = state.get_position();
175
176        if let Some(ch) = state.peek() {
177            if ch.is_ascii_digit() {
178                state.advance(1);
179
180                while let Some(ch) = state.peek() {
181                    if ch.is_ascii_digit() {
182                        state.advance(1);
183                    }
184                    else {
185                        break;
186                    }
187                }
188
189                state.add_token(SmalltalkSyntaxKind::Number, start_pos, state.get_position());
190                true
191            }
192            else {
193                false
194            }
195        }
196        else {
197            false
198        }
199    }
200
201    /// 处理标点符号
202    fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
203        let start_pos = state.get_position();
204
205        if let Some(ch) = state.peek() {
206            let kind = match ch {
207                '(' => SmalltalkSyntaxKind::LeftParen,
208                ')' => SmalltalkSyntaxKind::RightParen,
209                '[' => SmalltalkSyntaxKind::LeftBracket,
210                ']' => SmalltalkSyntaxKind::RightBracket,
211                '{' => SmalltalkSyntaxKind::LeftBrace,
212                '}' => SmalltalkSyntaxKind::RightBrace,
213                '.' => SmalltalkSyntaxKind::Dot,
214                ';' => SmalltalkSyntaxKind::Semicolon,
215                ',' => SmalltalkSyntaxKind::Comma,
216                '+' => SmalltalkSyntaxKind::Plus,
217                '-' => SmalltalkSyntaxKind::Minus,
218                '*' => SmalltalkSyntaxKind::Star,
219                '/' => SmalltalkSyntaxKind::Slash,
220                '=' => SmalltalkSyntaxKind::Equal,
221                '<' => SmalltalkSyntaxKind::Less,
222                '>' => SmalltalkSyntaxKind::Greater,
223                _ => return false,
224            };
225
226            state.advance(1);
227            state.add_token(kind, start_pos, state.get_position());
228            true
229        }
230        else {
231            false
232        }
233    }
234}
oak_smalltalk/lexer/mod.rs

oak_smalltalk/lexer/
mod.rs