Skip to main content

oak_wat/lexer/
mod.rs

1use crate::{kind::WatSyntaxKind, language::WatLanguage};
2use oak_core::{
3    Lexer, LexerCache, LexerState, OakError,
4    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5    source::{Source, TextEdit},
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, WatLanguage>;
10
11static WAT_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static WAT_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: ";;", block_start: "(;", block_end: ";)", nested_blocks: true });
13static WAT_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct WatLexer<'config> {
17    _config: &'config WatLanguage,
18}
19
20impl<'config> Lexer<WatLanguage> for WatLexer<'config> {
21    fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<WatLanguage>) -> LexOutput<WatLanguage> {
22        let mut state = State::new(text);
23        let result = self.run(&mut state);
24        state.finish_with_cache(result, cache)
25    }
26}
27
28impl<'config> WatLexer<'config> {
29    pub fn new(config: &'config WatLanguage) -> Self {
30        Self { _config: config }
31    }
32
33    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
34        while state.not_at_end() {
35            let safe_point = state.get_position();
36            if self.skip_whitespace(state) {
37                continue;
38            }
39
40            if self.skip_comment(state) {
41                continue;
42            }
43
44            if self.lex_string_literal(state) {
45                continue;
46            }
47
48            if self.lex_number_literal(state) {
49                continue;
50            }
51
52            if self.lex_identifier_or_keyword(state) {
53                continue;
54            }
55
56            if self.lex_punctuation(state) {
57                continue;
58            }
59
60            if self.lex_text(state) {
61                continue;
62            }
63
64            state.advance_if_dead_lock(safe_point);
65        }
66
67        Ok(())
68    }
69
70    /// 跳过空白字符
71    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
72        WAT_WHITESPACE.scan(state, WatSyntaxKind::Whitespace)
73    }
74
75    /// 跳过注释
76    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
77        WAT_COMMENT.scan(state, WatSyntaxKind::Comment, WatSyntaxKind::Comment)
78    }
79
80    /// 解析字符串字面量
81    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82        WAT_STRING.scan(state, WatSyntaxKind::StringLiteral)
83    }
84
85    /// 解析数字字面量
86    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87        let start = state.get_position();
88        if let Some(ch) = state.peek() {
89            if ch.is_ascii_digit() || ch == '-' || ch == '+' {
90                state.bump();
91                let mut is_float = false;
92                while let Some(ch) = state.peek() {
93                    if ch.is_ascii_digit() || ch == '_' {
94                        state.bump();
95                    }
96                    else if ch == '.' {
97                        is_float = true;
98                        state.bump();
99                    }
100                    else if ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P' || ch == 'x' || ch == 'X' || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') {
101                        state.bump();
102                    }
103                    else {
104                        break;
105                    }
106                }
107                let kind = if is_float { WatSyntaxKind::FloatLiteral } else { WatSyntaxKind::IntegerLiteral };
108                state.add_token(kind, start, state.get_position());
109                return true;
110            }
111        }
112        false
113    }
114
115    /// 解析标识符或关键字
116    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
117        let start = state.get_position();
118        if let Some(ch) = state.peek() {
119            if ch == '$' || ch.is_ascii_alphabetic() || ch == '_' {
120                state.bump();
121                while let Some(ch) = state.peek() {
122                    if ch.is_ascii_alphanumeric() || ch == '_' || ch == '.' || ch == '$' || ch == '-' {
123                        state.bump();
124                    }
125                    else {
126                        break;
127                    }
128                }
129                let end = state.get_position();
130                let text = state.get_text_in((start..end).into());
131                let kind = if text.starts_with('$') {
132                    WatSyntaxKind::Identifier
133                }
134                else {
135                    match text.as_ref() {
136                        "module" => WatSyntaxKind::ModuleKw,
137                        "func" => WatSyntaxKind::FuncKw,
138                        "param" => WatSyntaxKind::ParamKw,
139                        "result" => WatSyntaxKind::ResultKw,
140                        "export" => WatSyntaxKind::ExportKw,
141                        "import" => WatSyntaxKind::ImportKw,
142                        "table" => WatSyntaxKind::TableKw,
143                        "memory" => WatSyntaxKind::MemoryKw,
144                        "global" => WatSyntaxKind::GlobalKw,
145                        "type" => WatSyntaxKind::TypeKw,
146                        "elem" => WatSyntaxKind::ElemKw,
147                        "data" => WatSyntaxKind::DataKw,
148                        "start" => WatSyntaxKind::StartKw,
149                        "block" => WatSyntaxKind::BlockKw,
150                        "loop" => WatSyntaxKind::LoopKw,
151                        "if" => WatSyntaxKind::IfKw,
152                        "then" => WatSyntaxKind::ThenKw,
153                        "else" => WatSyntaxKind::ElseKw,
154                        "end" => WatSyntaxKind::EndKw,
155                        "br" => WatSyntaxKind::BrKw,
156                        "br_if" => WatSyntaxKind::BrIfKw,
157                        "br_table" => WatSyntaxKind::BrTableKw,
158                        "return" => WatSyntaxKind::ReturnKw,
159                        "call" => WatSyntaxKind::CallKw,
160                        "call_indirect" => WatSyntaxKind::CallIndirectKw,
161                        "local" => WatSyntaxKind::LocalKw,
162                        "local.get" => WatSyntaxKind::LocalGetKw,
163                        "local.set" => WatSyntaxKind::LocalSetKw,
164                        "local.tee" => WatSyntaxKind::LocalTeeKw,
165                        "global.get" => WatSyntaxKind::GlobalGetKw,
166                        "global.set" => WatSyntaxKind::GlobalSetKw,
167                        "i32.load" => WatSyntaxKind::I32LoadKw,
168                        "i64.load" => WatSyntaxKind::I64LoadKw,
169                        "f32.load" => WatSyntaxKind::F32LoadKw,
170                        "f64.load" => WatSyntaxKind::F64LoadKw,
171                        "i32.store" => WatSyntaxKind::I32StoreKw,
172                        "i64.store" => WatSyntaxKind::I64StoreKw,
173                        "f32.store" => WatSyntaxKind::F32StoreKw,
174                        "f64.store" => WatSyntaxKind::F64StoreKw,
175                        "memory.size" => WatSyntaxKind::MemorySizeKw,
176                        "memory.grow" => WatSyntaxKind::MemoryGrowKw,
177                        "i32.const" => WatSyntaxKind::I32ConstKw,
178                        "i64.const" => WatSyntaxKind::I64ConstKw,
179                        "f32.const" => WatSyntaxKind::F32ConstKw,
180                        "f64.const" => WatSyntaxKind::F64ConstKw,
181                        "i32.add" => WatSyntaxKind::I32AddKw,
182                        "i64.add" => WatSyntaxKind::I64AddKw,
183                        "f32.add" => WatSyntaxKind::F32AddKw,
184                        "f64.add" => WatSyntaxKind::F64AddKw,
185                        "i32.sub" => WatSyntaxKind::I32SubKw,
186                        "i64.sub" => WatSyntaxKind::I64SubKw,
187                        "f32.sub" => WatSyntaxKind::F32SubKw,
188                        "f64.sub" => WatSyntaxKind::F64SubKw,
189                        "i32.mul" => WatSyntaxKind::I32MulKw,
190                        "i64.mul" => WatSyntaxKind::I64MulKw,
191                        "f32.mul" => WatSyntaxKind::F32MulKw,
192                        "f64.mul" => WatSyntaxKind::F64MulKw,
193                        "i32.eq" => WatSyntaxKind::I32EqKw,
194                        "i64.eq" => WatSyntaxKind::I64EqKw,
195                        "f32.eq" => WatSyntaxKind::F32EqKw,
196                        "f64.eq" => WatSyntaxKind::F64EqKw,
197                        "i32.ne" => WatSyntaxKind::I32NeKw,
198                        "i64.ne" => WatSyntaxKind::I64NeKw,
199                        "f32.ne" => WatSyntaxKind::F32NeKw,
200                        "f64.ne" => WatSyntaxKind::F64NeKw,
201                        "drop" => WatSyntaxKind::DropKw,
202                        "select" => WatSyntaxKind::SelectKw,
203                        "unreachable" => WatSyntaxKind::UnreachableKw,
204                        "nop" => WatSyntaxKind::NopKw,
205                        "i32" => WatSyntaxKind::I32Kw,
206                        "i64" => WatSyntaxKind::I64Kw,
207                        "f32" => WatSyntaxKind::F32Kw,
208                        "f64" => WatSyntaxKind::F64Kw,
209                        _ => WatSyntaxKind::Identifier,
210                    }
211                };
212                state.add_token(kind, start, end);
213                return true;
214            }
215        }
216        false
217    }
218
219    /// 解析标点符号
220    fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
221        let start = state.get_position();
222        if let Some(ch) = state.peek() {
223            let kind = match ch {
224                '(' => Some(WatSyntaxKind::LeftParen),
225                ')' => Some(WatSyntaxKind::RightParen),
226                '=' => Some(WatSyntaxKind::Eq),
227                _ => None,
228            };
229
230            if let Some(kind) = kind {
231                state.bump();
232                state.add_token(kind, start, state.get_position());
233                return true;
234            }
235        }
236        false
237    }
238
239    /// 解析普通文本
240    fn lex_text<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
241        let start = state.get_position();
242        if let Some(_ch) = state.peek() {
243            state.bump();
244            state.add_token(WatSyntaxKind::Text, start, state.get_position());
245            true
246        }
247        else {
248            false
249        }
250    }
251}