Skip to main content

oak_wat/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Token type definitions.
3pub mod token_type;
4
5use crate::{language::WatLanguage, lexer::token_type::WatTokenType};
6use oak_core::{
7    Lexer, LexerCache, LexerState, OakError,
8    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
9    source::{Source, TextEdit},
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, WatLanguage>;
14
15static WAT_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static WAT_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: ";;", block_start: "(;", block_end: ")", nested_blocks: true });
17static WAT_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18
19/// Lexer for the WebAssembly Text (WAT) language.
20#[derive(Clone)]
21pub struct WatLexer<'config> {
22    config: &'config WatLanguage,
23}
24
25impl<'config> Lexer<WatLanguage> for WatLexer<'config> {
26    fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<WatLanguage>) -> LexOutput<WatLanguage> {
27        let mut state = State::new(text);
28        let result = self.run(&mut state);
29        state.finish_with_cache(result, cache)
30    }
31}
32
33impl<'config> WatLexer<'config> {
34    /// Creates a new instance of the WAT lexer.
35    pub fn new(config: &'config WatLanguage) -> Self {
36        Self { config }
37    }
38
39    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
40        while state.not_at_end() {
41            let safe_point = state.get_position();
42            if self.skip_whitespace(state) {
43                continue;
44            }
45
46            if self.skip_comment(state) {
47                continue;
48            }
49
50            if self.lex_string_literal(state) {
51                continue;
52            }
53
54            if self.lex_number_literal(state) {
55                continue;
56            }
57
58            if self.lex_identifier_or_keyword(state) {
59                continue;
60            }
61
62            if self.lex_punctuation(state) {
63                continue;
64            }
65
66            if self.lex_text(state) {
67                continue;
68            }
69
70            state.advance_if_dead_lock(safe_point);
71        }
72
73        Ok(())
74    }
75
76    /// Skips whitespace characters.
77    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78        WAT_WHITESPACE.scan(state, WatTokenType::Whitespace)
79    }
80
81    /// Skips comments.
82    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
83        WAT_COMMENT.scan(state, WatTokenType::Comment, WatTokenType::Comment)
84    }
85
86    /// Lexes string literals.
87    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88        WAT_STRING.scan(state, WatTokenType::StringLiteral)
89    }
90
91    /// Lexes number literals.
92    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
93        let start = state.get_position();
94        if let Some(ch) = state.peek() {
95            if ch.is_ascii_digit() || ch == '-' || ch == '+' {
96                state.bump();
97                let mut is_float = false;
98                while let Some(ch) = state.peek() {
99                    if ch.is_ascii_digit() || ch == '_' {
100                        state.bump();
101                    }
102                    else if ch == '.' {
103                        is_float = true;
104                        state.bump();
105                    }
106                    else if ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P' || ch == 'x' || ch == 'X' || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') {
107                        state.bump();
108                    }
109                    else {
110                        break;
111                    }
112                }
113                let kind = if is_float { WatTokenType::FloatLiteral } else { WatTokenType::IntegerLiteral };
114                state.add_token(kind, start, state.get_position());
115                return true;
116            }
117        }
118        false
119    }
120
121    /// Lexes identifiers or keywords.
122    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
123        let start = state.get_position();
124        if let Some(ch) = state.peek() {
125            if ch == '$' || ch.is_ascii_alphabetic() || ch == '_' {
126                state.bump();
127                while let Some(ch) = state.peek() {
128                    if ch.is_ascii_alphanumeric() || ch == '_' || ch == '.' || ch == '$' || ch == '-' {
129                        state.bump();
130                    }
131                    else {
132                        break;
133                    }
134                }
135                let end = state.get_position();
136                let text = state.get_text_in((start..end).into());
137                let kind = if text.starts_with('$') {
138                    WatTokenType::Identifier
139                }
140                else {
141                    match text.as_ref() {
142                        "module" => WatTokenType::ModuleKw,
143                        "func" => WatTokenType::FuncKw,
144                        "param" => WatTokenType::ParamKw,
145                        "result" => WatTokenType::ResultKw,
146                        "export" => WatTokenType::ExportKw,
147                        "import" => WatTokenType::ImportKw,
148                        "table" => WatTokenType::TableKw,
149                        "memory" => WatTokenType::MemoryKw,
150                        "global" => WatTokenType::GlobalKw,
151                        "type" => WatTokenType::TypeKw,
152                        "elem" => WatTokenType::ElemKw,
153                        "data" => WatTokenType::DataKw,
154                        "start" => WatTokenType::StartKw,
155                        "block" => WatTokenType::BlockKw,
156                        "loop" => WatTokenType::LoopKw,
157                        "if" => WatTokenType::IfKw,
158                        "then" => WatTokenType::ThenKw,
159                        "else" => WatTokenType::ElseKw,
160                        "end" => WatTokenType::EndKw,
161                        "br" => WatTokenType::BrKw,
162                        "br_if" => WatTokenType::BrIfKw,
163                        "br_table" => WatTokenType::BrTableKw,
164                        "return" => WatTokenType::ReturnKw,
165                        "call" => WatTokenType::CallKw,
166                        "call_indirect" => WatTokenType::CallIndirectKw,
167                        "local" => WatTokenType::LocalKw,
168                        "local.get" => WatTokenType::LocalGetKw,
169                        "local.set" => WatTokenType::LocalSetKw,
170                        "local.tee" => WatTokenType::LocalTeeKw,
171                        "global.get" => WatTokenType::GlobalGetKw,
172                        "global.set" => WatTokenType::GlobalSetKw,
173                        "i32.load" => WatTokenType::I32LoadKw,
174                        "i64.load" => WatTokenType::I64LoadKw,
175                        "f32.load" => WatTokenType::F32LoadKw,
176                        "f64.load" => WatTokenType::F64LoadKw,
177                        "i32.store" => WatTokenType::I32StoreKw,
178                        "i64.store" => WatTokenType::I64StoreKw,
179                        "f32.store" => WatTokenType::F32StoreKw,
180                        "f64.store" => WatTokenType::F64StoreKw,
181                        "memory.size" => WatTokenType::MemorySizeKw,
182                        "memory.grow" => WatTokenType::MemoryGrowKw,
183                        "i32.const" => WatTokenType::I32ConstKw,
184                        "i64.const" => WatTokenType::I64ConstKw,
185                        "f32.const" => WatTokenType::F32ConstKw,
186                        "f64.const" => WatTokenType::F64ConstKw,
187                        "i32.add" => WatTokenType::I32AddKw,
188                        "i64.add" => WatTokenType::I64AddKw,
189                        "f32.add" => WatTokenType::F32AddKw,
190                        "f64.add" => WatTokenType::F64AddKw,
191                        "i32.sub" => WatTokenType::I32SubKw,
192                        "i64.sub" => WatTokenType::I64SubKw,
193                        "f32.sub" => WatTokenType::F32SubKw,
194                        "f64.sub" => WatTokenType::F64SubKw,
195                        "i32.mul" => WatTokenType::I32MulKw,
196                        "i64.mul" => WatTokenType::I64MulKw,
197                        "f32.mul" => WatTokenType::F32MulKw,
198                        "f64.mul" => WatTokenType::F64MulKw,
199                        "i32.eq" => WatTokenType::I32EqKw,
200                        "i64.eq" => WatTokenType::I64EqKw,
201                        "f32.eq" => WatTokenType::F32EqKw,
202                        "f64.eq" => WatTokenType::F64EqKw,
203                        "i32.ne" => WatTokenType::I32NeKw,
204                        "i64.ne" => WatTokenType::I64NeKw,
205                        "f32.ne" => WatTokenType::F32NeKw,
206                        "f64.ne" => WatTokenType::F64NeKw,
207                        "drop" => WatTokenType::DropKw,
208                        "select" => WatTokenType::SelectKw,
209                        "unreachable" => WatTokenType::UnreachableKw,
210                        "nop" => WatTokenType::NopKw,
211                        "i32" => WatTokenType::I32Kw,
212                        "i64" => WatTokenType::I64Kw,
213                        "f32" => WatTokenType::F32Kw,
214                        "f64" => WatTokenType::F64Kw,
215                        _ => WatTokenType::Identifier,
216                    }
217                };
218                state.add_token(kind, start, end);
219                return true;
220            }
221        }
222        false
223    }
224
225    /// Lexes punctuation marks.
226    fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
227        let start = state.get_position();
228        if let Some(ch) = state.peek() {
229            let kind = match ch {
230                '(' => Some(WatTokenType::LeftParen),
231                ')' => Some(WatTokenType::RightParen),
232                '=' => Some(WatTokenType::Eq),
233                _ => None,
234            };
235
236            if let Some(kind) = kind {
237                state.bump();
238                state.add_token(kind, start, state.get_position());
239                return true;
240            }
241        }
242        false
243    }
244
245    /// Lexes plain text.
246    fn lex_text<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
247        let start = state.get_position();
248        if let Some(_ch) = state.peek() {
249            state.bump();
250            state.add_token(WatTokenType::Text, start, state.get_position());
251            true
252        }
253        else {
254            false
255        }
256    }
257}