1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::WatLanguage, lexer::token_type::WatTokenType};
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError,
8 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
9 source::{Source, TextEdit},
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, WatLanguage>;
14
15static WAT_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static WAT_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: ";;", block_start: "(;", block_end: ")", nested_blocks: true });
17static WAT_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18
19#[derive(Clone)]
21pub struct WatLexer<'config> {
22 config: &'config WatLanguage,
23}
24
25impl<'config> Lexer<WatLanguage> for WatLexer<'config> {
26 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<WatLanguage>) -> LexOutput<WatLanguage> {
27 let mut state = State::new(text);
28 let result = self.run(&mut state);
29 state.finish_with_cache(result, cache)
30 }
31}
32
33impl<'config> WatLexer<'config> {
34 pub fn new(config: &'config WatLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
40 while state.not_at_end() {
41 let safe_point = state.get_position();
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.skip_comment(state) {
47 continue;
48 }
49
50 if self.lex_string_literal(state) {
51 continue;
52 }
53
54 if self.lex_number_literal(state) {
55 continue;
56 }
57
58 if self.lex_identifier_or_keyword(state) {
59 continue;
60 }
61
62 if self.lex_punctuation(state) {
63 continue;
64 }
65
66 if self.lex_text(state) {
67 continue;
68 }
69
70 state.advance_if_dead_lock(safe_point);
71 }
72
73 Ok(())
74 }
75
76 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78 WAT_WHITESPACE.scan(state, WatTokenType::Whitespace)
79 }
80
81 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
83 WAT_COMMENT.scan(state, WatTokenType::Comment, WatTokenType::Comment)
84 }
85
86 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 WAT_STRING.scan(state, WatTokenType::StringLiteral)
89 }
90
91 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
93 let start = state.get_position();
94 if let Some(ch) = state.peek() {
95 if ch.is_ascii_digit() || ch == '-' || ch == '+' {
96 state.bump();
97 let mut is_float = false;
98 while let Some(ch) = state.peek() {
99 if ch.is_ascii_digit() || ch == '_' {
100 state.bump();
101 }
102 else if ch == '.' {
103 is_float = true;
104 state.bump();
105 }
106 else if ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P' || ch == 'x' || ch == 'X' || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') {
107 state.bump();
108 }
109 else {
110 break;
111 }
112 }
113 let kind = if is_float { WatTokenType::FloatLiteral } else { WatTokenType::IntegerLiteral };
114 state.add_token(kind, start, state.get_position());
115 return true;
116 }
117 }
118 false
119 }
120
121 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
123 let start = state.get_position();
124 if let Some(ch) = state.peek() {
125 if ch == '$' || ch.is_ascii_alphabetic() || ch == '_' {
126 state.bump();
127 while let Some(ch) = state.peek() {
128 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '.' || ch == '$' || ch == '-' {
129 state.bump();
130 }
131 else {
132 break;
133 }
134 }
135 let end = state.get_position();
136 let text = state.get_text_in((start..end).into());
137 let kind = if text.starts_with('$') {
138 WatTokenType::Identifier
139 }
140 else {
141 match text.as_ref() {
142 "module" => WatTokenType::ModuleKw,
143 "func" => WatTokenType::FuncKw,
144 "param" => WatTokenType::ParamKw,
145 "result" => WatTokenType::ResultKw,
146 "export" => WatTokenType::ExportKw,
147 "import" => WatTokenType::ImportKw,
148 "table" => WatTokenType::TableKw,
149 "memory" => WatTokenType::MemoryKw,
150 "global" => WatTokenType::GlobalKw,
151 "type" => WatTokenType::TypeKw,
152 "elem" => WatTokenType::ElemKw,
153 "data" => WatTokenType::DataKw,
154 "start" => WatTokenType::StartKw,
155 "block" => WatTokenType::BlockKw,
156 "loop" => WatTokenType::LoopKw,
157 "if" => WatTokenType::IfKw,
158 "then" => WatTokenType::ThenKw,
159 "else" => WatTokenType::ElseKw,
160 "end" => WatTokenType::EndKw,
161 "br" => WatTokenType::BrKw,
162 "br_if" => WatTokenType::BrIfKw,
163 "br_table" => WatTokenType::BrTableKw,
164 "return" => WatTokenType::ReturnKw,
165 "call" => WatTokenType::CallKw,
166 "call_indirect" => WatTokenType::CallIndirectKw,
167 "local" => WatTokenType::LocalKw,
168 "local.get" => WatTokenType::LocalGetKw,
169 "local.set" => WatTokenType::LocalSetKw,
170 "local.tee" => WatTokenType::LocalTeeKw,
171 "global.get" => WatTokenType::GlobalGetKw,
172 "global.set" => WatTokenType::GlobalSetKw,
173 "i32.load" => WatTokenType::I32LoadKw,
174 "i64.load" => WatTokenType::I64LoadKw,
175 "f32.load" => WatTokenType::F32LoadKw,
176 "f64.load" => WatTokenType::F64LoadKw,
177 "i32.store" => WatTokenType::I32StoreKw,
178 "i64.store" => WatTokenType::I64StoreKw,
179 "f32.store" => WatTokenType::F32StoreKw,
180 "f64.store" => WatTokenType::F64StoreKw,
181 "memory.size" => WatTokenType::MemorySizeKw,
182 "memory.grow" => WatTokenType::MemoryGrowKw,
183 "i32.const" => WatTokenType::I32ConstKw,
184 "i64.const" => WatTokenType::I64ConstKw,
185 "f32.const" => WatTokenType::F32ConstKw,
186 "f64.const" => WatTokenType::F64ConstKw,
187 "i32.add" => WatTokenType::I32AddKw,
188 "i64.add" => WatTokenType::I64AddKw,
189 "f32.add" => WatTokenType::F32AddKw,
190 "f64.add" => WatTokenType::F64AddKw,
191 "i32.sub" => WatTokenType::I32SubKw,
192 "i64.sub" => WatTokenType::I64SubKw,
193 "f32.sub" => WatTokenType::F32SubKw,
194 "f64.sub" => WatTokenType::F64SubKw,
195 "i32.mul" => WatTokenType::I32MulKw,
196 "i64.mul" => WatTokenType::I64MulKw,
197 "f32.mul" => WatTokenType::F32MulKw,
198 "f64.mul" => WatTokenType::F64MulKw,
199 "i32.eq" => WatTokenType::I32EqKw,
200 "i64.eq" => WatTokenType::I64EqKw,
201 "f32.eq" => WatTokenType::F32EqKw,
202 "f64.eq" => WatTokenType::F64EqKw,
203 "i32.ne" => WatTokenType::I32NeKw,
204 "i64.ne" => WatTokenType::I64NeKw,
205 "f32.ne" => WatTokenType::F32NeKw,
206 "f64.ne" => WatTokenType::F64NeKw,
207 "drop" => WatTokenType::DropKw,
208 "select" => WatTokenType::SelectKw,
209 "unreachable" => WatTokenType::UnreachableKw,
210 "nop" => WatTokenType::NopKw,
211 "i32" => WatTokenType::I32Kw,
212 "i64" => WatTokenType::I64Kw,
213 "f32" => WatTokenType::F32Kw,
214 "f64" => WatTokenType::F64Kw,
215 _ => WatTokenType::Identifier,
216 }
217 };
218 state.add_token(kind, start, end);
219 return true;
220 }
221 }
222 false
223 }
224
225 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
227 let start = state.get_position();
228 if let Some(ch) = state.peek() {
229 let kind = match ch {
230 '(' => Some(WatTokenType::LeftParen),
231 ')' => Some(WatTokenType::RightParen),
232 '=' => Some(WatTokenType::Eq),
233 _ => None,
234 };
235
236 if let Some(kind) = kind {
237 state.bump();
238 state.add_token(kind, start, state.get_position());
239 return true;
240 }
241 }
242 false
243 }
244
245 fn lex_text<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
247 let start = state.get_position();
248 if let Some(_ch) = state.peek() {
249 state.bump();
250 state.add_token(WatTokenType::Text, start, state.get_position());
251 true
252 }
253 else {
254 false
255 }
256 }
257}