1use crate::{kind::WatSyntaxKind, language::WatLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5 source::{Source, TextEdit},
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, WatLanguage>;
10
11static WAT_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static WAT_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: ";;", block_start: "(;", block_end: ";)", nested_blocks: true });
13static WAT_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct WatLexer<'config> {
17 _config: &'config WatLanguage,
18}
19
20impl<'config> Lexer<WatLanguage> for WatLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<WatLanguage>) -> LexOutput<WatLanguage> {
22 let mut state = State::new(text);
23 let result = self.run(&mut state);
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl<'config> WatLexer<'config> {
29 pub fn new(config: &'config WatLanguage) -> Self {
30 Self { _config: config }
31 }
32
33 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
34 while state.not_at_end() {
35 let safe_point = state.get_position();
36 if self.skip_whitespace(state) {
37 continue;
38 }
39
40 if self.skip_comment(state) {
41 continue;
42 }
43
44 if self.lex_string_literal(state) {
45 continue;
46 }
47
48 if self.lex_number_literal(state) {
49 continue;
50 }
51
52 if self.lex_identifier_or_keyword(state) {
53 continue;
54 }
55
56 if self.lex_punctuation(state) {
57 continue;
58 }
59
60 if self.lex_text(state) {
61 continue;
62 }
63
64 state.advance_if_dead_lock(safe_point);
65 }
66
67 Ok(())
68 }
69
70 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
72 WAT_WHITESPACE.scan(state, WatSyntaxKind::Whitespace)
73 }
74
75 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
77 WAT_COMMENT.scan(state, WatSyntaxKind::Comment, WatSyntaxKind::Comment)
78 }
79
80 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82 WAT_STRING.scan(state, WatSyntaxKind::StringLiteral)
83 }
84
85 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87 let start = state.get_position();
88 if let Some(ch) = state.peek() {
89 if ch.is_ascii_digit() || ch == '-' || ch == '+' {
90 state.bump();
91 let mut is_float = false;
92 while let Some(ch) = state.peek() {
93 if ch.is_ascii_digit() || ch == '_' {
94 state.bump();
95 }
96 else if ch == '.' {
97 is_float = true;
98 state.bump();
99 }
100 else if ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P' || ch == 'x' || ch == 'X' || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') {
101 state.bump();
102 }
103 else {
104 break;
105 }
106 }
107 let kind = if is_float { WatSyntaxKind::FloatLiteral } else { WatSyntaxKind::IntegerLiteral };
108 state.add_token(kind, start, state.get_position());
109 return true;
110 }
111 }
112 false
113 }
114
115 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
117 let start = state.get_position();
118 if let Some(ch) = state.peek() {
119 if ch == '$' || ch.is_ascii_alphabetic() || ch == '_' {
120 state.bump();
121 while let Some(ch) = state.peek() {
122 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '.' || ch == '$' || ch == '-' {
123 state.bump();
124 }
125 else {
126 break;
127 }
128 }
129 let end = state.get_position();
130 let text = state.get_text_in((start..end).into());
131 let kind = if text.starts_with('$') {
132 WatSyntaxKind::Identifier
133 }
134 else {
135 match text.as_ref() {
136 "module" => WatSyntaxKind::ModuleKw,
137 "func" => WatSyntaxKind::FuncKw,
138 "param" => WatSyntaxKind::ParamKw,
139 "result" => WatSyntaxKind::ResultKw,
140 "export" => WatSyntaxKind::ExportKw,
141 "import" => WatSyntaxKind::ImportKw,
142 "table" => WatSyntaxKind::TableKw,
143 "memory" => WatSyntaxKind::MemoryKw,
144 "global" => WatSyntaxKind::GlobalKw,
145 "type" => WatSyntaxKind::TypeKw,
146 "elem" => WatSyntaxKind::ElemKw,
147 "data" => WatSyntaxKind::DataKw,
148 "start" => WatSyntaxKind::StartKw,
149 "block" => WatSyntaxKind::BlockKw,
150 "loop" => WatSyntaxKind::LoopKw,
151 "if" => WatSyntaxKind::IfKw,
152 "then" => WatSyntaxKind::ThenKw,
153 "else" => WatSyntaxKind::ElseKw,
154 "end" => WatSyntaxKind::EndKw,
155 "br" => WatSyntaxKind::BrKw,
156 "br_if" => WatSyntaxKind::BrIfKw,
157 "br_table" => WatSyntaxKind::BrTableKw,
158 "return" => WatSyntaxKind::ReturnKw,
159 "call" => WatSyntaxKind::CallKw,
160 "call_indirect" => WatSyntaxKind::CallIndirectKw,
161 "local" => WatSyntaxKind::LocalKw,
162 "local.get" => WatSyntaxKind::LocalGetKw,
163 "local.set" => WatSyntaxKind::LocalSetKw,
164 "local.tee" => WatSyntaxKind::LocalTeeKw,
165 "global.get" => WatSyntaxKind::GlobalGetKw,
166 "global.set" => WatSyntaxKind::GlobalSetKw,
167 "i32.load" => WatSyntaxKind::I32LoadKw,
168 "i64.load" => WatSyntaxKind::I64LoadKw,
169 "f32.load" => WatSyntaxKind::F32LoadKw,
170 "f64.load" => WatSyntaxKind::F64LoadKw,
171 "i32.store" => WatSyntaxKind::I32StoreKw,
172 "i64.store" => WatSyntaxKind::I64StoreKw,
173 "f32.store" => WatSyntaxKind::F32StoreKw,
174 "f64.store" => WatSyntaxKind::F64StoreKw,
175 "memory.size" => WatSyntaxKind::MemorySizeKw,
176 "memory.grow" => WatSyntaxKind::MemoryGrowKw,
177 "i32.const" => WatSyntaxKind::I32ConstKw,
178 "i64.const" => WatSyntaxKind::I64ConstKw,
179 "f32.const" => WatSyntaxKind::F32ConstKw,
180 "f64.const" => WatSyntaxKind::F64ConstKw,
181 "i32.add" => WatSyntaxKind::I32AddKw,
182 "i64.add" => WatSyntaxKind::I64AddKw,
183 "f32.add" => WatSyntaxKind::F32AddKw,
184 "f64.add" => WatSyntaxKind::F64AddKw,
185 "i32.sub" => WatSyntaxKind::I32SubKw,
186 "i64.sub" => WatSyntaxKind::I64SubKw,
187 "f32.sub" => WatSyntaxKind::F32SubKw,
188 "f64.sub" => WatSyntaxKind::F64SubKw,
189 "i32.mul" => WatSyntaxKind::I32MulKw,
190 "i64.mul" => WatSyntaxKind::I64MulKw,
191 "f32.mul" => WatSyntaxKind::F32MulKw,
192 "f64.mul" => WatSyntaxKind::F64MulKw,
193 "i32.eq" => WatSyntaxKind::I32EqKw,
194 "i64.eq" => WatSyntaxKind::I64EqKw,
195 "f32.eq" => WatSyntaxKind::F32EqKw,
196 "f64.eq" => WatSyntaxKind::F64EqKw,
197 "i32.ne" => WatSyntaxKind::I32NeKw,
198 "i64.ne" => WatSyntaxKind::I64NeKw,
199 "f32.ne" => WatSyntaxKind::F32NeKw,
200 "f64.ne" => WatSyntaxKind::F64NeKw,
201 "drop" => WatSyntaxKind::DropKw,
202 "select" => WatSyntaxKind::SelectKw,
203 "unreachable" => WatSyntaxKind::UnreachableKw,
204 "nop" => WatSyntaxKind::NopKw,
205 "i32" => WatSyntaxKind::I32Kw,
206 "i64" => WatSyntaxKind::I64Kw,
207 "f32" => WatSyntaxKind::F32Kw,
208 "f64" => WatSyntaxKind::F64Kw,
209 _ => WatSyntaxKind::Identifier,
210 }
211 };
212 state.add_token(kind, start, end);
213 return true;
214 }
215 }
216 false
217 }
218
219 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
221 let start = state.get_position();
222 if let Some(ch) = state.peek() {
223 let kind = match ch {
224 '(' => Some(WatSyntaxKind::LeftParen),
225 ')' => Some(WatSyntaxKind::RightParen),
226 '=' => Some(WatSyntaxKind::Eq),
227 _ => None,
228 };
229
230 if let Some(kind) = kind {
231 state.bump();
232 state.add_token(kind, start, state.get_position());
233 return true;
234 }
235 }
236 false
237 }
238
239 fn lex_text<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
241 let start = state.get_position();
242 if let Some(_ch) = state.peek() {
243 state.bump();
244 state.add_token(WatSyntaxKind::Text, start, state.get_position());
245 true
246 }
247 else {
248 false
249 }
250 }
251}