1use crate::{kind::DHallSyntaxKind, language::DHallLanguage};
2use oak_core::{
3 LexOutput, Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, StringConfig, WhitespaceConfig},
5 source::{Source, TextEdit},
6};
7use std::sync::LazyLock;
8
9static DHALL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
10static DHALL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "--", block_start: "{-", block_end: "-}", nested_blocks: true });
11static DHALL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
12
13#[derive(Clone)]
14pub struct DHallLexer<'config> {
15 _config: &'config DHallLanguage,
16}
17
18impl<'config> Lexer<DHallLanguage> for DHallLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DHallLanguage>) -> LexOutput<DHallLanguage> {
20 let mut state = LexerState::new(source);
21 let result = self.run(&mut state);
22 state.finish_with_cache(result, cache)
23 }
24}
25
26impl<'config> DHallLexer<'config> {
27 pub fn new(config: &'config DHallLanguage) -> Self {
28 Self { _config: config }
29 }
30
31 fn run<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> Result<(), OakError> {
32 while state.not_at_end() {
33 let safe_point = state.get_position();
34 if self.skip_whitespace(state) {
35 continue;
36 }
37
38 if self.skip_comment(state) {
39 continue;
40 }
41
42 if self.lex_string_literal(state) {
43 continue;
44 }
45
46 if self.lex_number_literal(state) {
47 continue;
48 }
49
50 if self.lex_identifier_or_keyword(state) {
51 continue;
52 }
53
54 if self.lex_operators(state) {
55 continue;
56 }
57
58 if self.lex_single_char_tokens(state) {
59 continue;
60 }
61
62 state.advance_if_dead_lock(safe_point);
63 }
64
65 Ok(())
66 }
67
68 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
69 DHALL_WHITESPACE.scan(state, DHallSyntaxKind::Whitespace)
70 }
71
72 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
73 DHALL_COMMENT.scan(state, DHallSyntaxKind::Comment, DHallSyntaxKind::Comment)
74 }
75
76 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
77 DHALL_STRING.scan(state, DHallSyntaxKind::String)
78 }
79
80 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
81 let start = state.get_position();
82 let first = match state.peek() {
83 Some(c) => c,
84 None => return false,
85 };
86
87 if !first.is_ascii_digit() {
88 return false;
89 }
90
91 state.advance(1);
92 while let Some(c) = state.peek() {
93 if c.is_ascii_digit() {
94 state.advance(1);
95 }
96 else {
97 break;
98 }
99 }
100
101 state.add_token(DHallSyntaxKind::Number, start, state.get_position());
102 true
103 }
104
105 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
106 let start = state.get_position();
107 let first = match state.peek() {
108 Some(c) => c,
109 None => return false,
110 };
111
112 if !first.is_alphabetic() && first != '_' {
113 return false;
114 }
115
116 state.advance(1);
117 while let Some(c) = state.peek() {
118 if c.is_alphanumeric() || c == '_' || c == '-' || c == '/' {
119 state.advance(1);
120 }
121 else {
122 break;
123 }
124 }
125
126 let end = state.get_position();
127 let text = state.get_text_in((start..end).into());
128
129 let kind = match text.as_ref() {
130 "if" => DHallSyntaxKind::If,
131 "then" => DHallSyntaxKind::Then,
132 "else" => DHallSyntaxKind::Else,
133 "let" => DHallSyntaxKind::Let,
134 "in" => DHallSyntaxKind::In,
135 "using" => DHallSyntaxKind::Using,
136 "as" => DHallSyntaxKind::As,
137 "merge" => DHallSyntaxKind::Merge,
138 "Some" => DHallSyntaxKind::Some,
139 "None" => DHallSyntaxKind::None,
140 "with" => DHallSyntaxKind::With,
141 "forall" => DHallSyntaxKind::Forall,
142 "assert" => DHallSyntaxKind::Assert,
143 "Bool" => DHallSyntaxKind::Bool,
144 "Natural" => DHallSyntaxKind::Natural,
145 "Integer" => DHallSyntaxKind::Integer,
146 "Double" => DHallSyntaxKind::Double,
147 "Text" => DHallSyntaxKind::Text,
148 "List" => DHallSyntaxKind::List,
149 "Optional" => DHallSyntaxKind::Optional,
150 "True" => DHallSyntaxKind::True,
151 "False" => DHallSyntaxKind::False,
152 "λ" => DHallSyntaxKind::Lambda,
153 _ => DHallSyntaxKind::Identifier,
154 };
155
156 state.add_token(kind, start, end);
157 true
158 }
159
160 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
161 let start = state.get_position();
162 let text = state.rest();
163
164 let ops = [
165 ("->", DHallSyntaxKind::Arrow),
166 ("→", DHallSyntaxKind::Arrow),
167 ("=>", DHallSyntaxKind::FatArrow),
168 ("==", DHallSyntaxKind::EqualEqual),
169 ("≡", DHallSyntaxKind::EqualEqual),
170 ("!=", DHallSyntaxKind::NotEqual),
171 ("&&", DHallSyntaxKind::And),
172 ("∧", DHallSyntaxKind::And),
173 ("||", DHallSyntaxKind::Or),
174 ("∨", DHallSyntaxKind::Or),
175 ("++", DHallSyntaxKind::Append),
176 ("//", DHallSyntaxKind::Combine),
177 ("⫽", DHallSyntaxKind::Combine),
178 ("/\\", DHallSyntaxKind::CombineTypes),
179 ("⩓", DHallSyntaxKind::CombineTypes),
180 ("//\\", DHallSyntaxKind::Prefer),
181 ("∀", DHallSyntaxKind::Forall),
182 ("λ", DHallSyntaxKind::Lambda),
183 ];
184
185 for (op, kind) in ops {
186 if text.starts_with(op) {
187 state.advance(op.len());
188 state.add_token(kind, start, state.get_position());
189 return true;
190 }
191 }
192
193 false
194 }
195
196 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, DHallLanguage>) -> bool {
197 let start = state.get_position();
198 let c = match state.peek() {
199 Some(c) => c,
200 None => return false,
201 };
202
203 let kind = match c {
204 '(' => DHallSyntaxKind::LeftParen,
205 ')' => DHallSyntaxKind::RightParen,
206 '[' => DHallSyntaxKind::LeftBracket,
207 ']' => DHallSyntaxKind::RightBracket,
208 '{' => DHallSyntaxKind::LeftBrace,
209 '}' => DHallSyntaxKind::RightBrace,
210 '<' => DHallSyntaxKind::Less,
211 '>' => DHallSyntaxKind::Greater,
212 ',' => DHallSyntaxKind::Comma,
213 '.' => DHallSyntaxKind::Dot,
214 ':' => DHallSyntaxKind::Colon,
215 ';' => DHallSyntaxKind::Semicolon,
216 '=' => DHallSyntaxKind::Equal,
217 '@' => DHallSyntaxKind::At,
218 '#' => DHallSyntaxKind::Hash,
219 '?' => DHallSyntaxKind::Question,
220 '+' => DHallSyntaxKind::Plus,
221 '*' => DHallSyntaxKind::Star,
222 '/' => DHallSyntaxKind::Slash,
223 '|' => DHallSyntaxKind::Pipe,
224 '\\' => DHallSyntaxKind::Lambda,
225 _ => return false,
226 };
227
228 state.advance(1);
229 state.add_token(kind, start, state.get_position());
230 true
231 }
232}