1use crate::{kind::IniSyntaxKind, language::IniLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7
8type State<'a, S> = LexerState<'a, S, IniLanguage>;
9
10static _INI_WHITESPACE: WhitespaceConfig = WhitespaceConfig { unicode_whitespace: true };
11static _INI_COMMENT: CommentConfig = CommentConfig { line_marker: ";", block_start: "", block_end: "", nested_blocks: false };
12static _INI_STRING: StringConfig = StringConfig { quotes: &['"', '\''], escape: Some('\\') };
13
14#[derive(Clone, Debug)]
15pub struct IniLexer<'config> {
16 _config: &'config IniLanguage,
17}
18
19impl<'config> Lexer<IniLanguage> for IniLexer<'config> {
20 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<IniLanguage>) -> LexOutput<IniLanguage> {
21 let mut state: State<'_, S> = State::new(source);
22 let result = self.run(&mut state);
23 if result.is_ok() {
24 state.add_eof();
25 }
26 state.finish_with_cache(result, cache)
27 }
28}
29
30impl<'config> IniLexer<'config> {
31 pub fn new(config: &'config IniLanguage) -> Self {
32 Self { _config: config }
33 }
34
35 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
37 while state.not_at_end() {
38 let safe_point = state.get_position();
39
40 if self.skip_whitespace(state) {
41 continue;
42 }
43
44 if self.lex_newline(state) {
45 continue;
46 }
47
48 if self.skip_comment(state) {
49 continue;
50 }
51
52 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_number_literal(state) {
57 continue;
58 }
59
60 if self.lex_identifier(state) {
61 continue;
62 }
63
64 if self.lex_punctuation(state) {
65 continue;
66 }
67
68 state.advance_if_dead_lock(safe_point);
69 }
70
71 Ok(())
72 }
73
74 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
76 let start = state.get_position();
77
78 while let Some(ch) = state.peek() {
79 if ch == ' ' || ch == '\t' || ch == '\r' {
80 state.advance(ch.len_utf8());
81 }
82 else {
83 break;
84 }
85 }
86
87 if state.get_position() > start {
88 state.add_token(IniSyntaxKind::Whitespace, start, state.get_position());
89 return true;
90 }
91 false
92 }
93
94 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
96 let start = state.get_position();
97
98 if state.current() == Some('\n') {
99 state.advance(1);
100 state.add_token(IniSyntaxKind::Newline, start, state.get_position());
101 return true;
102 }
103 false
104 }
105
106 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
108 let start = state.get_position();
109
110 if let Some(ch) = state.current() {
111 if ch == ';' || ch == '#' {
112 state.advance(1);
114
115 while let Some(ch) = state.peek() {
117 if ch != '\n' {
118 state.advance(ch.len_utf8());
119 }
120 else {
121 break;
122 }
123 }
124
125 state.add_token(IniSyntaxKind::Comment, start, state.get_position());
126 return true;
127 }
128 }
129 false
130 }
131
132 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
134 let start = state.get_position();
135
136 if let Some(quote_char) = state.current() {
137 if quote_char == '"' || quote_char == '\'' {
138 state.advance(1);
140
141 while let Some(ch) = state.peek() {
142 if ch != quote_char {
143 if ch == '\\' {
144 state.advance(1); if let Some(_) = state.peek() {
146 state.advance(1); }
148 }
149 else {
150 state.advance(ch.len_utf8());
151 }
152 }
153 else {
154 state.advance(1);
156 break;
157 }
158 }
159
160 state.add_token(IniSyntaxKind::String, start, state.get_position());
161 return true;
162 }
163 }
164 false
165 }
166
167 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
169 let start = state.get_position();
170 let first = match state.current() {
171 Some(c) => c,
172 None => return false,
173 };
174
175 if !first.is_ascii_digit() && first != '-' && first != '+' {
177 return false;
178 }
179
180 if first == '-' || first == '+' {
182 if let Some(next) = state.peek_next_n(1) {
183 if !next.is_ascii_digit() {
184 return false;
185 }
186 }
187 else {
188 return false;
189 }
190 }
191
192 state.advance(1);
193 let mut has_dot = false;
194 let mut has_exp = false;
195
196 while let Some(ch) = state.peek() {
197 if ch.is_ascii_digit() {
198 state.advance(1);
199 }
200 else if ch == '.' && !has_dot && !has_exp {
201 has_dot = true;
202 state.advance(1);
203 }
204 else if (ch == 'e' || ch == 'E') && !has_exp {
205 has_exp = true;
206 state.advance(1);
207 if let Some(sign) = state.peek() {
209 if sign == '+' || sign == '-' {
210 state.advance(1);
211 }
212 }
213 }
214 else {
215 break;
216 }
217 }
218
219 let end = state.get_position();
221 let text = state.get_text_in((start..end).into());
222
223 if text.as_ref() == "-" || text.as_ref() == "+" || text.as_ref() == "." {
225 state.set_position(start);
227 return false;
228 }
229
230 let kind = if has_dot || has_exp { IniSyntaxKind::Float } else { IniSyntaxKind::Integer };
232
233 state.add_token(kind, start, state.get_position());
234 true
235 }
236
237 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
239 let start = state.get_position();
240 let ch = match state.current() {
241 Some(c) => c,
242 None => return false,
243 };
244
245 if !(ch.is_ascii_alphabetic() || ch == '_') {
247 return false;
248 }
249
250 state.advance(1);
251 while let Some(c) = state.current() {
252 if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
253 state.advance(1);
254 }
255 else {
256 break;
257 }
258 }
259
260 let end = state.get_position();
261 let text = state.get_text_in((start..end).into());
262
263 let kind = match text.to_lowercase().as_str() {
265 "true" | "false" => IniSyntaxKind::Boolean,
266 _ => {
267 if self.is_datetime_like(text.as_ref()) {
268 IniSyntaxKind::DateTime
269 }
270 else {
271 IniSyntaxKind::Identifier
272 }
273 }
274 };
275
276 state.add_token(kind, start, state.get_position());
277 true
278 }
279
280 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
282 let start = state.get_position();
283
284 if state.starts_with("[[") {
286 state.advance(2);
287 state.add_token(IniSyntaxKind::DoubleLeftBracket, start, state.get_position());
288 return true;
289 }
290
291 if state.starts_with("]]") {
292 state.advance(2);
293 state.add_token(IniSyntaxKind::DoubleRightBracket, start, state.get_position());
294 return true;
295 }
296
297 if let Some(ch) = state.current() {
298 let kind = match ch {
299 '{' => IniSyntaxKind::LeftBrace,
300 '}' => IniSyntaxKind::RightBrace,
301 '[' => IniSyntaxKind::LeftBracket,
302 ']' => IniSyntaxKind::RightBracket,
303 ',' => IniSyntaxKind::Comma,
304 '.' => IniSyntaxKind::Dot,
305 '=' => IniSyntaxKind::Equal,
306 _ => return false,
307 };
308
309 state.advance(ch.len_utf8());
310 state.add_token(kind, start, state.get_position());
311 return true;
312 }
313
314 false
315 }
316
317 fn is_datetime_like(&self, text: &str) -> bool {
319 if text.len() < 8 {
322 return false;
323 }
324
325 if text.contains('-') || text.contains(':') || text.contains('T') {
327 let chars: Vec<char> = text.chars().collect();
329 let mut digit_count = 0;
330 let mut separator_count = 0;
331
332 for ch in chars {
333 if ch.is_ascii_digit() {
334 digit_count += 1;
335 }
336 else if ch == '-' || ch == ':' || ch == 'T' || ch == 'Z' || ch == '+' {
337 separator_count += 1;
338 }
339 }
340
341 digit_count > separator_count && digit_count >= 6
343 }
344 else {
345 false
346 }
347 }
348}