1#![doc = include_str!("readme.md")]
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError, Source,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5};
6pub mod token_type;
7
8use crate::{language::IniLanguage, lexer::token_type::IniTokenType};
9
10type State<'a, S> = LexerState<'a, S, IniLanguage>;
11
12static _INI_WHITESPACE: WhitespaceConfig = WhitespaceConfig { unicode_whitespace: true };
13static _INI_COMMENT: CommentConfig = CommentConfig { line_marker: ";", block_start: "", block_end: "", nested_blocks: false };
14static _INI_STRING: StringConfig = StringConfig { quotes: &['"', '\''], escape: Some('\\') };
15
16#[derive(Clone, Debug)]
17pub struct IniLexer<'config> {
18 _config: &'config IniLanguage,
19}
20
21impl<'config> Lexer<IniLanguage> for IniLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<IniLanguage>) -> LexOutput<IniLanguage> {
23 let mut state: State<'_, S> = State::new(source);
24 let result = self.run(&mut state);
25 if result.is_ok() {
26 state.add_eof();
27 }
28 state.finish_with_cache(result, cache)
29 }
30}
31
32impl<'config> IniLexer<'config> {
33 pub fn new(config: &'config IniLanguage) -> Self {
34 Self { _config: config }
35 }
36
37 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.lex_newline(state) {
47 continue;
48 }
49
50 if self.skip_comment(state) {
51 continue;
52 }
53
54 if self.lex_string_literal(state) {
55 continue;
56 }
57
58 if self.lex_number_literal(state) {
59 continue;
60 }
61
62 if self.lex_identifier(state) {
63 continue;
64 }
65
66 if self.lex_punctuation(state) {
67 continue;
68 }
69
70 state.advance_if_dead_lock(safe_point);
71 }
72
73 Ok(())
74 }
75
76 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
78 let start = state.get_position();
79
80 while let Some(ch) = state.peek() {
81 if ch == ' ' || ch == '\t' || ch == '\r' {
82 state.advance(ch.len_utf8());
83 }
84 else {
85 break;
86 }
87 }
88
89 if state.get_position() > start {
90 state.add_token(IniTokenType::Whitespace, start, state.get_position());
91 return true;
92 }
93 false
94 }
95
96 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
98 let start = state.get_position();
99
100 if state.current() == Some('\n') {
101 state.advance(1);
102 state.add_token(IniTokenType::Newline, start, state.get_position());
103 return true;
104 }
105 false
106 }
107
108 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
110 let start = state.get_position();
111
112 if let Some(ch) = state.current() {
113 if ch == ';' || ch == '#' {
114 state.advance(1);
116
117 while let Some(ch) = state.peek() {
119 if ch != '\n' {
120 state.advance(ch.len_utf8());
121 }
122 else {
123 break;
124 }
125 }
126
127 state.add_token(IniTokenType::Comment, start, state.get_position());
128 return true;
129 }
130 }
131 false
132 }
133
134 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
136 let start = state.get_position();
137
138 if let Some(quote_char) = state.current() {
139 if quote_char == '"' || quote_char == '\'' {
140 state.advance(1);
142
143 while let Some(ch) = state.peek() {
144 if ch != quote_char {
145 if ch == '\\' {
146 state.advance(1); if let Some(_) = state.peek() {
148 state.advance(1); }
150 }
151 else {
152 state.advance(ch.len_utf8());
153 }
154 }
155 else {
156 state.advance(1);
158 break;
159 }
160 }
161
162 state.add_token(IniTokenType::String, start, state.get_position());
163 return true;
164 }
165 }
166 false
167 }
168
169 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
171 let start = state.get_position();
172 let first = match state.current() {
173 Some(c) => c,
174 None => return false,
175 };
176
177 if !first.is_ascii_digit() && first != '-' && first != '+' {
179 return false;
180 }
181
182 if first == '-' || first == '+' {
184 if let Some(next) = state.peek_next_n(1) {
185 if !next.is_ascii_digit() {
186 return false;
187 }
188 }
189 else {
190 return false;
191 }
192 }
193
194 state.advance(1);
195 let mut has_dot = false;
196 let mut has_exp = false;
197
198 while let Some(ch) = state.peek() {
199 if ch.is_ascii_digit() {
200 state.advance(1);
201 }
202 else if ch == '.' && !has_dot && !has_exp {
203 has_dot = true;
204 state.advance(1);
205 }
206 else if (ch == 'e' || ch == 'E') && !has_exp {
207 has_exp = true;
208 state.advance(1);
209 if let Some(sign) = state.peek() {
211 if sign == '+' || sign == '-' {
212 state.advance(1);
213 }
214 }
215 }
216 else {
217 break;
218 }
219 }
220
221 let end = state.get_position();
223 let text = state.get_text_in((start..end).into());
224
225 if text.as_ref() == "-" || text.as_ref() == "+" || text.as_ref() == "." {
227 state.set_position(start);
229 return false;
230 }
231
232 let kind = if has_dot || has_exp { IniTokenType::Float } else { IniTokenType::Integer };
234
235 state.add_token(kind, start, state.get_position());
236 true
237 }
238
239 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
241 let start = state.get_position();
242 let ch = match state.current() {
243 Some(c) => c,
244 None => return false,
245 };
246
247 if !(ch.is_ascii_alphabetic() || ch == '_') {
249 return false;
250 }
251
252 state.advance(1);
253 while let Some(c) = state.current() {
254 if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
255 state.advance(1);
256 }
257 else {
258 break;
259 }
260 }
261
262 let end = state.get_position();
263 let text = state.get_text_in((start..end).into());
264
265 let kind = match text.to_lowercase().as_str() {
267 "true" | "false" => IniTokenType::Boolean,
268 _ => {
269 if self.is_datetime_like(text.as_ref()) {
270 IniTokenType::DateTime
271 }
272 else {
273 IniTokenType::Identifier
274 }
275 }
276 };
277
278 state.add_token(kind, start, state.get_position());
279 true
280 }
281
282 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
284 let start = state.get_position();
285
286 if state.starts_with("[[") {
288 state.advance(2);
289 state.add_token(IniTokenType::DoubleLeftBracket, start, state.get_position());
290 return true;
291 }
292
293 if state.starts_with("]]") {
294 state.advance(2);
295 state.add_token(IniTokenType::DoubleRightBracket, start, state.get_position());
296 return true;
297 }
298
299 if let Some(ch) = state.current() {
300 let kind = match ch {
301 '{' => IniTokenType::LeftBrace,
302 '}' => IniTokenType::RightBrace,
303 '[' => IniTokenType::LeftBracket,
304 ']' => IniTokenType::RightBracket,
305 ',' => IniTokenType::Comma,
306 '.' => IniTokenType::Dot,
307 '=' => IniTokenType::Equal,
308 _ => return false,
309 };
310
311 state.advance(ch.len_utf8());
312 state.add_token(kind, start, state.get_position());
313 return true;
314 }
315
316 false
317 }
318
319 fn is_datetime_like(&self, text: &str) -> bool {
320 text.contains('-') && text.contains(':')
322 }
323}