1#![doc = include_str!("readme.md")]
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError, Source,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5};
6pub mod token_type;
7
8use crate::{language::IniLanguage, lexer::token_type::IniTokenType};
9
10pub(crate) type State<'a, S> = LexerState<'a, S, IniLanguage>;
11
12static _INI_WHITESPACE: WhitespaceConfig = WhitespaceConfig { unicode_whitespace: true };
13static _INI_COMMENT: CommentConfig = CommentConfig { line_marker: ";", block_start: "", block_end: "", nested_blocks: false };
14static _INI_STRING: StringConfig = StringConfig { quotes: &['"', '\''], escape: Some('\\') };
15
16#[derive(Clone, Debug)]
18pub struct IniLexer<'config> {
19 config: &'config IniLanguage,
21}
22
23impl<'config> Lexer<IniLanguage> for IniLexer<'config> {
24 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<IniLanguage>) -> LexOutput<IniLanguage> {
25 let mut state: State<'_, S> = State::new(source);
26 let result = self.run(&mut state);
27 if result.is_ok() {
28 state.add_eof();
29 }
30 state.finish_with_cache(result, cache)
31 }
32}
33
34impl<'config> IniLexer<'config> {
35 pub fn new(config: &'config IniLanguage) -> Self {
37 Self { config }
38 }
39
40 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
42 while state.not_at_end() {
43 let safe_point = state.get_position();
44
45 if self.skip_whitespace(state) {
46 continue;
47 }
48
49 if self.lex_newline(state) {
50 continue;
51 }
52
53 if self.skip_comment(state) {
54 continue;
55 }
56
57 if self.lex_string_literal(state) {
58 continue;
59 }
60
61 if self.lex_number_literal(state) {
62 continue;
63 }
64
65 if self.lex_identifier(state) {
66 continue;
67 }
68
69 if self.lex_punctuation(state) {
70 continue;
71 }
72
73 state.advance_if_dead_lock(safe_point);
74 }
75
76 Ok(())
77 }
78
79 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
81 let start = state.get_position();
82
83 while let Some(ch) = state.peek() {
84 if ch == ' ' || ch == '\t' || ch == '\r' {
85 state.advance(ch.len_utf8());
86 }
87 else {
88 break;
89 }
90 }
91
92 if state.get_position() > start {
93 state.add_token(IniTokenType::Whitespace, start, state.get_position());
94 return true;
95 }
96 false
97 }
98
99 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
101 let start = state.get_position();
102
103 if state.current() == Some('\n') {
104 state.advance(1);
105 state.add_token(IniTokenType::Newline, start, state.get_position());
106 return true;
107 }
108 false
109 }
110
111 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113 let start = state.get_position();
114
115 if let Some(ch) = state.current() {
116 if ch == ';' || ch == '#' {
117 state.advance(1);
119
120 while let Some(ch) = state.peek() {
122 if ch != '\n' {
123 state.advance(ch.len_utf8());
124 }
125 else {
126 break;
127 }
128 }
129
130 state.add_token(IniTokenType::Comment, start, state.get_position());
131 return true;
132 }
133 }
134 false
135 }
136
137 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
139 let start = state.get_position();
140
141 if let Some(quote_char) = state.current() {
142 if quote_char == '"' || quote_char == '\'' {
143 state.advance(1);
145
146 while let Some(ch) = state.peek() {
147 if ch != quote_char {
148 if ch == '\\' {
149 state.advance(1); if let Some(_) = state.peek() {
151 state.advance(1); }
153 }
154 else {
155 state.advance(ch.len_utf8());
156 }
157 }
158 else {
159 state.advance(1);
161 break;
162 }
163 }
164
165 state.add_token(IniTokenType::String, start, state.get_position());
166 return true;
167 }
168 }
169 false
170 }
171
172 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
174 let start = state.get_position();
175 let first = match state.current() {
176 Some(c) => c,
177 None => return false,
178 };
179
180 if !first.is_ascii_digit() && first != '-' && first != '+' {
182 return false;
183 }
184
185 if first == '-' || first == '+' {
187 if let Some(next) = state.peek_next_n(1) {
188 if !next.is_ascii_digit() {
189 return false;
190 }
191 }
192 else {
193 return false;
194 }
195 }
196
197 state.advance(1);
198 let mut has_dot = false;
199 let mut has_exp = false;
200
201 while let Some(ch) = state.peek() {
202 if ch.is_ascii_digit() {
203 state.advance(1);
204 }
205 else if ch == '.' && !has_dot && !has_exp {
206 has_dot = true;
207 state.advance(1);
208 }
209 else if (ch == 'e' || ch == 'E') && !has_exp {
210 has_exp = true;
211 state.advance(1);
212 if let Some(sign) = state.peek() {
214 if sign == '+' || sign == '-' {
215 state.advance(1);
216 }
217 }
218 }
219 else {
220 break;
221 }
222 }
223
224 let end = state.get_position();
226 let text = state.get_text_in((start..end).into());
227
228 if text.as_ref() == "-" || text.as_ref() == "+" || text.as_ref() == "." {
230 state.set_position(start);
232 return false;
233 }
234
235 let kind = if has_dot || has_exp { IniTokenType::Float } else { IniTokenType::Integer };
237
238 state.add_token(kind, start, state.get_position());
239 true
240 }
241
242 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
244 let start = state.get_position();
245 let ch = match state.current() {
246 Some(c) => c,
247 None => return false,
248 };
249
250 if !(ch.is_ascii_alphabetic() || ch == '_') {
252 return false;
253 }
254
255 state.advance(1);
256 while let Some(c) = state.current() {
257 if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
258 state.advance(1);
259 }
260 else {
261 break;
262 }
263 }
264
265 let end = state.get_position();
266 let text = state.get_text_in((start..end).into());
267
268 let kind = match text.to_lowercase().as_str() {
270 "true" | "false" => IniTokenType::Boolean,
271 _ => {
272 if self.is_datetime_like(text.as_ref()) {
273 IniTokenType::DateTime
274 }
275 else {
276 IniTokenType::Identifier
277 }
278 }
279 };
280
281 state.add_token(kind, start, state.get_position());
282 true
283 }
284
285 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
287 let start = state.get_position();
288
289 if state.starts_with("[[") {
291 state.advance(2);
292 state.add_token(IniTokenType::DoubleLeftBracket, start, state.get_position());
293 return true;
294 }
295
296 if state.starts_with("]]") {
297 state.advance(2);
298 state.add_token(IniTokenType::DoubleRightBracket, start, state.get_position());
299 return true;
300 }
301
302 if let Some(ch) = state.current() {
303 let kind = match ch {
304 '{' => IniTokenType::LeftBrace,
305 '}' => IniTokenType::RightBrace,
306 '[' => IniTokenType::LeftBracket,
307 ']' => IniTokenType::RightBracket,
308 ',' => IniTokenType::Comma,
309 '.' => IniTokenType::Dot,
310 '=' => IniTokenType::Equal,
311 _ => return false,
312 };
313
314 state.advance(ch.len_utf8());
315 state.add_token(kind, start, state.get_position());
316 return true;
317 }
318
319 false
320 }
321
322 fn is_datetime_like(&self, text: &str) -> bool {
323 text.contains('-') && text.contains(':')
325 }
326}