1use crate::{TomlSyntaxKind, language::TomlLanguage};
2use oak_core::{
3 Lexer, LexerState, OakError, TextEdit,
4 lexer::{LexOutput, LexerCache},
5 source::Source,
6};
7
8type State<'a, S> = LexerState<'a, S, TomlLanguage>;
9
10#[derive(Clone, Debug)]
11pub struct TomlLexer<'config> {
12 _config: &'config TomlLanguage,
13}
14
15impl<'config> Lexer<TomlLanguage> for TomlLexer<'config> {
16 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<TomlLanguage>) -> LexOutput<TomlLanguage> {
17 let mut state = State::new(source);
18 let result = self.run(&mut state);
19 if result.is_ok() {
20 state.add_eof();
21 }
22 state.finish_with_cache(result, cache)
23 }
24}
25
26impl<'config> TomlLexer<'config> {
27 pub fn new(config: &'config TomlLanguage) -> Self {
28 Self { _config: config }
29 }
30
31 fn run<S: Source + ?Sized>(&self, state: &mut State<S>) -> Result<(), OakError> {
33 while state.not_at_end() {
34 if let Some(ch) = state.peek() {
35 match ch {
36 ' ' | '\t' | '\n' | '\r' => {
37 self.skip_whitespace(state);
38 }
39 '#' => {
40 self.skip_comment(state);
41 }
42 '"' | '\'' => {
43 self.lex_string(state);
44 }
45 '0'..='9' | '+' | '-' => {
46 self.lex_number(state);
47 }
48 '[' | ']' | '{' | '}' | ',' | '.' | '=' => {
49 self.lex_punctuation(state);
50 }
51 'a'..='z' | 'A'..='Z' | '_' => {
52 self.lex_identifier(state);
53 }
54 _ => {
55 if self.lex_punctuation(state) {
57 continue;
58 }
59 state.advance(1);
61 }
62 }
63 }
64 else {
65 break;
66 }
67 }
68 Ok(())
69 }
70
71 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
73 let start_pos = state.get_position();
74
75 while let Some(ch) = state.current() {
76 if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
77 state.advance(1);
78 }
79 else {
80 break;
81 }
82 }
83
84 if state.get_position() > start_pos {
85 state.add_token(TomlSyntaxKind::Whitespace, start_pos, state.get_position());
86 true
87 }
88 else {
89 false
90 }
91 }
92
93 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
95 if state.current() == Some('#') {
96 let start_pos = state.get_position();
97 state.advance(1);
98
99 while let Some(ch) = state.current() {
101 if ch == '\n' || ch == '\r' {
102 break;
103 }
104 state.advance(ch.len_utf8());
105 }
106
107 state.add_token(TomlSyntaxKind::Comment, start_pos, state.get_position());
108 true
109 }
110 else {
111 false
112 }
113 }
114
115 fn lex_string<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
117 match state.current() {
118 Some('"') => {
119 let start = state.get_position();
120 state.advance(1);
121
122 while let Some(ch) = state.current() {
124 if ch == '"' {
125 state.advance(1);
126 break;
127 }
128 if ch == '\\' {
129 state.advance(1); if state.current().is_some() {
131 state.advance(1);
132 }
133 }
134 else {
135 state.advance(1);
136 }
137 }
138
139 let end = state.get_position();
140 state.add_token(TomlSyntaxKind::BasicString, start, end);
141 true
142 }
143 Some('\'') => {
144 let start = state.get_position();
145 state.advance(1);
146
147 while let Some(ch) = state.current() {
149 if ch == '\'' {
150 state.advance(1);
151 break;
152 }
153 state.advance(1);
154 }
155
156 let end = state.get_position();
157 state.add_token(TomlSyntaxKind::LiteralString, start, end);
158 true
159 }
160 _ => false,
161 }
162 }
163
164 fn lex_number<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
166 if !state.current().map_or(false, |c| c.is_ascii_digit() || c == '-' || c == '+') {
167 return false;
168 }
169
170 let start = state.get_position();
171
172 if matches!(state.current(), Some('-') | Some('+')) {
174 state.advance(1);
175 }
176
177 while state.current().map_or(false, |c| c.is_ascii_digit()) {
179 state.advance(1);
180 }
181
182 let mut is_float = false;
184 if state.current() == Some('.') {
185 is_float = true;
186 state.advance(1);
187 while state.current().map_or(false, |c| c.is_ascii_digit()) {
188 state.advance(1);
189 }
190 }
191
192 let end = state.get_position();
193 let kind = if is_float { TomlSyntaxKind::Float } else { TomlSyntaxKind::Integer };
194 state.add_token(kind, start, end);
195 true
196 }
197
198 fn lex_punctuation<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
200 let start = state.get_position();
201
202 match state.current() {
203 Some('[') => {
204 state.advance(1);
205 if state.current() == Some('[') {
206 state.advance(1);
207 let end = state.get_position();
208 state.add_token(TomlSyntaxKind::DoubleLeftBracket, start, end);
209 }
210 else {
211 let end = state.get_position();
212 state.add_token(TomlSyntaxKind::LeftBracket, start, end);
213 }
214 true
215 }
216 Some(']') => {
217 state.advance(1);
218 if state.current() == Some(']') {
219 state.advance(1);
220 let end = state.get_position();
221 state.add_token(TomlSyntaxKind::DoubleRightBracket, start, end);
222 }
223 else {
224 let end = state.get_position();
225 state.add_token(TomlSyntaxKind::RightBracket, start, end);
226 }
227 true
228 }
229 Some('{') => {
230 state.advance(1);
231 let end = state.get_position();
232 state.add_token(TomlSyntaxKind::LeftBrace, start, end);
233 true
234 }
235 Some('}') => {
236 state.advance(1);
237 let end = state.get_position();
238 state.add_token(TomlSyntaxKind::RightBrace, start, end);
239 true
240 }
241 Some(',') => {
242 state.advance(1);
243 let end = state.get_position();
244 state.add_token(TomlSyntaxKind::Comma, start, end);
245 true
246 }
247 Some('.') => {
248 state.advance(1);
249 let end = state.get_position();
250 state.add_token(TomlSyntaxKind::Dot, start, end);
251 true
252 }
253 Some('=') => {
254 state.advance(1);
255 let end = state.get_position();
256 state.add_token(TomlSyntaxKind::Equal, start, end);
257 true
258 }
259 _ => false,
260 }
261 }
262
263 fn lex_identifier<S: Source + ?Sized>(&self, state: &mut State<S>) -> bool {
265 if !state.current().map_or(false, |c| c.is_ascii_alphabetic() || c == '_') {
266 return false;
267 }
268
269 let start = state.get_position();
270
271 while state.current().map_or(false, |c| c.is_ascii_alphanumeric() || c == '_' || c == '-') {
272 state.advance(1);
273 }
274
275 let end = state.get_position();
276
277 let text = state.get_text_in((start..end).into());
279 let kind = match text.as_ref() {
280 "true" | "false" => TomlSyntaxKind::Boolean,
281 _ => TomlSyntaxKind::BareKey,
282 };
283
284 state.add_token(kind, start, end);
285 true
286 }
287}