1use oak_core::{
5 lexer::{LexOutput, Lexer, LexerCache, LexerState},
6 source::{Source, TextEdit},
7};
8
9pub mod token_type;
10use crate::language::LiquidLanguage;
11use token_type::LiquidTokenType;
12
13#[derive(Debug, Clone)]
15pub struct LiquidLexer<'config> {
16 config: &'config LiquidLanguage,
18}
19
20pub(crate) type State<'a, S> = LexerState<'a, S, LiquidLanguage>;
21
22impl<'config> LiquidLexer<'config> {
23 pub fn new(config: &'config LiquidLanguage) -> Self {
25 Self { config }
26 }
27
28 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), oak_core::OakError> {
29 while state.not_at_end() {
30 let safe_point = state.get_position();
31
32 if self.skip_whitespace(state) {
33 continue;
34 }
35
36 if self.skip_comment(state) {
37 continue;
38 }
39
40 if self.lex_string(state) {
41 continue;
42 }
43
44 if self.lex_number(state) {
45 continue;
46 }
47
48 if self.lex_punctuation(state) {
49 continue;
50 }
51
52 if self.lex_identifier(state) {
53 continue;
54 }
55
56 if self.lex_html_text(state) {
57 continue;
58 }
59
60 state.advance_if_dead_lock(safe_point)
61 }
62
63 Ok(())
64 }
65
66 fn lex_html_text<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
67 let start = state.get_position();
68 while let Some(ch) = state.peek() {
69 let rest = state.rest();
70 if rest.starts_with(&self.config.variable_start) || rest.starts_with(&self.config.tag_start) || rest.starts_with(&self.config.comment_start) {
71 break;
72 }
73 state.advance(ch.len_utf8());
74 }
75 if state.get_position() > start {
76 state.add_token(LiquidTokenType::Identifier, start, state.get_position());
77 return true;
78 }
79 false
80 }
81
82 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
83 let start = state.get_position();
84 let mut found = false;
85
86 while let Some(ch) = state.peek() {
87 if ch.is_whitespace() {
88 state.advance(ch.len_utf8());
89 found = true;
90 }
91 else {
92 break;
93 }
94 }
95
96 if found {
97 state.add_token(LiquidTokenType::Whitespace, start, state.get_position());
98 }
99
100 found
101 }
102
103 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
104 let start = state.get_position();
105 if state.consume_if_starts_with(&self.config.comment_start) {
106 while state.not_at_end() {
107 if state.consume_if_starts_with(&self.config.comment_end) {
108 break;
109 }
110 if let Some(ch) = state.peek() {
111 state.advance(ch.len_utf8());
112 }
113 }
114 state.add_token(LiquidTokenType::Comment, start, state.get_position());
115 return true;
116 }
117 false
118 }
119
120 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
121 let start = state.get_position();
122
123 if let Some(quote) = state.peek() {
124 if quote == '"' || quote == '\'' {
125 state.advance(1);
126
127 while let Some(ch) = state.peek() {
128 if ch == quote {
129 state.advance(1);
130 break;
131 }
132 else if ch == '\\' {
133 state.advance(1);
134 if let Some(_) = state.peek() {
135 state.advance(1);
136 }
137 }
138 else {
139 state.advance(ch.len_utf8());
140 }
141 }
142
143 state.add_token(LiquidTokenType::String, start, state.get_position());
144 return true;
145 }
146 }
147
148 false
149 }
150
151 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
152 let start = state.get_position();
153
154 if let Some(ch) = state.peek() {
155 if ch.is_ascii_digit() {
156 state.advance(1);
157
158 while let Some(ch) = state.peek() {
159 if ch.is_ascii_digit() || ch == '.' {
160 state.advance(1);
161 }
162 else {
163 break;
164 }
165 }
166
167 state.add_token(LiquidTokenType::Number, start, state.get_position());
168 return true;
169 }
170 }
171
172 false
173 }
174
175 fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176 let start = state.get_position();
177 let rest = state.rest();
178
179 if rest.starts_with(&self.config.variable_start) {
181 state.advance(self.config.variable_start.len());
182 state.add_token(LiquidTokenType::DoubleLeftBrace, start, state.get_position());
183 return true;
184 }
185 if rest.starts_with(&self.config.variable_end) {
186 state.advance(self.config.variable_end.len());
187 state.add_token(LiquidTokenType::DoubleRightBrace, start, state.get_position());
188 return true;
189 }
190 if rest.starts_with(&self.config.tag_start) {
191 state.advance(self.config.tag_start.len());
192 state.add_token(LiquidTokenType::LeftBracePercent, start, state.get_position());
193 return true;
194 }
195 if rest.starts_with(&self.config.tag_end) {
196 state.advance(self.config.tag_end.len());
197 state.add_token(LiquidTokenType::PercentRightBrace, start, state.get_position());
198 return true;
199 }
200
201 if let Some(ch) = state.peek() {
203 let kind = match ch {
204 '{' => LiquidTokenType::LeftBrace,
205 '}' => LiquidTokenType::RightBrace,
206 '(' => LiquidTokenType::LeftParen,
207 ')' => LiquidTokenType::RightParen,
208 '[' => LiquidTokenType::LeftBracket,
209 ']' => LiquidTokenType::RightBracket,
210 ',' => LiquidTokenType::Comma,
211 '.' => LiquidTokenType::Dot,
212 ':' => LiquidTokenType::Colon,
213 ';' => LiquidTokenType::Semicolon,
214 '|' => LiquidTokenType::Pipe,
215 '=' => LiquidTokenType::Eq,
216 '+' => LiquidTokenType::Plus,
217 '-' => LiquidTokenType::Minus,
218 '*' => LiquidTokenType::Star,
219 '/' => LiquidTokenType::Slash,
220 '%' => LiquidTokenType::Percent,
221 '!' => LiquidTokenType::Bang,
222 '?' => LiquidTokenType::Question,
223 '<' => LiquidTokenType::Lt,
224 '>' => LiquidTokenType::Gt,
225 '&' => LiquidTokenType::Amp,
226 '^' => LiquidTokenType::Caret,
227 '~' => LiquidTokenType::Tilde,
228 _ => return false,
229 };
230
231 state.advance(ch.len_utf8());
232 state.add_token(kind, start, state.get_position());
233 return true;
234 }
235
236 false
237 }
238
239 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
240 let start = state.get_position();
241
242 if let Some(ch) = state.peek() {
243 if ch.is_ascii_alphabetic() || ch == '_' {
244 state.advance(ch.len_utf8());
245
246 while let Some(ch) = state.peek() {
247 if ch.is_ascii_alphanumeric() || ch == '_' {
248 state.advance(ch.len_utf8());
249 }
250 else {
251 break;
252 }
253 }
254
255 let end = state.get_position();
256 let text = state.get_text_in((start..end).into());
257
258 let kind = match text.as_ref() {
260 "true" | "false" => LiquidTokenType::Boolean,
261 _ => LiquidTokenType::Identifier,
262 };
263 state.add_token(kind, start, end);
264 return true;
265 }
266 }
267 false
268 }
269}
270
271impl<'config> Lexer<LiquidLanguage> for LiquidLexer<'config> {
272 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LiquidLanguage>) -> LexOutput<LiquidLanguage> {
273 let mut state = LexerState::new(source);
274 let result = self.run(&mut state);
275 if result.is_ok() {
276 state.add_eof()
277 }
278 state.finish_with_cache(result, cache)
279 }
280}