Skip to main content

oak_liquid/lexer/
mod.rs

1/// Liquid Lexer module
2///
3/// This module defines the lexer for Liquid templates, responsible for tokenizing the input.
4use oak_core::{
5    lexer::{LexOutput, Lexer, LexerCache, LexerState},
6    source::{Source, TextEdit},
7};
8
9pub mod token_type;
10use crate::language::LiquidLanguage;
11use token_type::LiquidTokenType;
12
13/// Lexer for Liquid templates
14#[derive(Debug, Clone)]
15pub struct LiquidLexer<'config> {
16    /// Language configuration
17    config: &'config LiquidLanguage,
18}
19
20pub(crate) type State<'a, S> = LexerState<'a, S, LiquidLanguage>;
21
22impl<'config> LiquidLexer<'config> {
23    /// Create a new Liquid lexer
24    pub fn new(config: &'config LiquidLanguage) -> Self {
25        Self { config }
26    }
27
28    fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), oak_core::OakError> {
29        while state.not_at_end() {
30            let safe_point = state.get_position();
31
32            if self.skip_whitespace(state) {
33                continue;
34            }
35
36            if self.skip_comment(state) {
37                continue;
38            }
39
40            if self.lex_string(state) {
41                continue;
42            }
43
44            if self.lex_number(state) {
45                continue;
46            }
47
48            if self.lex_punctuation(state) {
49                continue;
50            }
51
52            if self.lex_identifier(state) {
53                continue;
54            }
55
56            if self.lex_html_text(state) {
57                continue;
58            }
59
60            state.advance_if_dead_lock(safe_point)
61        }
62
63        Ok(())
64    }
65
66    fn lex_html_text<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
67        let start = state.get_position();
68        while let Some(ch) = state.peek() {
69            let rest = state.rest();
70            if rest.starts_with(&self.config.variable_start) || rest.starts_with(&self.config.tag_start) || rest.starts_with(&self.config.comment_start) {
71                break;
72            }
73            state.advance(ch.len_utf8());
74        }
75        if state.get_position() > start {
76            state.add_token(LiquidTokenType::Identifier, start, state.get_position());
77            return true;
78        }
79        false
80    }
81
82    fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
83        let start = state.get_position();
84        let mut found = false;
85
86        while let Some(ch) = state.peek() {
87            if ch.is_whitespace() {
88                state.advance(ch.len_utf8());
89                found = true;
90            }
91            else {
92                break;
93            }
94        }
95
96        if found {
97            state.add_token(LiquidTokenType::Whitespace, start, state.get_position());
98        }
99
100        found
101    }
102
103    fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
104        let start = state.get_position();
105        if state.consume_if_starts_with(&self.config.comment_start) {
106            while state.not_at_end() {
107                if state.consume_if_starts_with(&self.config.comment_end) {
108                    break;
109                }
110                if let Some(ch) = state.peek() {
111                    state.advance(ch.len_utf8());
112                }
113            }
114            state.add_token(LiquidTokenType::Comment, start, state.get_position());
115            return true;
116        }
117        false
118    }
119
120    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
121        let start = state.get_position();
122
123        if let Some(quote) = state.peek() {
124            if quote == '"' || quote == '\'' {
125                state.advance(1);
126
127                while let Some(ch) = state.peek() {
128                    if ch == quote {
129                        state.advance(1);
130                        break;
131                    }
132                    else if ch == '\\' {
133                        state.advance(1);
134                        if let Some(_) = state.peek() {
135                            state.advance(1);
136                        }
137                    }
138                    else {
139                        state.advance(ch.len_utf8());
140                    }
141                }
142
143                state.add_token(LiquidTokenType::String, start, state.get_position());
144                return true;
145            }
146        }
147
148        false
149    }
150
151    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
152        let start = state.get_position();
153
154        if let Some(ch) = state.peek() {
155            if ch.is_ascii_digit() {
156                state.advance(1);
157
158                while let Some(ch) = state.peek() {
159                    if ch.is_ascii_digit() || ch == '.' {
160                        state.advance(1);
161                    }
162                    else {
163                        break;
164                    }
165                }
166
167                state.add_token(LiquidTokenType::Number, start, state.get_position());
168                return true;
169            }
170        }
171
172        false
173    }
174
175    fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176        let start = state.get_position();
177        let rest = state.rest();
178
179        // Double-character operators
180        if rest.starts_with(&self.config.variable_start) {
181            state.advance(self.config.variable_start.len());
182            state.add_token(LiquidTokenType::DoubleLeftBrace, start, state.get_position());
183            return true;
184        }
185        if rest.starts_with(&self.config.variable_end) {
186            state.advance(self.config.variable_end.len());
187            state.add_token(LiquidTokenType::DoubleRightBrace, start, state.get_position());
188            return true;
189        }
190        if rest.starts_with(&self.config.tag_start) {
191            state.advance(self.config.tag_start.len());
192            state.add_token(LiquidTokenType::LeftBracePercent, start, state.get_position());
193            return true;
194        }
195        if rest.starts_with(&self.config.tag_end) {
196            state.advance(self.config.tag_end.len());
197            state.add_token(LiquidTokenType::PercentRightBrace, start, state.get_position());
198            return true;
199        }
200
201        // Single-character operators
202        if let Some(ch) = state.peek() {
203            let kind = match ch {
204                '{' => LiquidTokenType::LeftBrace,
205                '}' => LiquidTokenType::RightBrace,
206                '(' => LiquidTokenType::LeftParen,
207                ')' => LiquidTokenType::RightParen,
208                '[' => LiquidTokenType::LeftBracket,
209                ']' => LiquidTokenType::RightBracket,
210                ',' => LiquidTokenType::Comma,
211                '.' => LiquidTokenType::Dot,
212                ':' => LiquidTokenType::Colon,
213                ';' => LiquidTokenType::Semicolon,
214                '|' => LiquidTokenType::Pipe,
215                '=' => LiquidTokenType::Eq,
216                '+' => LiquidTokenType::Plus,
217                '-' => LiquidTokenType::Minus,
218                '*' => LiquidTokenType::Star,
219                '/' => LiquidTokenType::Slash,
220                '%' => LiquidTokenType::Percent,
221                '!' => LiquidTokenType::Bang,
222                '?' => LiquidTokenType::Question,
223                '<' => LiquidTokenType::Lt,
224                '>' => LiquidTokenType::Gt,
225                '&' => LiquidTokenType::Amp,
226                '^' => LiquidTokenType::Caret,
227                '~' => LiquidTokenType::Tilde,
228                _ => return false,
229            };
230
231            state.advance(ch.len_utf8());
232            state.add_token(kind, start, state.get_position());
233            return true;
234        }
235
236        false
237    }
238
239    fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
240        let start = state.get_position();
241
242        if let Some(ch) = state.peek() {
243            if ch.is_ascii_alphabetic() || ch == '_' {
244                state.advance(ch.len_utf8());
245
246                while let Some(ch) = state.peek() {
247                    if ch.is_ascii_alphanumeric() || ch == '_' {
248                        state.advance(ch.len_utf8());
249                    }
250                    else {
251                        break;
252                    }
253                }
254
255                let end = state.get_position();
256                let text = state.get_text_in((start..end).into());
257
258                // Check if it is a boolean keyword
259                let kind = match text.as_ref() {
260                    "true" | "false" => LiquidTokenType::Boolean,
261                    _ => LiquidTokenType::Identifier,
262                };
263                state.add_token(kind, start, end);
264                return true;
265            }
266        }
267        false
268    }
269}
270
271impl<'config> Lexer<LiquidLanguage> for LiquidLexer<'config> {
272    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LiquidLanguage>) -> LexOutput<LiquidLanguage> {
273        let mut state = LexerState::new(source);
274        let result = self.run(&mut state);
275        if result.is_ok() {
276            state.add_eof()
277        }
278        state.finish_with_cache(result, cache)
279    }
280}