1use smol_str::SmolStr;
4use std::ops::{Deref, DerefMut};
5
6use crate::{
7 error::ParseError,
8 state::State,
9 token::{Token, TokenType},
10 utils::can_be_identifier,
11};
12
13#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
15#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
16pub struct Lexer<'a> {
17 pub(crate) input: &'a str,
19
20 pub(crate) chars: Vec<char>,
22
23 pub(crate) errors: Vec<ParseError>,
26
27 pub(crate) state: State,
29}
30
31impl<'a> Lexer<'a> {
32 #[inline]
34 pub fn new(input: &'a str) -> Self {
35 Self::default().with_input(input)
36 }
37
38 #[inline]
40 pub fn with_input(mut self, input: &'a str) -> Self {
41 self.set_input(input);
42 self
43 }
44
45 #[inline]
47 pub fn set_input(&mut self, input: &'a str) {
48 self.input = input;
49 self.chars = input.chars().collect();
50 self.last_whitespace = self.skip_whitespace();
51 }
52
53 #[inline]
55 pub fn save_state(&self) -> State {
56 self.state.clone()
57 }
58
59 #[inline]
61 pub fn set_state(&mut self, state: State) {
62 self.state = state;
63 }
64
65 pub fn next_token(&mut self) -> Token {
68 if !self.errors.is_empty() {
69 let error = self.errors.remove(0);
70 let start = error.start();
71
72 return TokenType::Error(error).into_token(start, self.lexer_position, "", "");
73 }
74
75 let start = self.lexer_position;
76
77 TokenType::try_lex(self)
78 .map(|token_type| {
79 let whitespaces = self.skip_whitespace();
80 let spaces_before = self.last_whitespace.clone();
81 let spaces_after = whitespaces.clone();
82
83 self.last_whitespace = whitespaces;
84
85 token_type.into_token(start, self.lexer_position, spaces_before, spaces_after)
86 })
87 .unwrap_or_else(|| Token::END_OF_FILE)
88 }
89
90 #[inline]
92 pub fn current_char(&self) -> Option<char> {
93 self.chars.get(self.position).copied()
94 }
95
96 #[inline]
98 pub fn next_char(&self) -> Option<char> {
99 self.chars.get(self.position + 1).copied()
100 }
101
102 #[inline]
105 pub fn consume(&mut self, character: char) -> bool {
106 if self.current_char() == Some(character) {
107 self.increment_position_by_char(character);
108
109 true
110 } else {
111 false
112 }
113 }
114
115 #[inline]
118 pub fn consume_with_next(&mut self, character: char) -> bool {
119 if self.next_char() == Some(character) {
120 let current_char = self.current_char().unwrap();
121
122 self.increment_position_by_char(current_char);
123 self.increment_position_by_char(character);
124
125 true
126 } else {
127 false
128 }
129 }
130
131 pub fn consume_identifier(&mut self) -> SmolStr {
134 let start = self.position;
135 while let Some(character) = self.current_char() {
136 if can_be_identifier(character) {
137 self.increment_position_by_char(character);
138 } else {
139 break;
140 }
141 }
142
143 self.input[start..self.position].into()
144 }
145
146 pub fn skip_whitespace(&mut self) -> SmolStr {
149 let start = self.position;
150 while let Some(character) = self.current_char() {
151 if character.is_whitespace() {
152 self.increment_position_by_char(character);
153 } else {
154 break;
155 }
156 }
157
158 (start != self.position)
159 .then(|| self.input[start..self.position].into())
160 .unwrap_or_default()
161 }
162}
163
164impl Deref for Lexer<'_> {
165 type Target = State;
166
167 fn deref(&self) -> &Self::Target {
168 &self.state
169 }
170}
171
172impl DerefMut for Lexer<'_> {
173 fn deref_mut(&mut self) -> &mut Self::Target {
174 &mut self.state
175 }
176}
177
178pub trait Lexable: Sized {
180 fn try_lex(lexer: &mut Lexer) -> Option<Self>;
182}