pipeline_script/lexer/
mod.rs1pub mod iter;
2pub mod position;
3pub mod token;
4
5use crate::lexer::position::Position;
6use crate::lexer::token::Token;
7use ariadne::{Label, Report, ReportKind, Source};
8use logos::{Lexer as LogosLexer, Logos, Span};
9
10#[derive(Debug, Clone)]
12pub struct Lexer {
13 file_name: String,
15 logos_lexer: LogosLexer<'static, Token>,
17 source: &'static str,
19 line_starts: Vec<usize>,
21 is_eof: bool,
23}
24
25impl Lexer {
26 pub fn new(file_name: impl Into<String>) -> Self {
31 let empty_source: &'static str = Box::leak("".to_string().into_boxed_str());
33 let logos_lexer = Token::lexer(empty_source);
34
35 Self {
36 file_name: file_name.into(),
37 logos_lexer,
38 source: empty_source,
39 line_starts: vec![0],
40 is_eof: false,
41 }
42 }
43
44 pub fn is_eof(&self) -> bool {
46 self.is_eof || self.logos_lexer.span().start >= self.source.len()
47 }
48
49 pub fn get_file_name(&self) -> String {
51 self.file_name.clone()
52 }
53
54 pub fn line(&self, line: usize) -> String {
62 if line == 0 || line > self.line_starts.len() {
63 return String::new();
64 }
65
66 let start = self.line_starts[line - 1];
67 let end = if line < self.line_starts.len() {
68 self.line_starts[line]
69 } else {
70 self.source.len()
71 };
72
73 self.source[start..end].trim_end_matches('\n').to_string()
74 }
75
76 pub fn set_chars(&mut self, chars: Vec<char>) {
81 let source_string: String = chars.into_iter().collect();
82 let source: &'static str = Box::leak(source_string.into_boxed_str());
83 self.source = source;
84 self.logos_lexer = Token::lexer(source);
85 self.compute_line_starts();
86 self.is_eof = false;
87 }
88
89 pub fn from_script(file_name: impl Into<String>, script: impl Into<String>) -> Self {
98 let script_string = script.into();
99 let source: &'static str = Box::leak(script_string.into_boxed_str());
100 let logos_lexer = Token::lexer(source);
101
102 let mut lexer = Self {
103 file_name: file_name.into(),
104 logos_lexer,
105 source,
106 line_starts: Vec::new(),
107 is_eof: false,
108 };
109
110 lexer.compute_line_starts();
111 lexer
112 }
113
114 pub fn next_token(&mut self) -> Option<(Token, Position)> {
119 if self.is_eof {
120 return None;
121 }
122
123 match self.logos_lexer.next() {
124 Some(Ok(token)) => {
125 let span = self.logos_lexer.span();
126 let position = self.create_position_from_span(span);
127 Some((token, position))
128 }
129 Some(Err(_)) => {
130 let span = self.logos_lexer.span();
132 Report::build(ReportKind::Warning, (self.file_name.as_str(), span.clone()))
133 .with_message(format!("非法的字符{}", self.logos_lexer.slice()))
134 .with_label(
135 Label::new((self.file_name.as_str(), span))
136 .with_message("该字符不支持解析"),
137 )
138 .finish()
139 .print((self.file_name.as_str(), Source::from(self.source)))
140 .unwrap();
141 None
142 }
143 None => {
144 self.is_eof = true;
146 let position =
147 Position::new(self.file_name.clone(), self.source.len(), self.source.len());
148 Some((Token::Eof, position))
149 }
150 }
151 }
152
153 pub fn get_source(&self) -> &str {
155 self.source
156 }
157
158 fn compute_line_starts(&mut self) {
160 self.line_starts.clear();
161 self.line_starts.push(0);
162
163 for (i, ch) in self.source.char_indices() {
164 if ch == '\n' {
165 self.line_starts.push(i + 1);
166 }
167 }
168 }
169
170 fn create_position_from_span(&self, span: Span) -> Position {
178 Position::new(self.file_name.clone(), span.start, span.end)
179 }
180}
181
182#[derive(Debug, Clone, Copy, PartialEq)]
184pub enum TokenType {
185 Identifier,
187 Integer,
189 Float,
191 Symbol,
193}
194
195impl From<&Token> for TokenType {
196 fn from(token: &Token) -> Self {
197 match token {
198 Token::Identifier(_) => TokenType::Identifier,
199 Token::Int(_) => TokenType::Integer,
200 Token::Float(_) => TokenType::Float,
201 _ => TokenType::Symbol,
202 }
203 }
204}
205
206#[derive(Debug, Clone, PartialEq)]
208pub enum State {
209 Initial,
211 Identifier,
213 Number,
215 Decimal,
217 String,
219 FormatString,
221 Comment,
223 MultilineComment,
225 Done(TokenType),
227}
228
229#[cfg(test)]
230mod tests {
231 use super::*;
232
233 #[test]
234 fn test_basic_tokenization() {
235 let mut lexer = Lexer::from_script("test.script", "let x = 42;");
236
237 let tokens: Vec<_> = std::iter::from_fn(|| lexer.next_token())
238 .map(|(token, _)| token)
239 .collect();
240
241 assert_eq!(tokens[1], Token::Identifier("x".to_string()));
242 assert_eq!(tokens[2], Token::Assign);
243 assert_eq!(tokens[3], Token::Int(42));
244 assert_eq!(tokens[4], Token::Eof);
245 }
246
247 #[test]
248 fn test_string_parsing() {
249 let mut lexer = Lexer::from_script("test.script", r#""hello world""#);
250
251 if let Some((Token::String(s), _)) = lexer.next_token() {
252 assert_eq!(s, "hello world");
253 } else {
254 panic!("Expected string token");
255 }
256 }
257
258 #[test]
259 fn test_format_string_parsing() {
260 let mut lexer = Lexer::from_script("test.script", r#"f"hello {name}""#);
261
262 if let Some((Token::FormatString(s), _)) = lexer.next_token() {
263 assert_eq!(s, "hello {name}");
264 } else {
265 panic!("Expected format string token");
266 }
267 }
268
269 #[test]
270 fn test_comments_are_skipped() {
271 let mut lexer =
272 Lexer::from_script("test.script", "// comment\nlet x = 1; /* block comment */");
273
274 let tokens: Vec<_> = std::iter::from_fn(|| lexer.next_token())
275 .map(|(token, _)| token)
276 .collect();
277
278 assert_eq!(tokens[1], Token::Identifier("x".to_string()));
279 assert_eq!(tokens[2], Token::Assign);
280 assert_eq!(tokens[3], Token::Int(1));
281 }
282}