pipeline_script/lexer/
token.rs

1use logos::Logos;
2use std::fmt::{Display, Formatter};
3
4/// 词法单元定义,使用 logos 库进行词法分析
5#[derive(Logos, Debug, Clone, PartialEq)]
6#[logos(skip r"[ \t\n\f\r;]+")] // 跳过空白字符
7#[logos(skip r"//[^\n]*")] // 跳过单行注释
8#[logos(skip r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/")] // 跳过多行注释
9pub enum Token {
10    #[token("if", |lex| lex.slice().to_owned())]
11    #[token("else", |lex| lex.slice().to_owned())]
12    #[token("while", |lex| lex.slice().to_owned())]
13    #[token("fn", |lex| lex.slice().to_owned())]
14    #[token("extern", |lex| lex.slice().to_owned())]
15    #[token("struct", |lex| lex.slice().to_owned())]
16    #[token("return", |lex| lex.slice().to_owned())]
17    #[token("let", |lex| lex.slice().to_owned())]
18    #[token("match", |lex| lex.slice().to_owned())]
19    #[token("import", |lex| lex.slice().to_owned())]
20    #[token("const", |lex| lex.slice().to_owned())]
21    #[token("continue", |lex| lex.slice().to_owned())]
22    #[token("break", |lex| lex.slice().to_owned())]
23    #[token("enum", |lex| lex.slice().to_owned())]
24    #[token("class", |lex| lex.slice().to_owned())]
25    #[token("module", |lex| lex.slice().to_owned())]
26    #[token("static", |lex| lex.slice().to_owned())]
27    Keyword(String),
28    #[token("true")]
29    True,
30    #[token("false")]
31    False,
32
33    // 字符串字面量
34    #[regex(r#""([^"\\]|\\.)*""#, |lex| parse_string(lex.slice()))]
35    String(String),
36
37    // 格式化字符串(以 f 开头的字符串)
38    #[regex(r#"f"([^"\\]|\\.)*""#, |lex| parse_format_string(lex.slice()))]
39    FormatString(String),
40
41    // 整数
42    #[regex(r"-?[0-9]+(_)?i8", |lex| parse_i8(lex.slice()))]
43    Int8(i8),
44    #[regex(r"-?[0-9]+(_)?i16", |lex| parse_i16(lex.slice()))]
45    Int16(i16),
46    #[regex(r"-?[0-9]+(_)?i32", |lex| parse_i32(lex.slice()))]
47    Int32(i32),
48    #[regex(r"-?[0-9]+(_)?i64", |lex| parse_i64(lex.slice()))]
49    Int64(i64),
50    #[regex(r"-?[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
51    Int(i64),
52
53    // 浮点数
54    #[regex(r"-?[0-9]+\.[0-9]+(_)?f32", |lex| parse_f32(lex.slice()))]
55    Float32(f32),
56    #[regex(r"-?[0-9]+\.[0-9]+(_)?f64", |lex| parse_f64(lex.slice()))]
57    Float64(f64),
58    #[regex(r"-?[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
59    Float(f64),
60    // 关键字的通用表示
61    // // 统一的关键字类型
62    // 先定义关键字(优先级高)
63
64    // 标识符
65    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())]
66    Identifier(String),
67
68    // 符号和操作符
69    #[token("(")]
70    BraceLeft,
71    #[token(")")]
72    BraceRight,
73    #[token("[")]
74    BracketLeft,
75    #[token("]")]
76    BracketRight,
77    #[token("{")]
78    ParenLeft,
79    #[token("}")]
80    ParenRight,
81    #[token(".")]
82    Dot,
83    #[token(":")]
84    Colon,
85    #[token("::")]
86    ScopeSymbol,
87    #[token("=")]
88    Assign,
89    #[token(",")]
90    Comma,
91    #[token("+")]
92    Plus,
93    #[token("-")]
94    Minus,
95    #[token("*")]
96    Star,
97    #[token("/")]
98    Slash,
99    #[token("%")]
100    Mod,
101    #[token(">")]
102    Greater,
103    #[token("<")]
104    Less,
105    #[token("<=")]
106    LessEqual,
107    #[token(">=")]
108    GreaterEqual,
109    #[token("==")]
110    Equal,
111    #[token("!=")]
112    NotEqual,
113    #[token("->")]
114    Arrow,
115    #[token("!")]
116    Not,
117    #[token("&&")]
118    And,
119    #[token("||")]
120    Or,
121    #[token("|")]
122    Vertical,
123    #[token("@")]
124    Annotation,
125    #[token("&")]
126    BitAnd,
127
128    // 布尔值(通过关键字处理)
129    Boolean(bool),
130
131    // 文件结束标记
132    Eof,
133}
134
135/// 解析字符串字面量,去除引号并处理转义字符
136fn parse_string(s: &str) -> Option<String> {
137    let content = &s[1..s.len() - 1]; // 去除前后的引号
138    Some(unescape_string(content))
139}
140
141/// 解析格式化字符串,去除 f" 前缀和后缀引号
142fn parse_format_string(s: &str) -> Option<String> {
143    let content = &s[2..s.len() - 1]; // 去除 f" 前缀和后缀引号
144    Some(unescape_string(content))
145}
146
147/// 解析 i8 后缀的整数,支持 10i8 和 10_i8 两种格式
148fn parse_i8(s: &str) -> Option<i8> {
149    let number_part = if s.ends_with("_i8") {
150        &s[..s.len() - 3] // 去除 "_i8"
151    } else {
152        &s[..s.len() - 2] // 去除 "i8"
153    };
154    number_part.parse::<i8>().ok()
155}
156
157/// 解析 i16 后缀的整数,支持 10i16 和 10_i16 两种格式
158fn parse_i16(s: &str) -> Option<i16> {
159    let number_part = if s.ends_with("_i16") {
160        &s[..s.len() - 4] // 去除 "_i16"
161    } else {
162        &s[..s.len() - 3] // 去除 "i16"
163    };
164    number_part.parse::<i16>().ok()
165}
166
167/// 解析 i32 后缀的整数,支持 10i32 和 10_i32 两种格式
168fn parse_i32(s: &str) -> Option<i32> {
169    let number_part = if s.ends_with("_i32") {
170        &s[..s.len() - 4] // 去除 "_i32"
171    } else {
172        &s[..s.len() - 3] // 去除 "i32"
173    };
174    number_part.parse::<i32>().ok()
175}
176
177/// 解析 i64 后缀的整数,支持 10i64 和 10_i64 两种格式
178fn parse_i64(s: &str) -> Option<i64> {
179    // 去除 i64 后缀
180    let number_part = if s.ends_with("_i64") {
181        &s[..s.len() - 4] // 去除 "_i64"
182    } else {
183        &s[..s.len() - 3] // 去除 "i64"
184    };
185
186    number_part.parse::<i64>().ok()
187}
188
189/// 解析 f32 后缀的浮点数,支持 10.5f32 和 10.5_f32 两种格式
190fn parse_f32(s: &str) -> Option<f32> {
191    let number_part = if s.ends_with("_f32") {
192        &s[..s.len() - 4] // 去除 "_f32"
193    } else {
194        &s[..s.len() - 3] // 去除 "f32"
195    };
196    number_part.parse::<f32>().ok()
197}
198
199/// 解析 f64 后缀的浮点数,支持 10.5f64 和 10.5_f64 两种格式
200fn parse_f64(s: &str) -> Option<f64> {
201    let number_part = if s.ends_with("_f64") {
202        &s[..s.len() - 4] // 去除 "_f64"
203    } else {
204        &s[..s.len() - 3] // 去除 "f64"
205    };
206    number_part.parse::<f64>().ok()
207}
208
209/// 处理字符串中的转义字符
210fn unescape_string(s: &str) -> String {
211    let mut result = String::new();
212    let mut chars = s.chars().peekable();
213
214    while let Some(ch) = chars.next() {
215        if ch == '\\' {
216            match chars.next() {
217                Some('n') => result.push('\n'),
218                Some('t') => result.push('\t'),
219                Some('r') => result.push('\r'),
220                Some('\\') => result.push('\\'),
221                Some('"') => result.push('"'),
222                Some('0') => result.push('\0'),
223                Some(other) => {
224                    result.push('\\');
225                    result.push(other);
226                }
227                None => result.push('\\'),
228            }
229        } else {
230            result.push(ch);
231        }
232    }
233
234    result
235}
236
237impl Token {
238    /// 将关键字字符串转换为对应的 Token
239    pub fn from_keyword_or_identifier(s: String) -> Self {
240        match s.as_str() {
241            "true" => Token::Boolean(true),
242            "false" => Token::Boolean(false),
243            _ => Token::Identifier(s),
244        }
245    }
246
247    /// 获取 token 的唯一标识符
248    #[allow(unused)]
249    pub fn token_id(&self) -> i8 {
250        match self {
251            Token::String(_) => 0,
252            Token::Int(_) => 1,
253            Token::Int8(_) => 37,
254            Token::Int16(_) => 38,
255            Token::Int32(_) => 39,
256            Token::Int64(_) => 40,
257            Token::Float(_) => 2,
258            Token::Float32(_) => 41,
259            Token::Float64(_) => 42,
260            Token::Identifier(_) => 3,
261            Token::Keyword(_) => 4,
262            Token::BraceLeft => 5,
263            Token::BraceRight => 6,
264            Token::BracketLeft => 7,
265            Token::BracketRight => 8,
266            Token::ParenLeft => 9,
267            Token::ParenRight => 10,
268            Token::Dot => 11,
269            Token::Comma => 12,
270            Token::Eof => 13,
271            Token::Colon => 14,
272            Token::Assign => 15,
273            Token::Plus => 16,
274            Token::Star => 17,
275            Token::Greater => 18,
276            Token::Less => 19,
277            Token::Equal => 20,
278            Token::Minus => 21,
279            Token::Slash => 22,
280            Token::Mod => 23,
281            Token::ScopeSymbol => 24,
282            Token::NotEqual => 25,
283            Token::Arrow => 26,
284            Token::Not => 27,
285            Token::And => 28,
286            Token::Vertical => 29,
287            Token::Annotation => 30,
288            Token::Or => 31,
289            Token::Boolean(_) => 32,
290            Token::FormatString(_) => 33,
291            Token::LessEqual => 34,
292            Token::GreaterEqual => 35,
293            Token::BitAnd => 36,
294            _ => -1,
295        }
296    }
297
298    /// 检查是否为冒号
299    pub fn is_colon(&self) -> bool {
300        matches!(self, Token::Colon)
301    }
302
303    /// 检查是否为左花括号
304    pub fn is_parenthesis_left(&self) -> bool {
305        matches!(self, Token::ParenLeft)
306    }
307
308    /// 检查是否为指定的关键字
309    pub fn is_keyword(&self, keyword: &str) -> bool {
310        match self {
311            Token::Keyword(k) => k == keyword,
312            _ => false,
313        }
314    }
315
316    /// 检查是否为赋值操作符
317    pub fn is_assign(&self) -> bool {
318        matches!(self, Token::Assign)
319    }
320
321    /// 获取标识符的值
322    pub fn get_identifier_value(&self) -> &str {
323        match self {
324            Token::Identifier(s) => s.as_str(),
325            _ => "",
326        }
327    }
328}
329
330impl Display for Token {
331    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
332        match self {
333            Token::String(s) => write!(f, "String({s})"),
334            Token::FormatString(s) => write!(f, "FormatString({s})"),
335            Token::Int(i) => write!(f, "Int({i})"),
336            Token::Int8(i) => write!(f, "Int8({i})"),
337            Token::Int16(i) => write!(f, "Int16({i})"),
338            Token::Int32(i) => write!(f, "Int32({i})"),
339            Token::Int64(i) => write!(f, "Int64({i})"),
340            Token::Float(fl) => write!(f, "Float({fl})"),
341            Token::Float32(fl) => write!(f, "Float32({fl})"),
342            Token::Float64(fl) => write!(f, "Float64({fl})"),
343            Token::Identifier(i) => write!(f, "Identifier({i})"),
344            Token::Boolean(b) => write!(f, "Boolean({b})"),
345            Token::Keyword(kw) => write!(f, "Keyword({kw})"),
346            Token::True => write!(f, "Boolean(true)"),
347            Token::False => write!(f, "Boolean(false)"),
348            Token::BraceLeft => write!(f, "Symbol(()"),
349            Token::BraceRight => write!(f, "Symbol())"),
350            Token::BracketLeft => write!(f, "Symbol([)"),
351            Token::BracketRight => write!(f, "Symbol(])"),
352            Token::ParenLeft => write!(f, "Symbol({{)"),
353            Token::ParenRight => write!(f, "Symbol(}})"),
354            Token::Dot => write!(f, "Symbol(.)"),
355            Token::Colon => write!(f, "Symbol(:)"),
356            Token::ScopeSymbol => write!(f, "Symbol(::)"),
357            Token::Assign => write!(f, "Symbol(=)"),
358            Token::Comma => write!(f, "Symbol(,)"),
359            Token::Plus => write!(f, "Symbol(+)"),
360            Token::Minus => write!(f, "Symbol(-)"),
361            Token::Star => write!(f, "Symbol(*)"),
362            Token::Slash => write!(f, "Symbol(/)"),
363            Token::Mod => write!(f, "Symbol(%)"),
364            Token::Greater => write!(f, "Symbol(>)"),
365            Token::Less => write!(f, "Symbol(<)"),
366            Token::LessEqual => write!(f, "Symbol(<=)"),
367            Token::GreaterEqual => write!(f, "Symbol(>=)"),
368            Token::Equal => write!(f, "Symbol(==)"),
369            Token::NotEqual => write!(f, "Symbol(!=)"),
370            Token::Arrow => write!(f, "Symbol(->)"),
371            Token::Not => write!(f, "Symbol(!)"),
372            Token::And => write!(f, "Symbol(&&)"),
373            Token::Or => write!(f, "Symbol(||)"),
374            Token::Vertical => write!(f, "Symbol(|)"),
375            Token::Annotation => write!(f, "Symbol(@)"),
376            Token::BitAnd => write!(f, "Symbol(&)"),
377            Token::Eof => write!(f, "EOF"),
378        }
379    }
380}