pipeline_script/lexer/
token.rs

1use logos::Logos;
2use std::fmt::{Display, Formatter};
3
4/// 词法单元定义,使用 logos 库进行词法分析
5#[derive(Logos, Debug, Clone, PartialEq)]
6#[logos(skip r"[ \t\n\f\r;]+")] // 跳过空白字符
7#[logos(skip r"//[^\n]*")] // 跳过单行注释
8#[logos(skip r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/")] // 跳过多行注释
9pub enum Token {
10    #[token("if", |lex| lex.slice().to_owned())]
11    #[token("else", |lex| lex.slice().to_owned())]
12    #[token("while", |lex| lex.slice().to_owned())]
13    #[token("fn", |lex| lex.slice().to_owned())]
14    #[token("extern", |lex| lex.slice().to_owned())]
15    #[token("struct", |lex| lex.slice().to_owned())]
16    #[token("return", |lex| lex.slice().to_owned())]
17    #[token("let", |lex| lex.slice().to_owned())]
18    #[token("match", |lex| lex.slice().to_owned())]
19    #[token("import", |lex| lex.slice().to_owned())]
20    #[token("const", |lex| lex.slice().to_owned())]
21    #[token("continue", |lex| lex.slice().to_owned())]
22    #[token("break", |lex| lex.slice().to_owned())]
23    #[token("enum", |lex| lex.slice().to_owned())]
24    #[token("class", |lex| lex.slice().to_owned())]
25    #[token("module", |lex| lex.slice().to_owned())]
26    #[token("static", |lex| lex.slice().to_owned())]
27    Keyword(String),
28    #[token("true")]
29    True,
30    #[token("false")]
31    False,
32
33    // 字符串字面量
34    #[regex(r#""([^"\\]|\\.)*""#, |lex| parse_string(lex.slice()))]
35    String(String),
36
37    // 格式化字符串(以 f 开头的字符串)
38    #[regex(r#"f"([^"\\]|\\.)*""#, |lex| parse_format_string(lex.slice()))]
39    FormatString(String),
40
41    // 整数
42    #[regex(r"-?[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
43    Int(i64),
44
45    // 浮点数
46    #[regex(r"-?[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
47    Float(f64),
48    // 关键字的通用表示
49    // // 统一的关键字类型
50    // 先定义关键字(优先级高)
51
52    // 标识符
53    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())]
54    Identifier(String),
55
56    // 符号和操作符
57    #[token("(")]
58    BraceLeft,
59    #[token(")")]
60    BraceRight,
61    #[token("[")]
62    BracketLeft,
63    #[token("]")]
64    BracketRight,
65    #[token("{")]
66    ParenLeft,
67    #[token("}")]
68    ParenRight,
69    #[token(".")]
70    Dot,
71    #[token(":")]
72    Colon,
73    #[token("::")]
74    ScopeSymbol,
75    #[token("=")]
76    Assign,
77    #[token(",")]
78    Comma,
79    #[token("+")]
80    Plus,
81    #[token("-")]
82    Minus,
83    #[token("*")]
84    Star,
85    #[token("/")]
86    Slash,
87    #[token("%")]
88    Mod,
89    #[token(">")]
90    Greater,
91    #[token("<")]
92    Less,
93    #[token("<=")]
94    LessEqual,
95    #[token(">=")]
96    GreaterEqual,
97    #[token("==")]
98    Equal,
99    #[token("!=")]
100    NotEqual,
101    #[token("->")]
102    Arrow,
103    #[token("!")]
104    Not,
105    #[token("&&")]
106    And,
107    #[token("||")]
108    Or,
109    #[token("|")]
110    Vertical,
111    #[token("@")]
112    Annotation,
113    #[token("&")]
114    BitAnd,
115
116    // 布尔值(通过关键字处理)
117    Boolean(bool),
118
119    // 文件结束标记
120    Eof,
121}
122
123/// 解析字符串字面量,去除引号并处理转义字符
124fn parse_string(s: &str) -> Option<String> {
125    let content = &s[1..s.len() - 1]; // 去除前后的引号
126    Some(unescape_string(content))
127}
128
129/// 解析格式化字符串,去除 f" 前缀和后缀引号
130fn parse_format_string(s: &str) -> Option<String> {
131    let content = &s[2..s.len() - 1]; // 去除 f" 前缀和后缀引号
132    Some(unescape_string(content))
133}
134
135/// 处理字符串中的转义字符
136fn unescape_string(s: &str) -> String {
137    let mut result = String::new();
138    let mut chars = s.chars().peekable();
139
140    while let Some(ch) = chars.next() {
141        if ch == '\\' {
142            match chars.next() {
143                Some('n') => result.push('\n'),
144                Some('t') => result.push('\t'),
145                Some('r') => result.push('\r'),
146                Some('\\') => result.push('\\'),
147                Some('"') => result.push('"'),
148                Some('0') => result.push('\0'),
149                Some(other) => {
150                    result.push('\\');
151                    result.push(other);
152                }
153                None => result.push('\\'),
154            }
155        } else {
156            result.push(ch);
157        }
158    }
159
160    result
161}
162
163impl Token {
164    /// 将关键字字符串转换为对应的 Token
165    pub fn from_keyword_or_identifier(s: String) -> Self {
166        match s.as_str() {
167            "true" => Token::Boolean(true),
168            "false" => Token::Boolean(false),
169            _ => Token::Identifier(s),
170        }
171    }
172
173    /// 获取 token 的唯一标识符
174    #[allow(unused)]
175    pub fn token_id(&self) -> i8 {
176        match self {
177            Token::String(_) => 0,
178            Token::Int(_) => 1,
179            Token::Float(_) => 2,
180            Token::Identifier(_) => 3,
181            Token::Keyword(_) => 4,
182            Token::BraceLeft => 5,
183            Token::BraceRight => 6,
184            Token::BracketLeft => 7,
185            Token::BracketRight => 8,
186            Token::ParenLeft => 9,
187            Token::ParenRight => 10,
188            Token::Dot => 11,
189            Token::Comma => 12,
190            Token::Eof => 13,
191            Token::Colon => 14,
192            Token::Assign => 15,
193            Token::Plus => 16,
194            Token::Star => 17,
195            Token::Greater => 18,
196            Token::Less => 19,
197            Token::Equal => 20,
198            Token::Minus => 21,
199            Token::Slash => 22,
200            Token::Mod => 23,
201            Token::ScopeSymbol => 24,
202            Token::NotEqual => 25,
203            Token::Arrow => 26,
204            Token::Not => 27,
205            Token::And => 28,
206            Token::Vertical => 29,
207            Token::Annotation => 30,
208            Token::Or => 31,
209            Token::Boolean(_) => 32,
210            Token::FormatString(_) => 33,
211            Token::LessEqual => 34,
212            Token::GreaterEqual => 35,
213            Token::BitAnd => 36,
214            _ => -1,
215        }
216    }
217
218    /// 检查是否为冒号
219    pub fn is_colon(&self) -> bool {
220        matches!(self, Token::Colon)
221    }
222
223    /// 检查是否为左花括号
224    pub fn is_parenthesis_left(&self) -> bool {
225        matches!(self, Token::ParenLeft)
226    }
227
228    /// 检查是否为指定的关键字
229    pub fn is_keyword(&self, keyword: &str) -> bool {
230        match self {
231            Token::Keyword(k) => k == keyword,
232            _ => false,
233        }
234    }
235
236    /// 检查是否为赋值操作符
237    pub fn is_assign(&self) -> bool {
238        matches!(self, Token::Assign)
239    }
240
241    /// 获取标识符的值
242    pub fn get_identifier_value(&self) -> &str {
243        match self {
244            Token::Identifier(s) => s.as_str(),
245            _ => "",
246        }
247    }
248}
249
250impl Display for Token {
251    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
252        match self {
253            Token::String(s) => write!(f, "String({s})"),
254            Token::FormatString(s) => write!(f, "FormatString({s})"),
255            Token::Int(i) => write!(f, "Int({i})"),
256            Token::Float(fl) => write!(f, "Float({fl})"),
257            Token::Identifier(i) => write!(f, "Identifier({i})"),
258            Token::Boolean(b) => write!(f, "Boolean({b})"),
259            Token::Keyword(kw) => write!(f, "Keyword({kw})"),
260            Token::True => write!(f, "Boolean(true)"),
261            Token::False => write!(f, "Boolean(false)"),
262            Token::BraceLeft => write!(f, "Symbol(()"),
263            Token::BraceRight => write!(f, "Symbol())"),
264            Token::BracketLeft => write!(f, "Symbol([)"),
265            Token::BracketRight => write!(f, "Symbol(])"),
266            Token::ParenLeft => write!(f, "Symbol({{)"),
267            Token::ParenRight => write!(f, "Symbol(}})"),
268            Token::Dot => write!(f, "Symbol(.)"),
269            Token::Colon => write!(f, "Symbol(:)"),
270            Token::ScopeSymbol => write!(f, "Symbol(::)"),
271            Token::Assign => write!(f, "Symbol(=)"),
272            Token::Comma => write!(f, "Symbol(,)"),
273            Token::Plus => write!(f, "Symbol(+)"),
274            Token::Minus => write!(f, "Symbol(-)"),
275            Token::Star => write!(f, "Symbol(*)"),
276            Token::Slash => write!(f, "Symbol(/)"),
277            Token::Mod => write!(f, "Symbol(%)"),
278            Token::Greater => write!(f, "Symbol(>)"),
279            Token::Less => write!(f, "Symbol(<)"),
280            Token::LessEqual => write!(f, "Symbol(<=)"),
281            Token::GreaterEqual => write!(f, "Symbol(>=)"),
282            Token::Equal => write!(f, "Symbol(==)"),
283            Token::NotEqual => write!(f, "Symbol(!=)"),
284            Token::Arrow => write!(f, "Symbol(->)"),
285            Token::Not => write!(f, "Symbol(!)"),
286            Token::And => write!(f, "Symbol(&&)"),
287            Token::Or => write!(f, "Symbol(||)"),
288            Token::Vertical => write!(f, "Symbol(|)"),
289            Token::Annotation => write!(f, "Symbol(@)"),
290            Token::BitAnd => write!(f, "Symbol(&)"),
291            Token::Eof => write!(f, "EOF"),
292        }
293    }
294}