1use logos::Logos;
2use std::ops::Range;
3
4#[derive(Logos, Debug, Clone, PartialEq)]
5#[logos(skip r"[ \t\r\f]+")]
6#[logos(skip(r"#[^\n]*", allow_greedy = true))]
7pub enum TokenKind {
8 #[token("fn")] Fn,
10 #[token("let")] Let,
11 #[token("type")] Type,
12 #[token("match")] Match,
13 #[token("if")] If,
14 #[token("else")] Else,
15 #[token("return")] Return,
16 #[token("import")] Import,
17 #[token("as")] As,
18 #[token("true")] True,
19 #[token("false")] False,
20 #[token("and")] And,
21 #[token("or")] Or,
22 #[token("not")] Not,
23
24 #[token("|>")] Pipe,
26 #[token("->")] Arrow,
27 #[token("=>")] FatArrow,
28 #[token(":=")] ColonEq,
29 #[token("::")] ColonColon,
30 #[token("==")] EqEq,
31 #[token("!=")] BangEq,
32 #[token("<=")] LtEq,
33 #[token(">=")] GtEq,
34
35 #[token("...")] DotDotDot,
37
38 #[token("+")] Plus,
40 #[token("-")] Minus,
41 #[token("*")] Star,
42 #[token("/")] Slash,
43 #[token("%")] Percent,
44 #[token("<")] Lt,
45 #[token(">")] Gt,
46 #[token(".")] Dot,
47 #[token(",")] Comma,
48 #[token(";")] Semi,
49 #[token(":")] Colon,
50 #[token("?")] Question,
51 #[token("(")] LParen,
52 #[token(")")] RParen,
53 #[token("{")] LBrace,
54 #[token("}")] RBrace,
55 #[token("[")] LBracket,
56 #[token("]")] RBracket,
57 #[token("=")] Eq,
58 #[token("|")] Bar,
59 #[token("_")] Underscore,
60 #[token("\n")] Newline,
61
62 #[regex(r"[0-9][0-9_]*[eE][+-]?[0-9]+", |lex| lex.slice().replace('_', "").parse::<f64>().ok())]
64 #[regex(r"[0-9][0-9_]*\.[0-9][0-9_]*([eE][+-]?[0-9]+)?", |lex| lex.slice().replace('_', "").parse::<f64>().ok())]
65 Float(f64),
66
67 #[regex(r"[0-9][0-9_]*", |lex| lex.slice().replace('_', "").parse::<i64>().ok(), priority = 3)]
68 Int(i64),
69
70 #[regex(r#""([^"\\]|\\.)*""#, |lex| unescape(&lex.slice()[1..lex.slice().len()-1]))]
71 Str(String),
72
73 #[regex(r#"b"([^"\\]|\\.)*""#, |lex| unescape(&lex.slice()[2..lex.slice().len()-1]).map(|s| s.into_bytes()))]
74 Bytes(Vec<u8>),
75
76 #[regex(r"[a-zA-Z][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
83 #[regex(r"_[a-zA-Z0-9_]+", |lex| lex.slice().to_string())]
84 Ident(String),
85}
86
87fn unescape(s: &str) -> Option<String> {
88 let mut out = String::with_capacity(s.len());
89 let mut chars = s.chars();
90 while let Some(c) = chars.next() {
91 if c == '\\' {
92 match chars.next()? {
93 'n' => out.push('\n'),
94 't' => out.push('\t'),
95 'r' => out.push('\r'),
96 '\\' => out.push('\\'),
97 '"' => out.push('"'),
98 '0' => out.push('\0'),
99 _ => return None,
100 }
101 } else {
102 out.push(c);
103 }
104 }
105 Some(out)
106}
107
108#[derive(Debug, Clone)]
109pub struct Token {
110 pub kind: TokenKind,
111 pub span: Range<usize>,
112}
113
114pub fn lex(src: &str) -> Result<Vec<Token>, LexError> {
115 let mut toks = Vec::new();
116 let mut lx = TokenKind::lexer(src);
117 while let Some(res) = lx.next() {
118 match res {
119 Ok(kind) => toks.push(Token { kind, span: lx.span() }),
120 Err(_) => {
121 return Err(LexError {
122 span: lx.span(),
123 snippet: lx.slice().to_string(),
124 });
125 }
126 }
127 }
128 Ok(toks)
129}
130
131#[derive(Debug, thiserror::Error)]
132#[error("unrecognized token `{snippet}` at {span:?}")]
133pub struct LexError {
134 pub span: Range<usize>,
135 pub snippet: String,
136}