reqlang_expr/
lexer.rs

1//! The lexer and associated types
2
3use logos::Logos;
4use std::ops::Range;
5
6use crate::{
7    errors::{ExprErrorS, LexicalError},
8    span::Spanned,
9};
10
11/// Parse source code in to a list of [`Token`].
12pub fn lex(source: &str) -> Vec<Result<(usize, Token, usize), ExprErrorS>> {
13    let lexer: Lexer<'_> = Lexer::new(&source);
14    let tokens: Vec<Result<(usize, Token, usize), ExprErrorS>> = lexer.collect::<Vec<_>>();
15
16    tokens
17}
18
19/// Converts a [`String`] source in to a vector of [`Token`]
20#[derive(Debug)]
21pub struct Lexer<'a> {
22    inner: logos::Lexer<'a, Token>,
23    pending: Option<(usize, Token, usize)>,
24}
25
26impl<'a> Lexer<'a> {
27    pub fn new(source: &'a str) -> Self {
28        Self {
29            inner: Token::lexer(source),
30            pending: None,
31        }
32    }
33}
34
35impl Iterator for Lexer<'_> {
36    type Item = Result<(usize, Token, usize), ExprErrorS>;
37
38    fn next(&mut self) -> Option<Self::Item> {
39        if let Some(token) = self.pending.take() {
40            return Some(Ok(token));
41        }
42
43        let token = self.inner.next()?;
44
45        {
46            let Range { start, end } = self.inner.span();
47
48            Some(
49                token
50                    .map(|token| (start, token, end))
51                    .map_err(|(err, err_span)| (err.into(), err_span)),
52            )
53        }
54    }
55}
56
57#[derive(Logos, Debug, Clone, PartialEq)]
58#[logos(error = Spanned<LexicalError>)]
59#[logos(skip r"[ \t\n\f]+")]
60pub enum Token {
61    #[token("(")]
62    LParan,
63
64    #[token(")")]
65    RParan,
66
67    #[token(",")]
68    Comma,
69
70    #[token("<")]
71    LAngle,
72
73    #[token(">")]
74    RAngle,
75
76    #[token("->")]
77    Arrow,
78
79    #[token("Fn")]
80    Fn,
81
82    #[token("...")]
83    ThreeDot,
84
85    #[regex(r#"`[^`]*`"#, lex_string)]
86    String(String),
87
88    #[regex("[!?:@]?[a-z_][a-zA-Z0-9_]*", lex_identifier)]
89    Identifier(String),
90
91    #[regex("[A-Z][a-zA-Z0-9]*", lex_identifier)]
92    Type(String),
93
94    #[token("true")]
95    True,
96
97    #[token("false")]
98    False,
99}
100
101fn lex_identifier(lexer: &mut logos::Lexer<Token>) -> String {
102    let slice = lexer.slice();
103    slice.to_string()
104}
105
106fn lex_string(lexer: &mut logos::Lexer<Token>) -> String {
107    let slice = lexer.slice();
108    slice[1..slice.len() - 1].to_string()
109}
110
111impl Token {
112    pub fn identifier(identifier: &str) -> Self {
113        Token::Identifier(identifier.to_string())
114    }
115}