parse_it/
lexer.rs

1//! Lexing for the parser.
2
3use std::hash::Hash;
4
5use regex_automata::{Anchored, Input, PatternID};
6
7pub use regex_automata::meta::Regex;
8
9use crate::LexIt;
10
11/// A span in the source code.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub struct Span {
14    /// The start of the span, inclusive
15    pub start: usize,
16    /// The end of the span, exclusive
17    pub end: usize,
18}
19
20/// A trait for types that can be converted to another type.
21pub trait TryConvert<T> {
22    /// Try to convert the value to the target type.
23    fn try_convert(&self) -> Option<T>;
24}
25
26impl<T: Copy> TryConvert<T> for T {
27    fn try_convert(&self) -> Option<T> {
28        Some(*self)
29    }
30}
31
32/// Cursor position in the input.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
34pub struct Cursor {
35    cursor: usize,
36    start: usize,
37}
38
39/// TODO
40#[derive(Clone)]
41pub struct LexerState<'a> {
42    start: usize,
43    cursor: usize,
44    input: &'a str,
45}
46
47impl<'a> LexerState<'a> {
48    /// Create a new lexer state.
49    pub fn new(input: &'a str) -> Self {
50        Self {
51            start: 0,
52            cursor: 0,
53            input,
54        }
55    }
56
57    /// Run the lexer against the given regex.
58    pub fn run(&mut self, regex: &Regex) -> Option<PatternID> {
59        let input = Input::new(self.input)
60            .range(self.cursor..)
61            .anchored(Anchored::Yes);
62        let end = regex.search_half(&input)?;
63        self.start = self.cursor;
64        self.cursor = end.offset();
65        Some(end.pattern())
66    }
67
68    /// Get the lexeme of the current token.
69    pub fn lexeme(&self) -> &'a str {
70        &self.input[self.start..self.cursor]
71    }
72
73    /// Get the current cursor position.
74    pub fn cursor(&self) -> Cursor {
75        Cursor {
76            start: self.start,
77            cursor: self.cursor,
78        }
79    }
80
81    /// Get the span of the current token.
82    pub fn span(&self) -> Span {
83        Span {
84            start: self.start,
85            end: self.cursor,
86        }
87    }
88
89    /// Check if the lexer is at the end of the input.
90    pub fn is_empty(&self) -> bool {
91        self.cursor >= self.input.len()
92    }
93
94    /// Advance the lexer to the given cursor position.
95    pub fn advance_to_cursor(&mut self, cursor: Cursor) {
96        self.start = cursor.start;
97        self.cursor = cursor.cursor;
98    }
99}
100
101/// A lexer for a single character.
102#[derive(Clone)]
103pub struct CharLexer;
104
105impl LexIt for CharLexer {
106    type Token<'a> = char;
107
108    fn new() -> Self {
109        Self
110    }
111
112    fn next<'a>(&self, lexbuf: &mut LexerState<'a>) -> Option<Self::Token<'a>> {
113        thread_local! {
114            static REGEX: Regex = Regex::new(r".").unwrap();
115        }
116        REGEX.with(|regex| {
117            if lexbuf.run(regex).is_some() {
118                let lexeme = lexbuf.lexeme();
119                Some(lexeme.chars().next().unwrap())
120            } else {
121                None
122            }
123        })
124    }
125}