1use std::hash::Hash;
4
5use regex_automata::{Anchored, Input, PatternID};
6
7pub use regex_automata::meta::Regex;
8
9use crate::LexIt;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub struct Span {
14 pub start: usize,
16 pub end: usize,
18}
19
20pub trait TryConvert<T> {
22 fn try_convert(&self) -> Option<T>;
24}
25
26impl<T: Copy> TryConvert<T> for T {
27 fn try_convert(&self) -> Option<T> {
28 Some(*self)
29 }
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
34pub struct Cursor {
35 cursor: usize,
36 start: usize,
37}
38
39#[derive(Clone)]
41pub struct LexerState<'a> {
42 start: usize,
43 cursor: usize,
44 input: &'a str,
45}
46
47impl<'a> LexerState<'a> {
48 pub fn new(input: &'a str) -> Self {
50 Self {
51 start: 0,
52 cursor: 0,
53 input,
54 }
55 }
56
57 pub fn run(&mut self, regex: &Regex) -> Option<PatternID> {
59 let input = Input::new(self.input)
60 .range(self.cursor..)
61 .anchored(Anchored::Yes);
62 let end = regex.search_half(&input)?;
63 self.start = self.cursor;
64 self.cursor = end.offset();
65 Some(end.pattern())
66 }
67
68 pub fn lexeme(&self) -> &'a str {
70 &self.input[self.start..self.cursor]
71 }
72
73 pub fn cursor(&self) -> Cursor {
75 Cursor {
76 start: self.start,
77 cursor: self.cursor,
78 }
79 }
80
81 pub fn span(&self) -> Span {
83 Span {
84 start: self.start,
85 end: self.cursor,
86 }
87 }
88
89 pub fn is_empty(&self) -> bool {
91 self.cursor >= self.input.len()
92 }
93
94 pub fn advance_to_cursor(&mut self, cursor: Cursor) {
96 self.start = cursor.start;
97 self.cursor = cursor.cursor;
98 }
99}
100
101#[derive(Clone)]
103pub struct CharLexer;
104
105impl LexIt for CharLexer {
106 type Token<'a> = char;
107
108 fn new() -> Self {
109 Self
110 }
111
112 fn next<'a>(&self, lexbuf: &mut LexerState<'a>) -> Option<Self::Token<'a>> {
113 thread_local! {
114 static REGEX: Regex = Regex::new(r".").unwrap();
115 }
116 REGEX.with(|regex| {
117 if lexbuf.run(regex).is_some() {
118 let lexeme = lexbuf.lexeme();
119 Some(lexeme.chars().next().unwrap())
120 } else {
121 None
122 }
123 })
124 }
125}