rxp/scanner.rs
1use crate::graphviz::{DiGraph, RankDir, Style};
2use unicode_segmentation::Graphemes;
3use unicode_segmentation::UnicodeSegmentation;
4
5#[derive(Copy, Debug, Clone, PartialEq, Eq, Hash)]
6pub enum Token<'a> {
7 LeftParen,
8 RightParen,
9 Pipe,
10 Star,
11 Plus,
12 GraphemeCluster(&'a str),
13 BackSlash,
14}
15
16impl<'a> Token<'a> {
17 /// Given a token and a source buffer, return a slice corresponding to the token's lexeme.
18 /// Panics if the lexeme is not valid UTF-8; a good way of achieving that is passing a source
19 /// buffer that was not used to generate the token.
20 pub fn lexeme(&self) -> &'a str {
21 match self {
22 Self::LeftParen => "(",
23 Self::RightParen => ")",
24 Self::Pipe => "|",
25 Self::Star => "*",
26 Self::Plus => "+",
27 Self::GraphemeCluster(s) => s,
28 Self::BackSlash => "\\",
29 }
30 }
31
32 pub fn kind(&self) -> &'static str {
33 match self {
34 Self::LeftParen => "LeftParen",
35 Self::RightParen => "RightParen",
36 Self::Pipe => "Pipe",
37 Self::Star => "Star",
38 Self::Plus => "Plus",
39 Self::GraphemeCluster(_) => "GraphemeCluster",
40 Self::BackSlash => "BackSlash",
41 }
42 }
43}
44
45pub struct Scanner<'a> {
46 source: &'a str,
47}
48
49impl<'a> Scanner<'a> {
50 /// Creates a new scanner wrapping a string slice.
51 pub fn new(source: &'a str) -> Self {
52 Self { source }
53 }
54
55 /// Produces an iterator of tokens from the scanner.
56 pub fn tokens(&'a self) -> Tokens<'a> {
57 Tokens {
58 graphemes: self.source.graphemes(true),
59 }
60 }
61
62 pub fn source(&'a self) -> &'a str {
63 self.source
64 }
65}
66
67impl Scanner<'_> {
68 pub fn graphviz(&self, graph_name: &str) -> String {
69 let mut digraph = DiGraph::new(graph_name);
70 digraph.rankdir(RankDir::LeftRight);
71
72 for (i, token) in self.tokens().enumerate() {
73 if let Token::GraphemeCluster(_) = &token {
74 let lexeme = token.lexeme();
75 digraph.vertex(i, Style::new().label(lexeme));
76 } else {
77 let kind = &token.kind();
78 digraph.vertex(i, Style::new().label(kind));
79 }
80
81 if i != 0 {
82 digraph.edge(i - 1, i, None);
83 }
84 }
85
86 digraph.to_string()
87 }
88}
89
90pub struct Tokens<'a> {
91 graphemes: Graphemes<'a>,
92}
93
94impl<'a> Tokens<'a> {
95 fn next_token(&mut self) -> Option<Token<'a>> {
96 self.graphemes.next().map(|lexeme| match lexeme {
97 "(" => Token::LeftParen,
98 ")" => Token::RightParen,
99 "*" => Token::Star,
100 "+" => Token::Plus,
101 "|" => Token::Pipe,
102 "\\" => Token::BackSlash,
103 other => Token::GraphemeCluster(other),
104 })
105 }
106}
107
108impl<'a> std::iter::Iterator for Tokens<'a> {
109 type Item = Token<'a>;
110
111 fn next(&mut self) -> Option<Self::Item> {
112 self.next_token()
113 }
114}
115
116#[cfg(test)]
117mod test {
118 // use super::*;
119
120 // #[test]
121 // fn ascii_lexeme() {
122 // let source = "a(abcde";
123 // let token = Token {
124 // kind: TokenKind::LeftParen,
125 // };
126
127 // assert_eq!(token.lexeme(source), "(");
128 // }
129
130 // #[test]
131 // fn unicode_lexeme() {
132 // let token = Token {
133 // kind: TokenKind::GraphemeCluster,
134 // start: 1,
135 // length: 4,
136 // };
137 // let source = "a💖cde";
138 // assert_eq!(token.lexeme(source), "💖");
139 // }
140
141 // #[test]
142 // fn ascii_tokens() {
143 // let source = "ab)c";
144 // let tokens: Vec<_> = Scanner::new(source).tokens().collect();
145
146 // assert_eq!(
147 // tokens,
148 // vec![
149 // Token {
150 // kind: TokenKind::GraphemeCluster,
151 // start: 0,
152 // length: 1
153 // },
154 // Token {
155 // kind: TokenKind::GraphemeCluster,
156 // start: 1,
157 // length: 1
158 // },
159 // Token {
160 // kind: TokenKind::RightParen,
161 // start: 2,
162 // length: 1
163 // },
164 // Token {
165 // kind: TokenKind::GraphemeCluster,
166 // start: 3,
167 // length: 1
168 // },
169 // ]
170 // )
171 // }
172
173 // #[test]
174 // fn unicode_tokens() {
175 // let source = "ab💖*";
176 // let tokens: Vec<_> = Scanner::new(source).tokens().collect();
177
178 // assert_eq!(
179 // tokens,
180 // vec![
181 // Token {
182 // kind: TokenKind::GraphemeCluster,
183 // start: 0,
184 // length: 1
185 // },
186 // Token {
187 // kind: TokenKind::GraphemeCluster,
188 // start: 1,
189 // length: 1
190 // },
191 // Token {
192 // kind: TokenKind::GraphemeCluster,
193 // start: 2,
194 // length: 4
195 // },
196 // Token {
197 // kind: TokenKind::Star,
198 // start: 6,
199 // length: 1
200 // },
201 // ]
202 // )
203 // }
204
205 // #[test]
206 // fn peeking() {
207 // let source = "a(*|";
208 // let scanner = Scanner::new(source);
209 // let mut tokens = scanner.tokens().peekable();
210
211 // assert_eq!(
212 // tokens.next(),
213 // Some(Token {
214 // start: 0,
215 // length: 1,
216 // kind: TokenKind::GraphemeCluster
217 // })
218 // );
219
220 // assert_eq!(
221 // tokens.peek(),
222 // Some(&Token {
223 // start: 1,
224 // length: 1,
225 // kind: TokenKind::LeftParen
226 // })
227 // );
228
229 // assert_eq!(
230 // tokens.peek(),
231 // Some(&Token {
232 // start: 1,
233 // length: 1,
234 // kind: TokenKind::LeftParen
235 // })
236 // );
237
238 // assert_eq!(
239 // tokens.next(),
240 // Some(Token {
241 // start: 1,
242 // length: 1,
243 // kind: TokenKind::LeftParen
244 // })
245 // );
246
247 // assert_eq!(
248 // tokens.next(),
249 // Some(Token {
250 // start: 2,
251 // length: 1,
252 // kind: TokenKind::Star
253 // })
254 // );
255 // }
256}