rxp/
scanner.rs

1use crate::graphviz::{DiGraph, RankDir, Style};
2use unicode_segmentation::Graphemes;
3use unicode_segmentation::UnicodeSegmentation;
4
5#[derive(Copy, Debug, Clone, PartialEq, Eq, Hash)]
6pub enum Token<'a> {
7    LeftParen,
8    RightParen,
9    Pipe,
10    Star,
11    Plus,
12    GraphemeCluster(&'a str),
13    BackSlash,
14}
15
16impl<'a> Token<'a> {
17    /// Given a token and a source buffer, return a slice corresponding to the token's lexeme.
18    /// Panics if the lexeme is not valid UTF-8; a good way of achieving that is passing a source
19    /// buffer that was not used to generate the token.
20    pub fn lexeme(&self) -> &'a str {
21        match self {
22            Self::LeftParen => "(",
23            Self::RightParen => ")",
24            Self::Pipe => "|",
25            Self::Star => "*",
26            Self::Plus => "+",
27            Self::GraphemeCluster(s) => s,
28            Self::BackSlash => "\\",
29        }
30    }
31
32    pub fn kind(&self) -> &'static str {
33        match self {
34            Self::LeftParen => "LeftParen",
35            Self::RightParen => "RightParen",
36            Self::Pipe => "Pipe",
37            Self::Star => "Star",
38            Self::Plus => "Plus",
39            Self::GraphemeCluster(_) => "GraphemeCluster",
40            Self::BackSlash => "BackSlash",
41        }
42    }
43}
44
45pub struct Scanner<'a> {
46    source: &'a str,
47}
48
49impl<'a> Scanner<'a> {
50    /// Creates a new scanner wrapping a string slice.
51    pub fn new(source: &'a str) -> Self {
52        Self { source }
53    }
54
55    /// Produces an iterator of tokens from the scanner.
56    pub fn tokens(&'a self) -> Tokens<'a> {
57        Tokens {
58            graphemes: self.source.graphemes(true),
59        }
60    }
61
62    pub fn source(&'a self) -> &'a str {
63        self.source
64    }
65}
66
67impl Scanner<'_> {
68    pub fn graphviz(&self, graph_name: &str) -> String {
69        let mut digraph = DiGraph::new(graph_name);
70        digraph.rankdir(RankDir::LeftRight);
71
72        for (i, token) in self.tokens().enumerate() {
73            if let Token::GraphemeCluster(_) = &token {
74                let lexeme = token.lexeme();
75                digraph.vertex(i, Style::new().label(lexeme));
76            } else {
77                let kind = &token.kind();
78                digraph.vertex(i, Style::new().label(kind));
79            }
80
81            if i != 0 {
82                digraph.edge(i - 1, i, None);
83            }
84        }
85
86        digraph.to_string()
87    }
88}
89
90pub struct Tokens<'a> {
91    graphemes: Graphemes<'a>,
92}
93
94impl<'a> Tokens<'a> {
95    fn next_token(&mut self) -> Option<Token<'a>> {
96        self.graphemes.next().map(|lexeme| match lexeme {
97            "(" => Token::LeftParen,
98            ")" => Token::RightParen,
99            "*" => Token::Star,
100            "+" => Token::Plus,
101            "|" => Token::Pipe,
102            "\\" => Token::BackSlash,
103            other => Token::GraphemeCluster(other),
104        })
105    }
106}
107
108impl<'a> std::iter::Iterator for Tokens<'a> {
109    type Item = Token<'a>;
110
111    fn next(&mut self) -> Option<Self::Item> {
112        self.next_token()
113    }
114}
115
116#[cfg(test)]
117mod test {
118    // use super::*;
119
120    // #[test]
121    // fn ascii_lexeme() {
122    //     let source = "a(abcde";
123    //     let token = Token {
124    //         kind: TokenKind::LeftParen,
125    //     };
126
127    //     assert_eq!(token.lexeme(source), "(");
128    // }
129
130    // #[test]
131    // fn unicode_lexeme() {
132    //     let token = Token {
133    //         kind: TokenKind::GraphemeCluster,
134    //         start: 1,
135    //         length: 4,
136    //     };
137    //     let source = "a💖cde";
138    //     assert_eq!(token.lexeme(source), "💖");
139    // }
140
141    // #[test]
142    // fn ascii_tokens() {
143    //     let source = "ab)c";
144    //     let tokens: Vec<_> = Scanner::new(source).tokens().collect();
145
146    //     assert_eq!(
147    //         tokens,
148    //         vec![
149    //             Token {
150    //                 kind: TokenKind::GraphemeCluster,
151    //                 start: 0,
152    //                 length: 1
153    //             },
154    //             Token {
155    //                 kind: TokenKind::GraphemeCluster,
156    //                 start: 1,
157    //                 length: 1
158    //             },
159    //             Token {
160    //                 kind: TokenKind::RightParen,
161    //                 start: 2,
162    //                 length: 1
163    //             },
164    //             Token {
165    //                 kind: TokenKind::GraphemeCluster,
166    //                 start: 3,
167    //                 length: 1
168    //             },
169    //         ]
170    //     )
171    // }
172
173    // #[test]
174    // fn unicode_tokens() {
175    //     let source = "ab💖*";
176    //     let tokens: Vec<_> = Scanner::new(source).tokens().collect();
177
178    //     assert_eq!(
179    //         tokens,
180    //         vec![
181    //             Token {
182    //                 kind: TokenKind::GraphemeCluster,
183    //                 start: 0,
184    //                 length: 1
185    //             },
186    //             Token {
187    //                 kind: TokenKind::GraphemeCluster,
188    //                 start: 1,
189    //                 length: 1
190    //             },
191    //             Token {
192    //                 kind: TokenKind::GraphemeCluster,
193    //                 start: 2,
194    //                 length: 4
195    //             },
196    //             Token {
197    //                 kind: TokenKind::Star,
198    //                 start: 6,
199    //                 length: 1
200    //             },
201    //         ]
202    //     )
203    // }
204
205    // #[test]
206    // fn peeking() {
207    //     let source = "a(*|";
208    //     let scanner = Scanner::new(source);
209    //     let mut tokens = scanner.tokens().peekable();
210
211    //     assert_eq!(
212    //         tokens.next(),
213    //         Some(Token {
214    //             start: 0,
215    //             length: 1,
216    //             kind: TokenKind::GraphemeCluster
217    //         })
218    //     );
219
220    //     assert_eq!(
221    //         tokens.peek(),
222    //         Some(&Token {
223    //             start: 1,
224    //             length: 1,
225    //             kind: TokenKind::LeftParen
226    //         })
227    //     );
228
229    //     assert_eq!(
230    //         tokens.peek(),
231    //         Some(&Token {
232    //             start: 1,
233    //             length: 1,
234    //             kind: TokenKind::LeftParen
235    //         })
236    //     );
237
238    //     assert_eq!(
239    //         tokens.next(),
240    //         Some(Token {
241    //             start: 1,
242    //             length: 1,
243    //             kind: TokenKind::LeftParen
244    //         })
245    //     );
246
247    //     assert_eq!(
248    //         tokens.next(),
249    //         Some(Token {
250    //             start: 2,
251    //             length: 1,
252    //             kind: TokenKind::Star
253    //         })
254    //     );
255    // }
256}