graphviz_parser/
lex.rs

1use logos::Logos;
2
3/// A Token represents all terminals supported by the graphviz dot format
4///
5/// For more info on the tokens, see
6/// the graphviz language spec here: https://graphviz.org/doc/info/lang.html
7#[derive(Logos, Debug, PartialEq, Clone)]
8pub(crate) enum Token {
9    #[regex("(\"[^\"]*\"|[a-zA-Z0-9_]+)")]
10    ID,
11
12    #[token("strict")]
13    Strict,
14
15    #[token("subgraph")]
16    Subgraph,
17    #[token("graph")]
18    Graph,
19    #[token("digraph")]
20    Digraph,
21    #[token("node")]
22    Node,
23    #[token("edge")]
24    Edge,
25    #[token("->")]
26    DirectedEdge,
27    #[token("--")]
28    UndirectedEdge,
29    #[token("{")]
30    OpenParen,
31    #[token("}")]
32    CloseParen,
33
34    #[token("[")]
35    OpenBracket,
36
37    #[token("]")]
38    CloseBracket,
39
40    #[token("=")]
41    Equals,
42
43    #[token(",")]
44    Comma,
45
46    #[token(";")]
47    SemiColon,
48
49    #[token("\n")]
50    NewLine,
51
52    #[token(":")]
53    Colon,
54
55    #[token("\"")]
56    Quotation,
57
58    #[error]
59    #[regex(r"[ \t\f]", logos::skip)]
60    Error,
61}
62use logos::Span;
63
64/// The Peekable Trait extends the underlying
65/// token iterator to support basic lookahead
66/// it also provides
67pub trait Peekable<'a> {
68    type Item;
69    fn peek(&mut self) -> Option<&Self::Item>;
70    fn span(&self) -> Span;
71    fn slice(&self) -> &'a str;
72}
73
74/// A lexer wrapper that supports the method
75/// .peek() in addition to the standard set
76/// of lexing operations
77#[derive(Clone)]
78pub(crate) struct PeekableLexer<'a> {
79    inner_lexer: logos::Lexer<'a, Token>,
80    peeked_token: Option<Token>,
81    curr_span: Span,
82    curr_slice: &'a str,
83}
84
85impl<'a> std::fmt::Debug for PeekableLexer<'a> {
86    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87        let mut v = self.clone();
88        while v.next().is_some() {
89            write!(f, "{} ", v.slice())?;
90        }
91        write!(f, "\n")
92    }
93}
94
95impl<'a> std::iter::Iterator for PeekableLexer<'a> {
96    type Item = Token;
97
98    /// This will consume the next token if we don't have an existing token that
99    /// has earlier been peeked, otherwise it will return the peeked token
100    fn next(&mut self) -> Option<Token> {
101        if let Some(inner_tok) = self.peeked_token.take() {
102            self.update_splice();
103            Some(inner_tok)
104        } else {
105            let token = self.inner_lexer.next();
106            self.update_splice();
107            token
108        }
109    }
110}
111
112impl<'a> Peekable<'a> for PeekableLexer<'a> {
113    type Item = Token;
114
115    fn peek(&mut self) -> Option<&Token> {
116        if self.peeked_token.is_none() {
117            self.update_splice();
118            self.peeked_token = self.inner_lexer.next();
119        }
120        self.peeked_token.as_ref()
121    }
122
123    fn span(&self) -> Span {
124        if self.peeked_token.is_none() {
125            self.inner_lexer.span()
126        } else {
127            self.curr_span.clone()
128        }
129    }
130
131    fn slice(&self) -> &'a str {
132        self.curr_slice.as_ref()
133    }
134}
135
136impl<'a> PeekableLexer<'a> {
137    /// Creates a new lexer from a raw string
138    pub fn from(ref_str: &'a str) -> Self {
139        let inner_lexer = logos::Lexer::new(ref_str);
140        Self::from_lexer(inner_lexer)
141    }
142
143    /// Constructs a new instance of the PeekableLexer
144    /// from an existing underlying lexer
145    fn from_lexer(inner_lexer: logos::Lexer<'a, Token>) -> Self {
146        let curr_span = inner_lexer.span().clone();
147        let curr_slice = inner_lexer.slice();
148        Self {
149            inner_lexer,
150            peeked_token: None,
151            curr_span,
152            curr_slice,
153        }
154    }
155
156    /// A utility method used to clear out lines that only used to delimit constructions
157    pub(crate) fn clear_filler(&mut self) {
158        while self.peek() == Some(&Token::NewLine) || self.peek() == Some(&Token::SemiColon) {
159            self.next();
160        }
161    }
162
163    fn update_splice(&mut self) {
164        self.curr_span = self.inner_lexer.span();
165        self.curr_slice = self.inner_lexer.slice();
166    }
167}
168
169#[cfg(test)]
170mod tests {
171
172    use super::*;
173
174    #[test]
175    fn lexer_test_lex_basic_dotfile() {
176        let test_str = "strict graph { 
177                        a -- b
178                        b -- a [color=blue]
179                        }
180        ";
181        let mut lexer_sut = PeekableLexer::from(test_str);
182        assert_eq!(lexer_sut.next(), Some(Token::Strict));
183        assert_eq!(lexer_sut.next(), Some(Token::Graph));
184        assert_eq!(lexer_sut.next(), Some(Token::OpenParen));
185        assert_eq!(lexer_sut.next(), Some(Token::NewLine));
186        assert_eq!(lexer_sut.next(), Some(Token::ID));
187        assert_eq!(lexer_sut.next(), Some(Token::UndirectedEdge));
188        assert_eq!(lexer_sut.next(), Some(Token::ID));
189        assert_eq!(lexer_sut.next(), Some(Token::NewLine));
190        assert_eq!(lexer_sut.next(), Some(Token::ID));
191        assert_eq!(lexer_sut.next(), Some(Token::UndirectedEdge));
192        assert_eq!(lexer_sut.next(), Some(Token::ID));
193
194        assert_eq!(lexer_sut.next(), Some(Token::OpenBracket));
195        assert_eq!(lexer_sut.next(), Some(Token::ID));
196        assert_eq!(lexer_sut.next(), Some(Token::Equals));
197        assert_eq!(lexer_sut.next(), Some(Token::ID));
198        assert_eq!(lexer_sut.next(), Some(Token::CloseBracket));
199        assert_eq!(lexer_sut.next(), Some(Token::NewLine));
200    }
201
202    #[test]
203    fn token_test_for_id_regex() {
204        let test_str = "\"___ooogabooga:asdf\"";
205        let mut lxt = PeekableLexer::from(test_str);
206        assert_eq!(Some(Token::ID), lxt.next());
207        assert_eq!(String::from(test_str), lxt.slice());
208    }
209
210    #[test]
211    fn lexer_peek_index_1_test() {
212        let solution = vec!["big", "kahuna", "electric", "boogaloo"];
213        let test_text: String = solution
214            .iter()
215            .map(|x| String::from(*x) + " ")
216            .collect::<Vec<String>>()
217            .iter()
218            .map(|x| x.chars())
219            .flatten()
220            .collect();
221
222        let mut lexer_to_test = PeekableLexer::from(&test_text);
223        for _val in solution {
224            let v1 = lexer_to_test.peek().unwrap().clone();
225            let v2 = lexer_to_test.next().clone().unwrap();
226            assert_eq!(v1, v2);
227        }
228    }
229
230    #[test]
231    fn lexer_no_semicolon_test() {
232        let test_string = "
233            hi
234            there 
235        ";
236        let mut lexer = PeekableLexer::from(test_string);
237        println!("{}", test_string);
238        assert_eq!(lexer.next(), Some(Token::NewLine));
239        assert_eq!(lexer.next(), Some(Token::ID));
240        assert_eq!(lexer.next(), Some(Token::NewLine));
241
242        assert_eq!(lexer.next(), Some(Token::ID));
243        assert_eq!(lexer.next(), Some(Token::NewLine));
244    }
245
246    #[test]
247    fn lexer_slice_indexing_1_test() {
248        let solution = vec!["big ", "kahuna ", "electric ", "boogaloo "];
249        let test_text: String = solution.iter().map(|x| x.chars()).flatten().collect();
250        let mut lexer_to_test = PeekableLexer::from(&test_text);
251
252        for sol in solution {
253            let _v = lexer_to_test.next();
254            let _j = lexer_to_test.peek();
255            assert_eq!(lexer_to_test.slice(), sol.trim());
256        }
257    }
258
259    #[test]
260    fn lexer_slice_indexing_2_test() {
261        let solution = vec!["big ", "kahuna ", "electric ", "boogaloo "];
262        let test_text: String = solution.iter().map(|x| x.chars()).flatten().collect();
263        let mut lexer_to_test = PeekableLexer::from(&test_text);
264
265        for sol in solution {
266            let _v = lexer_to_test.next();
267            assert_eq!(lexer_to_test.slice(), sol.trim());
268        }
269    }
270}