Skip to main content

perspective_viewer/exprtk/
tokenize.rs

1// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
2// ┃ ██████ ██████ ██████       █      █      █      █      █ █▄  ▀███ █       ┃
3// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█  ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄  ▀█ █ ▀▀▀▀▀ ┃
4// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄   █ ▄▄▄▄▄ ┃
5// ┃ █      ██████ █  ▀█▄       █ ██████      █      ███▌▐███ ███████▄ █       ┃
6// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
7// ┃ Copyright (c) 2017, the Perspective Authors.                              ┃
8// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
9// ┃ This file is part of the Perspective library, distributed under the terms ┃
10// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12
13mod comment;
14mod number;
15mod string;
16mod symbol;
17
18use nom::IResult;
19use nom::branch::alt;
20use nom::bytes::complete::{is_a, is_not};
21use nom::character::complete::{line_ending, space1};
22use nom::combinator::map;
23use nom::multi::many0;
24use yew::prelude::*;
25
26use self::comment::*;
27use self::number::*;
28use self::string::*;
29use self::symbol::*;
30
31/// Syntax-highlightable ExprTK tokens.
32///
33///  We had the option of implemnting this alternatively as `pub struct
34/// Token(TokenType, &'a str);`, but I felt this was less ergonomic for the
35/// frequent pattern matching necessary when handling enum tokens.
36#[derive(Clone, Copy, Debug, PartialEq, Eq)]
37pub enum Token<'a> {
38    /// `//``
39    Comment(&'a str),
40
41    /// ` `
42    Whitespace(&'a str),
43
44    /// `\n`
45    Break(&'a str),
46
47    /// `x`
48    Symbol(&'a str),
49
50    /// `12`
51    Literal(&'a str),
52
53    /// `+`
54    Operator(&'a str),
55
56    /// `#`
57    Unknown(&'a str),
58
59    /// `"Sales"`
60    Column(&'a str),
61}
62
63use Token::*;
64
65impl Token<'_> {
66    pub fn to_html(&self) -> Html {
67        html! {
68            if matches!(self, Break(_)) {
69                <br />
70            } else {
71                <span class={self.class_name()}>{ self.content() }</span>
72            }
73        }
74    }
75}
76
77impl<'a> Token<'a> {
78    const fn class_name(&self) -> &'static str {
79        match self {
80            Comment(_) => "comment",
81            Whitespace(_) => "whitespace",
82            Symbol(_) => "symbol",
83            Operator(_) => "operator",
84            Unknown(_) => "unknown",
85            Break(_) => "break",
86            Literal(_) => "literal",
87            Column(_) => "column",
88        }
89    }
90
91    /// Note the use of the lifetime `'a` - this function does not work
92    /// correctly when it's signature is specified `-> &'_ str` instead, as
93    /// `self` and the `str` may have different lifetimes.
94    pub const fn content(&self) -> &'a str {
95        match self {
96            Comment(x) => x,
97            Whitespace(x) => x,
98            Symbol(x) => x,
99            Operator(x) => x,
100            Unknown(x) => x,
101            Break(x) => x,
102            Literal(x) => x,
103            Column(x) => x,
104        }
105    }
106}
107
108#[allow(clippy::redundant_closure)]
109fn parse_multiline_string<'a>(
110    sep: char,
111    lit: impl Fn(&'a str) -> Token<'a>,
112) -> impl FnMut(&'a str) -> IResult<&'a str, Vec<Token<'a>>> {
113    map(parse_string_literal(sep), move |x| {
114        x.into_iter()
115            .map(|x| lit(x))
116            .intersperse(Token::Break("\n"))
117            .collect()
118    })
119}
120
121/// Parse a string representing an ExprTK Expression Column into `Token`s. A
122/// token list is sufficient for syntax-highlighting purposes, faster than a
123/// full parser and much easier to write a renderer for.
124pub fn tokenize(input: &str) -> Vec<Token<'_>> {
125    let comment = map(parse_comment, |x| vec![Token::Comment(x)]);
126    let string = parse_multiline_string('\'', Token::Literal);
127    let column = parse_multiline_string('"', Token::Column);
128    let symbol = map(parse_symbol_literal, |x| vec![Token::Symbol(x)]);
129    let number = map(parse_number_literal, |x| vec![Token::Literal(x)]);
130    let whitespace = map(space1, |x| vec![Token::Whitespace(x)]);
131    let linebreak = map(line_ending, |x| vec![Token::Break(x)]);
132    let ops = map(is_a("+-/*^%&|=:;,.(){}[]<>\\"), |x| {
133        vec![Token::Operator(x)]
134    });
135    let unknown = map(is_not(" \t\n\r"), |x| vec![Token::Unknown(x)]);
136    let token = alt((
137        comment, string, column, symbol, number, whitespace, linebreak, ops, unknown,
138    ));
139
140    let mut expr = map(many0(token), |x| x.into_iter().flatten().collect());
141    let (rest, mut tokens) = expr(input).unwrap_or_else(|_| (input, vec![]));
142    if !rest.is_empty() {
143        tracing::warn!(
144            "Parser terminated at position {}: {}",
145            input.len() - rest.len(),
146            input
147        );
148
149        tokens.push(Token::Unknown(rest))
150    }
151
152    tokens
153}
154
155#[cfg(test)]
156mod tests {
157    use wasm_bindgen_test::*;
158
159    use super::*;
160
161    #[wasm_bindgen_test]
162    fn test_simple() {
163        let s = "123 abc 'hello' \"Sales\"";
164        assert_eq!(tokenize(s), vec![
165            Literal("123"),
166            Whitespace(" "),
167            Symbol("abc"),
168            Whitespace(" "),
169            Column("'hello'"),
170            Whitespace(" "),
171            Literal("\"Sales\"")
172        ]);
173    }
174
175    #[wasm_bindgen_test]
176    fn test_complex_string() {
177        let s = "'this is 'a \"test of\" strings";
178        assert_eq!(tokenize(s), vec![
179            Column("'this is '"),
180            Symbol("a"),
181            Whitespace(" "),
182            Literal("\"test of\""),
183            Whitespace(" "),
184            Symbol("strings"),
185        ]);
186    }
187
188    #[wasm_bindgen_test]
189    fn test_comment_newline() {
190        let s = "// Title\n1 + 2";
191        assert_eq!(tokenize(s), vec![
192            Comment("// Title"),
193            Break("\n"),
194            Literal("1"),
195            Whitespace(" "),
196            Operator("+"),
197            Whitespace(" "),
198            Literal("2"),
199        ]);
200    }
201
202    #[wasm_bindgen_test]
203    fn test_escape_strings() {
204        let s = "'test\\/'";
205        assert_eq!(tokenize(s), vec![Literal("'test\\/'"),]);
206    }
207}