perspective_viewer/exprtk/
tokenize.rs

1// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
2// ┃ ██████ ██████ ██████       █      █      █      █      █ █▄  ▀███ █       ┃
3// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█  ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄  ▀█ █ ▀▀▀▀▀ ┃
4// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄   █ ▄▄▄▄▄ ┃
5// ┃ █      ██████ █  ▀█▄       █ ██████      █      ███▌▐███ ███████▄ █       ┃
6// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
7// ┃ Copyright (c) 2017, the Perspective Authors.                              ┃
8// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
9// ┃ This file is part of the Perspective library, distributed under the terms ┃
10// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12
13mod comment;
14mod number;
15mod string;
16mod symbol;
17
18use nom::IResult;
19use nom::branch::alt;
20use nom::bytes::complete::{is_a, is_not};
21use nom::character::complete::{line_ending, space1};
22use nom::combinator::map;
23use nom::multi::many0;
24use yew::prelude::*;
25
26use self::comment::*;
27use self::number::*;
28use self::string::*;
29use self::symbol::*;
30
31/// Syntax-highlightable ExprTK tokens.
32///
33///  We had the option of implemnting this alternatively as `pub struct
34/// Token(TokenType, &'a str);`, but I felt this was less ergonomic for the
35/// frequent pattern matching necessary when handling enum tokens.
36#[derive(Clone, Copy, Debug, PartialEq, Eq)]
37pub enum Token<'a> {
38    Comment(&'a str),
39    Whitespace(&'a str),
40    Break(&'a str),
41    Symbol(&'a str),
42    Literal(&'a str),
43    Operator(&'a str),
44    Unknown(&'a str),
45    Column(&'a str),
46}
47
48use Token::*;
49
50impl ToHtml for Token<'_> {
51    fn to_html(&self) -> Html {
52        html! {
53            if matches!(self, Break(_)) { <br /> } else {
54                <span class={self.class_name()}>{ self.content() }</span>
55            }
56        }
57    }
58}
59
60impl<'a> Token<'a> {
61    const fn class_name(&self) -> &'static str {
62        match self {
63            Comment(_) => "comment",
64            Whitespace(_) => "whitespace",
65            Symbol(_) => "symbol",
66            Operator(_) => "operator",
67            Unknown(_) => "unknown",
68            Break(_) => "break",
69            Literal(_) => "literal",
70            Column(_) => "column",
71        }
72    }
73
74    /// Note the use of the lifetime `'a` - this function does not work
75    /// correctly when it's signature is specified `-> &'_ str` instead, as
76    /// `self` and the `str` may have different lifetimes.
77    pub const fn content(&self) -> &'a str {
78        match self {
79            Comment(x) => x,
80            Whitespace(x) => x,
81            Symbol(x) => x,
82            Operator(x) => x,
83            Unknown(x) => x,
84            Break(x) => x,
85            Literal(x) => x,
86            Column(x) => x,
87        }
88    }
89}
90
91#[allow(clippy::redundant_closure)]
92fn parse_multiline_string<'a>(
93    sep: char,
94    lit: impl Fn(&'a str) -> Token<'a>,
95) -> impl FnMut(&'a str) -> IResult<&'a str, Vec<Token<'a>>> {
96    map(parse_string_literal(sep), move |x| {
97        x.into_iter()
98            .map(|x| lit(x))
99            .intersperse(Token::Break("\n"))
100            .collect()
101    })
102}
103
104/// Parse a string representing an ExprTK Expression Column into `Token`s. A
105/// token list is sufficient for syntax-highlighting purposes, faster than a
106/// full parser and much easier to write a renderer for.
107pub fn tokenize(input: &str) -> Vec<Token<'_>> {
108    let comment = map(parse_comment, |x| vec![Token::Comment(x)]);
109    let string = parse_multiline_string('\'', Token::Literal);
110    let column = parse_multiline_string('"', Token::Column);
111    let symbol = map(parse_symbol_literal, |x| vec![Token::Symbol(x)]);
112    let number = map(parse_number_literal, |x| vec![Token::Literal(x)]);
113    let whitespace = map(space1, |x| vec![Token::Whitespace(x)]);
114    let linebreak = map(line_ending, |x| vec![Token::Break(x)]);
115    let ops = map(is_a("+-/*^%&|=:;,.(){}[]<>\\"), |x| {
116        vec![Token::Operator(x)]
117    });
118    let unknown = map(is_not(" \t\n\r"), |x| vec![Token::Unknown(x)]);
119    let token = alt((
120        comment, string, column, symbol, number, whitespace, linebreak, ops, unknown,
121    ));
122
123    let mut expr = map(many0(token), |x| x.into_iter().flatten().collect());
124    let (rest, mut tokens) = expr(input).unwrap_or_else(|_| (input, vec![]));
125    if !rest.is_empty() {
126        tracing::warn!(
127            "Parser terminated at position {}: {}",
128            input.len() - rest.len(),
129            input
130        );
131
132        tokens.push(Token::Unknown(rest))
133    }
134
135    tokens
136}
137
138#[cfg(test)]
139mod tests {
140    use wasm_bindgen_test::*;
141
142    use super::*;
143
144    #[wasm_bindgen_test]
145    fn test_simple() {
146        let s = "123 abc 'hello' \"Sales\"";
147        assert_eq!(tokenize(s), vec![
148            Literal("123"),
149            Whitespace(" "),
150            Symbol("abc"),
151            Whitespace(" "),
152            Column("'hello'"),
153            Whitespace(" "),
154            Literal("\"Sales\"")
155        ]);
156    }
157
158    #[wasm_bindgen_test]
159    fn test_complex_string() {
160        let s = "'this is 'a \"test of\" strings";
161        assert_eq!(tokenize(s), vec![
162            Column("'this is '"),
163            Symbol("a"),
164            Whitespace(" "),
165            Literal("\"test of\""),
166            Whitespace(" "),
167            Symbol("strings"),
168        ]);
169    }
170
171    #[wasm_bindgen_test]
172    fn test_comment_newline() {
173        let s = "// Title\n1 + 2";
174        assert_eq!(tokenize(s), vec![
175            Comment("// Title"),
176            Break("\n"),
177            Literal("1"),
178            Whitespace(" "),
179            Operator("+"),
180            Whitespace(" "),
181            Literal("2"),
182        ]);
183    }
184
185    #[wasm_bindgen_test]
186    fn test_escape_strings() {
187        let s = "'test\\/'";
188        assert_eq!(tokenize(s), vec![Literal("'test\\/'"),]);
189    }
190}