mod comment;
mod number;
mod string;
mod symbol;
use nom::branch::alt;
use nom::bytes::complete::{is_a, is_not};
use nom::character::complete::{line_ending, space1};
use nom::combinator::map;
use nom::multi::many0;
use nom::IResult;
use yew::prelude::*;
use self::comment::*;
use self::number::*;
use self::string::*;
use self::symbol::*;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Token<'a> {
Comment(&'a str),
Whitespace(&'a str),
Break(&'a str),
Symbol(&'a str),
Literal(&'a str),
Operator(&'a str),
Unknown(&'a str),
Column(&'a str),
}
use Token::*;
impl<'a> ToHtml for Token<'a> {
fn to_html(&self) -> Html {
html! {
if matches!(self, Break(_)) { <br /> } else {
<span class={self.class_name()}>{ self.content() }</span>
}
}
}
}
impl<'a> Token<'a> {
const fn class_name(&self) -> &'static str {
match self {
Comment(_) => "comment",
Whitespace(_) => "whitespace",
Symbol(_) => "symbol",
Operator(_) => "operator",
Unknown(_) => "unknown",
Break(_) => "break",
Literal(_) => "literal",
Column(_) => "column",
}
}
pub const fn content(&self) -> &'a str {
match self {
Comment(x) => x,
Whitespace(x) => x,
Symbol(x) => x,
Operator(x) => x,
Unknown(x) => x,
Break(x) => x,
Literal(x) => x,
Column(x) => x,
}
}
}
#[allow(clippy::redundant_closure)]
fn parse_multiline_string<'a>(
sep: char,
lit: impl Fn(&'a str) -> Token<'a>,
) -> impl FnMut(&'a str) -> IResult<&'a str, Vec<Token<'a>>> {
map(parse_string_literal(sep), move |x| {
x.into_iter()
.map(|x| lit(x))
.intersperse(Token::Break("\n"))
.collect()
})
}
pub fn tokenize(input: &str) -> Vec<Token<'_>> {
let comment = map(parse_comment, |x| vec![Token::Comment(x)]);
let string = parse_multiline_string('\'', Token::Literal);
let column = parse_multiline_string('"', Token::Column);
let symbol = map(parse_symbol_literal, |x| vec![Token::Symbol(x)]);
let number = map(parse_number_literal, |x| vec![Token::Literal(x)]);
let whitespace = map(space1, |x| vec![Token::Whitespace(x)]);
let linebreak = map(line_ending, |x| vec![Token::Break(x)]);
let ops = map(is_a("+-/*^%&|=:;,.(){}[]<>\\"), |x| {
vec![Token::Operator(x)]
});
let unknown = map(is_not(" \t\n\r"), |x| vec![Token::Unknown(x)]);
let token = alt((
comment, string, column, symbol, number, whitespace, linebreak, ops, unknown,
));
let mut expr = map(many0(token), |x| x.into_iter().flatten().collect());
let (rest, mut tokens) = expr(input).unwrap_or_else(|_| (input, vec![]));
if !rest.is_empty() {
tracing::warn!(
"Parser terminated at position {}: {}",
input.len() - rest.len(),
input
);
tokens.push(Token::Unknown(rest))
}
tokens
}
#[cfg(test)]
mod tests {
use wasm_bindgen_test::*;
use super::*;
#[wasm_bindgen_test]
fn test_simple() {
let s = "123 abc 'hello' \"Sales\"";
assert_eq!(tokenize(s), vec![
Literal("123"),
Whitespace(" "),
Symbol("abc"),
Whitespace(" "),
Column("'hello'"),
Whitespace(" "),
Literal("\"Sales\"")
]);
}
#[wasm_bindgen_test]
fn test_complex_string() {
let s = "'this is 'a \"test of\" strings";
assert_eq!(tokenize(s), vec![
Column("'this is '"),
Symbol("a"),
Whitespace(" "),
Literal("\"test of\""),
Whitespace(" "),
Symbol("strings"),
]);
}
#[wasm_bindgen_test]
fn test_comment_newline() {
let s = "// Title\n1 + 2";
assert_eq!(tokenize(s), vec![
Comment("// Title"),
Break("\n"),
Literal("1"),
Whitespace(" "),
Operator("+"),
Whitespace(" "),
Literal("2"),
]);
}
#[wasm_bindgen_test]
fn test_escape_strings() {
let s = "'test\\/'";
assert_eq!(tokenize(s), vec![Literal("'test\\/'"),]);
}
}