mod comment;
mod number;
mod string;
mod symbol;
use nom::branch::alt;
use nom::bytes::complete::{is_a, is_not};
use nom::character::complete::{line_ending, space1};
use nom::combinator::map;
use nom::multi::many0;
use yew::prelude::*;
use self::comment::*;
use self::number::*;
use self::string::*;
use self::symbol::*;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Token<'a> {
Comment(&'a str),
Whitespace(&'a str),
Break(&'a str),
Symbol(&'a str),
Literal(&'a str),
Operator(&'a str),
Unknown(&'a str),
Column(&'a str),
}
use Token::*;
#[allow(clippy::use_self)]
impl<'a> From<Token<'a>> for Html {
fn from(x: Token<'a>) -> Self {
html! {
if matches!(x, Break(_)) {
<br/>
} else {
<span class={ x.class_name() }>
{ x.content() }
</span>
}
}
}
}
impl<'a> Token<'a> {
const fn class_name(&self) -> &'static str {
match self {
Comment(_) => "comment",
Whitespace(_) => "whitespace",
Symbol(_) => "symbol",
Operator(_) => "operator",
Unknown(_) => "unknown",
Break(_) => "break",
Literal(_) => "literal",
Column(_) => "column",
}
}
pub const fn content(&self) -> &'a str {
match self {
Comment(x) => x,
Whitespace(x) => x,
Symbol(x) => x,
Operator(x) => x,
Unknown(x) => x,
Break(x) => x,
Literal(x) => x,
Column(x) => x,
}
}
}
pub fn tokenize(input: &str) -> Vec<Token<'_>> {
let comment = map(parse_comment, Token::Comment);
let string = map(parse_string_literal('"'), Token::Literal);
let column = map(parse_string_literal('\''), Token::Column);
let symbol = map(parse_symbol_literal, Token::Symbol);
let number = map(parse_number_literal, Token::Literal);
let whitespace = map(space1, Token::Whitespace);
let linebreak = map(line_ending, Token::Break);
let ops = map(is_a("+-/*^%&|=:;,.(){}[]"), Token::Operator);
let unknown = map(is_not(" \t\n\r"), Token::Unknown);
let mut expr = many0(alt((
comment, string, column, symbol, number, whitespace, linebreak, ops, unknown,
)));
let (rest, mut tokens) = expr(input).unwrap_or_else(|_| (input, vec![]));
if !rest.is_empty() {
tracing::warn!(
"Parser terminated at position {}: {}",
input.len() - rest.len(),
input
);
tokens.push(Token::Unknown(rest))
}
tokens
}
#[cfg(test)]
mod tests {
use wasm_bindgen_test::*;
use super::*;
#[wasm_bindgen_test]
fn test_simple() {
let s = "123 abc 'hello' \"Sales\"";
assert_eq!(tokenize(s), vec![
Literal("123"),
Whitespace(" "),
Symbol("abc"),
Whitespace(" "),
Column("'hello'"),
Whitespace(" "),
Literal("\"Sales\"")
]);
}
#[wasm_bindgen_test]
fn test_complex_string() {
let s = "'this is 'a \"test of\" strings";
assert_eq!(tokenize(s), vec![
Column("'this is '"),
Symbol("a"),
Whitespace(" "),
Literal("\"test of\""),
Whitespace(" "),
Symbol("strings"),
]);
}
#[wasm_bindgen_test]
fn test_comment_newline() {
let s = "// Title\n1 + 2";
assert_eq!(tokenize(s), vec![
Comment("// Title"),
Break("\n"),
Literal("1"),
Whitespace(" "),
Operator("+"),
Whitespace(" "),
Literal("2"),
]);
}
}