use logos::Logos;
use rowan::TextRange;
use std::ops::Range;
use super::cst::SyntaxKind;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Token {
pub kind: SyntaxKind,
pub span: TextRange,
}
impl Token {
#[inline]
pub fn new(kind: SyntaxKind, span: TextRange) -> Self {
Self { kind, span }
}
}
fn range_to_text_range(range: Range<usize>) -> TextRange {
TextRange::new((range.start as u32).into(), (range.end as u32).into())
}
pub fn lex(source: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut lexer = SyntaxKind::lexer(source);
let mut error_start: Option<usize> = None;
loop {
match lexer.next() {
Some(Ok(kind)) => {
if let Some(start) = error_start.take() {
let end = lexer.span().start;
tokens.push(Token::new(
SyntaxKind::Garbage,
range_to_text_range(start..end),
));
}
let span = lexer.span();
match kind {
SyntaxKind::StringLiteral => {
split_string_literal(source, span, &mut tokens);
}
SyntaxKind::RegexPredicateMatch => {
split_regex_predicate(source, span, SyntaxKind::OpRegexMatch, &mut tokens);
}
SyntaxKind::RegexPredicateNoMatch => {
split_regex_predicate(
source,
span,
SyntaxKind::OpRegexNoMatch,
&mut tokens,
);
}
_ => {
tokens.push(Token::new(kind, range_to_text_range(span)));
}
}
}
Some(Err(())) => {
if error_start.is_none() {
error_start = Some(lexer.span().start);
}
}
None => {
if let Some(start) = error_start.take() {
tokens.push(Token::new(
SyntaxKind::Garbage,
range_to_text_range(start..source.len()),
));
}
break;
}
}
}
tokens
}
fn split_string_literal(source: &str, span: Range<usize>, tokens: &mut Vec<Token>) {
let text = &source[span.clone()];
let quote_char = text.chars().next().unwrap();
let quote_kind = if quote_char == '"' {
SyntaxKind::DoubleQuote
} else {
SyntaxKind::SingleQuote
};
let start = span.start;
let end = span.end;
tokens.push(Token::new(
quote_kind,
range_to_text_range(start..start + 1),
));
if end - start > 2 {
tokens.push(Token::new(
SyntaxKind::StrVal,
range_to_text_range(start + 1..end - 1),
));
}
tokens.push(Token::new(quote_kind, range_to_text_range(end - 1..end)));
}
fn split_regex_predicate(
source: &str,
span: Range<usize>,
op_kind: SyntaxKind,
tokens: &mut Vec<Token>,
) {
let text = &source[span.clone()];
let start = span.start;
tokens.push(Token::new(op_kind, range_to_text_range(start..start + 2)));
let regex_start_in_text = text[2..].find('/').unwrap() + 2;
if regex_start_in_text > 2 {
tokens.push(Token::new(
SyntaxKind::Whitespace,
range_to_text_range(start + 2..start + regex_start_in_text),
));
}
tokens.push(Token::new(
SyntaxKind::RegexLiteral,
range_to_text_range(start + regex_start_in_text..span.end),
));
}
#[inline]
pub fn token_text<'q>(source: &'q str, token: &Token) -> &'q str {
&source[std::ops::Range::<usize>::from(token.span)]
}