plotnik_compiler/parser/
lexer.rs1use logos::Logos;
11use rowan::TextRange;
12use std::ops::Range;
13
14use super::cst::SyntaxKind;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub struct Token {
19 pub kind: SyntaxKind,
20 pub span: TextRange,
21}
22
23impl Token {
24 #[inline]
25 pub fn new(kind: SyntaxKind, span: TextRange) -> Self {
26 Self { kind, span }
27 }
28}
29
30fn range_to_text_range(range: Range<usize>) -> TextRange {
31 TextRange::new((range.start as u32).into(), (range.end as u32).into())
32}
33
34pub fn lex(source: &str) -> Vec<Token> {
41 let mut tokens = Vec::new();
42 let mut lexer = SyntaxKind::lexer(source);
43 let mut error_start: Option<usize> = None;
44
45 loop {
46 match lexer.next() {
47 Some(Ok(kind)) => {
48 if let Some(start) = error_start.take() {
49 let end = lexer.span().start;
50 tokens.push(Token::new(
51 SyntaxKind::Garbage,
52 range_to_text_range(start..end),
53 ));
54 }
55
56 let span = lexer.span();
57 match kind {
58 SyntaxKind::StringLiteral => {
59 split_string_literal(source, span, &mut tokens);
60 }
61 SyntaxKind::RegexPredicateMatch => {
62 split_regex_predicate(source, span, SyntaxKind::OpRegexMatch, &mut tokens);
63 }
64 SyntaxKind::RegexPredicateNoMatch => {
65 split_regex_predicate(source, span, SyntaxKind::OpRegexNoMatch, &mut tokens);
66 }
67 _ => {
68 tokens.push(Token::new(kind, range_to_text_range(span)));
69 }
70 }
71 }
72 Some(Err(())) => {
73 if error_start.is_none() {
74 error_start = Some(lexer.span().start);
75 }
76 }
77 None => {
78 if let Some(start) = error_start.take() {
79 tokens.push(Token::new(
80 SyntaxKind::Garbage,
81 range_to_text_range(start..source.len()),
82 ));
83 }
84 break;
85 }
86 }
87 }
88
89 tokens
90}
91
92fn split_string_literal(source: &str, span: Range<usize>, tokens: &mut Vec<Token>) {
94 let text = &source[span.clone()];
95 let quote_char = text.chars().next().unwrap();
96 let quote_kind = if quote_char == '"' {
97 SyntaxKind::DoubleQuote
98 } else {
99 SyntaxKind::SingleQuote
100 };
101
102 let start = span.start;
103 let end = span.end;
104
105 tokens.push(Token::new(
106 quote_kind,
107 range_to_text_range(start..start + 1),
108 ));
109
110 if end - start > 2 {
111 tokens.push(Token::new(
112 SyntaxKind::StrVal,
113 range_to_text_range(start + 1..end - 1),
114 ));
115 }
116
117 tokens.push(Token::new(quote_kind, range_to_text_range(end - 1..end)));
118}
119
120fn split_regex_predicate(
125 source: &str,
126 span: Range<usize>,
127 op_kind: SyntaxKind,
128 tokens: &mut Vec<Token>,
129) {
130 let text = &source[span.clone()];
131 let start = span.start;
132
133 tokens.push(Token::new(op_kind, range_to_text_range(start..start + 2)));
135
136 let regex_start_in_text = text[2..].find('/').unwrap() + 2;
138
139 if regex_start_in_text > 2 {
141 tokens.push(Token::new(
142 SyntaxKind::Whitespace,
143 range_to_text_range(start + 2..start + regex_start_in_text),
144 ));
145 }
146
147 tokens.push(Token::new(
149 SyntaxKind::RegexLiteral,
150 range_to_text_range(start + regex_start_in_text..span.end),
151 ));
152}
153
154#[inline]
156pub fn token_text<'q>(source: &'q str, token: &Token) -> &'q str {
157 &source[std::ops::Range::<usize>::from(token.span)]
158}