plotnik_compiler/parser/
lexer.rs1use logos::Logos;
11use rowan::TextRange;
12use std::ops::Range;
13
14use super::cst::SyntaxKind;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub struct Token {
19 pub kind: SyntaxKind,
20 pub span: TextRange,
21}
22
23impl Token {
24 #[inline]
25 pub fn new(kind: SyntaxKind, span: TextRange) -> Self {
26 Self { kind, span }
27 }
28}
29
30fn range_to_text_range(range: Range<usize>) -> TextRange {
31 TextRange::new((range.start as u32).into(), (range.end as u32).into())
32}
33
34pub fn lex(source: &str) -> Vec<Token> {
41 let mut tokens = Vec::new();
42 let mut lexer = SyntaxKind::lexer(source);
43 let mut error_start: Option<usize> = None;
44
45 loop {
46 match lexer.next() {
47 Some(Ok(kind)) => {
48 if let Some(start) = error_start.take() {
49 let end = lexer.span().start;
50 tokens.push(Token::new(
51 SyntaxKind::Garbage,
52 range_to_text_range(start..end),
53 ));
54 }
55
56 let span = lexer.span();
57 match kind {
58 SyntaxKind::StringLiteral => {
59 split_string_literal(source, span, &mut tokens);
60 }
61 SyntaxKind::RegexPredicateMatch => {
62 split_regex_predicate(source, span, SyntaxKind::OpRegexMatch, &mut tokens);
63 }
64 SyntaxKind::RegexPredicateNoMatch => {
65 split_regex_predicate(
66 source,
67 span,
68 SyntaxKind::OpRegexNoMatch,
69 &mut tokens,
70 );
71 }
72 _ => {
73 tokens.push(Token::new(kind, range_to_text_range(span)));
74 }
75 }
76 }
77 Some(Err(())) => {
78 if error_start.is_none() {
79 error_start = Some(lexer.span().start);
80 }
81 }
82 None => {
83 if let Some(start) = error_start.take() {
84 tokens.push(Token::new(
85 SyntaxKind::Garbage,
86 range_to_text_range(start..source.len()),
87 ));
88 }
89 break;
90 }
91 }
92 }
93
94 tokens
95}
96
97fn split_string_literal(source: &str, span: Range<usize>, tokens: &mut Vec<Token>) {
99 let text = &source[span.clone()];
100 let quote_char = text.chars().next().unwrap();
101 let quote_kind = if quote_char == '"' {
102 SyntaxKind::DoubleQuote
103 } else {
104 SyntaxKind::SingleQuote
105 };
106
107 let start = span.start;
108 let end = span.end;
109
110 tokens.push(Token::new(
111 quote_kind,
112 range_to_text_range(start..start + 1),
113 ));
114
115 if end - start > 2 {
116 tokens.push(Token::new(
117 SyntaxKind::StrVal,
118 range_to_text_range(start + 1..end - 1),
119 ));
120 }
121
122 tokens.push(Token::new(quote_kind, range_to_text_range(end - 1..end)));
123}
124
125fn split_regex_predicate(
130 source: &str,
131 span: Range<usize>,
132 op_kind: SyntaxKind,
133 tokens: &mut Vec<Token>,
134) {
135 let text = &source[span.clone()];
136 let start = span.start;
137
138 tokens.push(Token::new(op_kind, range_to_text_range(start..start + 2)));
140
141 let regex_start_in_text = text[2..].find('/').unwrap() + 2;
143
144 if regex_start_in_text > 2 {
146 tokens.push(Token::new(
147 SyntaxKind::Whitespace,
148 range_to_text_range(start + 2..start + regex_start_in_text),
149 ));
150 }
151
152 tokens.push(Token::new(
154 SyntaxKind::RegexLiteral,
155 range_to_text_range(start + regex_start_in_text..span.end),
156 ));
157}
158
159#[inline]
161pub fn token_text<'q>(source: &'q str, token: &Token) -> &'q str {
162 &source[std::ops::Range::<usize>::from(token.span)]
163}