squawk_parser/
lexed_str.rs1use std::ops;
4
5use squawk_lexer::tokenize;
6
7use crate::SyntaxKind;
8
9pub struct LexedStr<'a> {
10 text: &'a str,
11 kind: Vec<SyntaxKind>,
12 start: Vec<u32>,
13 error: Vec<LexError>,
14}
15
16struct LexError {
17 msg: String,
18 token: u32,
19}
20
21impl<'a> LexedStr<'a> {
22 pub fn new(text: &'a str) -> LexedStr<'a> {
25 let mut conv = Converter::new(text);
26
27 for token in tokenize(&text[conv.offset..]) {
28 let token_text = &text[conv.offset..][..token.len as usize];
29
30 conv.extend_token(&token.kind, token_text);
31 }
32
33 conv.finalize_with_eof()
34 }
35
36 pub(crate) fn len(&self) -> usize {
59 self.kind.len() - 1
60 }
61
62 pub(crate) fn kind(&self, i: usize) -> SyntaxKind {
67 assert!(i < self.len());
68 self.kind[i]
69 }
70
71 pub(crate) fn text(&self, i: usize) -> &str {
72 self.range_text(i..i + 1)
73 }
74
75 pub(crate) fn range_text(&self, r: ops::Range<usize>) -> &str {
76 assert!(r.start < r.end && r.end <= self.len());
77 let lo = self.start[r.start] as usize;
78 let hi = self.start[r.end] as usize;
79 &self.text[lo..hi]
80 }
81
82 pub fn text_range(&self, i: usize) -> ops::Range<usize> {
84 assert!(i < self.len());
85 let lo = self.start[i] as usize;
86 let hi = self.start[i + 1] as usize;
87 lo..hi
88 }
89 pub fn text_start(&self, i: usize) -> usize {
90 assert!(i <= self.len());
91 self.start[i] as usize
92 }
93 pub fn errors(&self) -> impl Iterator<Item = (usize, &str)> + '_ {
109 self.error
110 .iter()
111 .map(|it| (it.token as usize, it.msg.as_str()))
112 }
113
114 fn push(&mut self, kind: SyntaxKind, offset: usize) {
115 self.kind.push(kind);
116 self.start.push(offset as u32);
117 }
118}
119
120struct Converter<'a> {
121 res: LexedStr<'a>,
122 offset: usize,
123}
124
125impl<'a> Converter<'a> {
126 fn new(text: &'a str) -> Self {
127 Self {
128 res: LexedStr {
129 text,
130 kind: Vec::new(),
131 start: Vec::new(),
132 error: Vec::new(),
133 },
134 offset: 0,
135 }
136 }
137
138 fn finalize_with_eof(mut self) -> LexedStr<'a> {
139 self.res.push(SyntaxKind::EOF, self.offset);
140 self.res
141 }
142
143 fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
144 self.res.push(kind, self.offset);
145 self.offset += len;
146
147 if let Some(err) = err {
148 let token = self.res.len() as u32;
149 let msg = err.to_owned();
150 self.res.error.push(LexError { msg, token });
151 }
152 }
153
154 fn extend_token(&mut self, kind: &squawk_lexer::TokenKind, token_text: &str) {
155 let mut err = "";
160
161 let syntax_kind = {
162 match kind {
163 squawk_lexer::TokenKind::LineComment => SyntaxKind::COMMENT,
164 squawk_lexer::TokenKind::BlockComment { terminated } => {
165 if !terminated {
166 err = "Missing trailing `*/` symbols to terminate the block comment";
167 }
168 SyntaxKind::COMMENT
169 }
170
171 squawk_lexer::TokenKind::Whitespace => SyntaxKind::WHITESPACE,
172 squawk_lexer::TokenKind::Ident => {
173 SyntaxKind::from_keyword(token_text).unwrap_or(SyntaxKind::IDENT)
184 }
185 squawk_lexer::TokenKind::Literal { kind, .. } => {
186 self.extend_literal(token_text.len(), kind);
187 return;
188 }
189 squawk_lexer::TokenKind::Semi => SyntaxKind::SEMICOLON,
190 squawk_lexer::TokenKind::Comma => SyntaxKind::COMMA,
191 squawk_lexer::TokenKind::Dot => SyntaxKind::DOT,
192 squawk_lexer::TokenKind::OpenParen => SyntaxKind::L_PAREN,
193 squawk_lexer::TokenKind::CloseParen => SyntaxKind::R_PAREN,
194 squawk_lexer::TokenKind::OpenBracket => SyntaxKind::L_BRACK,
195 squawk_lexer::TokenKind::CloseBracket => SyntaxKind::R_BRACK,
196 squawk_lexer::TokenKind::At => SyntaxKind::AT,
197 squawk_lexer::TokenKind::Pound => SyntaxKind::POUND,
198 squawk_lexer::TokenKind::Tilde => SyntaxKind::TILDE,
199 squawk_lexer::TokenKind::Question => SyntaxKind::QUESTION,
200 squawk_lexer::TokenKind::Colon => SyntaxKind::COLON,
201 squawk_lexer::TokenKind::Eq => SyntaxKind::EQ,
202 squawk_lexer::TokenKind::Bang => SyntaxKind::BANG,
203 squawk_lexer::TokenKind::Lt => SyntaxKind::L_ANGLE,
204 squawk_lexer::TokenKind::Gt => SyntaxKind::R_ANGLE,
205 squawk_lexer::TokenKind::Minus => SyntaxKind::MINUS,
206 squawk_lexer::TokenKind::And => SyntaxKind::AMP,
207 squawk_lexer::TokenKind::Or => SyntaxKind::PIPE,
208 squawk_lexer::TokenKind::Plus => SyntaxKind::PLUS,
209 squawk_lexer::TokenKind::Star => SyntaxKind::STAR,
210 squawk_lexer::TokenKind::Slash => SyntaxKind::SLASH,
211 squawk_lexer::TokenKind::Caret => SyntaxKind::CARET,
212 squawk_lexer::TokenKind::Percent => SyntaxKind::PERCENT,
213 squawk_lexer::TokenKind::Unknown => SyntaxKind::ERROR,
214 squawk_lexer::TokenKind::UnknownPrefix => {
215 err = "unknown literal prefix";
216 SyntaxKind::IDENT
217 }
218 squawk_lexer::TokenKind::Eof => SyntaxKind::EOF,
219 squawk_lexer::TokenKind::Backtick => SyntaxKind::BACKTICK,
220 squawk_lexer::TokenKind::PositionalParam => SyntaxKind::POSITIONAL_PARAM,
221 squawk_lexer::TokenKind::QuotedIdent { terminated } => {
222 if !terminated {
223 err = "Missing trailing \" to terminate the quoted identifier"
224 }
225 SyntaxKind::IDENT
226 }
227 }
228 };
229
230 let err = if err.is_empty() { None } else { Some(err) };
231 self.push(syntax_kind, token_text.len(), err);
232 }
233
234 fn extend_literal(&mut self, len: usize, kind: &squawk_lexer::LiteralKind) {
235 let mut err = "";
236
237 let syntax_kind = match *kind {
238 squawk_lexer::LiteralKind::Int { empty_int, base: _ } => {
239 if empty_int {
240 err = "Missing digits after the integer base prefix";
241 }
242 SyntaxKind::INT_NUMBER
243 }
244 squawk_lexer::LiteralKind::Float {
245 empty_exponent,
246 base: _,
247 } => {
248 if empty_exponent {
249 err = "Missing digits after the exponent symbol";
250 }
251 SyntaxKind::FLOAT_NUMBER
252 }
253 squawk_lexer::LiteralKind::Str { terminated } => {
254 if !terminated {
255 err = "Missing trailing `'` symbol to terminate the string literal";
256 }
257 SyntaxKind::STRING
259 }
260 squawk_lexer::LiteralKind::ByteStr { terminated } => {
261 if !terminated {
262 err = "Missing trailing `'` symbol to terminate the hex bit string literal";
263 }
264 SyntaxKind::BYTE_STRING
266 }
267 squawk_lexer::LiteralKind::BitStr { terminated } => {
268 if !terminated {
269 err = "Missing trailing `\'` symbol to terminate the bit string literal";
270 }
271 SyntaxKind::BIT_STRING
273 }
274 squawk_lexer::LiteralKind::DollarQuotedString { terminated } => {
275 if !terminated {
276 err = "Unterminated dollar quoted string literal";
278 }
279 SyntaxKind::DOLLAR_QUOTED_STRING
281 }
282 squawk_lexer::LiteralKind::UnicodeEscStr { terminated } => {
283 if !terminated {
284 err = "Missing trailing `'` symbol to terminate the unicode escape string literal";
285 }
286 SyntaxKind::BYTE_STRING
288 }
289 squawk_lexer::LiteralKind::EscStr { terminated } => {
290 if !terminated {
291 err = "Missing trailing `\'` symbol to terminate the escape string literal";
292 }
293 SyntaxKind::ESC_STRING
295 }
296 };
297
298 let err = if err.is_empty() { None } else { Some(err) };
299 self.push(syntax_kind, len, err);
300 }
301}