squawk_parser/
lexed_str.rs1use std::ops;
4
5use squawk_lexer::tokenize;
6
7use crate::SyntaxKind;
8
9pub struct LexedStr<'a> {
10 text: &'a str,
11 kind: Vec<SyntaxKind>,
12 start: Vec<u32>,
13 error: Vec<LexError>,
14}
15
16struct LexError {
17 msg: String,
18 token: u32,
19}
20
21impl<'a> LexedStr<'a> {
22 pub fn new(text: &'a str) -> LexedStr<'a> {
25 let mut conv = Converter::new(text);
26
27 for token in tokenize(&text[conv.offset..]) {
28 let token_text = &text[conv.offset..][..token.len as usize];
29
30 conv.extend_token(&token.kind, token_text);
31 }
32
33 conv.finalize_with_eof()
34 }
35
36 pub(crate) fn len(&self) -> usize {
59 self.kind.len() - 1
60 }
61
62 pub(crate) fn kind(&self, i: usize) -> SyntaxKind {
67 assert!(i < self.len());
68 self.kind[i]
69 }
70
71 pub(crate) fn text(&self, i: usize) -> &str {
72 self.range_text(i..i + 1)
73 }
74
75 pub(crate) fn range_text(&self, r: ops::Range<usize>) -> &str {
76 assert!(r.start < r.end && r.end <= self.len());
77 let lo = self.start[r.start] as usize;
78 let hi = self.start[r.end] as usize;
79 &self.text[lo..hi]
80 }
81
82 pub fn text_range(&self, i: usize) -> ops::Range<usize> {
84 assert!(i < self.len());
85 let lo = self.start[i] as usize;
86 let hi = self.start[i + 1] as usize;
87 lo..hi
88 }
89 pub fn text_start(&self, i: usize) -> usize {
90 assert!(i <= self.len());
91 self.start[i] as usize
92 }
93 pub fn errors(&self) -> impl Iterator<Item = (usize, &str)> + '_ {
109 self.error
110 .iter()
111 .map(|it| (it.token as usize, it.msg.as_str()))
112 }
113
114 fn push(&mut self, kind: SyntaxKind, offset: usize) {
115 self.kind.push(kind);
116 self.start.push(offset as u32);
117 }
118}
119
120struct Converter<'a> {
121 res: LexedStr<'a>,
122 offset: usize,
123}
124
125impl<'a> Converter<'a> {
126 fn new(text: &'a str) -> Self {
127 Self {
128 res: LexedStr {
129 text,
130 kind: Vec::new(),
131 start: Vec::new(),
132 error: Vec::new(),
133 },
134 offset: 0,
135 }
136 }
137
138 fn finalize_with_eof(mut self) -> LexedStr<'a> {
139 self.res.push(SyntaxKind::EOF, self.offset);
140 self.res
141 }
142
143 fn push(&mut self, kind: SyntaxKind, len: usize, err: Option<&str>) {
144 self.res.push(kind, self.offset);
145 self.offset += len;
146
147 if let Some(err) = err {
148 let token = self.res.len() as u32;
149 let msg = err.to_owned();
150 self.res.error.push(LexError { msg, token });
151 }
152 }
153
154 fn extend_token(&mut self, kind: &squawk_lexer::TokenKind, token_text: &str) {
155 let mut err = "";
160
161 let syntax_kind = {
162 match kind {
163 squawk_lexer::TokenKind::LineComment => SyntaxKind::COMMENT,
164 squawk_lexer::TokenKind::BlockComment { terminated } => {
165 if !terminated {
166 err = "Missing trailing `*/` symbols to terminate the block comment";
167 }
168 SyntaxKind::COMMENT
169 }
170
171 squawk_lexer::TokenKind::Whitespace => SyntaxKind::WHITESPACE,
172 squawk_lexer::TokenKind::Ident => {
173 SyntaxKind::from_keyword(token_text).unwrap_or(SyntaxKind::IDENT)
184 }
185 squawk_lexer::TokenKind::Literal { kind, .. } => {
186 self.extend_literal(token_text.len(), kind);
187 return;
188 }
189 squawk_lexer::TokenKind::Semi => SyntaxKind::SEMICOLON,
190 squawk_lexer::TokenKind::Comma => SyntaxKind::COMMA,
191 squawk_lexer::TokenKind::Dot => SyntaxKind::DOT,
192 squawk_lexer::TokenKind::OpenParen => SyntaxKind::L_PAREN,
193 squawk_lexer::TokenKind::CloseParen => SyntaxKind::R_PAREN,
194 squawk_lexer::TokenKind::OpenBracket => SyntaxKind::L_BRACK,
195 squawk_lexer::TokenKind::CloseBracket => SyntaxKind::R_BRACK,
196 squawk_lexer::TokenKind::OpenCurly => SyntaxKind::L_CURLY,
197 squawk_lexer::TokenKind::CloseCurly => SyntaxKind::R_CURLY,
198 squawk_lexer::TokenKind::At => SyntaxKind::AT,
199 squawk_lexer::TokenKind::Pound => SyntaxKind::POUND,
200 squawk_lexer::TokenKind::Tilde => SyntaxKind::TILDE,
201 squawk_lexer::TokenKind::Question => SyntaxKind::QUESTION,
202 squawk_lexer::TokenKind::Colon => SyntaxKind::COLON,
203 squawk_lexer::TokenKind::Eq => SyntaxKind::EQ,
204 squawk_lexer::TokenKind::Bang => SyntaxKind::BANG,
205 squawk_lexer::TokenKind::Lt => SyntaxKind::L_ANGLE,
206 squawk_lexer::TokenKind::Gt => SyntaxKind::R_ANGLE,
207 squawk_lexer::TokenKind::Minus => SyntaxKind::MINUS,
208 squawk_lexer::TokenKind::And => SyntaxKind::AMP,
209 squawk_lexer::TokenKind::Or => SyntaxKind::PIPE,
210 squawk_lexer::TokenKind::Plus => SyntaxKind::PLUS,
211 squawk_lexer::TokenKind::Star => SyntaxKind::STAR,
212 squawk_lexer::TokenKind::Slash => SyntaxKind::SLASH,
213 squawk_lexer::TokenKind::Caret => SyntaxKind::CARET,
214 squawk_lexer::TokenKind::Percent => SyntaxKind::PERCENT,
215 squawk_lexer::TokenKind::Unknown => SyntaxKind::ERROR,
216 squawk_lexer::TokenKind::UnknownPrefix => {
217 err = "unknown literal prefix";
218 SyntaxKind::IDENT
219 }
220 squawk_lexer::TokenKind::Eof => SyntaxKind::EOF,
221 squawk_lexer::TokenKind::Backtick => SyntaxKind::BACKTICK,
222 squawk_lexer::TokenKind::PositionalParam => SyntaxKind::POSITIONAL_PARAM,
223 squawk_lexer::TokenKind::QuotedIdent { terminated } => {
224 if !terminated {
225 err = "Missing trailing \" to terminate the quoted identifier"
226 }
227 SyntaxKind::IDENT
228 }
229 }
230 };
231
232 let err = if err.is_empty() { None } else { Some(err) };
233 self.push(syntax_kind, token_text.len(), err);
234 }
235
236 fn extend_literal(&mut self, len: usize, kind: &squawk_lexer::LiteralKind) {
237 let mut err = "";
238
239 let syntax_kind = match *kind {
240 squawk_lexer::LiteralKind::Int { empty_int, base: _ } => {
241 if empty_int {
242 err = "Missing digits after the integer base prefix";
243 }
244 SyntaxKind::INT_NUMBER
245 }
246 squawk_lexer::LiteralKind::Float {
247 empty_exponent,
248 base: _,
249 } => {
250 if empty_exponent {
251 err = "Missing digits after the exponent symbol";
252 }
253 SyntaxKind::FLOAT_NUMBER
254 }
255 squawk_lexer::LiteralKind::Str { terminated } => {
256 if !terminated {
257 err = "Missing trailing `'` symbol to terminate the string literal";
258 }
259 SyntaxKind::STRING
261 }
262 squawk_lexer::LiteralKind::ByteStr { terminated } => {
263 if !terminated {
264 err = "Missing trailing `'` symbol to terminate the hex bit string literal";
265 }
266 SyntaxKind::BYTE_STRING
268 }
269 squawk_lexer::LiteralKind::BitStr { terminated } => {
270 if !terminated {
271 err = "Missing trailing `\'` symbol to terminate the bit string literal";
272 }
273 SyntaxKind::BIT_STRING
275 }
276 squawk_lexer::LiteralKind::DollarQuotedString { terminated } => {
277 if !terminated {
278 err = "Unterminated dollar quoted string literal";
280 }
281 SyntaxKind::DOLLAR_QUOTED_STRING
283 }
284 squawk_lexer::LiteralKind::UnicodeEscStr { terminated } => {
285 if !terminated {
286 err = "Missing trailing `'` symbol to terminate the unicode escape string literal";
287 }
288 SyntaxKind::UNICODE_ESC_STRING
290 }
291 squawk_lexer::LiteralKind::EscStr { terminated } => {
292 if !terminated {
293 err = "Missing trailing `\'` symbol to terminate the escape string literal";
294 }
295 SyntaxKind::ESC_STRING
297 }
298 };
299
300 let err = if err.is_empty() { None } else { Some(err) };
301 self.push(syntax_kind, len, err);
302 }
303}