surql_parser/upstream/syn/lexer/
ident.rs1use super::unicode::is_identifier_continue;
2use crate::upstream::syn::error::{SyntaxError, bail, syntax_error};
3use crate::upstream::syn::lexer::keywords::KEYWORDS;
4use crate::upstream::syn::lexer::{BytesReader, Lexer};
5use crate::upstream::syn::token::{Span, Token, TokenKind};
6use unicase::UniCase;
7const BRACKET_CHARACTERS: [u8; 3] = const {
8 let mut b = [0; 3];
9 if '⟨'.encode_utf8(&mut b).len() != 3 {
10 panic!()
11 }
12 b
13};
14const BRACKET_START_CHARACTER: u8 = BRACKET_CHARACTERS[0];
15impl Lexer<'_> {
16 pub fn unescape_ident_span<'a>(
17 str: &'a str,
18 span: Span,
19 buffer: &'a mut Vec<u8>,
20 ) -> Result<&'a str, SyntaxError> {
21 let mut reader = BytesReader::new(str.as_bytes());
22 match reader.next() {
23 Some(b'`') => Self::unescape_backtick_span(reader, span, buffer),
24 Some(BRACKET_START_CHARACTER) => Self::unescape_bracket_span(reader, span, buffer),
25 _ => Ok(str),
26 }
27 }
28 fn unescape_backtick_span<'a>(
29 mut reader: BytesReader,
30 span: Span,
31 buffer: &'a mut Vec<u8>,
32 ) -> Result<&'a str, SyntaxError> {
33 buffer.clear();
34 loop {
35 let before = reader.offset();
36 let x = reader.next().expect("lexer validated input");
37 match x {
38 b'\\' => {
39 Self::lex_common_escape_sequence(&mut reader, span, before, buffer)?;
40 }
41 b'`' => break,
42 x => {
43 buffer.push(x);
44 }
45 }
46 }
47 Ok(unsafe { std::str::from_utf8_unchecked(buffer) })
48 }
49 fn unescape_bracket_span<'a>(
50 mut reader: BytesReader,
51 span: Span,
52 buffer: &'a mut Vec<u8>,
53 ) -> Result<&'a str, SyntaxError> {
54 buffer.clear();
55 assert_eq!(
56 reader
57 .complete_char(BRACKET_START_CHARACTER)
58 .expect("valid character"),
59 '⟨'
60 );
61 loop {
62 let before = reader.offset();
63 let x = reader.next().expect("lexer validated input");
64 match x {
65 b'\\' => {
66 Self::lex_common_escape_sequence(&mut reader, span, before, buffer)?;
67 }
68 x if !x.is_ascii() => {
69 let c = reader.complete_char(x).expect("valid character");
70 if c == '⟩' {
71 break;
72 } else {
73 let mut char_buffer = [0u8; 4];
74 buffer.extend_from_slice(c.encode_utf8(&mut char_buffer).as_bytes());
75 }
76 }
77 x => {
78 buffer.push(x);
79 }
80 }
81 }
82 Ok(unsafe { std::str::from_utf8_unchecked(buffer) })
83 }
84 pub(super) fn lex_param(&mut self) -> Token {
89 loop {
90 if let Some(x) = self.reader.peek()
91 && (x.is_ascii_alphanumeric() || x == b'_')
92 {
93 self.reader.next();
94 continue;
95 }
96 return self.finish_token(TokenKind::Parameter);
97 }
98 }
99 pub(super) fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
100 match self.lex_surrounded_ident_err(is_backtick) {
101 Ok(_) => self.finish_token(TokenKind::Parameter),
102 Err(e) => self.invalid_token(e),
103 }
104 }
105 pub(super) fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
112 debug_assert!(matches!(start, b'a'..= b'z' | b'A'..= b'Z' | b'_'));
113 self.lex_ident()
114 }
115 pub(super) fn lex_ident(&mut self) -> Token {
119 loop {
120 if let Some(x) = self.reader.peek()
121 && is_identifier_continue(x)
122 {
123 self.reader.next();
124 continue;
125 }
126 let str = self.span_str(self.current_span());
127 if let Some(x) = KEYWORDS.get(&UniCase::ascii(str)).copied() {
128 if x != TokenKind::Identifier {
129 return self.finish_token(x);
130 }
131 } else if str == "NaN" {
132 return self.finish_token(TokenKind::NaN);
133 } else if str == "Infinity" {
134 return self.finish_token(TokenKind::Infinity);
135 }
136 return self.finish_token(TokenKind::Identifier);
137 }
138 }
139 pub(super) fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
141 match self.lex_surrounded_ident_err(is_backtick) {
142 Ok(_) => self.finish_token(TokenKind::Identifier),
143 Err(e) => self.invalid_token(e),
144 }
145 }
146 pub(super) fn lex_surrounded_ident_err(
148 &mut self,
149 is_backtick: bool,
150 ) -> Result<(), SyntaxError> {
151 let start_span = self.current_span();
152 loop {
153 let Some(x) = self.reader.next() else {
154 let end_char = if is_backtick { '`' } else { '⟩' };
155 let error = syntax_error!(
156 "Unexpected end of file, expected identifier to end with `{end_char}`",
157 @ self.current_span()
158 );
159 return Err(error);
160 };
161 match x {
162 b'`' if is_backtick => {
163 return Ok(());
164 }
165 b'\\' => {
166 let Some(next) = self.reader.next() else {
167 bail!(
168 "Unexpected end of file, expected identifier to end.", @
169 start_span => "Identifier starting here."
170 );
171 };
172 if !next.is_ascii() {
173 self.reader.complete_char(next)?;
174 }
175 }
176 BRACKET_START_CHARACTER if !is_backtick => {
177 if self.reader.complete_char(BRACKET_START_CHARACTER)? == '⟩' {
178 return Ok(());
179 }
180 }
181 x => {
182 if !x.is_ascii() {
183 self.reader.complete_char(x)?;
184 }
185 }
186 }
187 }
188 }
189}