Skip to main content

surql_parser/upstream/syn/token/
mod.rs

1//! Module specifying the token representation of the parser.
2use std::fmt;
3use std::hash::Hash;
4use std::ops::Range;
5mod keyword;
6pub(crate) use keyword::Keyword;
7pub(crate) use keyword::keyword_t;
8mod mac;
9use crate::upstream::sql::Algorithm;
10use crate::upstream::sql::language::Language;
11pub(crate) use mac::t;
12/// A location in the source passed to the lexer.
13#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
14pub struct Span {
15	/// Offset in bytes.
16	pub offset: u32,
17	/// The amount of bytes this location encompasses.
18	pub len: u32,
19}
20impl Span {
21	/// Create a new empty span.
22	pub const fn empty() -> Self {
23		Span { offset: 0, len: 0 }
24	}
25	pub fn is_empty(&self) -> bool {
26		self.len == 0
27	}
28	pub fn from_range(r: Range<u32>) -> Self {
29		let len = r.end - r.start;
30		Span {
31			offset: r.start,
32			len,
33		}
34	}
35	pub fn to_range(&self) -> Range<u32> {
36		let start = self.offset;
37		let end = start + self.len;
38		start..end
39	}
40	/// Create a span that covers the range of both spans as well as possible
41	/// space inbetween.
42	pub fn covers(self, other: Span) -> Span {
43		let start = self.offset.min(other.offset);
44		let end = (self.offset + self.len).max(other.offset + other.len);
45		let len = end - start;
46		Span { offset: start, len }
47	}
48	pub fn as_within(mut self, other: Span) -> Span {
49		self.offset += other.offset;
50		self
51	}
52	pub fn after(self) -> Span {
53		Span {
54			offset: self.offset + self.len,
55			len: 0,
56		}
57	}
58	pub fn after_offset(self) -> u32 {
59		self.offset + self.len
60	}
61	/// Returns if the given span is the next span after this one.
62	pub fn is_followed_by(&self, other: &Self) -> bool {
63		let end = self.offset as usize + self.len as usize;
64		other.offset as usize == end
65	}
66	/// Returns if this span immediately follows the given.
67	pub fn follows_from(&self, other: &Self) -> bool {
68		let end = other.offset as usize + other.len as usize;
69		self.offset as usize == end
70	}
71}
72#[repr(u8)]
73#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
74pub enum Operator {
75	/// `!`
76	Not,
77	/// `+`
78	Add,
79	/// `-`
80	Subtract,
81	/// `÷`
82	Divide,
83	/// `×` or `∙`
84	Mult,
85	/// `%`
86	Modulo,
87	/// `||`
88	Or,
89	/// `&&`
90	And,
91	/// `<=`
92	LessEqual,
93	/// `>=`
94	GreaterEqual,
95	/// `*`
96	Star,
97	/// `**`
98	Power,
99	/// `=`
100	Equal,
101	/// `==`
102	Exact,
103	/// `!=`
104	NotEqual,
105	/// `*=`
106	AllEqual,
107	/// `?=`
108	AnyEqual,
109	/// `~`
110	Like,
111	/// `!~`
112	NotLike,
113	/// `*~`
114	AllLike,
115	/// `?~`
116	AnyLike,
117	/// `∋`
118	Contains,
119	/// `∌`
120	NotContains,
121	/// `⊇`
122	ContainsAll,
123	/// `⊃`
124	ContainsAny,
125	/// `⊅`
126	ContainsNone,
127	/// `∈`
128	Inside,
129	/// `∉`
130	NotInside,
131	/// `⊆`
132	AllInside,
133	/// `⊂`
134	AnyInside,
135	/// `⊄`
136	NoneInside,
137	/// `@123@`
138	Matches,
139	/// `+=`
140	Inc,
141	/// `-=`
142	Dec,
143	/// `+?=`
144	Ext,
145	/// `?:`
146	Tco,
147	/// `??`
148	Nco,
149	/// `<|`
150	KnnOpen,
151}
152impl Operator {
153	fn as_str(&self) -> &'static str {
154		match self {
155			Operator::Not => "!",
156			Operator::Add => "+",
157			Operator::Subtract => "-",
158			Operator::Divide => "÷",
159			Operator::Or => "||",
160			Operator::And => "&&",
161			Operator::Mult => "×",
162			Operator::Modulo => "%",
163			Operator::LessEqual => "<=",
164			Operator::GreaterEqual => ">=",
165			Operator::Star => "*",
166			Operator::Power => "**",
167			Operator::Equal => "=",
168			Operator::Exact => "==",
169			Operator::NotEqual => "!=",
170			Operator::AllEqual => "*=",
171			Operator::AnyEqual => "?=",
172			Operator::Like => "~",
173			Operator::NotLike => "!~",
174			Operator::AllLike => "*~",
175			Operator::AnyLike => "?~",
176			Operator::Contains => "∋",
177			Operator::NotContains => "∌",
178			Operator::ContainsAll => "⊇",
179			Operator::ContainsAny => "⊃",
180			Operator::ContainsNone => "⊅",
181			Operator::Inside => "∈",
182			Operator::NotInside => "∉",
183			Operator::AllInside => "⊆",
184			Operator::AnyInside => "⊂",
185			Operator::NoneInside => "⊄",
186			Operator::Matches => "@@",
187			Operator::Inc => "+=",
188			Operator::Dec => "-=",
189			Operator::Ext => "+?=",
190			Operator::Tco => "?:",
191			Operator::Nco => "??",
192			Operator::KnnOpen => "<|",
193		}
194	}
195}
196/// A delimiting token, denoting the start or end of a certain production.
197#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
198pub enum Delim {
199	/// `()`
200	Paren,
201	/// `[]`
202	Bracket,
203	/// `{}`
204	Brace,
205}
206#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
207pub enum DistanceKind {
208	Chebyshev,
209	Cosine,
210	Euclidean,
211	Hamming,
212	Jaccard,
213	Manhattan,
214	Minkowski,
215	Pearson,
216}
217impl DistanceKind {
218	pub fn as_str(&self) -> &'static str {
219		match self {
220			DistanceKind::Chebyshev => "CHEBYSHEV",
221			DistanceKind::Cosine => "COSINE",
222			DistanceKind::Euclidean => "EUCLIDEAN",
223			DistanceKind::Hamming => "HAMMING",
224			DistanceKind::Jaccard => "JACCARD",
225			DistanceKind::Manhattan => "MANHATTAN",
226			DistanceKind::Minkowski => "MINKOWSKI",
227			DistanceKind::Pearson => "PEARSON",
228		}
229	}
230}
231#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
232pub enum VectorTypeKind {
233	F64,
234	F32,
235	I64,
236	I32,
237	I16,
238}
239impl VectorTypeKind {
240	pub fn as_str(&self) -> &'static str {
241		match self {
242			Self::F64 => "F64",
243			Self::F32 => "F32",
244			Self::I64 => "I64",
245			Self::I32 => "I32",
246			Self::I16 => "I16",
247		}
248	}
249}
250impl Algorithm {
251	pub fn as_str(&self) -> &'static str {
252		match self {
253			Self::EdDSA => "EDDSA",
254			Self::Es256 => "ES256",
255			Self::Es384 => "ES384",
256			Self::Es512 => "ES512",
257			Self::Hs256 => "HS256",
258			Self::Hs384 => "HS384",
259			Self::Hs512 => "HS512",
260			Self::Ps256 => "PS256",
261			Self::Ps384 => "PS384",
262			Self::Ps512 => "PS512",
263			Self::Rs256 => "RS256",
264			Self::Rs384 => "RS384",
265			Self::Rs512 => "RS512",
266		}
267	}
268}
269#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
270pub enum StringKind {
271	/// `'`
272	Plain,
273	/// `"`
274	PlainDouble,
275	/// `r'`
276	RecordId,
277	/// `r"`
278	RecordIdDouble,
279	/// `u'`
280	Uuid,
281	/// `u"`
282	UuidDouble,
283	/// `d'`
284	DateTime,
285	/// `d"`
286	DateTimeDouble,
287	/// `b'`
288	Bytes,
289	/// `b"`
290	BytesDouble,
291	/// `f'`
292	File,
293	/// `f"`
294	FileDouble,
295}
296impl StringKind {
297	pub fn as_str(&self) -> &'static str {
298		match self {
299			StringKind::Plain | StringKind::PlainDouble => "a strand",
300			StringKind::RecordId | StringKind::RecordIdDouble => "a record-id strand",
301			StringKind::Uuid | StringKind::UuidDouble => "a uuid",
302			StringKind::DateTime | StringKind::DateTimeDouble => "a datetime",
303			StringKind::Bytes | StringKind::BytesDouble => "a bytestring",
304			StringKind::File | StringKind::FileDouble => "a file",
305		}
306	}
307}
308/// The type of token
309#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
310pub enum TokenKind {
311	Keyword(Keyword),
312	Algorithm(Algorithm),
313	Language(Language),
314	Distance(DistanceKind),
315	VectorType(VectorTypeKind),
316	Operator(Operator),
317	OpenDelim(Delim),
318	CloseDelim(Delim),
319	/// a token denoting the opening of a string, i.e. `r"`
320	String(StringKind),
321	/// A parameter like `$name`.
322	Parameter,
323	Identifier,
324	/// `<`
325	LeftChefron,
326	/// `>`
327	RightChefron,
328	/// `*`
329	Star,
330	/// `?`
331	Question,
332	/// `$`
333	Dollar,
334	/// `->`
335	ArrowRight,
336	/// '/'
337	ForwardSlash,
338	/// `.`
339	Dot,
340	/// `..`
341	DotDot,
342	/// `...` or `…`
343	DotDotDot,
344	/// `;`
345	SemiColon,
346	/// `::`
347	PathSeperator,
348	/// `:`
349	Colon,
350	/// `,`
351	Comma,
352	/// `|`
353	Vert,
354	/// `@`
355	At,
356	/// A token which indicates the end of the file.
357	Eof,
358	/// A token consiting of one or more ascii digits.
359	Digits,
360	/// The Not-A-Number number token.
361	NaN,
362	/// The infinity number token.
363	Infinity,
364	/// A token which could not be properly lexed.
365	Invalid,
366}
367impl fmt::Display for TokenKind {
368	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
369		f.write_str(self.as_str())
370	}
371}
372/// An assertion statically checking that the size of Tokenkind remains two
373/// bytes
374const _TOKEN_KIND_SIZE_ASSERT: [(); 2] = [(); std::mem::size_of::<TokenKind>()];
375impl TokenKind {
376	pub fn has_data(&self) -> bool {
377		matches!(self, TokenKind::Identifier)
378	}
379	fn algorithm_as_str(alg: Algorithm) -> &'static str {
380		match alg {
381			Algorithm::EdDSA => "EDDSA",
382			Algorithm::Es256 => "ES256",
383			Algorithm::Es384 => "ES384",
384			Algorithm::Es512 => "ES512",
385			Algorithm::Hs256 => "HS256",
386			Algorithm::Hs384 => "HS384",
387			Algorithm::Hs512 => "HS512",
388			Algorithm::Ps256 => "PS256",
389			Algorithm::Ps384 => "PS384",
390			Algorithm::Ps512 => "PS512",
391			Algorithm::Rs256 => "RS256",
392			Algorithm::Rs384 => "RS384",
393			Algorithm::Rs512 => "RS512",
394		}
395	}
396	pub fn as_str(&self) -> &'static str {
397		match *self {
398			TokenKind::Keyword(x) => x.as_str(),
399			TokenKind::Operator(x) => x.as_str(),
400			TokenKind::Algorithm(x) => Self::algorithm_as_str(x),
401			TokenKind::Language(x) => x.as_str(),
402			TokenKind::Distance(x) => x.as_str(),
403			TokenKind::VectorType(x) => x.as_str(),
404			TokenKind::OpenDelim(Delim::Paren) => "(",
405			TokenKind::OpenDelim(Delim::Brace) => "{",
406			TokenKind::OpenDelim(Delim::Bracket) => "[",
407			TokenKind::CloseDelim(Delim::Paren) => ")",
408			TokenKind::CloseDelim(Delim::Brace) => "}",
409			TokenKind::CloseDelim(Delim::Bracket) => "]",
410			TokenKind::Parameter => "a parameter",
411			TokenKind::Identifier => "an identifier",
412			TokenKind::LeftChefron => "<",
413			TokenKind::RightChefron => ">",
414			TokenKind::Star => "*",
415			TokenKind::Dollar => "$",
416			TokenKind::Question => "?",
417			TokenKind::ArrowRight => "->",
418			TokenKind::ForwardSlash => "/",
419			TokenKind::Dot => ".",
420			TokenKind::DotDot => "..",
421			TokenKind::DotDotDot => "...",
422			TokenKind::SemiColon => ";",
423			TokenKind::PathSeperator => "::",
424			TokenKind::Colon => ":",
425			TokenKind::Comma => ",",
426			TokenKind::Vert => "|",
427			TokenKind::At => "@",
428			TokenKind::Invalid => "Invalid",
429			TokenKind::Eof => "Eof",
430			TokenKind::String(x) => x.as_str(),
431			TokenKind::Digits => "a number",
432			TokenKind::NaN => "NaN",
433			TokenKind::Infinity => "Infinity",
434		}
435	}
436}
437#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
438pub struct Token {
439	pub kind: TokenKind,
440	pub span: Span,
441}
442impl Token {
443	pub const fn invalid() -> Token {
444		Token {
445			kind: TokenKind::Invalid,
446			span: Span::empty(),
447		}
448	}
449	/// Returns if the token is `end of file`.
450	pub fn is_eof(&self) -> bool {
451		matches!(self.kind, TokenKind::Eof)
452	}
453}
454/// A compound token which lexes a javascript function body.
455pub struct JavaScript;