glast/
lexer.rs

1//! Types and functionality related to the lexer.
2//!
3//! This module contains the structs and enums used to represent tokens, and the [`parse()`] function which returns
4//! a result of [`TokenStream`] and [`Metadata`]. In line with the specification, the lexer correctly detects the
5//! GLSL version and switches grammar accordingly. There is also an alternative [`parse_with_version()`] function
6//! which allows assuming the GLSL version rather than detecting it on-the-fly. The [`preprocessor`] submodule
7//! contains types used to represent tokens within preprocessor directives.
8//!
9//! The way spans are counted can differ depending on your needs. By default, the spans count offsets between
10//! individual [`char`]s, but there are alternate functions that assume different encodings:
11//! - [`parse_with_utf_16_offsets()`],
12//! - [`parse_with_utf_16_offsets_and_version()`],
13//! - [`parse_with_utf_8_offsets()`],
14//! - [`parse_with_utf_8_offsets_and_version()`].
15//!
16//! # Lexer
17//! This lexer uses the "Maximal munch" principle to greedily create tokens. This means the longest possible valid
18//! token is always produced. Some examples:
19//!
20//! ```text
21//! i---7      becomes (i) (--) (-) (7)
22//! i----7     becomes (i) (--) (--) (7)
23//! i-----7    becomes (i) (--) (--) (-) (7)
24//! i-- - --7  becomes (i) (--) (-) (--) (7)
25//! ```
26//! The longest possible tokens are produced even if they form an invalid expression. For example, `i----7`
27//! could've been a valid GLSL expression if it was parsed as `(i) (--) (-) (-) (7)`, but this behaviour is not
28//! exhibited as that would require knowing the context and the lexer is not context-aware.
29//!
30//! For a BNF notation of the official lexer grammar, see
31//! [this](https://github.com/KubaP/glsl-lsp/blob/release/glast/docs/lexer_grammar.bnf) file.
32//!
33//! # Differences in behaviour
34//! Since this crate is part of a larger effort to provide an LSP implementation, it is designed to handle errors
35//! in a UX friendly manner. This means that there are some minor differences between the behaviour of this lexer
36//! and of a lexer as specified by the GLSL specification. The differences are listed below:
37//!
38//! - When the lexer comes across a character which is not part of the allowed character set it emits the
39//!   [`Invalid`](Token::Invalid) token. The specification has no such token; it just mentions that a character
40//!   outside of the allowed character set must produce a compile-time error.
41//! - When the lexer comes across a block comment which does not have a delimiter (and therefore goes to the
42//!   end-of-file) it still produces a [`BlockComment`](Token::BlockComment) token with the `contains_eof` field
43//!   set to `true`. The specification does not mention what should technically happen in such a case, but
44//!   compilers seem to produce a compile-time error.
45//! - The lexer treats any number that matches the following pattern `0[0-9]+` as an octal number. The
46//!   specification says that an octal number can only contain digits `0-7`. This change was done to produce better
47//!   errors; the entire span `009` would be highlighted as an invalid octal number token, rather than an error
48//!   about two consecutive number tokens (`00` and `9`) which would be more confusing.
49//! - The lexer treats any identifier immediately after a number (without separating whitespace) as a suffix. The
50//!   specification only defines the `u|U` suffix as valid for integers, and the `f|F` & `lf|LF` suffix as valid
51//!   for floating point numbers. Anything afterwards should be treated as a new token, so this would be valid:
52//!   `#define TEST +5 \n uint i = 5uTEST`. Currently, this crate doesn't work according to this behaviour, hence
53//!   for now the lexer will treat the suffix as `uTEST` instead.
54//!
55//! See the [`preprocessor`] submodule for an overview of the lexer's behaviour for each individual preprocessor
56//! directive.
57//!
58//! To be certain that the source is valid, these cases (apart from the macro issue) must be checked afterwards by
59//! iterating over the [`TokenStream`]. The parsing functions provided in this crate do this for you, but if you
60//! are performing your own manipulation you must perform these checks yourself.
61//!
62//! A potential idea for consideration would be to include the alternate behaviour behind a flag (i.e. stop parsing
63//! after encountering an error). This is currently not a priority, but if you would like such functionality please
64//! file an issue on the github repository to show interest. An alternative would be to set a flag in the
65//! `Metadata` which signifies whether any errors were encountered.
66
67pub mod preprocessor;
68
69use crate::{GlslVersion, Span, SpanEncoding, Spanned};
70
71/// A vector of tokens representing a GLSL source string.
72pub type TokenStream = Vec<Spanned<Token>>;
73
74/// Parses a GLSL source string into a token stream.
75///
76/// This function detects the GLSL version and switches grammar in line with the specification. If this behaviour
77/// is undesirable, see the [`parse_with_version()`] function. Note that currently most GLSL versions are
78/// unsupported.
79///
80/// This function creates spans based on `utf-32` code units. See the documentation for [`Span`] for more details.
81/// **This does not mean that the source string is `utf-32` encoded, it is still a normal `utf-8` rust string.**
82///
83/// # Examples
84/// Parse a simple GLSL expression:
85/// ```rust
86/// # use glast::lexer::parse;
87/// let src = r#"
88/// ##version 450
89/// int i = 5.0 + 1;
90/// "#;
91/// let (token_stream, metadata) = parse(&src).unwrap();
92/// ```
93pub fn parse(source: &str) -> Result<(TokenStream, Metadata), ParseErr> {
94	let mut lexer: Lexer<Utf32> = Lexer::new(source, SpanEncoding::Utf32);
95	let tokens = parse_tokens(&mut lexer, false, false);
96	match lexer.metadata.version {
97		GlslVersion::Unsupported => {
98			Err(ParseErr::UnsupportedVersion(lexer.metadata.version))
99		}
100		_ => Ok((tokens, lexer.metadata)),
101	}
102}
103
104/// Parses a GLSL source string into a token stream, assuming a specific GLSL version.
105///
106/// Unlike the [`parse()`] function which returns an error if an unsupported GLSL version was detected, this
107/// function assumes the specified version and will always return something, (though if the version is incorrectly
108/// specified the resulting token stream can be semantically incorrect).
109///
110/// This function creates spans based on `utf-32` code units. See the documentation for [`Span`] for more details.
111///  **This does not mean that the source string is `utf-32` encoded, it is still a normal `utf-8` rust string.**
112///
113/// # Examples
114/// Parse a simple GLSL expression:
115/// ```rust
116/// # use glast::lexer::parse_with_version;
117/// let src = r#"
118/// int i = 5.0 + 1;
119/// "#;
120/// let (token_stream, metadata) = parse_with_version(&src, glast::GlslVersion::_450);
121/// ```
122pub fn parse_with_version(
123	source: &str,
124	version: GlslVersion,
125) -> (TokenStream, Metadata) {
126	let mut lexer: Lexer<Utf32> = Lexer::new(source, SpanEncoding::Utf32);
127	lexer.metadata.version = version;
128	let tokens = parse_tokens(&mut lexer, false, true);
129	(tokens, lexer.metadata)
130}
131
132/// Parses a GLSL source string into a token stream.
133///
134/// This function behaves exactly like [`parse()`], **however**, spans of tokens are based on `utf-16` code units.
135/// See the documentation for [`Span`] for more details. **This does not mean that the source string
136/// is `utf-16` encoded, it is still a normal `utf-8` rust string.**
137pub fn parse_with_utf_16_offsets(
138	source: &str,
139) -> Result<(TokenStream, Metadata), ParseErr> {
140	let mut lexer: Lexer<Utf16> = Lexer::new(source, SpanEncoding::Utf16);
141	let tokens = parse_tokens(&mut lexer, false, false);
142	match lexer.metadata.version {
143		GlslVersion::Unsupported => {
144			Err(ParseErr::UnsupportedVersion(lexer.metadata.version))
145		}
146		_ => Ok((tokens, lexer.metadata)),
147	}
148}
149
150/// Parses a GLSL source string into a token stream, assuming a specified GLSL version.
151///
152/// This function behaves exactly like [`parse_with_version()`], **however**, spans of tokens are based on `utf-16`
153/// code units. See the documentation for [`Span`] for a side-by-side comparison. **This does not mean that the
154/// source string is `utf-16` encoded, it is still a normal `utf-8` rust string.**
155pub fn parse_with_utf_16_offsets_and_version(
156	source: &str,
157	version: GlslVersion,
158) -> (TokenStream, Metadata) {
159	let mut lexer: Lexer<Utf16> = Lexer::new(source, SpanEncoding::Utf16);
160	lexer.metadata.version = version;
161	let tokens = parse_tokens(&mut lexer, false, true);
162	(tokens, lexer.metadata)
163}
164
165/// Parses a GLSL source string into a token stream.
166///
167/// This function behaves exactly like [`parse()`], **however**, spans of tokens are based on `utf-8` code units.
168/// See the documentation for [`Span`] for more details.
169pub fn parse_with_utf_8_offsets(
170	source: &str,
171) -> Result<(TokenStream, Metadata), ParseErr> {
172	let mut lexer: Lexer<Utf8> = Lexer::new(source, SpanEncoding::Utf8);
173	let tokens = parse_tokens(&mut lexer, false, false);
174	match lexer.metadata.version {
175		GlslVersion::Unsupported => {
176			Err(ParseErr::UnsupportedVersion(lexer.metadata.version))
177		}
178		_ => Ok((tokens, lexer.metadata)),
179	}
180}
181
182/// Parses a GLSL source string into a token stream, assuming a specified GLSL version.
183///
184/// This function behaves exactly like [`parse_with_version()`], **however**, spans of tokens are based on `utf-8`
185/// code units. See the documentation for [`Span`] for more details.
186pub fn parse_with_utf_8_offsets_and_version(
187	source: &str,
188	version: GlslVersion,
189) -> (TokenStream, Metadata) {
190	let mut lexer: Lexer<Utf8> = Lexer::new(source, SpanEncoding::Utf8);
191	lexer.metadata.version = version;
192	let tokens = parse_tokens(&mut lexer, false, true);
193	(tokens, lexer.metadata)
194}
195
196/// The error type for lexer parsing operations.
197#[derive(Debug)]
198pub enum ParseErr {
199	/// The source string contains an unsupported GLSL version.
200	UnsupportedVersion(GlslVersion),
201}
202
203/// Metadata about the GLSL source string.
204///
205/// This is returned by the lexer along with the [`TokenStream`] and describes certain properties of the source,
206/// such as wether the source contains any conditional compilation directives. These properties can be useful in
207/// order to optimize later processing steps, such as skipping a large chunk of code if a certain condition is not
208/// met.
209///
210/// The purpose of this struct is to hold structured data that gets extracted out and checked-against if needed.
211/// Hence, this struct is marked as `#[non_exhaustive]` and new fields may be added at any time without causing a
212/// breaking change.
213#[derive(Debug, Clone, PartialEq)]
214#[non_exhaustive]
215pub struct Metadata {
216	/// The type of encoding of spans.
217	pub span_encoding: SpanEncoding,
218	/// The detected GLSL version of the source string. In accordance with the specification, this is only set when
219	/// the lexer encounters a valid `#version` directive as the first token in the source string (barring any
220	/// whitespace).
221	pub version: GlslVersion,
222	/// Whether the GLSL source string contains any condition compilation directives.
223	pub contains_conditional_directives: bool,
224}
225
226/// A token representing a unit of text in the GLSL source string.
227#[derive(Debug, Clone, PartialEq)]
228pub enum Token {
229	/// A number, e.g. `1`, `517u`, `0xA9C`, `07113`, `7.3e-2`, `.015LF`.
230	Num {
231		/// The type of number.
232		type_: NumType,
233		/// The numeric contents, (this excludes any prefixes or suffixes).
234		num: String,
235		/// An optional suffix after the numeric contents.
236		suffix: Option<String>,
237	},
238	/// A boolean, either `true` or `false`.
239	Bool(bool),
240	/// An identifier, e.g. `foo_bar`, `_900_a`.
241	Ident(String),
242	/// A preprocessor directive, e.g. `#version 450 core`, `#define FOO 42`, `#ifdef TOGGLE`. This token cannot
243	/// exist within the body of a macro.
244	Directive(preprocessor::TokenStream),
245	/// The `##` punctuation symbol. This token is only emitted when parsing the body of a `#define` preprocessor
246	/// directive.
247	MacroConcat,
248	/// A line comment, e.g. `// comment`.
249	LineComment(String),
250	/// A block comment, e.g. `/* comment */`.
251	BlockComment {
252		str: String,
253		/// Only `true` if this comment is missing the closing delimiter.
254		contains_eof: bool,
255	},
256	/// An invalid character, e.g. `@`, `"`, `'`.
257	Invalid(char),
258	/* Keywords */
259	/// The `if` keyword.
260	If,
261	/// The `else` keyword.
262	Else,
263	/// The `for` keyword.
264	For,
265	/// The `do` keyword.
266	Do,
267	/// The `while` keyword.
268	While,
269	/// The `continue` keyword.
270	Continue,
271	/// The `switch` keyword.
272	Switch,
273	/// The `case` keyword.
274	Case,
275	/// The `default` keyword.
276	Default,
277	/// The `break` keyword.
278	Break,
279	/// The `return` keyword.
280	Return,
281	/// The `discard` keyword.
282	Discard,
283	/// The `struct` keyword.
284	Struct,
285	/// The `subroutine` keyword.
286	Subroutine,
287	/// A reserved keyword, e.g. `class`, `public`, `typedef`, `union`.
288	Reserved(String),
289	/* Qualifiers */
290	/// The `const` keyword.
291	Const,
292	/// The `in` keyword.
293	In,
294	/// The `out` keyword.
295	Out,
296	/// The `inout` keyword.
297	InOut,
298	/// The `attribute` keyword.
299	Attribute,
300	/// The `uniform` keyword.
301	Uniform,
302	/// The `varying` keyword.
303	Varying,
304	/// The `buffer` keyword.
305	Buffer,
306	/// The `shared` keyword.
307	Shared,
308	/// The `centroid` keyword.
309	Centroid,
310	/// The `sample` keyword.
311	Sample,
312	/// The `patch` keyword.
313	Patch,
314	/// The `layout` keyword.
315	Layout,
316	/// The `flat` keyword.
317	Flat,
318	/// The `smooth` keyword.
319	Smooth,
320	/// The `noperspective` keyword.
321	NoPerspective,
322	/// The `highp` keyword.
323	HighP,
324	/// The `mediump` keyword.
325	MediumP,
326	/// The `lowp` keyword.
327	LowP,
328	/// The `invariant` keyword.
329	Invariant,
330	/// The `precise` keyword.
331	Precise,
332	/// The `coherent` keyword.
333	Coherent,
334	/// The `volatile` keyword.
335	Volatile,
336	/// The `restrict` keyword.
337	Restrict,
338	/// The `readonly` keyword.
339	Readonly,
340	/// The `writeonly` keyword.
341	Writeonly,
342	/* Punctuation tokens */
343	/// A punctuation token.
344	Op(OpTy),
345	/// A comma `,`.
346	Comma,
347	/// A dot `.`.
348	Dot,
349	/// A semi-colon `;`.
350	Semi,
351	/// A colon `:`.
352	Colon,
353	/// A question mark `?`.
354	Question,
355	/// An opening parenthesis `(`.
356	LParen,
357	/// A closing parenthesis `)`.
358	RParen,
359	/// An opening bracket `[`.
360	LBracket,
361	/// A closing bracket `]`.
362	RBracket,
363	/// An opening brace `{`.
364	LBrace,
365	/// A closing brace `}`.
366	RBrace,
367}
368
369/// The type/notation of a number token.
370#[derive(Debug, Clone, Copy, PartialEq, Eq)]
371pub enum NumType {
372	/// A decimal is any number beginning with `1-9` without a decimal point or an exponent, or just the digit `0`
373	/// on its own.
374	Dec,
375	/// An octal is any number beginning with `0` without a decimal point or an exponent.
376	Oct,
377	/// A hexadecimal is any number beginning with `0x` without a decimal point or an exponent.
378	Hex,
379	/// A float is any number that contains a decimal point or an exponent.
380	Float,
381}
382
383/// A mathematical/comparison operator.
384#[derive(Debug, Clone, Copy, PartialEq, Eq)]
385pub enum OpTy {
386	/* Maths */
387	/// The `+` symbol.
388	Add,
389	/// The `-` symbol.
390	Sub,
391	/// The `*` symbol.
392	Mul,
393	/// The `/` symbol.
394	Div,
395	/// The `%` symbol.
396	Rem,
397	/// The `&` symbol.
398	And,
399	/// The `|` symbol.
400	Or,
401	/// The `^` symbol.
402	Xor,
403	/// The `<<` symbol.
404	LShift,
405	/// The `>>` symbol.
406	RShift,
407	/// The `-` symbol.
408	Flip,
409	/// The `=` symbol.
410	Eq,
411	/// The `++` symbol.
412	AddAdd,
413	/// The `--` symbol.
414	SubSub,
415	/// The `+=` symbol.
416	AddEq,
417	/// The `-=` symbol.
418	SubEq,
419	/// The `*=` symbol.
420	MulEq,
421	/// The `/=` symbol.
422	DivEq,
423	/// The `%=` symbol.
424	RemEq,
425	/// The `&=` symbol.
426	AndEq,
427	/// The `|=` symbol.
428	OrEq,
429	/// The `^=` symbol.
430	XorEq,
431	/// The `<<=` symbol.
432	LShiftEq,
433	/// The `>>=` symbol.
434	RShiftEq,
435	/* Comparison */
436	/// The `==` symbol.
437	EqEq,
438	/// The `!=` symbol.
439	NotEq,
440	/// The `!` symbol.
441	Not,
442	/// The `>` symbol.
443	Gt,
444	/// The `<` symbol.
445	Lt,
446	/// The `>=` symbol.
447	Ge,
448	/// The `<=` symbol.
449	Le,
450	/// The `&&` symbol.
451	AndAnd,
452	/// The `||` symbol.
453	OrOr,
454	/// The `^^` symbol.
455	XorXor,
456}
457
458impl Token {
459	/// Produces a syntax token corresponding to the type of this lexer token. This performs simple,
460	/// non-semantically-aware colouring.
461	pub fn non_semantic_colour(&self) -> crate::syntax::SyntaxType {
462		use crate::syntax::SyntaxType;
463		match self {
464			Token::Num { .. } => SyntaxType::Number,
465			Token::Bool(_) => SyntaxType::Boolean,
466			Token::Ident(_) => SyntaxType::Ident,
467			Token::Directive(_) => SyntaxType::Directive,
468			Token::MacroConcat => SyntaxType::DirectiveConcat,
469			Token::LineComment(_) | Token::BlockComment { .. } => {
470				SyntaxType::Comment
471			}
472			Token::Invalid(_) => SyntaxType::Invalid,
473			Token::If
474			| Token::Else
475			| Token::For
476			| Token::Do
477			| Token::While
478			| Token::Continue
479			| Token::Switch
480			| Token::Case
481			| Token::Default
482			| Token::Break
483			| Token::Return
484			| Token::Discard
485			| Token::Struct
486			| Token::Subroutine
487			| Token::Reserved(_)
488			| Token::Const
489			| Token::In
490			| Token::Out
491			| Token::InOut
492			| Token::Attribute
493			| Token::Uniform
494			| Token::Varying
495			| Token::Buffer
496			| Token::Shared
497			| Token::Centroid
498			| Token::Sample
499			| Token::Patch
500			| Token::Layout
501			| Token::Flat
502			| Token::Smooth
503			| Token::NoPerspective
504			| Token::HighP
505			| Token::MediumP
506			| Token::LowP
507			| Token::Invariant
508			| Token::Precise
509			| Token::Coherent
510			| Token::Volatile
511			| Token::Restrict
512			| Token::Readonly
513			| Token::Writeonly => SyntaxType::Keyword,
514			Token::Op(_) => SyntaxType::Operator,
515			Token::Comma
516			| Token::Dot
517			| Token::Semi
518			| Token::Colon
519			| Token::Question
520			| Token::LParen
521			| Token::RParen
522			| Token::LBracket
523			| Token::RBracket
524			| Token::LBrace
525			| Token::RBrace => SyntaxType::Punctuation,
526		}
527	}
528
529	/// Returns whether the current token is a keyword which can start a statement.
530	pub fn can_start_statement(&self) -> bool {
531		match self {
532			Self::If
533			| Self::For
534			| Self::Do
535			| Self::While
536			| Self::Continue
537			| Self::Switch
538			| Self::Break
539			| Self::Return
540			| Self::Discard
541			| Self::Struct
542			| Self::Subroutine
543			| Self::Const
544			| Self::In
545			| Self::Out
546			| Self::InOut
547			| Self::Attribute
548			| Self::Uniform
549			| Self::Varying
550			| Self::Buffer
551			| Self::Shared
552			| Self::Centroid
553			| Self::Sample
554			| Self::Patch
555			| Self::Layout
556			| Self::Flat
557			| Self::Smooth
558			| Self::NoPerspective
559			| Self::HighP
560			| Self::MediumP
561			| Self::LowP
562			| Self::Invariant
563			| Self::Precise
564			| Self::Coherent
565			| Self::Volatile
566			| Self::Restrict
567			| Self::Readonly
568			| Self::Writeonly => true,
569			_ => false,
570		}
571	}
572}
573
574impl std::fmt::Display for Token {
575	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
576		match self {
577			Token::Num { type_, num, suffix } => {
578				match type_ {
579					NumType::Dec => {}
580					NumType::Oct => write!(f, "0")?,
581					NumType::Hex => write!(f, "0x")?,
582					NumType::Float => {}
583				}
584				write!(f, "{num}")?;
585				if let Some(suffix) = suffix {
586					write!(f, "{suffix}")
587				} else {
588					Ok(())
589				}
590			}
591			Token::Bool(b) => write!(f, "{b}"),
592			Token::Ident(s) => write!(f, "{s}"),
593			Token::Directive(_) => write!(f, "DIRECTIVE"),
594			Token::MacroConcat => write!(f, "##"),
595			Token::LineComment(s) => write!(f, "//{s}"),
596			Token::BlockComment { str, contains_eof } => {
597				write!(f, "/*{str}")?;
598				if *contains_eof {
599					write!(f, "*/")
600				} else {
601					Ok(())
602				}
603			}
604			Token::Invalid(c) => write!(f, "{c}"),
605			Token::If => write!(f, "if"),
606			Token::Else => write!(f, "else"),
607			Token::For => write!(f, "for"),
608			Token::Do => write!(f, "do"),
609			Token::While => write!(f, "while"),
610			Token::Continue => write!(f, "continue"),
611			Token::Switch => write!(f, "switch"),
612			Token::Case => write!(f, "case"),
613			Token::Default => write!(f, "default"),
614			Token::Break => write!(f, "break"),
615			Token::Return => write!(f, "return"),
616			Token::Discard => write!(f, "discard"),
617			Token::Struct => write!(f, "struct"),
618			Token::Subroutine => write!(f, "subroutine"),
619			Token::Reserved(s) => write!(f, "{s}"),
620			Token::Const => write!(f, "const"),
621			Token::In => write!(f, "in"),
622			Token::Out => write!(f, "out"),
623			Token::InOut => write!(f, "inout"),
624			Token::Attribute => write!(f, "attribute"),
625			Token::Uniform => write!(f, "uniform"),
626			Token::Varying => write!(f, "varying"),
627			Token::Buffer => write!(f, "buffer"),
628			Token::Shared => write!(f, "shared"),
629			Token::Centroid => write!(f, "centroid"),
630			Token::Sample => write!(f, "sample"),
631			Token::Patch => write!(f, "patch"),
632			Token::Layout => write!(f, "layout"),
633			Token::Flat => write!(f, "flat"),
634			Token::Smooth => write!(f, "smooth"),
635			Token::NoPerspective => write!(f, "noperspective"),
636			Token::HighP => write!(f, "highp"),
637			Token::MediumP => write!(f, "mediump"),
638			Token::LowP => write!(f, "lowp"),
639			Token::Invariant => write!(f, "invariant"),
640			Token::Precise => write!(f, "precise"),
641			Token::Coherent => write!(f, "coherent"),
642			Token::Volatile => write!(f, "volatile"),
643			Token::Restrict => write!(f, "restrict"),
644			Token::Readonly => write!(f, "readonly"),
645			Token::Writeonly => write!(f, "writeonly"),
646			Token::Op(op) => match op {
647				OpTy::Add => write!(f, "+"),
648				OpTy::Sub => write!(f, "-"),
649				OpTy::Mul => write!(f, "*"),
650				OpTy::Div => write!(f, "/"),
651				OpTy::Rem => write!(f, "%"),
652				OpTy::And => write!(f, "&"),
653				OpTy::Or => write!(f, "|"),
654				OpTy::Xor => write!(f, "^"),
655				OpTy::LShift => write!(f, "<<"),
656				OpTy::RShift => write!(f, ">>"),
657				OpTy::Flip => write!(f, "~"),
658				OpTy::Eq => write!(f, "="),
659				OpTy::AddAdd => write!(f, "++"),
660				OpTy::SubSub => write!(f, "--"),
661				OpTy::AddEq => write!(f, "+="),
662				OpTy::SubEq => write!(f, "-="),
663				OpTy::MulEq => write!(f, "*="),
664				OpTy::DivEq => write!(f, "/="),
665				OpTy::RemEq => write!(f, "%="),
666				OpTy::AndEq => write!(f, "&="),
667				OpTy::OrEq => write!(f, "|="),
668				OpTy::XorEq => write!(f, "^="),
669				OpTy::LShiftEq => write!(f, "<<="),
670				OpTy::RShiftEq => write!(f, ">>="),
671				OpTy::EqEq => write!(f, "=="),
672				OpTy::NotEq => write!(f, "!="),
673				OpTy::Not => write!(f, "!"),
674				OpTy::Gt => write!(f, ">"),
675				OpTy::Lt => write!(f, "<"),
676				OpTy::Ge => write!(f, ">="),
677				OpTy::Le => write!(f, "<="),
678				OpTy::AndAnd => write!(f, "&&"),
679				OpTy::OrOr => write!(f, "||"),
680				OpTy::XorXor => write!(f, "^^"),
681			},
682			Token::Comma => write!(f, ","),
683			Token::Dot => write!(f, "."),
684			Token::Semi => write!(f, ";"),
685			Token::Colon => write!(f, ":"),
686			Token::Question => write!(f, "?"),
687			Token::LParen => write!(f, "("),
688			Token::RParen => write!(f, ")"),
689			Token::LBracket => write!(f, "["),
690			Token::RBracket => write!(f, "]"),
691			Token::LBrace => write!(f, "{{"),
692			Token::RBrace => write!(f, "}}"),
693		}
694	}
695}
696
697/// Parses GLSL tokens, continuing off from the current position of the lexer.
698///
699/// - `parsing_define_body` - Whether we are parsing the body of a `#define` preprocessor directive, which slightly
700///   changes the behaviour of the lexer.
701/// - `hardcoded_version` - Whether the lexer has a hardcoded version. If `true`, this disables the dynamic
702///   on-the-fly version changing when an appropriate version directive is encountered.
703///
704/// TODO: Track spans of line-continuators.
705fn parse_tokens<C: Char>(
706	lexer: &mut Lexer<C>,
707	parsing_define_body: bool,
708	hardcoded_version: bool,
709) -> TokenStream {
710	let mut tokens = Vec::new();
711
712	// This is a flag as to whether we can start parsing a directive if we encounter a `#` symbol.
713	// After an EOL or end of block comment this is set to `true`. Any branch other than the whitespace branch sets
714	// this to `false`. This makes it easy to keep track of when we are allowed to parse a directive, since they
715	// must exist at the start of a line barring any whitespace.
716	let mut can_start_directive = true;
717
718	// Any time we want to test the next character, we first `peek()` to see what it is. If it is valid in whatever
719	// branch we are in, we can `advance()` the lexer to the next character and repeat the process. If it is
720	// invalid (and hence we want to finish this branch and try another one), we don't `advance()` the lexer
721	// because we don't want to consume this character; we want to test it against the other branches.
722	//
723	// Any time we reach a EOL, we don't bother checking what type it is. If it's \n then any check consumes it and
724	// the next iteration of the loop starts a new token. If it's \r\n then the next iteration will consume the \n,
725	// after which we do _another_ iteration to start a new token.
726	let mut buffer = String::new();
727
728	// This flag is set to true when we encounter our first directive. This flag is used to detect the first
729	// directive, and if it's a version directive, and if the version directive contains a valid GLSL version
730	// number, we can set the version number of the lexer.
731	let mut parsed_directive_yet = false;
732	'outer: while !lexer.is_done() {
733		let buffer_start = lexer.position();
734		// Peek the current character.
735		let mut current = match lexer.peek() {
736			Some(c) => c,
737			None => {
738				break;
739			}
740		};
741
742		if parsing_define_body && (current == '\r' || current == '\n') {
743			// We are parsing the body of a `#define` macro. And EOL signifies the end of the body, and a return to
744			// the normal lexer behaviour.
745			return tokens;
746		}
747
748		if is_word_start(&current) {
749			can_start_directive = false;
750			buffer.push(current);
751			lexer.advance();
752
753			'word: loop {
754				// Peek the current character.
755				current = match lexer.peek() {
756					Some(c) => c,
757					None => {
758						// We have reached the end of the source string, and therefore the end of the word.
759						tokens.push((
760							match_word(std::mem::take(&mut buffer)),
761							Span {
762								start: buffer_start,
763								end: lexer.position(),
764							},
765						));
766						break 'word;
767					}
768				};
769
770				// Check if it can be part of a word.
771				if is_word(&current) {
772					// The character can be part of an word, so consume it and continue looping.
773					buffer.push(current);
774					lexer.advance();
775				} else {
776					// The character can't be part of an word, so we can produce a token and exit this loop without
777					// consuming it.
778					tokens.push((
779						match_word(std::mem::take(&mut buffer)),
780						Span {
781							start: buffer_start,
782							end: lexer.position(),
783						},
784					));
785					break 'word;
786				}
787			}
788		} else if is_number_start(&current) {
789			/// The current state when parsing a number.
790			#[derive(Debug, Clone, Copy, PartialEq, Eq)]
791			enum NumState {
792				/// Parsing either an octal or decimal or a floating point number (depending on what follows).
793				Zero,
794				/// Parsing a hexadecimal number.
795				Hex,
796				/// Parsing a decimal number.
797				Dec,
798				/// Parsing a decimal floating point number.
799				Float,
800			}
801
802			can_start_directive = false;
803
804			// We don't need to worry about having a word character before this first digit character because if
805			// there was a word character before, this digit character would be getting parsed as part of the
806			// word in the first place, so this branch would not be executing.
807
808			let mut num_buffer = String::new();
809			let mut suffix_buffer = None;
810
811			// If we begin with [1-9], we know it's 100% a decimal number. If we begin with `0x`, we know it's 100%
812			// a hexadecimal number and we can ignore this prefix as it's not part of the number itself.
813			//
814			// If we begin with a `0`, however, this can either be:
815			// - an octal number (and we need to ignore this prefix later down the line) or,
816			// - a decimal number `0` assuming the number ends at the next character or,
817			// - it's a floating point which can have a variable amount of `0`s before the decimal point.
818			//
819			// If we begin with a `.`, we 100% know it's a floating point if there's at least one [0-9] digit
820			// afterwards, otherwise this is just a dot token.
821			let mut state = if lexer.take_pat("0x") {
822				NumState::Hex
823			} else if lexer.take_pat("0X") {
824				NumState::Hex
825			} else if current == '0' {
826				// We have a `0`, so either an octal number or a decimal `0` or a floating point.
827				num_buffer.push(current);
828				lexer.advance();
829				NumState::Zero
830			} else if current == '.' {
831				if let Some(lookahead) = lexer.lookahead_1() {
832					if lookahead.is_ascii_digit() {
833						// We have a `.` followed by a character that is a floating point digit.
834						num_buffer.push(current);
835						lexer.advance();
836						NumState::Float
837					} else {
838						// We have a `.` followed by a character that is not a digit, so this must be a punctuation
839						// token. We consume the character because otherwise we'd end up back in this branch again.
840						lexer.advance();
841						tokens.push((
842							Token::Dot,
843							Span {
844								start: buffer_start,
845								end: lexer.position(),
846							},
847						));
848						continue;
849					}
850				} else {
851					// We have a `.` followed by the end of the source string, so this must be a punctuation token.
852					// We consume the character because otherwise we'd end up back in this branch again.
853					lexer.advance();
854					tokens.push((
855						Token::Dot,
856						Span {
857							start: buffer_start,
858							end: lexer.position(),
859						},
860					));
861					continue;
862				}
863			} else {
864				// We have a [1-9] digit, so a decimal number.
865				num_buffer.push(current);
866				lexer.advance();
867				NumState::Dec
868			};
869
870			'number: loop {
871				// Peek the current character.
872				current = match lexer.peek() {
873					Some(c) => c,
874					None => {
875						// We have reached the end of the source string, and therefore the end of the number.
876						let type_ = match state {
877							NumState::Hex => NumType::Hex,
878							NumState::Zero => {
879								if num_buffer.as_str() == "0" {
880									NumType::Dec
881								} else {
882									num_buffer.remove(0);
883									NumType::Oct
884								}
885							}
886							NumState::Dec => NumType::Dec,
887							NumState::Float => NumType::Float,
888						};
889						tokens.push((
890							Token::Num {
891								num: num_buffer,
892								suffix: suffix_buffer,
893								type_,
894							},
895							Span {
896								start: buffer_start,
897								end: lexer.position(),
898							},
899						));
900						break 'number;
901					}
902				};
903
904				if current == '.' && state == NumState::Hex {
905					// If we encounter a `.` and we are parsing a hexadecimal number, that means we've reached the
906					// end of this number, and the `.` is a punctuation symbol. We consume the character because
907					// otherwise we'd end up back in this branch again.
908					tokens.push((
909						Token::Num {
910							num: num_buffer,
911							suffix: suffix_buffer,
912							type_: NumType::Hex,
913						},
914						Span {
915							start: buffer_start,
916							end: lexer.position(),
917						},
918					));
919					tokens.push((
920						Token::Dot,
921						Span {
922							start: lexer.position(),
923							end: lexer.position() + 1,
924						},
925					));
926					lexer.advance();
927					break 'number;
928				}
929				if current == '.' && suffix_buffer.is_some() {
930					// If we have finished parsing the digits and are now parsing the suffix, that means we've
931					// reached the end of the number and this `.` is a punctuation symbol. We consume the character
932					// because otherwise we'd end up back in this branch again.
933					let type_ = match state {
934						NumState::Hex => NumType::Hex,
935						NumState::Zero => {
936							if num_buffer.as_str() == "0" {
937								NumType::Dec
938							} else {
939								num_buffer.remove(0);
940								NumType::Oct
941							}
942						}
943						NumState::Dec => NumType::Dec,
944						NumState::Float => NumType::Float,
945					};
946					tokens.push((
947						Token::Num {
948							num: num_buffer,
949							suffix: suffix_buffer,
950							type_,
951						},
952						Span {
953							start: buffer_start,
954							end: lexer.position(),
955						},
956					));
957					tokens.push((
958						Token::Dot,
959						Span {
960							start: lexer.position(),
961							end: lexer.position() + 1,
962						},
963					));
964					lexer.advance();
965					break 'number;
966				}
967				if current == '.'
968					&& (state == NumState::Dec || state == NumState::Zero)
969				{
970					// If we are still parsing the digits of a number beginning with [0-9] and haven't reached a
971					// suffix yet, and haven't encountered a `.` yet either, that means this number is a floating
972					// point.
973					state = NumState::Float;
974					num_buffer.push(current);
975					lexer.advance();
976					continue 'number;
977				}
978				if current == '.' && state == NumState::Float {
979					// If we are still parsing the digits and haven't reached a suffix yet, and have already
980					// encountered a `.` before, that means we've reached the end of the number and this `.` is a
981					// punctuation symbol. We consume the character because otherwise we'd end up back in this
982					// branch again.
983					let type_ = match state {
984						NumState::Hex => NumType::Hex,
985						NumState::Zero => {
986							if num_buffer.as_str() == "0" {
987								NumType::Dec
988							} else {
989								num_buffer.remove(0);
990								NumType::Oct
991							}
992						}
993						NumState::Dec => NumType::Dec,
994						NumState::Float => NumType::Float,
995					};
996					tokens.push((
997						Token::Num {
998							num: num_buffer,
999							suffix: suffix_buffer,
1000							type_,
1001						},
1002						Span {
1003							start: buffer_start,
1004							end: lexer.position(),
1005						},
1006					));
1007					tokens.push((
1008						Token::Dot,
1009						Span {
1010							start: lexer.position(),
1011							end: lexer.position() + 1,
1012						},
1013					));
1014					lexer.advance();
1015					break 'number;
1016				}
1017
1018				if current == 'e' {
1019					// Note: In the case we encounter an `e` followed by nothing after, that can only be a suffix,
1020					// so the logic below will deal with that.
1021					if let Some(lookahead) = lexer.lookahead_1() {
1022						if lookahead.is_ascii_digit() {
1023							// We have an `e` followed by a digit, so this is an exponent notation rather than a
1024							// suffix.
1025							num_buffer.push(current);
1026							lexer.advance();
1027							// If the number isn't already a float, then an exponent makes it one.
1028							state = NumState::Float;
1029							continue 'number;
1030						} else if lookahead == '+' || lookahead == '-' {
1031							//  We have an `e` followed by a `+`/`-`, so this _may_ be an exponent notation depending
1032							//  on whether a digit follows.
1033							if let Some(lookahead_2) = lexer.lookahead_2() {
1034								if lookahead_2.is_ascii_digit() {
1035									// We have an `e+`/`e-` followed by a digit, so this is an exponent notation rather
1036									// than a suffix.
1037									num_buffer.push(current);
1038									num_buffer.push(lookahead);
1039									lexer.advance();
1040									lexer.advance();
1041									// If the number isn't already a float, then an exponent makes it one.
1042									state = NumState::Float;
1043									continue 'number;
1044								} else {
1045									// We have an `e` followed by a `+`/`-` and something that's not a digit after, so
1046									// this becomes a suffix.
1047									lexer.advance();
1048									suffix_buffer = Some(String::from(current));
1049									let type_ = match state {
1050										NumState::Hex => NumType::Hex,
1051										NumState::Zero => {
1052											if num_buffer.as_str() == "0" {
1053												NumType::Dec
1054											} else {
1055												num_buffer.remove(0);
1056												NumType::Oct
1057											}
1058										}
1059										NumState::Dec => NumType::Dec,
1060										NumState::Float => NumType::Float,
1061									};
1062									tokens.push((
1063										Token::Num {
1064											num: num_buffer,
1065											suffix: suffix_buffer,
1066											type_,
1067										},
1068										Span {
1069											start: buffer_start,
1070											end: lexer.position(),
1071										},
1072									));
1073									break 'number;
1074								}
1075							} else {
1076								// We have an `e` followed by a `+`/`-` and nothing after, so this becomes a suffix.
1077								suffix_buffer = Some(String::from(current));
1078								lexer.advance();
1079								let type_ = match state {
1080									NumState::Hex => NumType::Hex,
1081									NumState::Zero => {
1082										if num_buffer.as_str() == "0" {
1083											NumType::Dec
1084										} else {
1085											num_buffer.remove(0);
1086											NumType::Oct
1087										}
1088									}
1089									NumState::Dec => NumType::Dec,
1090									NumState::Float => NumType::Float,
1091								};
1092								tokens.push((
1093									Token::Num {
1094										num: num_buffer,
1095										suffix: suffix_buffer,
1096										type_,
1097									},
1098									Span {
1099										start: buffer_start,
1100										end: lexer.position(),
1101									},
1102								));
1103								break 'number;
1104							}
1105						}
1106					}
1107				}
1108
1109				// We want to check for any word characters (and digits of course). This is to follow the spec.
1110				//
1111				// Something like `51ufoo` should be parsed as a decimal integer `51` with an invalid postfix
1112				// `ufoo`, hence why we must be greedy and pick up _any_ word characters.
1113				if current.is_ascii_hexdigit() || is_word(&current) {
1114					match state {
1115						NumState::Zero | NumState::Dec | NumState::Float => {
1116							if !current.is_ascii_digit()
1117								&& suffix_buffer.is_none()
1118							{
1119								// We have reached the beginning of a word, so flag that we are now parsing the
1120								// suffix.
1121								suffix_buffer = Some(String::new());
1122							}
1123						}
1124						NumState::Hex => {
1125							if !current.is_ascii_hexdigit()
1126								&& suffix_buffer.is_none()
1127							{
1128								// We have reached the beginning of a word, so flag that we are now parsing the
1129								// suffix.
1130								suffix_buffer = Some(String::new());
1131							}
1132						}
1133					}
1134
1135					// Append the character to the appropriate buffer.
1136					if let Some(suffix) = &mut suffix_buffer {
1137						suffix.push(current);
1138					} else {
1139						num_buffer.push(current);
1140					}
1141
1142					lexer.advance();
1143				} else {
1144					// The character can't be part of a number, so we can produce a token and exit this loop
1145					// without consuming it.
1146					let type_ = match state {
1147						NumState::Hex => NumType::Hex,
1148						NumState::Zero => {
1149							if num_buffer.as_str() == "0" {
1150								NumType::Dec
1151							} else {
1152								num_buffer.remove(0);
1153								NumType::Oct
1154							}
1155						}
1156						NumState::Dec => NumType::Dec,
1157						NumState::Float => NumType::Float,
1158					};
1159					tokens.push((
1160						Token::Num {
1161							num: num_buffer,
1162							suffix: suffix_buffer,
1163							type_,
1164						},
1165						Span {
1166							start: buffer_start,
1167							end: lexer.position(),
1168						},
1169					));
1170					break 'number;
1171				}
1172			}
1173		} else if is_punctuation_start(&current) {
1174			can_start_directive = false;
1175
1176			if lexer.take_pat("//") {
1177				// If we have a `//`, that means this is a comment until the EOL.
1178				'line_comment: loop {
1179					// Peek the current character.
1180					current = match lexer.peek() {
1181						Some(c) => c,
1182						None => {
1183							// We have reached the end of the source string, and therefore the end of the comment.
1184							tokens.push((
1185								Token::LineComment(std::mem::take(&mut buffer)),
1186								Span {
1187									start: buffer_start,
1188									end: lexer.position(),
1189								},
1190							));
1191							break 'line_comment;
1192						}
1193					};
1194
1195					if current == '\r' || current == '\n' {
1196						// We have an EOL without a line-continuator, so therefore this is the end of the directive.
1197						tokens.push((
1198							Token::LineComment(std::mem::take(&mut buffer)),
1199							Span {
1200								start: buffer_start,
1201								end: lexer.position(),
1202							},
1203						));
1204						break 'line_comment;
1205					} else {
1206						// Any other character is just added to the comment buffer.
1207						buffer.push(current);
1208						lexer.advance();
1209					}
1210				}
1211			} else if lexer.take_pat("/*") {
1212				// If we have a `/*`, that means this is a comment until the first `*/`
1213				'comment: loop {
1214					// Test if the end delimiter is here.
1215					if lexer.take_pat("*/") {
1216						tokens.push((
1217							Token::BlockComment {
1218								str: std::mem::take(&mut buffer),
1219								contains_eof: false,
1220							},
1221							Span {
1222								start: buffer_start,
1223								end: lexer.position(),
1224							},
1225						));
1226						can_start_directive = true;
1227						break 'comment;
1228					}
1229
1230					// Continue pushing any characters into the buffer.
1231					if let Some(char) = lexer.next() {
1232						buffer.push(char);
1233					} else {
1234						// We have reached the end of the source string, and therefore the end of the comment. This
1235						// comment however therefore contains the EOF and hence is not valid.
1236						tokens.push((
1237							Token::BlockComment {
1238								str: std::mem::take(&mut buffer),
1239								contains_eof: true,
1240							},
1241							Span {
1242								start: buffer_start,
1243								end: lexer.position(),
1244							},
1245						));
1246						break 'comment;
1247					}
1248				}
1249			} else {
1250				tokens.push((
1251					match_punctuation(lexer),
1252					Span {
1253						start: buffer_start,
1254						end: lexer.position(),
1255					},
1256				));
1257			}
1258		} else if current.is_whitespace() {
1259			// Check for an EOL, to reset the directive parsing flag.
1260			if current == '\r' || current == '\n' {
1261				can_start_directive = true;
1262			}
1263			// We ignore whitespace characters.
1264			lexer.advance();
1265		} else if can_start_directive && current == '#' && !parsing_define_body
1266		{
1267			// The first time we come across a directive, we want to check whether it is the first token other than
1268			// whitespace. If so, and if it turns out we are parsing a version directive, we have change the
1269			// version number for the lexer to the parsed value.
1270			let mut first_directive = false;
1271			if !parsed_directive_yet {
1272				first_directive = true;
1273				for (token, _) in &tokens {
1274					match token {
1275						Token::LineComment(_) | Token::BlockComment { .. } => {}
1276						_ => {
1277							first_directive = false;
1278							break;
1279						}
1280					}
1281				}
1282				parsed_directive_yet = true;
1283			};
1284
1285			// If we are parsing a directive string, then the only difference in behaviour is that we don't start a
1286			// new directive within the existing directive. This means the `#` character will be treated as an
1287			// invalid character instead.
1288			let directive_start = lexer.position();
1289			lexer.advance();
1290
1291			// Consume whitespace since any whitespace between the `#` and `<keyword>` is ignored.
1292			loop {
1293				current = match lexer.peek() {
1294					Some(c) => c,
1295					None => {
1296						// We have reached the end of the source string, and hence the end of this directive.
1297						tokens.push((
1298							Token::Directive(preprocessor::TokenStream::Empty),
1299							Span {
1300								start: directive_start,
1301								end: lexer.position(),
1302							},
1303						));
1304						break 'outer;
1305					}
1306				};
1307
1308				if current == '\r' || current == '\n' {
1309					// We have an EOL without a line-continuator, which marks the end of this directive.
1310					tokens.push((
1311						Token::Directive(preprocessor::TokenStream::Empty),
1312						Span {
1313							start: directive_start,
1314							end: lexer.position(),
1315						},
1316					));
1317					continue 'outer;
1318				}
1319
1320				if current.is_ascii_whitespace() {
1321					lexer.advance();
1322					continue;
1323				} else {
1324					break;
1325				}
1326			}
1327
1328			if !is_word_start(&current) {
1329				// We have a directive which doesn't begin with a word, which is invalid.
1330				let content_start = lexer.position();
1331				'content: loop {
1332					// Peek the current character.
1333					current = match lexer.peek() {
1334						Some(c) => c,
1335						None => {
1336							tokens.push((
1337								Token::Directive(
1338									preprocessor::TokenStream::Invalid {
1339										content: (
1340											std::mem::take(&mut buffer),
1341											Span {
1342												start: content_start,
1343												end: lexer.position(),
1344											},
1345										),
1346									},
1347								),
1348								Span {
1349									start: directive_start,
1350									end: lexer.position(),
1351								},
1352							));
1353							break 'outer;
1354						}
1355					};
1356
1357					if current == '\r' || current == '\n' {
1358						// We have an EOL without a line-continuator, which marks the end of this directive.
1359						break 'content;
1360					} else {
1361						// Any other character is just added to the content buffer.
1362						buffer.push(current);
1363						lexer.advance();
1364					}
1365				}
1366
1367				tokens.push((
1368					Token::Directive(preprocessor::TokenStream::Invalid {
1369						content: (
1370							std::mem::take(&mut buffer),
1371							Span {
1372								start: content_start,
1373								end: lexer.position(),
1374							},
1375						),
1376					}),
1377					Span {
1378						start: directive_start,
1379						end: lexer.position(),
1380					},
1381				));
1382				continue 'outer;
1383			}
1384
1385			// Consume the first word, which is the name of the directive.
1386			let directive_kw_start = lexer.position();
1387			buffer.push(current);
1388			lexer.advance();
1389			'directive_name: loop {
1390				// Peek the current character.
1391				current = match lexer.peek() {
1392					Some(c) => c,
1393					None => {
1394						// We have reached the end of the source string, and hence of this directive.
1395						tokens.push((
1396							Token::Directive(preprocessor::construct_empty(
1397								lexer,
1398								buffer,
1399								Span {
1400									start: directive_kw_start,
1401									end: lexer.position(),
1402								},
1403							)),
1404							Span {
1405								start: directive_start,
1406								end: lexer.position(),
1407							},
1408						));
1409						break 'outer;
1410					}
1411				};
1412
1413				// Check if it can be part of a word.
1414				if is_word(&current) {
1415					// The character can be part of a word, so consume it and continue looping.
1416					buffer.push(current);
1417					lexer.advance();
1418				} else {
1419					break 'directive_name;
1420				}
1421			}
1422
1423			let directive_kw_span = Span {
1424				start: directive_kw_start,
1425				end: lexer.position(),
1426			};
1427
1428			// Consume the rest of the directive, and create appropriate tokens depending on the directive keyword.
1429			match buffer.as_ref() {
1430				"version" => {
1431					let (stream, version) = preprocessor::parse_version(
1432						lexer,
1433						directive_kw_span,
1434						first_directive,
1435					);
1436					tokens.push((
1437						Token::Directive(stream),
1438						Span {
1439							start: directive_start,
1440							end: lexer.position(),
1441						},
1442					));
1443					if first_directive && !hardcoded_version {
1444						if let Some(version) = version {
1445							if version == GlslVersion::Unsupported {
1446								break 'outer;
1447							} else {
1448								lexer.metadata.version = version;
1449							}
1450						}
1451					}
1452				}
1453				"extension" => tokens.push((
1454					Token::Directive(preprocessor::parse_extension(
1455						lexer,
1456						directive_kw_span,
1457					)),
1458					Span {
1459						start: directive_start,
1460						end: lexer.position(),
1461					},
1462				)),
1463				"line" => tokens.push((
1464					Token::Directive(preprocessor::parse_line(
1465						lexer,
1466						directive_kw_span,
1467					)),
1468					Span {
1469						start: directive_start,
1470						end: lexer.position(),
1471					},
1472				)),
1473				"define" => {
1474					tokens.push((
1475						Token::Directive(preprocessor::TokenStream::Define {
1476							kw: directive_kw_span,
1477							ident_tokens: preprocessor::parse_define(lexer),
1478							body_tokens: parse_tokens(
1479								lexer,
1480								true,
1481								hardcoded_version,
1482							),
1483						}),
1484						Span {
1485							start: directive_start,
1486							end: lexer.position(),
1487						},
1488					));
1489				}
1490				"undef" => tokens.push((
1491					Token::Directive(preprocessor::parse_undef(
1492						lexer,
1493						directive_kw_span,
1494					)),
1495					Span {
1496						start: directive_start,
1497						end: lexer.position(),
1498					},
1499				)),
1500				"ifdef" | "ifndef" | "if" | "elif" | "else" | "endif" => {
1501					lexer.metadata.contains_conditional_directives = true;
1502					tokens.push((
1503						Token::Directive(preprocessor::parse_condition(
1504							lexer,
1505							&buffer,
1506							directive_kw_span,
1507						)),
1508						Span {
1509							start: directive_start,
1510							end: lexer.position(),
1511						},
1512					));
1513				}
1514				"error" => {
1515					buffer.clear();
1516					let content_start = lexer.position();
1517
1518					'content: loop {
1519						// Peek the current character.
1520						current = match lexer.peek() {
1521							Some(c) => c,
1522							None => {
1523								// We have reached the end of the source string, and therefore the end of this
1524								// directive.
1525								break 'content;
1526							}
1527						};
1528
1529						if current == '\r' || current == '\n' {
1530							// We have an EOL without a line-continuator, which marks the end of this directive.
1531							break 'content;
1532						} else {
1533							// Any other character is just added to the content buffer.
1534							buffer.push(current);
1535							lexer.advance();
1536						}
1537					}
1538
1539					tokens.push((
1540						Token::Directive(preprocessor::TokenStream::Error {
1541							kw: directive_kw_span,
1542							message: Some((
1543								std::mem::take(&mut buffer),
1544								Span {
1545									start: content_start,
1546									end: lexer.position(),
1547								},
1548							)),
1549						}),
1550						Span {
1551							start: directive_start,
1552							end: lexer.position(),
1553						},
1554					));
1555				}
1556				"pragma" => {
1557					buffer.clear();
1558					let content_start = lexer.position();
1559
1560					'content: loop {
1561						// Peek the current character.
1562						current = match lexer.peek() {
1563							Some(c) => c,
1564							None => {
1565								// We have reached the end of the source string, and therefore the end of this
1566								// directive.
1567								break 'content;
1568							}
1569						};
1570
1571						if current == '\r' || current == '\n' {
1572							// We have an EOL without a line-continuator, which marks the end of this directive.
1573							break 'content;
1574						} else {
1575							// Any other character is just added to the content buffer.
1576							buffer.push(current);
1577							lexer.advance();
1578						}
1579					}
1580
1581					tokens.push((
1582						Token::Directive(preprocessor::TokenStream::Pragma {
1583							kw: directive_kw_span,
1584							options: Some((
1585								std::mem::take(&mut buffer),
1586								Span {
1587									start: content_start,
1588									end: lexer.position(),
1589								},
1590							)),
1591						}),
1592						Span {
1593							start: directive_start,
1594							end: lexer.position(),
1595						},
1596					));
1597				}
1598				_ => {
1599					let kw = (std::mem::take(&mut buffer), directive_kw_span);
1600					let content_start = lexer.position();
1601
1602					'content: loop {
1603						// Peek the current character.
1604						current = match lexer.peek() {
1605							Some(c) => c,
1606							None => {
1607								// We have reached the end of the source string, and therefore the end of this
1608								// directive.
1609								break 'content;
1610							}
1611						};
1612
1613						if current == '\r' || current == '\n' {
1614							// We have an EOL without a line-continuator, which marks the end of this directive.
1615							break 'content;
1616						} else {
1617							// Any other character is just added to the content buffer.
1618							buffer.push(current);
1619							lexer.advance();
1620						}
1621					}
1622
1623					tokens.push((
1624						Token::Directive(preprocessor::TokenStream::Custom {
1625							kw,
1626							content: Some((
1627								std::mem::take(&mut buffer),
1628								Span {
1629									start: content_start,
1630									end: lexer.position(),
1631								},
1632							)),
1633						}),
1634						Span {
1635							start: directive_start,
1636							end: lexer.position(),
1637						},
1638					));
1639				}
1640			}
1641			buffer.clear();
1642		} else if current == '#' && parsing_define_body {
1643			// Look for a `##` which is valid within the body of a `#define` macro.
1644			if lexer.take_pat("##") {
1645				tokens.push((
1646					Token::MacroConcat,
1647					Span {
1648						start: buffer_start,
1649						end: lexer.position(),
1650					},
1651				));
1652			} else {
1653				lexer.advance();
1654				tokens.push((
1655					Token::Invalid(current),
1656					Span {
1657						start: buffer_start,
1658						end: lexer.position(),
1659					},
1660				));
1661			}
1662		} else {
1663			// This character isn't valid to start any token.
1664			lexer.advance();
1665			tokens.push((
1666				Token::Invalid(current),
1667				Span {
1668					start: buffer_start,
1669					end: lexer.position(),
1670				},
1671			));
1672		}
1673	}
1674
1675	tokens
1676}
1677
1678/// This trait allows us to monomorphize the lexer over different code units.
1679pub(crate) trait Char {
1680	fn offset(c: char) -> usize;
1681}
1682
1683pub(crate) struct Utf8;
1684impl Char for Utf8 {
1685	#[inline]
1686	fn offset(c: char) -> usize {
1687		c.len_utf8()
1688	}
1689}
1690
1691pub(crate) struct Utf16;
1692impl Char for Utf16 {
1693	#[inline]
1694	fn offset(c: char) -> usize {
1695		c.len_utf16()
1696	}
1697}
1698
1699pub(crate) struct Utf32;
1700impl Char for Utf32 {
1701	#[inline]
1702	fn offset(_: char) -> usize {
1703		// A `utf-32` is always 1 code unit in size.
1704		1
1705	}
1706}
1707
1708/// A lexer which allows stepping through a GLSL source string character by character.
1709///
1710/// This includes a lot of helper methods to make it easier to match patterns and correctly deal with things such
1711/// as line-continuators which a naive iteration would mess up.
1712pub(crate) struct Lexer<C> {
1713	/// The source string stored as a vector of characters.
1714	chars: Vec<char>,
1715	/// The index of the current character.
1716	cursor: usize,
1717	/// The offset, in code units, from the start of the character vector.
1718	offset: usize,
1719	/// Metadata about this source string.
1720	metadata: Metadata,
1721	_marker: std::marker::PhantomData<C>,
1722}
1723
1724impl<C: Char> Lexer<C> {
1725	/// Constructs a new lexer.
1726	pub(crate) fn new(source: &str, span_encoding: SpanEncoding) -> Self {
1727		let mut lexer = Lexer {
1728			// Iterating over individual characters is guaranteed to produce correct behaviour because GLSL source
1729			// strings must use the UTF-8 encoding as per the specification.
1730			chars: source.chars().collect(),
1731			cursor: 0,
1732			offset: 0,
1733			metadata: Metadata {
1734				span_encoding,
1735				version: Default::default(),
1736				contains_conditional_directives: false,
1737			},
1738			_marker: Default::default(),
1739		};
1740
1741		// Deal with a line-continuation character if it's the first thing in the source file. If we didn't do
1742		// this, the first time `peek()` is called in the first iteration of the loop it could return a `\` even
1743		// though it's a valid line-continuator.
1744		let i = lexer.take_line_continuator(0);
1745		lexer.cursor = i;
1746		lexer.offset = i;
1747
1748		lexer
1749	}
1750
1751	/// Returns the current character under the cursor, without advancing the cursor.
1752	fn peek(&self) -> Option<char> {
1753		self.chars.get(self.cursor).map(|c| *c)
1754	}
1755
1756	/// Peeks the next character without advancing the cursor; (returns the character under `cursor + 1`, taking
1757	/// into account a possible line continuator).
1758	fn lookahead_1(&self) -> Option<char> {
1759		let pos = self.cursor + 1 + self.take_line_continuator(self.cursor + 1);
1760		self.chars.get(pos).map(|c| *c)
1761	}
1762
1763	/// Peeks the character after the next one without advancing the cursor; (returns the character under `cursor +
1764	/// 2`, taking into account possible line continuators).
1765	fn lookahead_2(&self) -> Option<char> {
1766		let pos = self.cursor + 1 + self.take_line_continuator(self.cursor + 1);
1767		let pos = pos + 1 + self.take_line_continuator(pos + 1);
1768		self.chars.get(pos).map(|c| *c)
1769	}
1770
1771	/// Advances the cursor by one.
1772	fn advance(&mut self) {
1773		match self.peek() {
1774			Some(c) => {
1775				self.offset += C::offset(c);
1776			}
1777			None => {}
1778		}
1779		self.cursor += 1;
1780
1781		let i = self.take_line_continuator(self.cursor);
1782		self.offset += i;
1783		self.cursor += i;
1784	}
1785
1786	/// Returns the current character under the cursor and advances the cursor by one.
1787	///
1788	/// This is equivalent to calling [`peek()`](Self::peek()) followed by [`advance()`](Self::advance()).
1789	fn next(&mut self) -> Option<char> {
1790		let c = self.peek();
1791		// If we are successful in getting the character, advance the cursor.
1792		if c.is_some() {
1793			self.advance();
1794		}
1795		c
1796	}
1797
1798	/// Tries to match a pattern starting at the current character under the cursor.
1799	///
1800	/// If the match is successful, `true` is returned and the cursor is advanced to consume the pattern. If the
1801	/// match is unsuccessful, `false` is returned and the cursor stays in place. This method correctly deals with
1802	/// potential line-continuation characters within the source string that may exist within the pattern.
1803	fn take_pat(&mut self, pat: &str) -> bool {
1804		let pat = pat.chars().collect::<Vec<_>>();
1805		let pat_len = pat.len();
1806		let mut pat_count = 0;
1807
1808		// Store the current position before we check the pattern, so that we can rollback to this position if the
1809		// match fails.
1810		let starting_position = self.cursor;
1811		let starting_offset = self.offset;
1812
1813		// If the pattern fits within the remaining length of the string, compare.
1814		if self.chars.len() >= self.cursor + pat_len {
1815			while self.peek().is_some() {
1816				// If we have consumed the entire pattern, that means the pattern has matched and we can break out
1817				// of the loop.
1818				if pat_count == pat_len {
1819					break;
1820				}
1821
1822				// Check that the characters match.
1823				if self.peek().unwrap() != pat[pat_count] {
1824					self.cursor = starting_position;
1825					self.offset = starting_offset;
1826					return false;
1827				}
1828
1829				self.advance();
1830				pat_count += 1;
1831			}
1832
1833			return true;
1834		}
1835
1836		false
1837	}
1838
1839	/// Returns the position of the cursor.
1840	fn position(&self) -> usize {
1841		self.offset
1842	}
1843
1844	/// Returns whether this lexer has reached the end of the GLSL source string.
1845	fn is_done(&self) -> bool {
1846		// We check that the cursor is equal to the length, because that means we have gone past the last character
1847		// of the string, and hence, that we are done.
1848		self.cursor == self.chars.len()
1849	}
1850
1851	/// Returns the cursor advancement value necessary to consume a line-continuator, if one is present.
1852	///
1853	/// This takes a cursor position as `idx`. The reason a separate parameter is needed (rather than accessing
1854	/// `self.cursor`) is because in the `lookahead_*()` methods the cursor can't move.
1855	fn take_line_continuator(&self, idx: usize) -> usize {
1856		let current = match self.chars.get(idx) {
1857			Some(c) => *c,
1858			None => return 0,
1859		};
1860
1861		// Line-continuators need to begin with `\`.
1862		if current != '\\' {
1863			return 0;
1864		}
1865
1866		if let Some(lookahead) = self.chars.get(idx + 1) {
1867			if *lookahead == '\n' {
1868				// We have a `\<\n>`.
1869				2
1870			} else if *lookahead == '\r' {
1871				if let Some(lookahead_2) = self.chars.get(idx + 2) {
1872					if *lookahead_2 == '\n' {
1873						// We have a `\<\r><\n>`.
1874						3
1875					} else {
1876						// We have a `\<\r><something>`, where `<something>` is on the next line.
1877						2
1878					}
1879				} else {
1880					// We have a `\<\r><eof>`.
1881					2
1882				}
1883			} else if *lookahead == '\\' {
1884				// We have `\\`; this is a syntax error.
1885				// TODO: Syntax error
1886				0
1887			} else {
1888				// We have a `\` followed by a non-eol character; this is a syntax error.
1889				// TODO: Syntax error.
1890				0
1891			}
1892		} else {
1893			// We have a `\<eof>`, so we might as well treat this is a line-continuator.
1894			1
1895		}
1896	}
1897}
1898
1899/// Returns whether the character is allowed to start a word.
1900fn is_word_start(c: &char) -> bool {
1901	c.is_ascii_alphabetic() || *c == '_'
1902}
1903
1904/// Returns whether the character is allowed to be part of a word.
1905fn is_word(c: &char) -> bool {
1906	c.is_ascii_alphanumeric() || *c == '_'
1907}
1908
1909/// Returns whether the character is allowed to start a number.
1910fn is_number_start(c: &char) -> bool {
1911	c.is_ascii_digit() || *c == '.'
1912}
1913
1914/// Returns whether the character is allowed to start a punctuation token.
1915///
1916/// Note that whilst the `.` is a punctuation token, it gets caught by the `is_number_start()` branch since that
1917/// executes first.
1918fn is_punctuation_start(c: &char) -> bool {
1919	match c {
1920		'=' | ',' | '.' | ';' | '(' | ')' | '[' | ']' | '{' | '}' | ':'
1921		| '+' | '-' | '*' | '/' | '%' | '>' | '<' | '!' | '~' | '?' | '&'
1922		| '|' | '^' => true,
1923		_ => false,
1924	}
1925}
1926
1927macro_rules! match_op {
1928	($lexer:ident, $str:expr, $token:expr) => {
1929		if $lexer.take_pat($str) {
1930			return $token;
1931		}
1932	};
1933}
1934
1935/// Matches a punctuation symbol.
1936fn match_punctuation<C: Char>(lexer: &mut Lexer<C>) -> Token {
1937	match_op!(lexer, "<<=", Token::Op(OpTy::LShiftEq));
1938	match_op!(lexer, ">>=", Token::Op(OpTy::RShiftEq));
1939	match_op!(lexer, "==", Token::Op(OpTy::EqEq));
1940	match_op!(lexer, "!=", Token::Op(OpTy::NotEq));
1941	match_op!(lexer, ">=", Token::Op(OpTy::Ge));
1942	match_op!(lexer, "<=", Token::Op(OpTy::Le));
1943	match_op!(lexer, "&&", Token::Op(OpTy::AndAnd));
1944	match_op!(lexer, "||", Token::Op(OpTy::OrOr));
1945	match_op!(lexer, "++", Token::Op(OpTy::AddAdd));
1946	match_op!(lexer, "--", Token::Op(OpTy::SubSub));
1947	match_op!(lexer, "<<", Token::Op(OpTy::LShift));
1948	match_op!(lexer, ">>", Token::Op(OpTy::RShift));
1949	match_op!(lexer, "+=", Token::Op(OpTy::AddEq));
1950	match_op!(lexer, "-=", Token::Op(OpTy::SubEq));
1951	match_op!(lexer, "*=", Token::Op(OpTy::MulEq));
1952	match_op!(lexer, "/=", Token::Op(OpTy::DivEq));
1953	match_op!(lexer, "%=", Token::Op(OpTy::RemEq));
1954	match_op!(lexer, "&=", Token::Op(OpTy::AndEq));
1955	match_op!(lexer, "|=", Token::Op(OpTy::OrEq));
1956	match_op!(lexer, "^^", Token::Op(OpTy::XorXor));
1957	match_op!(lexer, "^=", Token::Op(OpTy::XorEq));
1958	match_op!(lexer, "=", Token::Op(OpTy::Eq));
1959	match_op!(lexer, ";", Token::Semi);
1960	match_op!(lexer, ".", Token::Dot);
1961	match_op!(lexer, ",", Token::Comma);
1962	match_op!(lexer, "(", Token::LParen);
1963	match_op!(lexer, ")", Token::RParen);
1964	match_op!(lexer, "[", Token::LBracket);
1965	match_op!(lexer, "]", Token::RBracket);
1966	match_op!(lexer, "{", Token::LBrace);
1967	match_op!(lexer, "}", Token::RBrace);
1968	match_op!(lexer, "+", Token::Op(OpTy::Add));
1969	match_op!(lexer, "-", Token::Op(OpTy::Sub));
1970	match_op!(lexer, "*", Token::Op(OpTy::Mul));
1971	match_op!(lexer, "/", Token::Op(OpTy::Div));
1972	match_op!(lexer, ">", Token::Op(OpTy::Gt));
1973	match_op!(lexer, "<", Token::Op(OpTy::Lt));
1974	match_op!(lexer, "!", Token::Op(OpTy::Not));
1975	match_op!(lexer, "~", Token::Op(OpTy::Flip));
1976	match_op!(lexer, "?", Token::Question);
1977	match_op!(lexer, ":", Token::Colon);
1978	match_op!(lexer, "%", Token::Op(OpTy::Rem));
1979	match_op!(lexer, "&", Token::Op(OpTy::And));
1980	match_op!(lexer, "|", Token::Op(OpTy::Or));
1981	match_op!(lexer, "^", Token::Op(OpTy::Xor));
1982	unreachable!("[token::match_punctuation] Exhausted all of the patterns without matching anything!");
1983}
1984
1985/// Matches a word to either the `true`/`false` literal, a keyword, or an identifier in that order of precedence.
1986fn match_word(str: String) -> Token {
1987	match str.as_ref() {
1988		// Booleans
1989		"true" => Token::Bool(true),
1990		"false" => Token::Bool(false),
1991		// Keywords
1992		"if" => Token::If,
1993		"else" => Token::Else,
1994		"for" => Token::For,
1995		"do" => Token::Do,
1996		"while" => Token::While,
1997		"continue" => Token::Continue,
1998		"switch" => Token::Switch,
1999		"case" => Token::Case,
2000		"default" => Token::Default,
2001		"break" => Token::Break,
2002		"return" => Token::Return,
2003		"discard" => Token::Discard,
2004		"struct" => Token::Struct,
2005		"subroutine" => Token::Subroutine,
2006		"const" => Token::Const,
2007		"in" => Token::In,
2008		"out" => Token::Out,
2009		"inout" => Token::InOut,
2010		"attribute" => Token::Attribute,
2011		"uniform" => Token::Uniform,
2012		"varying" => Token::Varying,
2013		"buffer" => Token::Buffer,
2014		"shared" => Token::Shared,
2015		"centroid" => Token::Centroid,
2016		"sample" => Token::Sample,
2017		"patch" => Token::Patch,
2018		"layout" => Token::Layout,
2019		"flat" => Token::Flat,
2020		"smooth" => Token::Smooth,
2021		"noperspective" => Token::NoPerspective,
2022		"highp" => Token::HighP,
2023		"mediump" => Token::MediumP,
2024		"lowp" => Token::LowP,
2025		"invariant" => Token::Invariant,
2026		"precise" => Token::Precise,
2027		"coherent" => Token::Coherent,
2028		"volatile" => Token::Volatile,
2029		"restrict" => Token::Restrict,
2030		"readonly" => Token::Readonly,
2031		"writeonly" => Token::Writeonly,
2032		// Reserved
2033		"common" | "partition" | "active" | "asm" | "class" | "union"
2034		| "enum" | "typedef" | "template" | "this" | "resource" | "goto"
2035		| "inline" | "noinline" | "public" | "static" | "extern"
2036		| "external" | "interface" | "long" | "short" | "half" | "fixed"
2037		| "unsigned" | "superp" | "input" | "output" | "hvec2" | "hvec3"
2038		| "hvec4" | "fvec2" | "fvec3" | "fvec4" | "sampler3DRect"
2039		| "filter" | "sizeof" | "cast" | "namespace" | "using" => {
2040			Token::Reserved(str)
2041		}
2042		// Identifier
2043		_ => Token::Ident(str),
2044	}
2045}
2046
2047#[cfg(test)]
2048mod tests {
2049	use super::{NumType, OpTy, Token};
2050	use crate::span;
2051
2052	macro_rules! assert_tokens2 {
2053		($src:expr, $($token:expr),*) => {
2054			let (tokens, _metadata) = crate::lexer::parse_with_version($src, crate::GlslVersion::_450);
2055			assert_eq!(tokens, vec![
2056				$(
2057					$token,
2058				)*
2059			])
2060		};
2061	}
2062
2063	#[test]
2064	fn span_comparisons() {
2065		let src = "a 𐐀 c";
2066		let (tokens, _metadata) =
2067			super::parse_with_version(src, crate::GlslVersion::_450);
2068		assert_eq!(
2069			tokens,
2070			vec![
2071				(Token::Ident("a".into()), span(0, 1)),
2072				(Token::Invalid('𐐀'), span(2, 3)),
2073				(Token::Ident("c".into()), span(4, 5))
2074			]
2075		);
2076
2077		let (tokens, _metadata) = super::parse_with_utf_16_offsets_and_version(
2078			src,
2079			crate::GlslVersion::_450,
2080		);
2081		assert_eq!(
2082			tokens,
2083			vec![
2084				(Token::Ident("a".into()), span(0, 1)),
2085				(Token::Invalid('𐐀'), span(2, 4)),
2086				(Token::Ident("c".into()), span(5, 6))
2087			]
2088		);
2089
2090		let (tokens, _metadata) = super::parse_with_utf_8_offsets_and_version(
2091			src,
2092			crate::GlslVersion::_450,
2093		);
2094		assert_eq!(
2095			tokens,
2096			vec![
2097				(Token::Ident("a".into()), span(0, 1)),
2098				(Token::Invalid('𐐀'), span(2, 6)),
2099				(Token::Ident("c".into()), span(7, 8))
2100			]
2101		);
2102	}
2103
2104	#[test]
2105	fn spans() {
2106		// Identifiers/keywords
2107		assert_tokens2!("return", (Token::Return, span(0, 6)));
2108		assert_tokens2!("break ", (Token::Break, span(0, 5)));
2109		assert_tokens2!(
2110			"return break",
2111			(Token::Return, span(0, 6)),
2112			(Token::Break, span(7, 12))
2113		);
2114		// Punctuation
2115		assert_tokens2!(";", (Token::Semi, span(0, 1)));
2116		assert_tokens2!(": ", (Token::Colon, span(0, 1)));
2117		assert_tokens2!(
2118			"; :",
2119			(Token::Semi, span(0, 1)),
2120			(Token::Colon, span(2, 3))
2121		);
2122		// Comments
2123		assert_tokens2!(
2124			"// comment",
2125			(Token::LineComment(" comment".into()), span(0, 10))
2126		);
2127		assert_tokens2!(
2128			"/* a */",
2129			(
2130				Token::BlockComment {
2131					str: " a ".into(),
2132					contains_eof: false
2133				},
2134				span(0, 7)
2135			)
2136		);
2137		assert_tokens2!(
2138			"/* a",
2139			(
2140				Token::BlockComment {
2141					str: " a".into(),
2142					contains_eof: true
2143				},
2144				span(0, 4)
2145			)
2146		);
2147		// Directive
2148		//assert_eq!(parse_from_str("#dir"), vec![(Token::Directive("dir".into()), span(0, 4))]);
2149		//assert_eq!(parse_from_str("#dir a "), vec![(Token::Directive("dir a ".into()), span(0, 7))]);
2150		// Invalid
2151		assert_tokens2!("@", (Token::Invalid('@'), span(0, 1)));
2152		assert_tokens2!("¬", (Token::Invalid('¬'), span(0, 1)));
2153		assert_tokens2!(
2154			"@  ¬",
2155			(Token::Invalid('@'), span(0, 1)),
2156			(Token::Invalid('¬'), span(3, 4))
2157		);
2158		// Numbers
2159		assert_tokens2!(".", (Token::Dot, span(0, 1)));
2160		assert_tokens2!(". ", (Token::Dot, span(0, 1)));
2161		assert_tokens2!(
2162			"0xF.",
2163			(
2164				Token::Num {
2165					num: "F".into(),
2166					suffix: None,
2167					type_: NumType::Hex
2168				},
2169				span(0, 3)
2170			),
2171			(Token::Dot, span(3, 4))
2172		);
2173		assert_tokens2!(
2174			"123u.",
2175			(
2176				Token::Num {
2177					num: "123".into(),
2178					suffix: Some("u".into()),
2179					type_: NumType::Dec
2180				},
2181				span(0, 4)
2182			),
2183			(Token::Dot, span(4, 5))
2184		);
2185		assert_tokens2!(
2186			"1.2.",
2187			(
2188				Token::Num {
2189					num: "1.2".into(),
2190					suffix: None,
2191					type_: NumType::Float
2192				},
2193				span(0, 3)
2194			),
2195			(Token::Dot, span(3, 4))
2196		);
2197		assert_tokens2!(
2198			"1e",
2199			(
2200				Token::Num {
2201					num: "1".into(),
2202					suffix: Some("e".into()),
2203					type_: NumType::Dec
2204				},
2205				span(0, 2)
2206			)
2207		);
2208		assert_tokens2!(
2209			"123 ",
2210			(
2211				Token::Num {
2212					num: "123".into(),
2213					suffix: None,
2214					type_: NumType::Dec
2215				},
2216				span(0, 3)
2217			)
2218		);
2219		assert_tokens2!(
2220			"1e+=",
2221			(
2222				Token::Num {
2223					num: "1".into(),
2224					suffix: Some("e".into()),
2225					type_: NumType::Dec
2226				},
2227				span(0, 2)
2228			),
2229			(Token::Op(OpTy::AddEq), span(2, 4))
2230		);
2231		assert_tokens2!(
2232			"1e+",
2233			(
2234				Token::Num {
2235					num: "1".into(),
2236					suffix: Some("e".into()),
2237					type_: NumType::Dec
2238				},
2239				span(0, 2)
2240			),
2241			(Token::Op(OpTy::Add), span(2, 3))
2242		);
2243	}
2244
2245	/// Asserts whether the token output of the `parse_from_str()` function matches the right hand side; this
2246	/// ignores the span information.
2247	macro_rules! assert_tokens {
2248		($src:expr, $($token:expr),*) => {
2249			let output = crate::lexer::parse_with_version($src, crate::GlslVersion::_450).0.into_iter().map(|(t, _)| t).collect::<Vec<_>>();
2250			assert_eq!(output, vec![
2251				$(
2252					$token,
2253				)*
2254			])
2255		};
2256	}
2257
2258	#[test]
2259	fn identifiers() {
2260		assert_tokens!("ident", Token::Ident("ident".into()));
2261		assert_tokens!("gl_something", Token::Ident("gl_something".into()));
2262		assert_tokens!("id_145", Token::Ident("id_145".into()));
2263		assert_tokens!("_9ga", Token::Ident("_9ga".into()));
2264
2265		// Broken by line continuator
2266		assert_tokens!("my_\\\r\nident", Token::Ident("my_ident".into()));
2267		assert_tokens!("_\\\n9ga", Token::Ident("_9ga".into()));
2268	}
2269
2270	#[test]
2271	fn keywords() {
2272		assert_tokens!("true", Token::Bool(true));
2273		assert_tokens!("false", Token::Bool(false));
2274		assert_tokens!("if", Token::If);
2275		assert_tokens!("else", Token::Else);
2276		assert_tokens!("for", Token::For);
2277		assert_tokens!("do", Token::Do);
2278		assert_tokens!("while", Token::While);
2279		assert_tokens!("continue", Token::Continue);
2280		assert_tokens!("switch", Token::Switch);
2281		assert_tokens!("case", Token::Case);
2282		assert_tokens!("default", Token::Default);
2283		assert_tokens!("break", Token::Break);
2284		assert_tokens!("return", Token::Return);
2285		assert_tokens!("discard", Token::Discard);
2286		assert_tokens!("struct", Token::Struct);
2287		assert_tokens!("subroutine", Token::Subroutine);
2288		assert_tokens!("const", Token::Const);
2289		assert_tokens!("in", Token::In);
2290		assert_tokens!("out", Token::Out);
2291		assert_tokens!("inout", Token::InOut);
2292		assert_tokens!("attribute", Token::Attribute);
2293		assert_tokens!("uniform", Token::Uniform);
2294		assert_tokens!("varying", Token::Varying);
2295		assert_tokens!("buffer", Token::Buffer);
2296		assert_tokens!("shared", Token::Shared);
2297		assert_tokens!("centroid", Token::Centroid);
2298		assert_tokens!("sample", Token::Sample);
2299		assert_tokens!("patch", Token::Patch);
2300		assert_tokens!("layout", Token::Layout);
2301		assert_tokens!("flat", Token::Flat);
2302		assert_tokens!("smooth", Token::Smooth);
2303		assert_tokens!("noperspective", Token::NoPerspective);
2304		assert_tokens!("highp", Token::HighP);
2305		assert_tokens!("mediump", Token::MediumP);
2306		assert_tokens!("lowp", Token::LowP);
2307		assert_tokens!("invariant", Token::Invariant);
2308		assert_tokens!("precise", Token::Precise);
2309		assert_tokens!("coherent", Token::Coherent);
2310		assert_tokens!("volatile", Token::Volatile);
2311		assert_tokens!("restrict", Token::Restrict);
2312		assert_tokens!("readonly", Token::Readonly);
2313		assert_tokens!("writeonly", Token::Writeonly);
2314		// Reserved
2315		assert_tokens!("common", Token::Reserved("common".into()));
2316		assert_tokens!("partition", Token::Reserved("partition".into()));
2317		assert_tokens!("active", Token::Reserved("active".into()));
2318		assert_tokens!("asm", Token::Reserved("asm".into()));
2319		assert_tokens!("class", Token::Reserved("class".into()));
2320		assert_tokens!("union", Token::Reserved("union".into()));
2321		assert_tokens!("enum", Token::Reserved("enum".into()));
2322		assert_tokens!("typedef", Token::Reserved("typedef".into()));
2323		assert_tokens!("template", Token::Reserved("template".into()));
2324		assert_tokens!("this", Token::Reserved("this".into()));
2325		assert_tokens!("resource", Token::Reserved("resource".into()));
2326		assert_tokens!("goto", Token::Reserved("goto".into()));
2327		assert_tokens!("inline", Token::Reserved("inline".into()));
2328		assert_tokens!("noinline", Token::Reserved("noinline".into()));
2329		assert_tokens!("public", Token::Reserved("public".into()));
2330		assert_tokens!("static", Token::Reserved("static".into()));
2331		assert_tokens!("extern", Token::Reserved("extern".into()));
2332		assert_tokens!("external", Token::Reserved("external".into()));
2333		assert_tokens!("interface", Token::Reserved("interface".into()));
2334		assert_tokens!("long", Token::Reserved("long".into()));
2335		assert_tokens!("short", Token::Reserved("short".into()));
2336		assert_tokens!("half", Token::Reserved("half".into()));
2337		assert_tokens!("fixed", Token::Reserved("fixed".into()));
2338		assert_tokens!("unsigned", Token::Reserved("unsigned".into()));
2339		assert_tokens!("superp", Token::Reserved("superp".into()));
2340		assert_tokens!("input", Token::Reserved("input".into()));
2341		assert_tokens!("output", Token::Reserved("output".into()));
2342		assert_tokens!("hvec2", Token::Reserved("hvec2".into()));
2343		assert_tokens!("hvec3", Token::Reserved("hvec3".into()));
2344		assert_tokens!("hvec4", Token::Reserved("hvec4".into()));
2345		assert_tokens!("fvec2", Token::Reserved("fvec2".into()));
2346		assert_tokens!("fvec3", Token::Reserved("fvec3".into()));
2347		assert_tokens!("fvec4", Token::Reserved("fvec4".into()));
2348		assert_tokens!(
2349			"sampler3DRect",
2350			Token::Reserved("sampler3DRect".into())
2351		);
2352		assert_tokens!("filter", Token::Reserved("filter".into()));
2353		assert_tokens!("sizeof", Token::Reserved("sizeof".into()));
2354		assert_tokens!("cast", Token::Reserved("cast".into()));
2355		assert_tokens!("namespace", Token::Reserved("namespace".into()));
2356		assert_tokens!("using", Token::Reserved("using".into()));
2357
2358		// Broken by line continuator
2359		assert_tokens!("tr\\\rue", Token::Bool(true));
2360		assert_tokens!("dis\\\ncard", Token::Discard);
2361		assert_tokens!("sub\\\r\nroutine", Token::Subroutine);
2362	}
2363
2364	#[test]
2365	fn punctuation() {
2366		assert_tokens!(";", Token::Semi);
2367		assert_tokens!(".", Token::Dot);
2368		assert_tokens!(",", Token::Comma);
2369		assert_tokens!("(", Token::LParen);
2370		assert_tokens!(")", Token::RParen);
2371		assert_tokens!("[", Token::LBracket);
2372		assert_tokens!("]", Token::RBracket);
2373		assert_tokens!("{", Token::LBrace);
2374		assert_tokens!("}", Token::RBrace);
2375		assert_tokens!(":", Token::Colon);
2376		assert_tokens!("=", Token::Op(OpTy::Eq));
2377		assert_tokens!("+", Token::Op(OpTy::Add));
2378		assert_tokens!("-", Token::Op(OpTy::Sub));
2379		assert_tokens!("*", Token::Op(OpTy::Mul));
2380		assert_tokens!("/", Token::Op(OpTy::Div));
2381		assert_tokens!(">", Token::Op(OpTy::Gt));
2382		assert_tokens!("<", Token::Op(OpTy::Lt));
2383		assert_tokens!("!", Token::Op(OpTy::Not));
2384		assert_tokens!("~", Token::Op(OpTy::Flip));
2385		assert_tokens!("?", Token::Question);
2386		assert_tokens!("%", Token::Op(OpTy::Rem));
2387		assert_tokens!("&", Token::Op(OpTy::And));
2388		assert_tokens!("|", Token::Op(OpTy::Or));
2389		assert_tokens!("^", Token::Op(OpTy::Xor));
2390		assert_tokens!("==", Token::Op(OpTy::EqEq));
2391		assert_tokens!("!=", Token::Op(OpTy::NotEq));
2392		assert_tokens!(">=", Token::Op(OpTy::Ge));
2393		assert_tokens!("<=", Token::Op(OpTy::Le));
2394		assert_tokens!("&&", Token::Op(OpTy::AndAnd));
2395		assert_tokens!("||", Token::Op(OpTy::OrOr));
2396		assert_tokens!("^^", Token::Op(OpTy::XorXor));
2397		assert_tokens!("++", Token::Op(OpTy::AddAdd));
2398		assert_tokens!("--", Token::Op(OpTy::SubSub));
2399		assert_tokens!("<<", Token::Op(OpTy::LShift));
2400		assert_tokens!(">>", Token::Op(OpTy::RShift));
2401		assert_tokens!("+=", Token::Op(OpTy::AddEq));
2402		assert_tokens!("-=", Token::Op(OpTy::SubEq));
2403		assert_tokens!("*=", Token::Op(OpTy::MulEq));
2404		assert_tokens!("/=", Token::Op(OpTy::DivEq));
2405		assert_tokens!("%=", Token::Op(OpTy::RemEq));
2406		assert_tokens!("&=", Token::Op(OpTy::AndEq));
2407		assert_tokens!("|=", Token::Op(OpTy::OrEq));
2408		assert_tokens!("^=", Token::Op(OpTy::XorEq));
2409		assert_tokens!("<<=", Token::Op(OpTy::LShiftEq));
2410		assert_tokens!(">>=", Token::Op(OpTy::RShiftEq));
2411
2412		// Broken by line continuator
2413		assert_tokens!("!\\\n=", Token::Op(OpTy::NotEq));
2414		assert_tokens!("+\\\r=", Token::Op(OpTy::AddEq));
2415		assert_tokens!("=\\\n=", Token::Op(OpTy::EqEq));
2416		assert_tokens!(">>\\\r\n=", Token::Op(OpTy::RShiftEq));
2417	}
2418
2419	#[test]
2420	#[rustfmt::skip]
2421	fn comments() {
2422		// Line comments
2423		assert_tokens!("// a comment", Token::LineComment(" a comment".into()));
2424		assert_tokens!("//a comment", Token::LineComment("a comment".into()));
2425
2426		// Broken by line continuator
2427		assert_tokens!("// a comment \\\rcontinuation", Token::LineComment(" a comment continuation".into()));
2428		assert_tokens!("//a comment\\\ncontinuation", Token::LineComment("a commentcontinuation".into()));
2429		assert_tokens!("//a comment \\\r\ncontinuation", Token::LineComment("a comment continuation".into()));
2430		assert_tokens!("/\\\r/ a comment", Token::LineComment(" a comment".into()));
2431		assert_tokens!("/\\\r\n/ a comment", Token::LineComment(" a comment".into()));
2432		assert_tokens!("//\\\n a comment", Token::LineComment(" a comment".into()));
2433
2434		// Multi-line comments
2435		assert_tokens!("/* a comment */", Token::BlockComment{ str: " a comment ".into(), contains_eof: false});
2436		assert_tokens!("/*a comment*/", Token::BlockComment{ str: "a comment".into(), contains_eof: false});
2437		assert_tokens!("/* <Ll#,;#l,_!\"^$!6 */", Token::BlockComment{ str: " <Ll#,;#l,_!\"^$!6 ".into(), contains_eof: false});
2438		assert_tokens!("/* open-ended comment", Token::BlockComment{ str: " open-ended comment".into(), contains_eof: true});
2439
2440		// Broken by line continuator
2441		assert_tokens!("/\\\r* a comment */", Token::BlockComment{ str: " a comment ".into(), contains_eof: false});
2442		assert_tokens!("/\\\n*a comment*\\\r\n/", Token::BlockComment{ str: "a comment".into(), contains_eof: false});
2443	}
2444
2445	#[test]
2446	#[rustfmt::skip]
2447	fn integers(){
2448		// Zero
2449		assert_tokens!("0", Token::Num{num: "0".into(), suffix: None, type_: NumType::Dec});
2450		// Zero with suffix
2451		assert_tokens!("0u", Token::Num{num: "0".into(), suffix: Some("u".into()), type_: NumType::Dec});
2452		// Decimal
2453		assert_tokens!("1", Token::Num{num: "1".into(), suffix: None, type_: NumType::Dec});
2454		assert_tokens!("123456", Token::Num{num: "123456".into(), suffix: None, type_: NumType::Dec});
2455		assert_tokens!("100008", Token::Num{num: "100008".into(), suffix: None,  type_: NumType::Dec});
2456		// Decimal with suffix
2457		assert_tokens!("1u", Token::Num{num: "1".into(), suffix: Some("u".into()), type_: NumType::Dec});
2458		assert_tokens!("123456u", Token::Num{num: "123456".into(), suffix: Some("u".into()), type_: NumType::Dec});
2459		assert_tokens!("100008u", Token::Num{num: "100008".into(), suffix: Some("u".into()),  type_: NumType::Dec});
2460		// Octal
2461		assert_tokens!("00", Token::Num{num: "0".into(), suffix: None,  type_: NumType::Oct});
2462		assert_tokens!("01715", Token::Num{num: "1715".into(), suffix: None,  type_: NumType::Oct});
2463		assert_tokens!("09183", Token::Num{num: "9183".into(), suffix: None, type_: NumType::Oct});
2464		// Octal with suffix
2465		assert_tokens!("00u", Token::Num{num: "0".into(), suffix: Some("u".into()),  type_: NumType::Oct});
2466		assert_tokens!("01715u", Token::Num{num: "1715".into(), suffix: Some("u".into()),  type_: NumType::Oct});
2467		assert_tokens!("09183u", Token::Num{num: "9183".into(), suffix: Some("u".into()), type_: NumType::Oct});
2468		// Hexadecimal
2469		assert_tokens!("0x", Token::Num{num: "".into(), suffix: None, type_: NumType::Hex});
2470		assert_tokens!("0x91fa", Token::Num{num: "91fa".into(), suffix: None,  type_: NumType::Hex});
2471		assert_tokens!("0x00F", Token::Num{num: "00F".into(), suffix: None,  type_: NumType::Hex});
2472		// Hexadecimal with suffix
2473		assert_tokens!("0xu", Token::Num{num: "".into(), suffix: Some("u".into()), type_: NumType::Hex});
2474		assert_tokens!("0x91fau", Token::Num{num: "91fa".into(), suffix: Some("u".into()),  type_: NumType::Hex});
2475		assert_tokens!("0x00Fu", Token::Num{num: "00F".into(), suffix: Some("u".into()),  type_: NumType::Hex});
2476		
2477		// Broken by line continuator
2478		assert_tokens!("123\\\r456", Token::Num{num: "123456".into(), suffix: None, type_: NumType::Dec});
2479		assert_tokens!("12\\\n3456u", Token::Num{num: "123456".into(), suffix: Some("u".into()), type_: NumType::Dec});
2480		assert_tokens!("0171\\\n5", Token::Num{num: "1715".into(), suffix: None,  type_: NumType::Oct});
2481		assert_tokens!("0x91\\\r\nfa", Token::Num{num: "91fa".into(), suffix: None,  type_: NumType::Hex});
2482		assert_tokens!("0x\\\r91fau", Token::Num{num: "91fa".into(), suffix: Some("u".into()),  type_: NumType::Hex});
2483		assert_tokens!("0x\\\nu", Token::Num{num: "".into(), suffix: Some("u".into()), type_: NumType::Hex});
2484	}
2485
2486	#[test]
2487	#[rustfmt::skip]
2488	fn floats() {
2489		// Zeroes
2490		assert_tokens!("0.0", Token::Num{num: "0.0".into(), suffix: None, type_: NumType::Float});
2491		assert_tokens!("0.", Token::Num{num: "0.".into(), suffix: None, type_: NumType::Float});
2492		assert_tokens!(".0", Token::Num{num: ".0".into(), suffix: None, type_: NumType::Float});
2493		// Zeroes with suffix
2494		assert_tokens!("0.0lf", Token::Num{num: "0.0".into(), suffix: Some("lf".into()), type_: NumType::Float});
2495		assert_tokens!("0.lf", Token::Num{num: "0.".into(), suffix: Some("lf".into()), type_: NumType::Float});
2496		assert_tokens!(".0lf", Token::Num{num: ".0".into(), suffix: Some("lf".into()), type_: NumType::Float});
2497		// Zeroes with exponent
2498		assert_tokens!("0e7", Token::Num{num: "0e7".into(), suffix: None, type_: NumType::Float});
2499		assert_tokens!("0e+7", Token::Num{num: "0e+7".into(), suffix: None, type_: NumType::Float});
2500		assert_tokens!("0e-7", Token::Num{num: "0e-7".into(), suffix: None, type_: NumType::Float});
2501		assert_tokens!("0.0e7", Token::Num{num: "0.0e7".into(), suffix: None, type_: NumType::Float});
2502		assert_tokens!("0.0e+7", Token::Num{num: "0.0e+7".into(), suffix: None, type_: NumType::Float});
2503		assert_tokens!("0.0e-7", Token::Num{num: "0.0e-7".into(), suffix: None, type_: NumType::Float});
2504		assert_tokens!("0.e7", Token::Num{num: "0.e7".into(), suffix: None, type_: NumType::Float});
2505		assert_tokens!("0.e+7", Token::Num{num: "0.e+7".into(), suffix: None, type_: NumType::Float});
2506		assert_tokens!("0.e-7", Token::Num{num: "0.e-7".into(), suffix: None, type_: NumType::Float});
2507		assert_tokens!(".0e7", Token::Num{num: ".0e7".into(), suffix: None, type_: NumType::Float});
2508		assert_tokens!(".0e+7", Token::Num{num: ".0e+7".into(), suffix: None, type_: NumType::Float});
2509		assert_tokens!(".0e-7", Token::Num{num: ".0e-7".into(), suffix: None, type_: NumType::Float});
2510		// Zeroes with exponent and suffix
2511		assert_tokens!("0e7lf", Token::Num{num: "0e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2512		assert_tokens!("0e+7lf", Token::Num{num: "0e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2513		assert_tokens!("0e-7lf", Token::Num{num: "0e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2514		assert_tokens!("0.0e7lf", Token::Num{num: "0.0e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2515		assert_tokens!("0.0e+7lf", Token::Num{num: "0.0e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2516		assert_tokens!("0.0e-7lf", Token::Num{num: "0.0e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2517		assert_tokens!("0.e7lf", Token::Num{num: "0.e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2518		assert_tokens!("0.e+7lf", Token::Num{num: "0.e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2519		assert_tokens!("0.e-7lf", Token::Num{num: "0.e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2520		assert_tokens!(".0e7lf", Token::Num{num: ".0e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2521		assert_tokens!(".0e+7lf", Token::Num{num: ".0e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2522		assert_tokens!(".0e-7lf", Token::Num{num: ".0e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2523		// Digits
2524		assert_tokens!("1.0", Token::Num{num: "1.0".into(), suffix: None, type_: NumType::Float});
2525		assert_tokens!("1.1", Token::Num{num: "1.1".into(), suffix: None, type_: NumType::Float});
2526		assert_tokens!("1.", Token::Num{num: "1.".into(), suffix: None, type_: NumType::Float});
2527		assert_tokens!(".1", Token::Num{num: ".1".into(), suffix: None, type_: NumType::Float});
2528		// Digits with suffix
2529		assert_tokens!("1.0lf", Token::Num{num: "1.0".into(), suffix: Some("lf".into()), type_: NumType::Float});
2530		assert_tokens!("1.1lf", Token::Num{num: "1.1".into(), suffix: Some("lf".into()), type_: NumType::Float});
2531		assert_tokens!("1.lf", Token::Num{num: "1.".into(), suffix: Some("lf".into()), type_: NumType::Float});
2532		assert_tokens!(".1lf", Token::Num{num: ".1".into(), suffix: Some("lf".into()), type_: NumType::Float});
2533		// Digits with exponent
2534		assert_tokens!("1e7", Token::Num{num: "1e7".into(), suffix: None, type_: NumType::Float});
2535		assert_tokens!("1e+7", Token::Num{num: "1e+7".into(), suffix: None, type_: NumType::Float});
2536		assert_tokens!("1e-7", Token::Num{num: "1e-7".into(), suffix: None, type_: NumType::Float});
2537		assert_tokens!("1.0e7", Token::Num{num: "1.0e7".into(), suffix: None, type_: NumType::Float});
2538		assert_tokens!("1.0e+7", Token::Num{num: "1.0e+7".into(), suffix: None, type_: NumType::Float});
2539		assert_tokens!("1.0e-7", Token::Num{num: "1.0e-7".into(), suffix: None, type_: NumType::Float});
2540		assert_tokens!("1.1e7", Token::Num{num: "1.1e7".into(), suffix: None, type_: NumType::Float});
2541		assert_tokens!("1.1e+7", Token::Num{num: "1.1e+7".into(), suffix: None, type_: NumType::Float});
2542		assert_tokens!("1.1e-7", Token::Num{num: "1.1e-7".into(), suffix: None, type_: NumType::Float});
2543		assert_tokens!("1.e7", Token::Num{num: "1.e7".into(), suffix: None, type_: NumType::Float});
2544		assert_tokens!("1.e+7", Token::Num{num: "1.e+7".into(), suffix: None, type_: NumType::Float});
2545		assert_tokens!("1.e-7", Token::Num{num: "1.e-7".into(), suffix: None, type_: NumType::Float});
2546		assert_tokens!(".1e7", Token::Num{num: ".1e7".into(), suffix: None, type_: NumType::Float});
2547		assert_tokens!(".1e+7", Token::Num{num: ".1e+7".into(), suffix: None, type_: NumType::Float});
2548		assert_tokens!(".1e-7", Token::Num{num: ".1e-7".into(), suffix: None, type_: NumType::Float});
2549		// Digits with exponent and suffix
2550		assert_tokens!("1e7lf", Token::Num{num: "1e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2551		assert_tokens!("1e+7lf", Token::Num{num: "1e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2552		assert_tokens!("1e-7lf", Token::Num{num: "1e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2553		assert_tokens!("1.0e7lf", Token::Num{num: "1.0e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2554		assert_tokens!("1.0e+7lf", Token::Num{num: "1.0e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2555		assert_tokens!("1.0e-7lf", Token::Num{num: "1.0e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2556		assert_tokens!("1.1e7lf", Token::Num{num: "1.1e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2557		assert_tokens!("1.1e+7lf", Token::Num{num: "1.1e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2558		assert_tokens!("1.1e-7lf", Token::Num{num: "1.1e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2559		assert_tokens!("1.e7lf", Token::Num{num: "1.e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2560		assert_tokens!("1.e+7lf", Token::Num{num: "1.e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2561		assert_tokens!("1.e-7lf", Token::Num{num: "1.e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2562		assert_tokens!(".1e7lf", Token::Num{num: ".1e7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2563		assert_tokens!(".1e+7lf", Token::Num{num: ".1e+7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2564		assert_tokens!(".1e-7lf", Token::Num{num: ".1e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2565		
2566		// Broken by line continuator
2567		assert_tokens!("0.\\\r0", Token::Num{num: "0.0".into(), suffix: None, type_: NumType::Float});
2568		assert_tokens!(".\\\n0", Token::Num{num: ".0".into(), suffix: None, type_: NumType::Float});
2569		assert_tokens!(".0\\\nlf", Token::Num{num: ".0".into(), suffix: Some("lf".into()), type_: NumType::Float});
2570		assert_tokens!("0.\\\r\nlf", Token::Num{num: "0.".into(), suffix: Some("lf".into()), type_: NumType::Float});
2571		assert_tokens!("0e\\\r7", Token::Num{num: "0e7".into(), suffix: None, type_: NumType::Float});
2572		assert_tokens!("0e\\\r\n-7", Token::Num{num: "0e-7".into(), suffix: None, type_: NumType::Float});
2573		assert_tokens!(".0\\\r\ne+7", Token::Num{num: ".0e+7".into(), suffix: None, type_: NumType::Float});
2574		assert_tokens!("1.0e-\\\n7lf", Token::Num{num: "1.0e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2575		assert_tokens!(".1\\\re-7lf", Token::Num{num: ".1e-7".into(), suffix: Some("lf".into()), type_: NumType::Float});
2576	}
2577
2578	#[test]
2579	#[rustfmt::skip]
2580	fn illegal(){
2581		// Note: All of these characters will be parsed as part of a preprocessor directive string; only later once
2582		// the string is tokenised will any errors come up.
2583		assert_tokens!("@", Token::Invalid('@'));
2584		assert_tokens!("¬", Token::Invalid('¬'));
2585		assert_tokens!("`", Token::Invalid('`'));
2586		assert_tokens!("¦", Token::Invalid('¦'));
2587		assert_tokens!("'", Token::Invalid('\''));
2588		assert_tokens!("\"", Token::Invalid('"'));
2589		assert_tokens!("£", Token::Invalid('£'));
2590		assert_tokens!("$", Token::Invalid('$'));
2591		assert_tokens!("€", Token::Invalid('€'));
2592	}
2593
2594	#[cfg(test)]
2595	mod preproc {
2596		use super::super::{
2597			preprocessor::{
2598				ConditionToken, DefineToken, ExtensionToken, LineToken,
2599				TokenStream, UndefToken, VersionToken,
2600			},
2601			Token,
2602		};
2603		use crate::span;
2604
2605		#[test]
2606		fn empty() {
2607			assert_tokens2!(
2608				"#",
2609				(Token::Directive(TokenStream::Empty), span(0, 1))
2610			);
2611
2612			assert_tokens2!(
2613				"#    ",
2614				(Token::Directive(TokenStream::Empty), span(0, 5))
2615			);
2616		}
2617
2618		#[test]
2619		fn custom() {
2620			assert_tokens2!(
2621				"#custom",
2622				(
2623					Token::Directive(TokenStream::Custom {
2624						kw: ("custom".into(), span(1, 7)),
2625						content: None
2626					}),
2627					span(0, 7)
2628				)
2629			);
2630
2631			assert_tokens2!(
2632				"# custom      ",
2633				(
2634					Token::Directive(TokenStream::Custom {
2635						kw: ("custom".into(), span(2, 8)),
2636						content: Some(("      ".into(), span(8, 14)))
2637					}),
2638					span(0, 14)
2639				)
2640			);
2641
2642			assert_tokens2!(
2643				"#custom foobar 5 @;#",
2644				(
2645					Token::Directive(TokenStream::Custom {
2646						kw: ("custom".into(), span(1, 7)),
2647						content: Some((" foobar 5 @;#".into(), span(7, 20)))
2648					}),
2649					span(0, 20)
2650				)
2651			);
2652
2653			assert_tokens2!(
2654				"# custom-5 bar",
2655				(
2656					Token::Directive(TokenStream::Custom {
2657						kw: ("custom".into(), span(2, 8)),
2658						content: Some(("-5 bar".into(), span(8, 14))),
2659					}),
2660					span(0, 14)
2661				)
2662			);
2663		}
2664
2665		#[test]
2666		fn invalid() {
2667			assert_tokens2!(
2668				"# # 55 @ `!",
2669				(
2670					Token::Directive(TokenStream::Invalid {
2671						content: ("# 55 @ `!".into(), span(2, 11))
2672					}),
2673					span(0, 11)
2674				)
2675			);
2676		}
2677
2678		#[test]
2679		fn version() {
2680			assert_tokens2!(
2681				"#version",
2682				(
2683					Token::Directive(TokenStream::Version {
2684						kw: span(1, 8),
2685						tokens: vec![]
2686					}),
2687					span(0, 8)
2688				)
2689			);
2690
2691			assert_tokens2!(
2692				"#version 450 core",
2693				(
2694					Token::Directive(TokenStream::Version {
2695						kw: span(1, 8),
2696						tokens: vec![
2697							(VersionToken::Num(450), span(9, 12)),
2698							(VersionToken::Word("core".into()), span(13, 17)),
2699						]
2700					}),
2701					span(0, 17)
2702				)
2703			);
2704
2705			assert_tokens2!(
2706				"#   version 330 es",
2707				(
2708					Token::Directive(TokenStream::Version {
2709						kw: span(4, 11),
2710						tokens: vec![
2711							(VersionToken::Num(330), span(12, 15)),
2712							(VersionToken::Word("es".into()), span(16, 18)),
2713						]
2714					}),
2715					span(0, 18)
2716				)
2717			);
2718
2719			assert_tokens2!(
2720				"#version foobar     ",
2721				(
2722					Token::Directive(TokenStream::Version {
2723						kw: span(1, 8),
2724						tokens: vec![(
2725							VersionToken::Word("foobar".into()),
2726							span(9, 15)
2727						)]
2728					}),
2729					span(0, 20)
2730				)
2731			);
2732
2733			assert_tokens2!(
2734				"# version 100compatability ##@;",
2735				(
2736					Token::Directive(TokenStream::Version {
2737						kw: span(2, 9),
2738						tokens: vec![
2739							(
2740								VersionToken::InvalidNum(
2741									"100compatability".into()
2742								),
2743								span(10, 26)
2744							),
2745							(VersionToken::Invalid('#'), span(27, 28)),
2746							(VersionToken::Invalid('#'), span(28, 29)),
2747							(VersionToken::Invalid('@'), span(29, 30)),
2748							(VersionToken::Invalid(';'), span(30, 31))
2749						]
2750					}),
2751					span(0, 31)
2752				)
2753			);
2754		}
2755
2756		#[test]
2757		fn extension() {
2758			assert_tokens2!(
2759				"#extension",
2760				(
2761					Token::Directive(TokenStream::Extension {
2762						kw: span(1, 10),
2763						tokens: vec![]
2764					}),
2765					span(0, 10)
2766				)
2767			);
2768			assert_tokens2!(
2769				"#  extension foobar : enable",
2770				(
2771					Token::Directive(TokenStream::Extension {
2772						kw: span(3, 12),
2773						tokens: vec![
2774							(
2775								ExtensionToken::Word("foobar".into()),
2776								span(13, 19)
2777							),
2778							(ExtensionToken::Colon, span(20, 21)),
2779							(
2780								ExtensionToken::Word("enable".into()),
2781								span(22, 28)
2782							)
2783						]
2784					}),
2785					span(0, 28)
2786				)
2787			);
2788			assert_tokens2!(
2789				"#extension: 600   ",
2790				(
2791					Token::Directive(TokenStream::Extension {
2792						kw: span(1, 10),
2793						tokens: vec![
2794							(ExtensionToken::Colon, span(10, 11)),
2795							(ExtensionToken::Invalid('6'), span(12, 13)),
2796							(ExtensionToken::Invalid('0'), span(13, 14)),
2797							(ExtensionToken::Invalid('0'), span(14, 15))
2798						]
2799					}),
2800					span(0, 18)
2801				)
2802			);
2803		}
2804
2805		#[test]
2806		fn line() {
2807			assert_tokens2!(
2808				"#line",
2809				(
2810					Token::Directive(TokenStream::Line {
2811						kw: span(1, 5),
2812						tokens: vec![]
2813					}),
2814					span(0, 5)
2815				)
2816			);
2817
2818			assert_tokens2!(
2819				"# line 5 1007",
2820				(
2821					Token::Directive(TokenStream::Line {
2822						kw: span(2, 6),
2823						tokens: vec![
2824							(LineToken::Num(5), span(7, 8)),
2825							(LineToken::Num(1007), span(9, 13))
2826						]
2827					}),
2828					span(0, 13)
2829				)
2830			);
2831
2832			assert_tokens2!(
2833				"#line FOO",
2834				(
2835					Token::Directive(TokenStream::Line {
2836						kw: span(1, 5),
2837						tokens: vec![(
2838							LineToken::Ident("FOO".into()),
2839							span(6, 9)
2840						)]
2841					}),
2842					span(0, 9)
2843				)
2844			);
2845
2846			assert_tokens2!(
2847				"#  line  734abc     ",
2848				(
2849					Token::Directive(TokenStream::Line {
2850						kw: span(3, 7),
2851						tokens: vec![(
2852							LineToken::InvalidNum("734abc".into()),
2853							span(9, 15)
2854						)]
2855					}),
2856					span(0, 20)
2857				)
2858			);
2859		}
2860
2861		#[test]
2862		fn define() {
2863			use super::{NumType, OpTy};
2864
2865			// Object-like
2866			assert_tokens2!(
2867				"#define",
2868				(
2869					Token::Directive(TokenStream::Define {
2870						kw: span(1, 7),
2871						ident_tokens: vec![],
2872						body_tokens: vec![],
2873					}),
2874					span(0, 7)
2875				)
2876			);
2877
2878			assert_tokens2!(
2879				"#define foobar",
2880				(
2881					Token::Directive(TokenStream::Define {
2882						kw: span(1, 7),
2883						ident_tokens: vec![(
2884							DefineToken::Ident("foobar".into()),
2885							span(8, 14)
2886						)],
2887						body_tokens: vec![],
2888					}),
2889					span(0, 14)
2890				)
2891			);
2892
2893			assert_tokens2!(
2894				"#  define FOO 5   ",
2895				(
2896					Token::Directive(TokenStream::Define {
2897						kw: span(3, 9),
2898						ident_tokens: vec![(
2899							DefineToken::Ident("FOO".into()),
2900							span(10, 13)
2901						)],
2902						body_tokens: vec![(
2903							Token::Num {
2904								type_: NumType::Dec,
2905								num: "5".into(),
2906								suffix: None
2907							},
2908							span(14, 15)
2909						)]
2910					}),
2911					span(0, 18)
2912				)
2913			);
2914
2915			assert_tokens2!(
2916				"#define FOO_5  if [bar##0x6}",
2917				(
2918					Token::Directive(TokenStream::Define {
2919						kw: span(1, 7),
2920						ident_tokens: vec![(
2921							DefineToken::Ident("FOO_5".into()),
2922							span(8, 13)
2923						)],
2924						body_tokens: vec![
2925							(Token::If, span(15, 17)),
2926							(Token::LBracket, span(18, 19)),
2927							(Token::Ident("bar".into()), span(19, 22)),
2928							(Token::MacroConcat, span(22, 24)),
2929							(
2930								Token::Num {
2931									type_: NumType::Hex,
2932									num: "6".into(),
2933									suffix: None
2934								},
2935								span(24, 27)
2936							),
2937							(Token::RBrace, span(27, 28))
2938						]
2939					}),
2940					span(0, 28)
2941				)
2942			);
2943
2944			assert_tokens2!(
2945				"#define baz ( )",
2946				(
2947					Token::Directive(TokenStream::Define {
2948						kw: span(1, 7),
2949						ident_tokens: vec![(
2950							DefineToken::Ident("baz".into()),
2951							span(8, 11)
2952						),],
2953						body_tokens: vec![
2954							(Token::LParen, span(12, 13)),
2955							(Token::RParen, span(14, 15))
2956						]
2957					}),
2958					span(0, 15)
2959				)
2960			);
2961
2962			assert_tokens2!(
2963				"#define 5 @@ ` ",
2964				(
2965					Token::Directive(TokenStream::Define {
2966						kw: span(1, 7),
2967						ident_tokens: vec![],
2968						body_tokens: vec![
2969							(
2970								Token::Num {
2971									type_: NumType::Dec,
2972									num: "5".into(),
2973									suffix: None
2974								},
2975								span(8, 9)
2976							),
2977							(Token::Invalid('@'), span(10, 11)),
2978							(Token::Invalid('@'), span(11, 12)),
2979							(Token::Invalid('`'), span(13, 14)),
2980						]
2981					}),
2982					span(0, 15)
2983				)
2984			);
2985
2986			// Function-like
2987			assert_tokens2!(
2988				"#define FOOBAR()",
2989				(
2990					Token::Directive(TokenStream::Define {
2991						kw: span(1, 7),
2992						ident_tokens: vec![
2993							(DefineToken::Ident("FOOBAR".into()), span(8, 14)),
2994							(DefineToken::LParen, span(14, 15)),
2995							(DefineToken::RParen, span(15, 16)),
2996						],
2997						body_tokens: vec![]
2998					}),
2999					span(0, 16)
3000				)
3001			);
3002
3003			assert_tokens2!(
3004				"#define baz( )",
3005				(
3006					Token::Directive(TokenStream::Define {
3007						kw: span(1, 7),
3008						ident_tokens: vec![
3009							(DefineToken::Ident("baz".into()), span(8, 11)),
3010							(DefineToken::LParen, span(11, 12)),
3011							(DefineToken::RParen, span(13, 14))
3012						],
3013						body_tokens: vec![]
3014					}),
3015					span(0, 14)
3016				)
3017			);
3018
3019			assert_tokens2!(
3020				"#define FOOBAR( a, b)",
3021				(
3022					Token::Directive(TokenStream::Define {
3023						kw: span(1, 7),
3024						ident_tokens: vec![
3025							(DefineToken::Ident("FOOBAR".into()), span(8, 14)),
3026							(DefineToken::LParen, span(14, 15)),
3027							(DefineToken::Ident("a".into()), span(16, 17)),
3028							(DefineToken::Comma, span(17, 18)),
3029							(DefineToken::Ident("b".into()), span(19, 20)),
3030							(DefineToken::RParen, span(20, 21)),
3031						],
3032						body_tokens: vec![]
3033					}),
3034					span(0, 21)
3035				)
3036			);
3037
3038			assert_tokens2!(
3039				"#define FOOBAR( a # @@",
3040				(
3041					Token::Directive(TokenStream::Define {
3042						kw: span(1, 7),
3043						ident_tokens: vec![
3044							(DefineToken::Ident("FOOBAR".into()), span(8, 14)),
3045							(DefineToken::LParen, span(14, 15)),
3046							(DefineToken::Ident("a".into()), span(16, 17)),
3047							(DefineToken::Invalid('#'), span(18, 19)),
3048							(DefineToken::Invalid('@'), span(20, 21)),
3049							(DefineToken::Invalid('@'), span(21, 22)),
3050						],
3051						body_tokens: vec![]
3052					}),
3053					span(0, 22)
3054				)
3055			);
3056
3057			assert_tokens2!(
3058				"#define FOOBAR( a)  if [0x7u## %!",
3059				(
3060					Token::Directive(TokenStream::Define {
3061						kw: span(1, 7),
3062						ident_tokens: vec![
3063							(DefineToken::Ident("FOOBAR".into()), span(8, 14)),
3064							(DefineToken::LParen, span(14, 15)),
3065							(DefineToken::Ident("a".into()), span(16, 17)),
3066							(DefineToken::RParen, span(17, 18)),
3067						],
3068						body_tokens: vec![
3069							(Token::If, span(20, 22)),
3070							(Token::LBracket, span(23, 24)),
3071							(
3072								Token::Num {
3073									type_: NumType::Hex,
3074									num: "7".into(),
3075									suffix: Some("u".into())
3076								},
3077								span(24, 28)
3078							),
3079							(Token::MacroConcat, span(28, 30)),
3080							(Token::Op(OpTy::Rem), span(31, 32)),
3081							(Token::Op(OpTy::Not), span(32, 33)),
3082						]
3083					}),
3084					span(0, 33)
3085				)
3086			);
3087		}
3088
3089		#[test]
3090		fn undef() {
3091			assert_tokens2!(
3092				"#undef",
3093				(
3094					Token::Directive(TokenStream::Undef {
3095						kw: span(1, 6),
3096						tokens: vec![]
3097					}),
3098					span(0, 6)
3099				)
3100			);
3101
3102			assert_tokens2!(
3103				"# undef foo ",
3104				(
3105					Token::Directive(TokenStream::Undef {
3106						kw: span(2, 7),
3107						tokens: vec![(
3108							UndefToken::Ident("foo".into()),
3109							span(8, 11)
3110						)]
3111					}),
3112					span(0, 12)
3113				)
3114			);
3115
3116			assert_tokens2!(
3117				"#    undef foobar @ `` 4    ",
3118				(
3119					Token::Directive(TokenStream::Undef {
3120						kw: span(5, 10),
3121						tokens: vec![
3122							(UndefToken::Ident("foobar".into()), span(11, 17)),
3123							(UndefToken::Invalid('@'), span(18, 19)),
3124							(UndefToken::Invalid('`'), span(20, 21)),
3125							(UndefToken::Invalid('`'), span(21, 22)),
3126							(UndefToken::Invalid('4'), span(23, 24)),
3127						]
3128					}),
3129					span(0, 28)
3130				)
3131			);
3132		}
3133
3134		#[test]
3135		fn conditional() {
3136			assert_tokens2!(
3137				"#if",
3138				(
3139					Token::Directive(TokenStream::If {
3140						kw: span(1, 3),
3141						tokens: vec![]
3142					}),
3143					span(0, 3)
3144				)
3145			);
3146
3147			assert_tokens2!(
3148				"# if FOO > 5",
3149				(
3150					Token::Directive(TokenStream::If {
3151						kw: span(2, 4),
3152						tokens: vec![
3153							(ConditionToken::Ident("FOO".into()), span(5, 8)),
3154							(ConditionToken::Gt, span(9, 10)),
3155							(ConditionToken::Num(5), span(11, 12))
3156						]
3157					}),
3158					span(0, 12)
3159				)
3160			);
3161
3162			assert_tokens2!(
3163				"#if 5001bar",
3164				(
3165					Token::Directive(TokenStream::If {
3166						kw: span(1, 3),
3167						tokens: vec![(
3168							ConditionToken::InvalidNum("5001bar".into()),
3169							span(4, 11)
3170						)]
3171					}),
3172					span(0, 11)
3173				)
3174			);
3175
3176			assert_tokens2!(
3177				"#if (defined foobar) && 5 <8",
3178				(
3179					Token::Directive(TokenStream::If {
3180						kw: span(1, 3),
3181						tokens: vec![
3182							(ConditionToken::LParen, span(4, 5)),
3183							(ConditionToken::Defined, span(5, 12)),
3184							(
3185								ConditionToken::Ident("foobar".into()),
3186								span(13, 19)
3187							),
3188							(ConditionToken::RParen, span(19, 20)),
3189							(ConditionToken::AndAnd, span(21, 23)),
3190							(ConditionToken::Num(5), span(24, 25)),
3191							(ConditionToken::Lt, span(26, 27)),
3192							(ConditionToken::Num(8), span(27, 28))
3193						]
3194					}),
3195					span(0, 28)
3196				)
3197			);
3198			assert_tokens2!(
3199				"#if baz @ ## :   ",
3200				(
3201					Token::Directive(TokenStream::If {
3202						kw: span(1, 3),
3203						tokens: vec![
3204							(ConditionToken::Ident("baz".into()), span(4, 7)),
3205							(ConditionToken::Invalid('@'), span(8, 9)),
3206							(ConditionToken::Invalid('#'), span(10, 11)),
3207							(ConditionToken::Invalid('#'), span(11, 12)),
3208							(ConditionToken::Invalid(':'), span(13, 14)),
3209						]
3210					}),
3211					span(0, 17)
3212				)
3213			);
3214		}
3215
3216		#[test]
3217		fn error() {
3218			assert_tokens2!(
3219				"#error",
3220				(
3221					Token::Directive(TokenStream::Error {
3222						kw: span(1, 6),
3223						message: None
3224					}),
3225					span(0, 6)
3226				)
3227			);
3228
3229			assert_tokens2!(
3230				"# error foo bar ## @ ;      ",
3231				(
3232					Token::Directive(TokenStream::Error {
3233						kw: span(2, 7),
3234						message: Some((
3235							" foo bar ## @ ;      ".into(),
3236							span(7, 28)
3237						))
3238					}),
3239					span(0, 28)
3240				)
3241			);
3242		}
3243
3244		#[test]
3245		fn pragma() {
3246			assert_tokens2!(
3247				"#pragma",
3248				(
3249					Token::Directive(TokenStream::Pragma {
3250						kw: span(1, 7),
3251						options: None
3252					}),
3253					span(0, 7)
3254				)
3255			);
3256
3257			assert_tokens2!(
3258				"# pragma foo bar ## @ ;      ",
3259				(
3260					Token::Directive(TokenStream::Pragma {
3261						kw: span(2, 8),
3262						options: Some((
3263							" foo bar ## @ ;      ".into(),
3264							span(8, 29)
3265						))
3266					}),
3267					span(0, 29)
3268				)
3269			);
3270		}
3271	}
3272}
glast/lexer.rs

glast/
lexer.rs