1use std::{error::Error, fmt};
2
3use crate::{
4 CompilerErrorCode, Keyword, MAX_TOKEN_LENGTH, Token, TokenKind, nwscript_string_hash_bytes,
5 source::{SourceFile, SourceId, Span},
6};
7
8#[derive(#[automatically_derived]
impl ::core::fmt::Debug for LexerError {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field3_finish(f, "LexerError",
"code", &self.code, "span", &self.span, "message", &&self.message)
}
}Debug, #[automatically_derived]
impl ::core::clone::Clone for LexerError {
#[inline]
fn clone(&self) -> LexerError {
LexerError {
code: ::core::clone::Clone::clone(&self.code),
span: ::core::clone::Clone::clone(&self.span),
message: ::core::clone::Clone::clone(&self.message),
}
}
}Clone, #[automatically_derived]
impl ::core::cmp::PartialEq for LexerError {
#[inline]
fn eq(&self, other: &LexerError) -> bool {
self.code == other.code && self.span == other.span &&
self.message == other.message
}
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for LexerError {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {
let _: ::core::cmp::AssertParamIsEq<CompilerErrorCode>;
let _: ::core::cmp::AssertParamIsEq<Span>;
let _: ::core::cmp::AssertParamIsEq<String>;
}
}Eq)]
10pub struct LexerError {
11 pub code: CompilerErrorCode,
13 pub span: Span,
15 pub message: String,
17}
18
19impl LexerError {
20 fn new(
21 code: CompilerErrorCode,
22 source_id: SourceId,
23 start: usize,
24 end: usize,
25 message: impl Into<String>,
26 ) -> Self {
27 Self {
28 code,
29 span: Span::new(source_id, start, end),
30 message: message.into(),
31 }
32 }
33}
34
35impl fmt::Display for LexerError {
36 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37 f.write_fmt(format_args!("{0} ({1})", self.message, self.code.code()))write!(f, "{} ({})", self.message, self.code.code())
38 }
39}
40
41impl Error for LexerError {}
42
43#[derive(#[automatically_derived]
impl<'a> ::core::fmt::Debug for Lexer<'a> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field3_finish(f, "Lexer",
"source_id", &self.source_id, "input", &self.input, "position",
&&self.position)
}
}Debug, #[automatically_derived]
impl<'a> ::core::clone::Clone for Lexer<'a> {
#[inline]
fn clone(&self) -> Lexer<'a> {
Lexer {
source_id: ::core::clone::Clone::clone(&self.source_id),
input: ::core::clone::Clone::clone(&self.input),
position: ::core::clone::Clone::clone(&self.position),
}
}
}Clone)]
45pub struct Lexer<'a> {
46 source_id: SourceId,
47 input: &'a [u8],
48 position: usize,
49}
50
51impl<'a> Lexer<'a> {
52 #[must_use]
54 pub fn new(source_id: SourceId, input: &'a [u8]) -> Self {
55 Self {
56 source_id,
57 input,
58 position: 0,
59 }
60 }
61
62 pub fn lex_all(&mut self) -> Result<Vec<Token>, LexerError> {
68 let mut tokens = Vec::new();
69 loop {
70 self.skip_trivia();
71 let token = self.next_token()?;
72 let is_eof = token.kind == TokenKind::Eof;
73 tokens.push(token);
74 if is_eof {
75 break;
76 }
77 }
78 Ok(tokens)
79 }
80
81 fn next_token(&mut self) -> Result<Token, LexerError> {
82 if self.position >= self.input.len() {
83 return Ok(Token::new(
84 TokenKind::Eof,
85 Span::new(self.source_id, self.position, self.position),
86 "",
87 ));
88 }
89
90 if self.starts_with_raw_string() {
91 return self.lex_raw_string();
92 }
93 if self.starts_with_hashed_string() {
94 return self.lex_hashed_string();
95 }
96
97 let start = self.position;
98 let current = self.current_byte();
99 match current {
100 Some(b'0'..=b'9') => self.lex_number(),
101 Some(b'.') if self.peek_byte(1).is_some_and(|next| next.is_ascii_digit()) => {
102 self.lex_number()
103 }
104 Some(b'a'..=b'z' | b'A'..=b'Z' | b'_') => self.lex_identifier(),
105 Some(b'#') => self.lex_hash_identifier_or_error(),
106 Some(b'"') => self.lex_string(),
107 Some(_) => self.lex_punctuation(start),
108 None => Ok(Token::new(
109 TokenKind::Eof,
110 Span::new(self.source_id, self.position, self.position),
111 "",
112 )),
113 }
114 }
115
116 fn skip_trivia(&mut self) {
117 loop {
118 if self.position >= self.input.len() {
119 return;
120 }
121
122 match self.current_byte() {
123 Some(b' ' | b'\t' | b'\n' | b'\r') => {
124 self.position += 1;
125 }
126 Some(b'/') if self.peek_byte(1) == Some(b'/') => {
127 self.position += 2;
128 while let Some(byte) = self.current_byte() {
129 if byte == b'\n' {
130 break;
131 }
132 self.position += 1;
133 }
134 }
135 Some(b'/') if self.peek_byte(1) == Some(b'*') => {
136 self.position += 2;
137 while self.position < self.input.len() {
138 if self.current_byte() == Some(b'*') && self.peek_byte(1) == Some(b'/') {
139 self.position += 2;
140 break;
141 }
142 self.position += 1;
143 }
144 }
145 _ => return,
146 }
147 }
148 }
149
150 fn lex_number(&mut self) -> Result<Token, LexerError> {
151 let start = self.position;
152 let mut text = String::new();
153 let mut kind = TokenKind::Integer;
154
155 if self.current_byte() == Some(b'.') {
156 kind = TokenKind::Float;
157 text.push('0');
158 text.push('.');
159 self.position += 1;
160 self.consume_ascii_digits(&mut text);
161 self.consume_float_suffix_if_present(&mut kind);
162 return self.finish_token(kind, start, self.position, text);
163 }
164
165 self.consume_ascii_digits(&mut text);
166
167 if text == "0" {
168 match self.current_byte() {
169 Some(b'x' | b'X') => {
170 kind = TokenKind::HexInteger;
171 if let Some(prefix) = self.bump_byte() {
172 text.push(char::from(prefix));
173 }
174 while let Some(byte) = self.current_byte() {
175 if byte.is_ascii_hexdigit() {
176 let lowered = if (b'A'..=b'F').contains(&byte) {
177 byte + 32
178 } else {
179 byte
180 };
181 text.push(char::from(lowered));
182 self.position += 1;
183 } else {
184 break;
185 }
186 }
187 return self.finish_token(kind, start, self.position, text);
188 }
189 Some(b'b' | b'B') => {
190 kind = TokenKind::BinaryInteger;
191 if let Some(prefix) = self.bump_byte() {
192 text.push(char::from(prefix));
193 }
194 while let Some(byte) = self.current_byte() {
195 if #[allow(non_exhaustive_omitted_patterns)] match byte {
b'0' | b'1' => true,
_ => false,
}matches!(byte, b'0' | b'1') {
196 text.push(char::from(byte));
197 self.position += 1;
198 } else {
199 break;
200 }
201 }
202 return self.finish_token(kind, start, self.position, text);
203 }
204 Some(b'o' | b'O') => {
205 kind = TokenKind::OctalInteger;
206 if let Some(prefix) = self.bump_byte() {
207 text.push(char::from(prefix));
208 }
209 while let Some(byte) = self.current_byte() {
210 if (b'0'..=b'7').contains(&byte) {
211 text.push(char::from(byte));
212 self.position += 1;
213 } else {
214 break;
215 }
216 }
217 return self.finish_token(kind, start, self.position, text);
218 }
219 _ => {}
220 }
221 }
222
223 if self.current_byte() == Some(b'.') {
224 kind = TokenKind::Float;
225 text.push('.');
226 self.position += 1;
227 self.consume_ascii_digits(&mut text);
228 }
229
230 self.consume_float_suffix_if_present(&mut kind);
231 self.finish_token(kind, start, self.position, text)
232 }
233
234 fn lex_identifier(&mut self) -> Result<Token, LexerError> {
235 let start = self.position;
236 let mut text = String::new();
237 while let Some(byte) = self.current_byte() {
238 if is_identifier_continue(byte) {
239 text.push(char::from(byte));
240 self.position += 1;
241 } else {
242 break;
243 }
244 }
245 self.finish_identifier_like_token(start, self.position, text)
246 }
247
248 fn lex_hash_identifier_or_error(&mut self) -> Result<Token, LexerError> {
249 let start = self.position;
250 self.position += 1;
251
252 if !self.current_byte().is_some_and(is_identifier_start) {
253 return Err(LexerError::new(
254 CompilerErrorCode::EllipsisInIdentifier,
255 self.source_id,
256 start,
257 self.position,
258 "invalid preprocessor-like identifier",
259 ));
260 }
261
262 let mut text = String::from("#");
263 while let Some(byte) = self.current_byte() {
264 if is_identifier_continue(byte) {
265 text.push(char::from(byte));
266 self.position += 1;
267 } else {
268 break;
269 }
270 }
271 self.finish_identifier_like_token(start, self.position, text)
272 }
273
274 fn finish_identifier_like_token(
275 &self,
276 start: usize,
277 end: usize,
278 text: String,
279 ) -> Result<Token, LexerError> {
280 if let Some(keyword) = Keyword::from_lexeme(&text) {
281 return self.finish_token(TokenKind::Keyword(keyword), start, end, text);
282 }
283 if text.starts_with('#') {
284 return Err(LexerError::new(
285 CompilerErrorCode::EllipsisInIdentifier,
286 self.source_id,
287 start,
288 end,
289 ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("unknown preprocessor-like identifier {0:?}",
text))
})format!("unknown preprocessor-like identifier {text:?}"),
290 ));
291 }
292 self.finish_token(TokenKind::Identifier, start, end, text)
293 }
294
295 fn lex_string(&mut self) -> Result<Token, LexerError> {
296 let start = self.position;
297 self.position += 1;
298 let mut text = String::new();
299
300 while let Some(byte) = self.current_byte() {
301 match byte {
302 b'\n' => {
303 return Err(LexerError::new(
304 CompilerErrorCode::UnterminatedStringConstant,
305 self.source_id,
306 start,
307 self.position,
308 "unterminated string constant",
309 ));
310 }
311 b'"' => {
312 self.position += 1;
313 return self.finish_token(TokenKind::String, start, self.position, text);
314 }
315 b'\\' => {
316 let next = self.peek_byte(1);
317 match next {
318 Some(b'n') => {
319 text.push('\n');
320 self.position += 2;
321 }
322 Some(b'\\') => {
323 text.push('\\');
324 self.position += 2;
325 }
326 Some(b'"') => {
327 text.push('"');
328 self.position += 2;
329 }
330 Some(b'x') => {
331 let first = self.peek_byte(2);
332 let second = self.peek_byte(3);
333 if first.is_none() || second.is_none() {
334 return Err(LexerError::new(
335 CompilerErrorCode::UnterminatedStringConstant,
336 self.source_id,
337 start,
338 self.input.len(),
339 "unterminated hexadecimal string escape",
340 ));
341 }
342 let value = parse_upstream_hex_escape(
343 first.unwrap_or_default(),
344 second.unwrap_or_default(),
345 );
346 text.push(char::from(value));
347 self.position += 4;
348 }
349 Some(_) => {
350 self.position += 1;
351 }
352 None => {
353 return Err(LexerError::new(
354 CompilerErrorCode::UnterminatedStringConstant,
355 self.source_id,
356 start,
357 self.input.len(),
358 "unterminated string constant",
359 ));
360 }
361 }
362 }
363 _ => {
364 text.push(byte_to_text_char(byte));
365 self.position += 1;
366 }
367 }
368 }
369
370 Err(LexerError::new(
371 CompilerErrorCode::UnterminatedStringConstant,
372 self.source_id,
373 start,
374 self.input.len(),
375 "unterminated string constant",
376 ))
377 }
378
379 fn lex_raw_string(&mut self) -> Result<Token, LexerError> {
380 let start = self.position;
381 self.position += 2;
382 let mut text = String::new();
383
384 while let Some(byte) = self.current_byte() {
385 if byte == b'"' {
386 if self.peek_byte(1) == Some(b'"') {
387 text.push('"');
388 self.position += 2;
389 continue;
390 }
391
392 self.position += 1;
393 return self.finish_token(TokenKind::String, start, self.position, text);
394 }
395
396 text.push(byte_to_text_char(byte));
397 self.position += 1;
398 }
399
400 Err(LexerError::new(
401 CompilerErrorCode::UnterminatedStringConstant,
402 self.source_id,
403 start,
404 self.input.len(),
405 "unterminated raw string constant",
406 ))
407 }
408
409 fn lex_hashed_string(&mut self) -> Result<Token, LexerError> {
410 let start = self.position;
411 self.position += 2;
412 let mut cooked_bytes = Vec::new();
413
414 while let Some(byte) = self.current_byte() {
415 match byte {
416 b'\n' => {
417 return Err(LexerError::new(
418 CompilerErrorCode::UnterminatedStringConstant,
419 self.source_id,
420 start,
421 self.position,
422 "unterminated hashed string constant",
423 ));
424 }
425 b'"' => {
426 self.position += 1;
427 let lowered = ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("0x{0:x}",
nwscript_string_hash_bytes(&cooked_bytes).cast_unsigned()))
})format!(
428 "0x{:x}",
429 nwscript_string_hash_bytes(&cooked_bytes).cast_unsigned()
430 );
431 return self.finish_token(TokenKind::HexInteger, start, self.position, lowered);
432 }
433 b'\\' => {
434 let next = self.peek_byte(1);
435 match next {
436 Some(b'n') => {
437 cooked_bytes.push(b'\n');
438 self.position += 2;
439 }
440 Some(b'\\') => {
441 cooked_bytes.push(b'\\');
442 self.position += 2;
443 }
444 Some(b'"') => {
445 cooked_bytes.push(b'"');
446 self.position += 2;
447 }
448 Some(b'x') => {
449 let first = self.peek_byte(2);
450 let second = self.peek_byte(3);
451 if first.is_none() || second.is_none() {
452 return Err(LexerError::new(
453 CompilerErrorCode::UnterminatedStringConstant,
454 self.source_id,
455 start,
456 self.input.len(),
457 "unterminated hexadecimal hashed-string escape",
458 ));
459 }
460 let value = parse_upstream_hex_escape(
461 first.unwrap_or_default(),
462 second.unwrap_or_default(),
463 );
464 cooked_bytes.push(value);
465 self.position += 4;
466 }
467 Some(_) => {
468 self.position += 1;
469 }
470 None => {
471 return Err(LexerError::new(
472 CompilerErrorCode::UnterminatedStringConstant,
473 self.source_id,
474 start,
475 self.input.len(),
476 "unterminated hashed string constant",
477 ));
478 }
479 }
480 }
481 _ => {
482 cooked_bytes.push(byte);
483 self.position += 1;
484 }
485 }
486 }
487
488 Err(LexerError::new(
489 CompilerErrorCode::UnterminatedStringConstant,
490 self.source_id,
491 start,
492 self.input.len(),
493 "unterminated hashed string constant",
494 ))
495 }
496
497 fn lex_punctuation(&mut self, start: usize) -> Result<Token, LexerError> {
498 if self.slice_eq(start, start + 4, b">>>=") {
499 self.position += 4;
500 return self.finish_token(
501 TokenKind::AssignUnsignedShiftRight,
502 start,
503 self.position,
504 ">>>=".to_string(),
505 );
506 }
507
508 if let Some((kind, text)) = [
509 (TokenKind::UnsignedShiftRight, ">>>"),
510 (TokenKind::AssignShiftRight, ">>="),
511 (TokenKind::AssignShiftLeft, "<<="),
512 ]
513 .into_iter()
514 .find(|(_, text)| self.slice_eq(start, start + text.len(), text.as_bytes()))
515 {
516 let width = text.len();
517 self.position += width;
518 return self.finish_token(kind, start, self.position, text.to_string());
519 }
520
521 if let Some((kind, text)) = [
522 (TokenKind::LogicalAnd, "&&"),
523 (TokenKind::LogicalOr, "||"),
524 (TokenKind::GreaterEqual, ">="),
525 (TokenKind::LessEqual, "<="),
526 (TokenKind::NotEqual, "!="),
527 (TokenKind::EqualEqual, "=="),
528 (TokenKind::ShiftLeft, "<<"),
529 (TokenKind::ShiftRight, ">>"),
530 (TokenKind::Increment, "++"),
531 (TokenKind::Decrement, "--"),
532 (TokenKind::AssignMinus, "-="),
533 (TokenKind::AssignPlus, "+="),
534 (TokenKind::AssignMultiply, "*="),
535 (TokenKind::AssignDivide, "/="),
536 (TokenKind::AssignModulus, "%="),
537 (TokenKind::AssignAnd, "&="),
538 (TokenKind::AssignXor, "^="),
539 (TokenKind::AssignOr, "|="),
540 ]
541 .into_iter()
542 .find(|(_, text)| self.slice_eq(start, start + text.len(), text.as_bytes()))
543 {
544 let width = text.len();
545 self.position += width;
546 return self.finish_token(kind, start, self.position, text.to_string());
547 }
548
549 if let Some((kind, ch)) = self.current_byte().and_then(|byte| {
550 let kind = match byte {
551 b'/' => TokenKind::Divide,
552 b'*' => TokenKind::Multiply,
553 b'&' => TokenKind::BooleanAnd,
554 b'|' => TokenKind::InclusiveOr,
555 b'-' => TokenKind::Minus,
556 b'{' => TokenKind::LeftBrace,
557 b'}' => TokenKind::RightBrace,
558 b'(' => TokenKind::LeftParen,
559 b')' => TokenKind::RightParen,
560 b'[' => TokenKind::LeftSquareBracket,
561 b']' => TokenKind::RightSquareBracket,
562 b'<' => TokenKind::LessThan,
563 b'>' => TokenKind::GreaterThan,
564 b'!' => TokenKind::BooleanNot,
565 b'=' => TokenKind::Assign,
566 b'+' => TokenKind::Plus,
567 b'%' => TokenKind::Modulus,
568 b';' => TokenKind::Semicolon,
569 b',' => TokenKind::Comma,
570 b'^' => TokenKind::ExclusiveOr,
571 b'~' => TokenKind::Tilde,
572 b'.' => TokenKind::StructurePartSpecify,
573 b'?' => TokenKind::QuestionMark,
574 b':' => TokenKind::Colon,
575 _ => return None,
576 };
577 Some((kind, char::from(byte)))
578 }) {
579 self.position += 1;
580 return self.finish_token(kind, start, self.position, ch.to_string());
581 }
582
583 Err(LexerError::new(
584 CompilerErrorCode::UnexpectedCharacter,
585 self.source_id,
586 start,
587 start.saturating_add(1),
588 ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("unexpected character {0:?}",
self.current_byte().map_or('\0', char::from)))
})format!(
589 "unexpected character {:?}",
590 self.current_byte().map_or('\0', char::from)
591 ),
592 ))
593 }
594
595 fn finish_token(
596 &self,
597 kind: TokenKind,
598 start: usize,
599 end: usize,
600 text: String,
601 ) -> Result<Token, LexerError> {
602 if text.len() > MAX_TOKEN_LENGTH {
603 return Err(LexerError::new(
604 CompilerErrorCode::TokenTooLong,
605 self.source_id,
606 start,
607 end,
608 ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("token exceeds maximum length of {0} bytes",
MAX_TOKEN_LENGTH))
})format!("token exceeds maximum length of {MAX_TOKEN_LENGTH} bytes"),
609 ));
610 }
611 Ok(Token::new(
612 kind,
613 Span::new(self.source_id, start, end),
614 text,
615 ))
616 }
617
618 fn starts_with_raw_string(&self) -> bool {
619 #[allow(non_exhaustive_omitted_patterns)] match (self.current_byte(),
self.peek_byte(1)) {
(Some(b'r' | b'R'), Some(b'"')) => true,
_ => false,
}matches!(
620 (self.current_byte(), self.peek_byte(1)),
621 (Some(b'r' | b'R'), Some(b'"'))
622 )
623 }
624
625 fn starts_with_hashed_string(&self) -> bool {
626 #[allow(non_exhaustive_omitted_patterns)] match (self.current_byte(),
self.peek_byte(1)) {
(Some(b'h' | b'H'), Some(b'"')) => true,
_ => false,
}matches!(
627 (self.current_byte(), self.peek_byte(1)),
628 (Some(b'h' | b'H'), Some(b'"'))
629 )
630 }
631
632 fn consume_ascii_digits(&mut self, output: &mut String) {
633 while let Some(byte) = self.current_byte() {
634 if byte.is_ascii_digit() {
635 output.push(char::from(byte));
636 self.position += 1;
637 } else {
638 break;
639 }
640 }
641 }
642
643 fn consume_float_suffix_if_present(&mut self, kind: &mut TokenKind) {
644 if self.current_byte() == Some(b'f') {
645 *kind = TokenKind::Float;
646 self.position += 1;
647 }
648 }
649
650 fn current_byte(&self) -> Option<u8> {
651 self.input.get(self.position).copied()
652 }
653
654 fn peek_byte(&self, ahead: usize) -> Option<u8> {
655 self.input.get(self.position.saturating_add(ahead)).copied()
656 }
657
658 fn bump_byte(&mut self) -> Option<u8> {
659 let byte = self.current_byte()?;
660 self.position += 1;
661 Some(byte)
662 }
663
664 fn slice_eq(&self, start: usize, end: usize, expected: &[u8]) -> bool {
665 self.input.get(start..end) == Some(expected)
666 }
667}
668
669pub fn lex_source(source: &SourceFile) -> Result<Vec<Token>, LexerError> {
675 Lexer::new(source.id, source.bytes()).lex_all()
676}
677
678pub fn lex_bytes(source_id: SourceId, input: &[u8]) -> Result<Vec<Token>, LexerError> {
684 Lexer::new(source_id, input).lex_all()
685}
686
687pub fn lex_text(source_id: SourceId, input: &str) -> Result<Vec<Token>, LexerError> {
693 lex_bytes(source_id, input.as_bytes())
694}
695
696fn is_identifier_start(byte: u8) -> bool {
697 byte.is_ascii_alphabetic() || byte == b'_'
698}
699
700fn is_identifier_continue(byte: u8) -> bool {
701 byte.is_ascii_alphanumeric() || byte == b'_'
702}
703
704fn parse_upstream_hex_escape(first: u8, second: u8) -> u8 {
705 let first = hex_nibble(first);
706 let second = hex_nibble(second);
707 match (first, second) {
708 (Some(high), Some(low)) => (high << 4) | low,
709 (Some(value), None) => value,
710 (None, _) => 0,
711 }
712}
713
714fn hex_nibble(byte: u8) -> Option<u8> {
715 match byte {
716 b'0'..=b'9' => Some(byte - b'0'),
717 b'a'..=b'f' => Some((byte - b'a') + 10),
718 b'A'..=b'F' => Some((byte - b'A') + 10),
719 _ => None,
720 }
721}
722
723fn byte_to_text_char(byte: u8) -> char {
724 char::from_u32(u32::from(byte)).unwrap_or('\0')
725}
726
727#[cfg(test)]
728mod tests {
729 use crate::{
730 Keyword, SourceFile, SourceId, TokenKind, lex_bytes, lex_source, lex_text,
731 nwscript_string_hash_bytes,
732 };
733
734 #[test]
735 fn lexes_upstream_keyword_table_entries() {
736 let source = SourceFile::new(
737 SourceId::new(1),
738 "keywords.nss",
739 "if #include #define OBJECT_SELF JSON_TRUE __FILE__ ENGINE_STRUCTURE_0",
740 );
741
742 let tokens = lex_source(&source);
743 let kinds = tokens.ok().map(|items| {
744 items
745 .into_iter()
746 .map(|token| token.kind)
747 .collect::<Vec<_>>()
748 });
749
750 assert_eq!(
751 kinds,
752 Some(vec![
753 TokenKind::Keyword(Keyword::If),
754 TokenKind::Keyword(Keyword::Include),
755 TokenKind::Keyword(Keyword::Define),
756 TokenKind::Keyword(Keyword::ObjectSelf),
757 TokenKind::Keyword(Keyword::JsonTrue),
758 TokenKind::Keyword(Keyword::FileMacro),
759 TokenKind::Keyword(Keyword::EngineStructureDefinition),
760 TokenKind::Eof,
761 ])
762 );
763 }
764
765 #[test]
766 fn lexes_comments_numbers_and_operators() {
767 let tokens = lex_text(
768 SourceId::new(2),
769 "// header\n0xAB 0b10 0o77 .42 5.f 6f >>= >>>= && ||",
770 );
771
772 let pairs = tokens.ok().map(|items| {
773 items
774 .into_iter()
775 .map(|token| (token.kind, token.text))
776 .collect::<Vec<_>>()
777 });
778
779 assert_eq!(
780 pairs,
781 Some(vec![
782 (TokenKind::HexInteger, "0xab".to_string()),
783 (TokenKind::BinaryInteger, "0b10".to_string()),
784 (TokenKind::OctalInteger, "0o77".to_string()),
785 (TokenKind::Float, "0.42".to_string()),
786 (TokenKind::Float, "5.".to_string()),
787 (TokenKind::Float, "6".to_string()),
788 (TokenKind::AssignShiftRight, ">>=".to_string()),
789 (TokenKind::AssignUnsignedShiftRight, ">>>=".to_string()),
790 (TokenKind::LogicalAnd, "&&".to_string()),
791 (TokenKind::LogicalOr, "||".to_string()),
792 (TokenKind::Eof, "".to_string()),
793 ])
794 );
795 }
796
797 #[test]
798 fn lexes_strings_raw_strings_and_hashed_strings() {
799 let tokens = lex_text(
800 SourceId::new(3),
801 "\"a\\n\\\"\\\\\\x41\" r\"alpha\"\"beta\" h\"tag\\x3f\"",
802 );
803
804 let pairs = tokens.ok().map(|items| {
805 items
806 .into_iter()
807 .map(|token| (token.kind, token.text))
808 .collect::<Vec<_>>()
809 });
810
811 assert_eq!(
812 pairs,
813 Some(vec![
814 (TokenKind::String, "a\n\"\\A".to_string()),
815 (TokenKind::String, "alpha\"beta".to_string()),
816 (
817 TokenKind::HexInteger,
818 format!("0x{:x}", nwscript_string_hash_bytes(b"tag?") as u32),
819 ),
820 (TokenKind::Eof, "".to_string()),
821 ])
822 );
823 }
824
825 #[test]
826 fn lowers_hashed_strings_to_exact_upstream_hex_integers() {
827 let tokens = lex_text(
828 SourceId::new(5),
829 "h\"hello\" H\"\" h\"\\\"\\n\\\\\\xFF\\x80\"",
830 );
831 let pairs = tokens.ok().map(|items| {
832 items
833 .into_iter()
834 .map(|token| (token.kind, token.text))
835 .collect::<Vec<_>>()
836 });
837
838 assert_eq!(
839 pairs,
840 Some(vec![
841 (TokenKind::HexInteger, "0xf9cc2afc".to_string()),
842 (TokenKind::HexInteger, "0x0".to_string()),
843 (
844 TokenKind::HexInteger,
845 format!(
846 "0x{:x}",
847 nwscript_string_hash_bytes(&[b'"', b'\n', b'\\', 0xff, 0x80]) as u32
848 ),
849 ),
850 (TokenKind::Eof, "".to_string()),
851 ])
852 );
853 }
854
855 #[test]
856 fn rejects_unknown_hash_prefixed_identifier_like_upstream() {
857 let error = lex_text(SourceId::new(4), "#pragma").err();
858
859 assert_eq!(
860 error.map(|item| item.code),
861 Some(crate::CompilerErrorCode::EllipsisInIdentifier)
862 );
863 }
864
865 #[test]
866 fn lexes_non_utf8_string_bytes_without_rejecting_source() {
867 let tokens = lex_bytes(SourceId::new(6), b"\"a\x93\xff\"");
868 let string_token = tokens.ok().and_then(|items| {
869 items
870 .into_iter()
871 .find(|token| token.kind == TokenKind::String)
872 });
873
874 let codepoints =
875 string_token.map(|token| token.text.chars().map(|ch| ch as u32).collect::<Vec<_>>());
876
877 assert_eq!(codepoints, Some(vec![0x61, 0x93, 0xff]));
878 }
879}