1use core::mem;
2use extension_trait::extension_trait;
3use num_bigint::BigUint;
4use sway_ast::literal::{LitChar, LitInt, LitIntType, LitString, Literal};
5use sway_ast::token::{
6 Comment, CommentKind, CommentedGroup, CommentedTokenStream, CommentedTokenTree, DocComment,
7 DocStyle, Punct, Spacing, TokenStream,
8};
9use sway_error::error::CompileError;
10use sway_error::handler::{ErrorEmitted, Handler};
11use sway_error::lex_error::{LexError, LexErrorKind};
12use sway_types::span::Source;
13use sway_types::{
14 ast::{Delimiter, PunctKind},
15 Ident, SourceId, Span, Spanned,
16};
17use unicode_bidi::format_chars::{ALM, FSI, LRE, LRI, LRM, LRO, PDF, PDI, RLE, RLI, RLM, RLO};
18use unicode_xid::UnicodeXID;
19
20#[extension_trait]
21impl CharExt for char {
22 fn as_open_delimiter(self) -> Option<Delimiter> {
24 match self {
25 '(' => Some(Delimiter::Parenthesis),
26 '{' => Some(Delimiter::Brace),
27 '[' => Some(Delimiter::Bracket),
28 _ => None,
29 }
30 }
31
32 fn as_close_delimiter(self) -> Option<Delimiter> {
34 match self {
35 ')' => Some(Delimiter::Parenthesis),
36 '}' => Some(Delimiter::Brace),
37 ']' => Some(Delimiter::Bracket),
38 _ => None,
39 }
40 }
41
42 fn as_punct_kind(self) -> Option<PunctKind> {
44 match self {
45 ';' => Some(PunctKind::Semicolon),
46 ':' => Some(PunctKind::Colon),
47 '/' => Some(PunctKind::ForwardSlash),
48 ',' => Some(PunctKind::Comma),
49 '*' => Some(PunctKind::Star),
50 '+' => Some(PunctKind::Add),
51 '-' => Some(PunctKind::Sub),
52 '<' => Some(PunctKind::LessThan),
53 '>' => Some(PunctKind::GreaterThan),
54 '=' => Some(PunctKind::Equals),
55 '.' => Some(PunctKind::Dot),
56 '!' => Some(PunctKind::Bang),
57 '%' => Some(PunctKind::Percent),
58 '&' => Some(PunctKind::Ampersand),
59 '^' => Some(PunctKind::Caret),
60 '|' => Some(PunctKind::Pipe),
61 '_' => Some(PunctKind::Underscore),
62 '#' => Some(PunctKind::Sharp),
63 _ => None,
64 }
65 }
66}
67
68struct CharIndicesInner<'a> {
69 src: &'a str,
70 position: usize,
71}
72
73impl Iterator for CharIndicesInner<'_> {
74 type Item = (usize, char);
75
76 fn next(&mut self) -> Option<(usize, char)> {
77 let mut char_indices = self.src[self.position..].char_indices();
78 let (_, c) = char_indices.next()?;
79 let ret = (self.position, c);
80 match char_indices.next() {
81 Some((char_width, _)) => self.position += char_width,
82 None => self.position = self.src.len(),
83 };
84 Some(ret)
85 }
86}
87
88type CharIndices<'a> = std::iter::Peekable<CharIndicesInner<'a>>;
89type Result<T> = core::result::Result<T, ErrorEmitted>;
90
91struct Lexer<'l> {
92 handler: &'l Handler,
93 src: &'l Source,
94 source_id: &'l Option<SourceId>,
95 stream: &'l mut CharIndices<'l>,
96}
97
98pub fn lex(
99 handler: &Handler,
100 src: Source,
101 start: usize,
102 end: usize,
103 source_id: Option<SourceId>,
104) -> Result<TokenStream> {
105 lex_commented(handler, src, start, end, &source_id).map(|stream| stream.strip_comments())
106}
107
108pub fn is_valid_identifier_or_path(s: &str) -> bool {
122 if s.is_empty() {
124 return false;
125 }
126
127 let mut input = s;
129 if let Some(rest) = input.strip_prefix("::") {
130 input = rest;
131 if input.is_empty() {
133 return false;
134 }
135 }
136
137 for segment in input.split("::") {
140 if !is_valid_identifier(segment) {
141 return false;
142 }
143 }
144
145 true
146}
147
148fn is_valid_identifier(ident: &str) -> bool {
150 if ident.is_empty() || ident == "_" || ident.starts_with("__") {
152 return false;
153 }
154
155 let mut chars = ident.chars();
156 let first = chars.next().unwrap();
157
158 if !(first.is_xid_start() || first == '_') {
160 return false;
161 }
162
163 chars.all(|c| c.is_xid_continue())
165}
166
167pub fn lex_commented(
168 handler: &Handler,
169 src: Source,
170 start: usize,
171 end: usize,
172 source_id: &Option<SourceId>,
173) -> Result<CommentedTokenStream> {
174 let stream = &mut CharIndicesInner {
175 src: &src.text[..end],
176 position: start,
177 }
178 .peekable();
179 let mut l = Lexer {
180 handler,
181 src: &src,
182 source_id,
183 stream,
184 };
185 let mut file_start_offset: usize = 0;
186
187 let mut parent_token_trees = Vec::new();
188 let mut token_trees = Vec::new();
189 while let Some((mut index, mut character)) = l.stream.next() {
190 if character.is_whitespace() {
191 if index - file_start_offset == 0 {
195 file_start_offset += character.len_utf8();
196 }
197 continue;
198 }
199 if character == '/' {
200 match l.stream.peek() {
201 Some((_, '/')) => {
202 let search_end = token_trees
205 .last()
206 .map(|tt| {
207 if let CommentedTokenTree::Tree(t) = tt {
208 t.span().end()
209 } else {
210 0
211 }
212 })
213 .unwrap_or_default();
214
215 let has_newline = src.text[search_end..index]
216 .chars()
217 .rev()
218 .take_while(|c| c.is_whitespace())
219 .filter(|&c| c == '\n')
220 .count()
221 > 0;
222 let start_of_file_found = search_end == 0 && index == 0;
224
225 let comment_kind = if has_newline || start_of_file_found {
226 CommentKind::Newlined
227 } else {
228 CommentKind::Trailing
229 };
230
231 let ctt = lex_line_comment(&mut l, end, index, comment_kind);
232 token_trees.push(ctt);
233 continue;
234 }
235 Some((_, '*')) => {
236 if let Some(token) = lex_block_comment(&mut l, index) {
237 token_trees.push(token);
238 }
239 continue;
240 }
241 Some(_) | None => {}
242 }
243 }
244
245 if character.is_xid_start() || character == '_' {
246 let is_raw_ident = character == 'r' && matches!(l.stream.peek(), Some((_, '#')));
248 if is_raw_ident {
249 l.stream.next();
250 if let Some((next_index, next_character)) = l.stream.next() {
251 character = next_character;
252 index = next_index;
253 }
254 if !(character.is_xid_start() || character == '_') {
255 let kind = LexErrorKind::InvalidCharacter {
256 position: index,
257 character,
258 };
259 let span = span_one(&l, index, character);
260 error(l.handler, LexError { kind, span });
261 continue;
262 }
263 }
264
265 let not_is_single_underscore = character != '_'
267 || l.stream
268 .peek()
269 .is_some_and(|(_, next)| next.is_xid_continue());
270 if not_is_single_underscore {
271 while l.stream.next_if(|(_, c)| c.is_xid_continue()).is_some() {}
273 let ident = Ident::new_with_raw(span_until(&mut l, index), is_raw_ident);
274 token_trees.push(CommentedTokenTree::Tree(ident.into()));
275 continue;
276 }
277 }
278 if let Some(delimiter) = character.as_open_delimiter() {
279 let token_trees = mem::take(&mut token_trees);
280 parent_token_trees.push((token_trees, index, delimiter));
281 continue;
282 }
283 if let Some(close_delimiter) = character.as_close_delimiter() {
284 match parent_token_trees.pop() {
285 None => {
286 let kind = LexErrorKind::UnexpectedCloseDelimiter {
294 position: index,
295 close_delimiter,
296 };
297 let span = span_one(&l, index, character);
298 error(l.handler, LexError { kind, span });
299 }
300 Some((parent, open_index, open_delimiter)) => {
301 if open_delimiter != close_delimiter {
302 let kind = LexErrorKind::MismatchedDelimiters {
304 open_position: open_index,
305 close_position: index,
306 open_delimiter,
307 close_delimiter,
308 };
309 let span = span_one(&l, index, character);
310 error(l.handler, LexError { kind, span });
311 }
312 token_trees = lex_close_delimiter(
313 &mut l,
314 index,
315 parent,
316 token_trees,
317 open_index,
318 open_delimiter,
319 );
320 }
321 }
322 continue;
323 }
324 if let Some(token) = lex_string(&mut l, index, character)? {
325 token_trees.push(token);
326 continue;
327 }
328 if let Some(token) = lex_char(&mut l, index, character)? {
329 token_trees.push(token);
330 continue;
331 }
332 if let Some(token) = lex_int_lit(&mut l, index, character)? {
333 token_trees.push(token);
334 continue;
335 }
336 if let Some(token) = lex_punctuation(&mut l, index, character) {
337 token_trees.push(token);
338 continue;
339 }
340
341 let kind = LexErrorKind::InvalidCharacter {
344 position: index,
345 character,
346 };
347 let span = span_one(&l, index, character);
348 error(l.handler, LexError { kind, span });
349 continue;
350 }
351
352 while let Some((parent, open_index, open_delimiter)) = parent_token_trees.pop() {
354 let kind = LexErrorKind::UnclosedDelimiter {
355 open_position: open_index,
356 open_delimiter,
357 };
358 let span = span_one(&l, open_index, open_delimiter.as_open_char());
359 error(l.handler, LexError { kind, span });
360
361 token_trees = lex_close_delimiter(
362 &mut l,
363 src.text.len(),
364 parent,
365 token_trees,
366 open_index,
367 open_delimiter,
368 );
369 }
370 Ok(CommentedTokenStream {
371 token_trees,
372 full_span: span(&l, start, end),
373 })
374}
375
376fn lex_close_delimiter(
377 l: &mut Lexer<'_>,
378 index: usize,
379 mut parent: Vec<CommentedTokenTree>,
380 token_trees: Vec<CommentedTokenTree>,
381 open_index: usize,
382 delimiter: Delimiter,
383) -> Vec<CommentedTokenTree> {
384 let start_index = open_index + delimiter.as_open_char().len_utf8();
385 let full_span = span(l, start_index, index);
386 let group = CommentedGroup {
387 token_stream: CommentedTokenStream {
388 token_trees,
389 full_span,
390 },
391 delimiter,
392 span: span_until(l, open_index),
393 };
394 parent.push(CommentedTokenTree::Tree(group.into()));
395 parent
396}
397
398fn lex_line_comment(
399 l: &mut Lexer<'_>,
400 end: usize,
401 index: usize,
402 comment_kind: CommentKind,
403) -> CommentedTokenTree {
404 let _ = l.stream.next();
405
406 let end = l
408 .stream
409 .find(|(_, character)| *character == '\n')
410 .map_or(end, |(end, _)| end);
411 let sp = span(l, index, end);
412
413 let doc_style = match (sp.as_str().chars().nth(2), sp.as_str().chars().nth(3)) {
414 (Some('!'), _) => Some(DocStyle::Inner),
416 (Some('/'), Some('/')) => None,
418 (Some('/'), _) => Some(DocStyle::Outer),
420 _ => None,
421 };
422
423 if let Some(doc_style) = doc_style {
424 let doc_comment = DocComment {
425 span: sp,
426 doc_style,
427 content_span: span(l, index + 3, end),
428 };
429 CommentedTokenTree::Tree(doc_comment.into())
430 } else {
431 Comment {
432 span: sp,
433 comment_kind,
434 }
435 .into()
436 }
437}
438
439fn lex_block_comment(l: &mut Lexer<'_>, index: usize) -> Option<CommentedTokenTree> {
440 let _ = l.stream.next();
442 let mut unclosed_indices = vec![index];
443
444 let unclosed_multiline_comment = |l: &Lexer<'_>, unclosed_indices: Vec<_>| {
445 let span = span(l, *unclosed_indices.last().unwrap(), l.src.text.len() - 1);
446 let kind = LexErrorKind::UnclosedMultilineComment { unclosed_indices };
447 error(l.handler, LexError { kind, span });
448 None
449 };
450
451 let mut comment_kind = CommentKind::Inlined;
453
454 loop {
455 match l.stream.next() {
456 None => return unclosed_multiline_comment(l, unclosed_indices),
457 Some((_, '*')) => match l.stream.next() {
458 None => return unclosed_multiline_comment(l, unclosed_indices),
459 Some((slash_ix, '/')) => {
461 let start = unclosed_indices.pop().unwrap();
462 if unclosed_indices.is_empty() {
463 let end = slash_ix + '/'.len_utf8();
467 let span = span(l, start, end);
468 return Some(Comment { span, comment_kind }.into());
469 }
470 }
471 Some(_) => {}
472 },
473 Some((next_index, '/')) => match l.stream.next() {
475 None => return unclosed_multiline_comment(l, unclosed_indices),
476 Some((_, '*')) => unclosed_indices.push(next_index),
477 Some(_) => {}
478 },
479 Some((_, '\n')) => {
480 comment_kind = CommentKind::Multilined;
485 }
486 Some(_) => {}
487 }
488 }
489}
490
491fn lex_string(
492 l: &mut Lexer<'_>,
493 index: usize,
494 character: char,
495) -> Result<Option<CommentedTokenTree>> {
496 if character != '"' {
497 return Ok(None);
498 }
499 let mut parsed = String::new();
500 loop {
501 let unclosed_string_lit = |l: &Lexer<'_>, end| {
502 error(
503 l.handler,
504 LexError {
505 kind: LexErrorKind::UnclosedStringLiteral { position: index },
506 span: span(l, index, end),
507 },
508 )
509 };
510 let (next_index, next_character) = l.stream.next().ok_or_else(|| {
511 let mut end = l.src.text.len() - 1;
513 while !l.src.text.is_char_boundary(end) {
514 end -= 1;
515 }
516 unclosed_string_lit(l, end)
517 })?;
518 parsed.push(match next_character {
519 '\\' => parse_escape_code(l)
520 .map_err(|e| e.unwrap_or_else(|| unclosed_string_lit(l, l.src.text.len())))?,
521 '"' => break,
522 ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO => {
524 let kind = LexErrorKind::UnicodeTextDirInLiteral {
525 position: next_index,
526 character: next_character,
527 };
528 let span = span_one(l, next_index, next_character);
529 error(l.handler, LexError { span, kind });
530 continue;
531 }
532 _ => next_character,
533 });
534 }
535 let span = span_until(l, index);
536 let literal = Literal::String(LitString { span, parsed });
537 Ok(Some(CommentedTokenTree::Tree(literal.into())))
538}
539
540fn lex_char(
541 l: &mut Lexer<'_>,
542 index: usize,
543 character: char,
544) -> Result<Option<CommentedTokenTree>> {
545 let is_quote = |c| c == '\'';
546 if !is_quote(character) {
547 return Ok(None);
548 }
549
550 let unclosed_char_lit = |l: &Lexer<'_>| {
551 let err = LexError {
552 kind: LexErrorKind::UnclosedCharLiteral { position: index },
553 span: span(l, index, l.src.text.len()),
554 };
555 error(l.handler, err)
556 };
557 let next = |l: &mut Lexer<'_>| l.stream.next().ok_or_else(|| unclosed_char_lit(l));
558 let escape = |l: &mut Lexer<'_>, next_char| {
559 if next_char == '\\' {
560 parse_escape_code(l).map_err(|e| e.unwrap_or_else(|| unclosed_char_lit(l)))
561 } else {
562 Ok(next_char)
563 }
564 };
565
566 let (next_index, next_char) = next(l)?;
567 if let ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO = next_char {
569 let kind = LexErrorKind::UnicodeTextDirInLiteral {
570 position: next_index,
571 character: next_char,
572 };
573 let span = span_one(l, next_index, next_char);
574 error(l.handler, LexError { span, kind });
575 }
576
577 let parsed = escape(l, next_char)?;
578
579 let (next_index, next_char) = next(l)?;
581 let sp = span_until(l, index);
582
583 let literal = if !is_quote(next_char) {
586 let mut string = String::new();
587 string.push(parsed);
588 string.push(escape(l, next_char)?);
589 loop {
590 let (_, next_char) = next(l)?;
591 if is_quote(next_char) {
592 break;
593 }
594 string.push(next_char);
595 }
596
597 error(
599 l.handler,
600 LexError {
601 kind: LexErrorKind::ExpectedCloseQuote {
602 position: next_index,
603 },
604 span: span(l, next_index, next_index + string.len()),
605 },
606 );
607
608 Literal::String(LitString {
609 span: sp,
610 parsed: string,
611 })
612 } else {
613 Literal::Char(LitChar { span: sp, parsed })
614 };
615
616 Ok(Some(CommentedTokenTree::Tree(literal.into())))
617}
618
619fn parse_escape_code(l: &mut Lexer<'_>) -> core::result::Result<char, Option<ErrorEmitted>> {
620 let error = |kind, span| Err(Some(error(l.handler, LexError { kind, span })));
621
622 match l.stream.next() {
623 None => Err(None),
624 Some((_, '"')) => Ok('"'),
625 Some((_, '\'')) => Ok('\''),
626 Some((_, 'n')) => Ok('\n'),
627 Some((_, 'r')) => Ok('\r'),
628 Some((_, 't')) => Ok('\t'),
629 Some((_, '\\')) => Ok('\\'),
630 Some((_, '0')) => Ok('\0'),
631 Some((index, 'x')) => {
632 let (high, low) = match (l.stream.next(), l.stream.next()) {
633 (Some((_, high)), Some((_, low))) => (high, low),
634 _ => return Err(None),
635 };
636 let (high, low) = match (high.to_digit(16), low.to_digit(16)) {
637 (Some(high), Some(low)) => (high, low),
638 _ => return error(LexErrorKind::InvalidHexEscape, span_until(l, index)),
639 };
640 let parsed_character = char::from_u32((high << 4) | low).unwrap();
641 Ok(parsed_character)
642 }
643 Some((index, 'u')) => {
644 match l.stream.next() {
645 None => return Err(None),
646 Some((_, '{')) => (),
647 Some((_, unexpected_char)) => {
648 let span = span_one(l, index, unexpected_char);
649 let kind = LexErrorKind::UnicodeEscapeMissingBrace { position: index };
650 return error(kind, span);
651 }
652 }
653 let mut digits_start_position_opt = None;
654 let mut char_value = BigUint::from(0u32);
655 let digits_end_position = loop {
656 let (position, digit) = match l.stream.next() {
657 None => return Err(None),
658 Some((position, '}')) => break position,
659 Some((position, digit)) => (position, digit),
660 };
661 if digits_start_position_opt.is_none() {
662 digits_start_position_opt = Some(position);
663 };
664 let digit = match digit.to_digit(16) {
665 None => {
666 let span = span_one(l, position, digit);
667 let kind = LexErrorKind::InvalidUnicodeEscapeDigit { position };
668 return error(kind, span);
669 }
670 Some(digit) => digit,
671 };
672 char_value *= 16u32;
673 char_value += digit;
674 };
675 let digits_start_position = digits_start_position_opt.unwrap_or(digits_end_position);
676 let char_value = match u32::try_from(char_value) {
677 Err(..) => {
678 let span = span(l, digits_start_position, digits_end_position);
679 let kind = LexErrorKind::UnicodeEscapeOutOfRange { position: index };
680 return error(kind, span);
681 }
682 Ok(char_value) => char_value,
683 };
684 let parsed_character = match char::from_u32(char_value) {
685 None => {
686 let span_all = span_until(l, index);
687 let kind = LexErrorKind::UnicodeEscapeInvalidCharValue { span: span_all };
688 let span = span(l, digits_start_position, digits_end_position);
689 return error(kind, span);
690 }
691 Some(parsed_character) => parsed_character,
692 };
693 Ok(parsed_character)
694 }
695 Some((index, unexpected_char)) => error(
696 LexErrorKind::InvalidEscapeCode { position: index },
697 span_one(l, index, unexpected_char),
698 ),
699 }
700}
701
702fn lex_int_lit(
703 l: &mut Lexer<'_>,
704 index: usize,
705 character: char,
706) -> Result<Option<CommentedTokenTree>> {
707 let digit = match character.to_digit(10) {
708 None => return Ok(None),
709 Some(d) => d,
710 };
711
712 let decimal_int_lit = |l, digit: u32| {
713 let mut big_uint = BigUint::from(digit);
714 let end_opt = parse_digits(&mut big_uint, l, 10);
715 (big_uint, end_opt)
716 };
717 let (big_uint, end_opt) = if digit == 0 {
718 let prefixed_int_lit = |l: &mut Lexer<'_>, radix| {
719 let _ = l.stream.next();
720 let d = l.stream.next();
721 let incomplete_int_lit = |end| {
722 let kind = match radix {
723 16 => LexErrorKind::IncompleteHexIntLiteral { position: index },
724 8 => LexErrorKind::IncompleteOctalIntLiteral { position: index },
725 2 => LexErrorKind::IncompleteBinaryIntLiteral { position: index },
726 _ => unreachable!(),
727 };
728 let span = span(l, index, end);
729 error(l.handler, LexError { kind, span })
730 };
731 let (digit_pos, digit) = d.ok_or_else(|| incomplete_int_lit(l.src.text.len()))?;
732 let radix_digit = digit
733 .to_digit(radix)
734 .ok_or_else(|| incomplete_int_lit(digit_pos))?;
735 let mut big_uint = BigUint::from(radix_digit);
736 let end_opt = parse_digits(&mut big_uint, l, radix);
737 Ok((big_uint, end_opt))
738 };
739
740 match l.stream.peek() {
741 Some((_, 'x')) => prefixed_int_lit(l, 16)?,
742 Some((_, 'o')) => prefixed_int_lit(l, 8)?,
743 Some((_, 'b')) => prefixed_int_lit(l, 2)?,
744 Some((_, '_' | '0'..='9')) => decimal_int_lit(l, 0),
745 Some(&(next_index, _)) => (BigUint::from(0u32), Some(next_index)),
746 None => (BigUint::from(0u32), None),
747 }
748 } else {
749 decimal_int_lit(l, digit)
750 };
751
752 let ty_opt = lex_int_ty_opt(l)?;
753
754 let literal = Literal::Int(LitInt {
755 span: span(l, index, end_opt.unwrap_or(l.src.text.len())),
756 parsed: big_uint,
757 ty_opt,
758 is_generated_b256: false,
759 });
760
761 Ok(Some(CommentedTokenTree::Tree(literal.into())))
762}
763
764fn lex_int_ty_opt(l: &mut Lexer<'_>) -> Result<Option<(LitIntType, Span)>> {
765 let (suffix_start_position, c) = match l.stream.next_if(|(_, c)| c.is_xid_continue()) {
766 None => return Ok(None),
767 Some(x) => x,
768 };
769 let mut suffix = String::from(c);
770 let suffix_end_position = loop {
771 match l.stream.peek() {
772 Some((_, c)) if c.is_xid_continue() => {
773 suffix.push(*c);
774 let _ = l.stream.next();
775 }
776 Some((pos, _)) => break *pos,
777 None => break l.src.text.len(),
778 }
779 };
780 let ty = match parse_int_suffix(&suffix) {
782 Some(s) => s,
783 None => {
784 let span = span(l, suffix_start_position, suffix_end_position);
785 let kind = LexErrorKind::InvalidIntSuffix {
786 suffix: Ident::new(span.clone()),
787 };
788 error(l.handler, LexError { kind, span });
789 return Ok(None);
790 }
791 };
792 let span = span_until(l, suffix_start_position);
793 Ok(Some((ty, span)))
794}
795
796pub fn parse_int_suffix(suffix: &str) -> Option<LitIntType> {
798 Some(match suffix {
799 "u8" => LitIntType::U8,
800 "u16" => LitIntType::U16,
801 "u32" => LitIntType::U32,
802 "u64" => LitIntType::U64,
803 "u256" => LitIntType::U256,
804 "i8" => LitIntType::I8,
805 "i16" => LitIntType::I16,
806 "i32" => LitIntType::I32,
807 "i64" => LitIntType::I64,
808 _ => return None,
809 })
810}
811
812fn parse_digits(big_uint: &mut BigUint, l: &mut Lexer<'_>, radix: u32) -> Option<usize> {
813 loop {
814 match l.stream.peek() {
815 None => break None,
816 Some((_, '_')) => {
817 let _ = l.stream.next();
818 }
819 Some(&(index, character)) => match character.to_digit(radix) {
820 None => break Some(index),
821 Some(digit) => {
822 let _ = l.stream.next();
823 *big_uint *= radix;
824 *big_uint += digit;
825 }
826 },
827 };
828 }
829}
830
831fn lex_punctuation(l: &mut Lexer<'_>, index: usize, character: char) -> Option<CommentedTokenTree> {
832 let punct = Punct {
833 kind: character.as_punct_kind()?,
834 spacing: match l.stream.peek() {
835 Some((_, next_character)) if next_character.as_punct_kind().is_some() => Spacing::Joint,
836 _ => Spacing::Alone,
837 },
838 span: span_until(l, index),
839 };
840 Some(CommentedTokenTree::Tree(punct.into()))
841}
842
843fn span_until(l: &mut Lexer<'_>, start: usize) -> Span {
844 let end = l.stream.peek().map_or(l.src.text.len(), |(end, _)| *end);
845 span(l, start, end)
846}
847
848fn span_one(l: &Lexer<'_>, start: usize, c: char) -> Span {
849 span(l, start, start + c.len_utf8())
850}
851
852fn span(l: &Lexer<'_>, start: usize, end: usize) -> Span {
853 Span::new(l.src.clone(), start, end, *l.source_id).unwrap()
854}
855
856fn error(handler: &Handler, error: LexError) -> ErrorEmitted {
858 handler.emit_err(CompileError::Lex { error })
859}
860
861#[cfg(test)]
862mod tests {
863 use super::*;
864 use assert_matches::assert_matches;
865 use sway_ast::{
866 literal::{LitChar, Literal},
867 token::{
868 Comment, CommentKind, CommentedTokenTree, CommentedTree, DocComment, DocStyle,
869 TokenTree,
870 },
871 };
872 use sway_error::{
873 error::CompileError,
874 handler::Handler,
875 lex_error::{LexError, LexErrorKind},
876 };
877
878 #[test]
879 fn lex_bidi() {
880 let input = "
881 script;
882 use std::string::String;
883 fn main() {
884 let a = String::from_ascii_str(\"fuel\");
885 let b = String::from_ascii_str(\"fuel\u{202E}\u{2066}// Same string again\u{2069}\u{2066}\");
886 if a.as_bytes() == b.as_bytes() {
887 log(\"same\");
888 } else {
889 log(\"different\");
890 }
891 let lrm = '\u{202E}';
892 log(lrm);
893 }
894 ";
895 let start = 0;
896 let end = input.len();
897 let path = None;
898 let handler = Handler::default();
899 let _stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
900 let (errors, warnings, infos) = handler.consume();
901 assert_eq!(infos.len(), 0);
902 assert_eq!(warnings.len(), 0);
903 assert_eq!(errors.len(), 5);
904 for err in errors {
905 assert_matches!(
906 err,
907 CompileError::Lex {
908 error: LexError {
909 span: _,
910 kind: LexErrorKind::UnicodeTextDirInLiteral {
911 position: _,
912 character: _
913 }
914 }
915 }
916 );
917 }
918 }
919
920 #[test]
921 fn lex_commented_token_stream() {
922 let input = r#"
923 //
924 // Single-line comment.
925 struct Foo {
926 /* multi-
927 * line-
928 * comment */
929 bar: i32, // trailing comment
930 }
931 "#;
932 let start = 0;
933 let end = input.len();
934 let path = None;
935 let handler = Handler::default();
936 let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
937 assert!(handler.consume().0.is_empty());
938 let mut tts = stream.token_trees().iter();
939 assert_eq!(tts.next().unwrap().span().as_str(), "//");
940 assert_eq!(
941 tts.next().unwrap().span().as_str(),
942 "// Single-line comment."
943 );
944 assert_eq!(tts.next().unwrap().span().as_str(), "struct");
945 assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
946 {
947 let group = match tts.next() {
948 Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
949 _ => panic!("expected group"),
950 };
951 let mut tts = group.token_stream.token_trees().iter();
952 assert_eq!(
953 tts.next().unwrap().span().as_str(),
954 "/* multi-\n * line-\n * comment */",
955 );
956 assert_eq!(tts.next().unwrap().span().as_str(), "bar");
957 assert_eq!(tts.next().unwrap().span().as_str(), ":");
958 assert_eq!(tts.next().unwrap().span().as_str(), "i32");
959 assert_eq!(tts.next().unwrap().span().as_str(), ",");
960 assert_matches!(
961 tts.next(),
962 Some(CommentedTokenTree::Comment(Comment {
963 span,
964 comment_kind: CommentKind::Trailing,
965 })) if span.as_str() == "// trailing comment"
966 );
967 assert!(tts.next().is_none());
968 }
969 assert!(tts.next().is_none());
970 }
971
972 #[test]
973 fn lex_comments_check_comment_kind() {
974 let input = r#"
975 // CommentKind::Newlined
976 abi Foo {
977 // CommentKind::Newlined
978 fn bar(); // CommentKind::Trailing
979 // CommentKind::Newlined
980 }
981 "#;
982 let start = 0;
983 let end = input.len();
984 let path = None;
985 let handler = Handler::default();
986 let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
987 assert!(handler.consume().0.is_empty());
988 let mut tts = stream.token_trees().iter();
989
990 assert_matches!(
991 tts.next(),
992 Some(CommentedTokenTree::Comment(Comment {
993 span,
994 comment_kind: CommentKind::Newlined,
995 })) if span.as_str() == "// CommentKind::Newlined"
996 );
997 assert_eq!(tts.next().unwrap().span().as_str(), "abi");
998 assert_eq!(tts.next().unwrap().span().as_str(), "Foo");
999
1000 {
1001 let group = match tts.next() {
1002 Some(CommentedTokenTree::Tree(CommentedTree::Group(group))) => group,
1003 _ => panic!("expected group"),
1004 };
1005 let mut tts = group.token_stream.token_trees().iter();
1006
1007 assert_matches!(
1008 tts.next(),
1009 Some(CommentedTokenTree::Comment(Comment {
1010 span,
1011 comment_kind: CommentKind::Newlined,
1012 })) if span.as_str() == "// CommentKind::Newlined"
1013 );
1014 assert_eq!(tts.next().unwrap().span().as_str(), "fn");
1015 assert_eq!(tts.next().unwrap().span().as_str(), "bar");
1016 assert_eq!(tts.next().unwrap().span().as_str(), "()");
1017 assert_eq!(tts.next().unwrap().span().as_str(), ";");
1018 assert_matches!(
1019 tts.next(),
1020 Some(CommentedTokenTree::Comment(Comment {
1021 span,
1022 comment_kind: CommentKind::Trailing,
1023 })) if span.as_str() == "// CommentKind::Trailing"
1024 );
1025 assert_matches!(
1026 tts.next(),
1027 Some(CommentedTokenTree::Comment(Comment {
1028 span,
1029 comment_kind: CommentKind::Newlined,
1030 })) if span.as_str() == "// CommentKind::Newlined"
1031 );
1032 assert!(tts.next().is_none());
1033 }
1034 }
1035
1036 #[test]
1037 fn lex_doc_comments() {
1038 let input = r#"
1039 //none
1040 ////none
1041 //!inner
1042 //! inner
1043 ///outer
1044 /// outer
1045 "#;
1046 let start = 0;
1047 let end = input.len();
1048 let path = None;
1049 let handler = Handler::default();
1050 let stream = lex_commented(&handler, input.into(), start, end, &path).unwrap();
1051 assert!(handler.consume().0.is_empty());
1052 let mut tts = stream.token_trees().iter();
1053 assert_matches!(
1054 tts.next(),
1055 Some(CommentedTokenTree::Comment(Comment {
1056 span,
1057 comment_kind: CommentKind::Newlined,
1058 })) if span.as_str() == "//none"
1059 );
1060 assert_matches!(
1061 tts.next(),
1062 Some(CommentedTokenTree::Comment(Comment {
1063 span,
1064 comment_kind: CommentKind::Newlined,
1065 })) if span.as_str() == "////none"
1066 );
1067 assert_matches!(
1068 tts.next(),
1069 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1070 doc_style: DocStyle::Inner,
1071 span,
1072 content_span
1073 }))) if span.as_str() == "//!inner" && content_span.as_str() == "inner"
1074 );
1075 assert_matches!(
1076 tts.next(),
1077 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1078 doc_style: DocStyle::Inner,
1079 span,
1080 content_span
1081 }))) if span.as_str() == "//! inner" && content_span.as_str() == " inner"
1082 );
1083 assert_matches!(
1084 tts.next(),
1085 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1086 doc_style: DocStyle::Outer,
1087 span,
1088 content_span
1089 }))) if span.as_str() == "///outer" && content_span.as_str() == "outer"
1090 );
1091 assert_matches!(
1092 tts.next(),
1093 Some(CommentedTokenTree::Tree(CommentedTree::DocComment(DocComment {
1094 doc_style: DocStyle::Outer,
1095 span,
1096 content_span
1097 }))) if span.as_str() == "/// outer" && content_span.as_str() == " outer"
1098 );
1099 assert_eq!(tts.next(), None);
1100 }
1101
1102 #[test]
1103 fn lex_char_escaped_quote() {
1104 let input = r"
1105 '\''
1106 ";
1107 let handler = Handler::default();
1108 let stream = lex(&handler, input.into(), 0, input.len(), None).unwrap();
1109 assert!(handler.consume().0.is_empty());
1110 let mut tts = stream.token_trees().iter();
1111 assert_matches!(
1112 tts.next(),
1113 Some(TokenTree::Literal(Literal::Char(LitChar {
1114 parsed: '\'',
1115 ..
1116 })))
1117 );
1118 assert_eq!(tts.next(), None);
1119 }
1120
1121 use super::is_valid_identifier_or_path as valid;
1122
1123 #[test]
1124 fn accepts_simple_identifiers() {
1125 assert!(valid("foo"));
1126 assert!(valid("Foo"));
1127 assert!(valid("_foo"));
1128 assert!(valid("foo123"));
1129 assert!(valid("føø"));
1130 }
1131
1132 #[test]
1133 fn rejects_invalid_identifiers() {
1134 assert!(!valid(""));
1135 assert!(!valid("_"));
1136 assert!(!valid("__"));
1137 assert!(!valid("__invalid"));
1138 assert!(!valid(":foo"));
1139 assert!(!valid("foo:bar"));
1140 }
1141
1142 #[test]
1143 fn accepts_paths() {
1144 assert!(valid("foo::bar"));
1145 assert!(valid("_foo::_bar"));
1146 assert!(valid("foo_bar::baz123"));
1147 assert!(valid("::some_module::in_the_same::package"));
1148 }
1149
1150 #[test]
1151 fn rejects_malformed_paths() {
1152 assert!(!valid("foo:bar:baz"));
1153 assert!(!valid("foo::"));
1154 assert!(!valid("::"));
1155 assert!(!valid("foo:::bar"));
1156 assert!(!valid("foo::__bad"));
1157 }
1158}