1use crate::fallback::{
2 is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3 TokenStreamBuilder,
4};
5use crate::{Delimiter, Punct, Spacing, TokenTree};
6use core::char;
7use core::str::{Bytes, CharIndices, Chars};
8
9#[derive(Copy, Clone, Eq, PartialEq)]
10pub(crate) struct Cursor<'a> {
11 pub rest: &'a str,
12 #[cfg(span_locations)]
13 pub off: u32,
14}
15
16impl<'a> Cursor<'a> {
17 pub fn advance(&self, bytes: usize) -> Cursor<'a> {
18 let (_front, rest) = self.rest.split_at(bytes);
19 Cursor {
20 rest,
21 #[cfg(span_locations)]
22 off: self.off + _front.chars().count() as u32,
23 }
24 }
25
26 pub fn starts_with(&self, s: &str) -> bool {
27 self.rest.starts_with(s)
28 }
29
30 pub fn starts_with_char(&self, ch: char) -> bool {
31 self.rest.starts_with(ch)
32 }
33
34 pub fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35 where
36 Pattern: FnMut(char) -> bool,
37 {
38 self.rest.starts_with(f)
39 }
40
41 pub fn is_empty(&self) -> bool {
42 self.rest.is_empty()
43 }
44
45 fn len(&self) -> usize {
46 self.rest.len()
47 }
48
49 fn as_bytes(&self) -> &'a [u8] {
50 self.rest.as_bytes()
51 }
52
53 fn bytes(&self) -> Bytes<'a> {
54 self.rest.bytes()
55 }
56
57 fn chars(&self) -> Chars<'a> {
58 self.rest.chars()
59 }
60
61 fn char_indices(&self) -> CharIndices<'a> {
62 self.rest.char_indices()
63 }
64
65 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66 if self.starts_with(tag) {
67 Ok(self.advance(tag.len()))
68 } else {
69 Err(Reject)
70 }
71 }
72}
73
74pub(crate) struct Reject;
75type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
77fn skip_whitespace(input: Cursor) -> Cursor {
78 let mut s = input;
79
80 while !s.is_empty() {
81 let byte = s.as_bytes()[0];
82 if byte == b'/' {
83 if s.starts_with("//")
84 && (!s.starts_with("///") || s.starts_with("////"))
85 && !s.starts_with("//!")
86 {
87 let (cursor, _) = take_until_newline_or_eof(s);
88 s = cursor;
89 continue;
90 } else if s.starts_with("/**/") {
91 s = s.advance(4);
92 continue;
93 } else if s.starts_with("/*")
94 && (!s.starts_with("/**") || s.starts_with("/***"))
95 && !s.starts_with("/*!")
96 {
97 match block_comment(s) {
98 Ok((rest, _)) => {
99 s = rest;
100 continue;
101 }
102 Err(Reject) => return s,
103 }
104 }
105 }
106 match byte {
107 b' ' | 0x09..=0x0d => {
108 s = s.advance(1);
109 continue;
110 }
111 b if b.is_ascii() => {}
112 _ => {
113 let ch = s.chars().next().unwrap();
114 if is_whitespace(ch) {
115 s = s.advance(ch.len_utf8());
116 continue;
117 }
118 }
119 }
120 return s;
121 }
122 s
123}
124
125fn block_comment(input: Cursor) -> PResult<&str> {
126 if !input.starts_with("/*") {
127 return Err(Reject);
128 }
129
130 let mut depth = 0usize;
131 let bytes = input.as_bytes();
132 let mut i = 0usize;
133 let upper = bytes.len() - 1;
134
135 while i < upper {
136 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
137 depth += 1;
138 i += 1; } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
140 depth -= 1;
141 if depth == 0 {
142 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
143 }
144 i += 1; }
146 i += 1;
147 }
148
149 Err(Reject)
150}
151
152fn is_whitespace(ch: char) -> bool {
153 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
155}
156
157fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158 match input.chars().next() {
159 Some(ch) if is_ident_continue(ch) => Err(Reject),
160 Some(_) | None => Ok(input),
161 }
162}
163
164pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
165 let mut trees = TokenStreamBuilder::new();
166 let mut stack = Vec::new();
167
168 loop {
169 input = skip_whitespace(input);
170
171 if let Ok((rest, ())) = doc_comment(input, &mut trees) {
172 input = rest;
173 continue;
174 }
175
176 #[cfg(span_locations)]
177 let lo = input.off;
178
179 let first = match input.bytes().next() {
180 Some(first) => first,
181 None => match stack.last() {
182 None => return Ok(trees.build()),
183 #[cfg(span_locations)]
184 Some((lo, _frame)) => {
185 return Err(LexError {
186 span: Span { lo: *lo, hi: *lo },
187 })
188 }
189 #[cfg(not(span_locations))]
190 Some(_frame) => return Err(LexError { span: Span {} }),
191 },
192 };
193
194 if let Some(open_delimiter) = match first {
195 b'(' => Some(Delimiter::Parenthesis),
196 b'[' => Some(Delimiter::Bracket),
197 b'{' => Some(Delimiter::Brace),
198 _ => None,
199 } {
200 input = input.advance(1);
201 let frame = (open_delimiter, trees);
202 #[cfg(span_locations)]
203 let frame = (lo, frame);
204 stack.push(frame);
205 trees = TokenStreamBuilder::new();
206 } else if let Some(close_delimiter) = match first {
207 b')' => Some(Delimiter::Parenthesis),
208 b']' => Some(Delimiter::Bracket),
209 b'}' => Some(Delimiter::Brace),
210 _ => None,
211 } {
212 let frame = match stack.pop() {
213 Some(frame) => frame,
214 None => return Err(lex_error(input)),
215 };
216 #[cfg(span_locations)]
217 let (lo, frame) = frame;
218 let (open_delimiter, outer) = frame;
219 if open_delimiter != close_delimiter {
220 return Err(lex_error(input));
221 }
222 input = input.advance(1);
223 let mut g = Group::new(open_delimiter, trees.build());
224 g.set_span(Span {
225 #[cfg(span_locations)]
226 lo,
227 #[cfg(span_locations)]
228 hi: input.off,
229 });
230 trees = outer;
231 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
232 } else {
233 let (rest, mut tt) = match leaf_token(input) {
234 Ok((rest, tt)) => (rest, tt),
235 Err(Reject) => return Err(lex_error(input)),
236 };
237 tt.set_span(crate::Span::_new_fallback(Span {
238 #[cfg(span_locations)]
239 lo,
240 #[cfg(span_locations)]
241 hi: rest.off,
242 }));
243 trees.push_token_from_parser(tt);
244 input = rest;
245 }
246 }
247}
248
249fn lex_error(cursor: Cursor) -> LexError {
250 #[cfg(not(span_locations))]
251 let _ = cursor;
252 LexError {
253 span: Span {
254 #[cfg(span_locations)]
255 lo: cursor.off,
256 #[cfg(span_locations)]
257 hi: cursor.off,
258 },
259 }
260}
261
262fn leaf_token(input: Cursor) -> PResult<TokenTree> {
263 if let Ok((input, l)) = literal(input) {
264 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
266 } else if let Ok((input, p)) = punct(input) {
267 Ok((input, TokenTree::Punct(p)))
268 } else if let Ok((input, i)) = ident(input) {
269 Ok((input, TokenTree::Ident(i)))
270 } else {
271 Err(Reject)
272 }
273}
274
275fn ident(input: Cursor) -> PResult<crate::Ident> {
276 if [
277 "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
278 ]
279 .iter()
280 .any(|prefix| input.starts_with(prefix))
281 {
282 Err(Reject)
283 } else {
284 ident_any(input)
285 }
286}
287
288fn ident_any(input: Cursor) -> PResult<crate::Ident> {
289 let raw = input.starts_with("r#");
290 let rest = input.advance((raw as usize) << 1);
291
292 let (rest, sym) = ident_not_raw(rest)?;
293
294 if !raw {
295 let ident = crate::Ident::new(sym, crate::Span::call_site());
296 return Ok((rest, ident));
297 }
298
299 match sym {
300 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
301 _ => {}
302 }
303
304 let ident = crate::Ident::_new_raw(sym, crate::Span::call_site());
305 Ok((rest, ident))
306}
307
308fn ident_not_raw(input: Cursor) -> PResult<&str> {
309 let mut chars = input.char_indices();
310
311 match chars.next() {
312 Some((_, ch)) if is_ident_start(ch) => {}
313 _ => return Err(Reject),
314 }
315
316 let mut end = input.len();
317 for (i, ch) in chars {
318 if !is_ident_continue(ch) {
319 end = i;
320 break;
321 }
322 }
323
324 Ok((input.advance(end), &input.rest[..end]))
325}
326
327pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
328 let rest = literal_nocapture(input)?;
329 let end = input.len() - rest.len();
330 Ok((rest, Literal::_new(input.rest[..end].to_string())))
331}
332
333fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
334 if let Ok(ok) = string(input) {
335 Ok(ok)
336 } else if let Ok(ok) = byte_string(input) {
337 Ok(ok)
338 } else if let Ok(ok) = c_string(input) {
339 Ok(ok)
340 } else if let Ok(ok) = byte(input) {
341 Ok(ok)
342 } else if let Ok(ok) = character(input) {
343 Ok(ok)
344 } else if let Ok(ok) = float(input) {
345 Ok(ok)
346 } else if let Ok(ok) = int(input) {
347 Ok(ok)
348 } else {
349 Err(Reject)
350 }
351}
352
353fn literal_suffix(input: Cursor) -> Cursor {
354 match ident_not_raw(input) {
355 Ok((input, _)) => input,
356 Err(Reject) => input,
357 }
358}
359
360fn string(input: Cursor) -> Result<Cursor, Reject> {
361 if let Ok(input) = input.parse("\"") {
362 cooked_string(input)
363 } else if let Ok(input) = input.parse("r") {
364 raw_string(input)
365 } else {
366 Err(Reject)
367 }
368}
369
370fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
371 let mut chars = input.char_indices();
372
373 while let Some((i, ch)) = chars.next() {
374 match ch {
375 '"' => {
376 let input = input.advance(i + 1);
377 return Ok(literal_suffix(input));
378 }
379 '\r' => match chars.next() {
380 Some((_, '\n')) => {}
381 _ => break,
382 },
383 '\\' => match chars.next() {
384 Some((_, 'x')) => {
385 backslash_x_char(&mut chars)?;
386 }
387 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
388 Some((_, 'u')) => {
389 backslash_u(&mut chars)?;
390 }
391 Some((newline, ch @ ('\n' | '\r'))) => {
392 input = input.advance(newline + 1);
393 trailing_backslash(&mut input, ch as u8)?;
394 chars = input.char_indices();
395 }
396 _ => break,
397 },
398 _ch => {}
399 }
400 }
401 Err(Reject)
402}
403
404fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
405 let (input, delimiter) = delimiter_of_raw_string(input)?;
406 let mut bytes = input.bytes().enumerate();
407 while let Some((i, byte)) = bytes.next() {
408 match byte {
409 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
410 let rest = input.advance(i + 1 + delimiter.len());
411 return Ok(literal_suffix(rest));
412 }
413 b'\r' => match bytes.next() {
414 Some((_, b'\n')) => {}
415 _ => break,
416 },
417 _ => {}
418 }
419 }
420 Err(Reject)
421}
422
423fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
424 if let Ok(input) = input.parse("b\"") {
425 cooked_byte_string(input)
426 } else if let Ok(input) = input.parse("br") {
427 raw_byte_string(input)
428 } else {
429 Err(Reject)
430 }
431}
432
433fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
434 let mut bytes = input.bytes().enumerate();
435 while let Some((offset, b)) = bytes.next() {
436 match b {
437 b'"' => {
438 let input = input.advance(offset + 1);
439 return Ok(literal_suffix(input));
440 }
441 b'\r' => match bytes.next() {
442 Some((_, b'\n')) => {}
443 _ => break,
444 },
445 b'\\' => match bytes.next() {
446 Some((_, b'x')) => {
447 backslash_x_byte(&mut bytes)?;
448 }
449 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
450 Some((newline, b @ (b'\n' | b'\r'))) => {
451 input = input.advance(newline + 1);
452 trailing_backslash(&mut input, b)?;
453 bytes = input.bytes().enumerate();
454 }
455 _ => break,
456 },
457 b if b.is_ascii() => {}
458 _ => break,
459 }
460 }
461 Err(Reject)
462}
463
464fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
465 for (i, byte) in input.bytes().enumerate() {
466 match byte {
467 b'"' => {
468 if i > 255 {
469 return Err(Reject);
471 }
472 return Ok((input.advance(i + 1), &input.rest[..i]));
473 }
474 b'#' => {}
475 _ => break,
476 }
477 }
478 Err(Reject)
479}
480
481fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
482 let (input, delimiter) = delimiter_of_raw_string(input)?;
483 let mut bytes = input.bytes().enumerate();
484 while let Some((i, byte)) = bytes.next() {
485 match byte {
486 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
487 let rest = input.advance(i + 1 + delimiter.len());
488 return Ok(literal_suffix(rest));
489 }
490 b'\r' => match bytes.next() {
491 Some((_, b'\n')) => {}
492 _ => break,
493 },
494 other => {
495 if !other.is_ascii() {
496 break;
497 }
498 }
499 }
500 }
501 Err(Reject)
502}
503
504fn c_string(input: Cursor) -> Result<Cursor, Reject> {
505 if let Ok(input) = input.parse("c\"") {
506 cooked_c_string(input)
507 } else if let Ok(input) = input.parse("cr") {
508 raw_c_string(input)
509 } else {
510 Err(Reject)
511 }
512}
513
514fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
515 let (input, delimiter) = delimiter_of_raw_string(input)?;
516 let mut bytes = input.bytes().enumerate();
517 while let Some((i, byte)) = bytes.next() {
518 match byte {
519 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
520 let rest = input.advance(i + 1 + delimiter.len());
521 return Ok(literal_suffix(rest));
522 }
523 b'\r' => match bytes.next() {
524 Some((_, b'\n')) => {}
525 _ => break,
526 },
527 b'\0' => break,
528 _ => {}
529 }
530 }
531 Err(Reject)
532}
533
534fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
535 let mut chars = input.char_indices();
536
537 while let Some((i, ch)) = chars.next() {
538 match ch {
539 '"' => {
540 let input = input.advance(i + 1);
541 return Ok(literal_suffix(input));
542 }
543 '\r' => match chars.next() {
544 Some((_, '\n')) => {}
545 _ => break,
546 },
547 '\\' => match chars.next() {
548 Some((_, 'x')) => {
549 backslash_x_nonzero(&mut chars)?;
550 }
551 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
552 Some((_, 'u')) => {
553 if backslash_u(&mut chars)? == '\0' {
554 break;
555 }
556 }
557 Some((newline, ch @ ('\n' | '\r'))) => {
558 input = input.advance(newline + 1);
559 trailing_backslash(&mut input, ch as u8)?;
560 chars = input.char_indices();
561 }
562 _ => break,
563 },
564 '\0' => break,
565 _ch => {}
566 }
567 }
568 Err(Reject)
569}
570
571fn byte(input: Cursor) -> Result<Cursor, Reject> {
572 let input = input.parse("b'")?;
573 let mut bytes = input.bytes().enumerate();
574 let ok = match bytes.next().map(|(_, b)| b) {
575 Some(b'\\') => match bytes.next().map(|(_, b)| b) {
576 Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
577 Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
578 _ => false,
579 },
580 b => b.is_some(),
581 };
582 if !ok {
583 return Err(Reject);
584 }
585 let (offset, _) = bytes.next().ok_or(Reject)?;
586 if !input.chars().as_str().is_char_boundary(offset) {
587 return Err(Reject);
588 }
589 let input = input.advance(offset).parse("'")?;
590 Ok(literal_suffix(input))
591}
592
593fn character(input: Cursor) -> Result<Cursor, Reject> {
594 let input = input.parse("'")?;
595 let mut chars = input.char_indices();
596 let ok = match chars.next().map(|(_, ch)| ch) {
597 Some('\\') => match chars.next().map(|(_, ch)| ch) {
598 Some('x') => backslash_x_char(&mut chars).is_ok(),
599 Some('u') => backslash_u(&mut chars).is_ok(),
600 Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
601 _ => false,
602 },
603 ch => ch.is_some(),
604 };
605 if !ok {
606 return Err(Reject);
607 }
608 let (idx, _) = chars.next().ok_or(Reject)?;
609 let input = input.advance(idx).parse("'")?;
610 Ok(literal_suffix(input))
611}
612
613macro_rules! next_ch {
614 ($chars:ident @ $pat:pat) => {
615 match $chars.next() {
616 Some((_, ch)) => match ch {
617 $pat => ch,
618 _ => return Err(Reject),
619 },
620 None => return Err(Reject),
621 }
622 };
623}
624
625fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
626where
627 I: Iterator<Item = (usize, char)>,
628{
629 next_ch!(chars @ '0'..='7');
630 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
631 Ok(())
632}
633
634fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
635where
636 I: Iterator<Item = (usize, u8)>,
637{
638 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
639 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
640 Ok(())
641}
642
643fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
644where
645 I: Iterator<Item = (usize, char)>,
646{
647 let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
648 let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
649 if first == '0' && second == '0' {
650 Err(Reject)
651 } else {
652 Ok(())
653 }
654}
655
656fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
657where
658 I: Iterator<Item = (usize, char)>,
659{
660 next_ch!(chars @ '{');
661 let mut value = 0;
662 let mut len = 0;
663 for (_, ch) in chars {
664 let digit = match ch {
665 '0'..='9' => ch as u8 - b'0',
666 'a'..='f' => 10 + ch as u8 - b'a',
667 'A'..='F' => 10 + ch as u8 - b'A',
668 '_' if len > 0 => continue,
669 '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
670 _ => break,
671 };
672 if len == 6 {
673 break;
674 }
675 value *= 0x10;
676 value += u32::from(digit);
677 len += 1;
678 }
679 Err(Reject)
680}
681
682fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
683 let mut whitespace = input.bytes().enumerate();
684 loop {
685 if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
686 return Err(Reject);
687 }
688 match whitespace.next() {
689 Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
690 last = b;
691 }
692 Some((offset, _)) => {
693 *input = input.advance(offset);
694 return Ok(());
695 }
696 None => return Err(Reject),
697 }
698 }
699}
700
701fn float(input: Cursor) -> Result<Cursor, Reject> {
702 let mut rest = float_digits(input)?;
703 if let Some(ch) = rest.chars().next() {
704 if is_ident_start(ch) {
705 rest = ident_not_raw(rest)?.0;
706 }
707 }
708 word_break(rest)
709}
710
711fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
712 let mut chars = input.chars().peekable();
713 match chars.next() {
714 Some(ch) if '0' <= ch && ch <= '9' => {}
715 _ => return Err(Reject),
716 }
717
718 let mut len = 1;
719 let mut has_dot = false;
720 let mut has_exp = false;
721 while let Some(&ch) = chars.peek() {
722 match ch {
723 '0'..='9' | '_' => {
724 chars.next();
725 len += 1;
726 }
727 '.' => {
728 if has_dot {
729 break;
730 }
731 chars.next();
732 if chars
733 .peek()
734 .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
735 {
736 return Err(Reject);
737 }
738 len += 1;
739 has_dot = true;
740 }
741 'e' | 'E' => {
742 chars.next();
743 len += 1;
744 has_exp = true;
745 break;
746 }
747 _ => break,
748 }
749 }
750
751 if !(has_dot || has_exp) {
752 return Err(Reject);
753 }
754
755 if has_exp {
756 let token_before_exp = if has_dot {
757 Ok(input.advance(len - 1))
758 } else {
759 Err(Reject)
760 };
761 let mut has_sign = false;
762 let mut has_exp_value = false;
763 while let Some(&ch) = chars.peek() {
764 match ch {
765 '+' | '-' => {
766 if has_exp_value {
767 break;
768 }
769 if has_sign {
770 return token_before_exp;
771 }
772 chars.next();
773 len += 1;
774 has_sign = true;
775 }
776 '0'..='9' => {
777 chars.next();
778 len += 1;
779 has_exp_value = true;
780 }
781 '_' => {
782 chars.next();
783 len += 1;
784 }
785 _ => break,
786 }
787 }
788 if !has_exp_value {
789 return token_before_exp;
790 }
791 }
792
793 Ok(input.advance(len))
794}
795
796fn int(input: Cursor) -> Result<Cursor, Reject> {
797 let mut rest = digits(input)?;
798 if let Some(ch) = rest.chars().next() {
799 if is_ident_start(ch) {
800 rest = ident_not_raw(rest)?.0;
801 }
802 }
803 word_break(rest)
804}
805
806fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
807 let base = if input.starts_with("0x") {
808 input = input.advance(2);
809 16
810 } else if input.starts_with("0o") {
811 input = input.advance(2);
812 8
813 } else if input.starts_with("0b") {
814 input = input.advance(2);
815 2
816 } else {
817 10
818 };
819
820 let mut len = 0;
821 let mut empty = true;
822 for b in input.bytes() {
823 match b {
824 b'0'..=b'9' => {
825 let digit = (b - b'0') as u64;
826 if digit >= base {
827 return Err(Reject);
828 }
829 }
830 b'a'..=b'f' => {
831 let digit = 10 + (b - b'a') as u64;
832 if digit >= base {
833 break;
834 }
835 }
836 b'A'..=b'F' => {
837 let digit = 10 + (b - b'A') as u64;
838 if digit >= base {
839 break;
840 }
841 }
842 b'_' => {
843 if empty && base == 10 {
844 return Err(Reject);
845 }
846 len += 1;
847 continue;
848 }
849 _ => break,
850 };
851 len += 1;
852 empty = false;
853 }
854 if empty {
855 Err(Reject)
856 } else {
857 Ok(input.advance(len))
858 }
859}
860
861fn punct(input: Cursor) -> PResult<Punct> {
862 let (rest, ch) = punct_char(input)?;
863 if ch == '\'' {
864 if ident_any(rest)?.0.starts_with_char('\'') {
865 Err(Reject)
866 } else {
867 Ok((rest, Punct::new('\'', Spacing::Joint)))
868 }
869 } else {
870 let kind = match punct_char(rest) {
871 Ok(_) => Spacing::Joint,
872 Err(Reject) => Spacing::Alone,
873 };
874 Ok((rest, Punct::new(ch, kind)))
875 }
876}
877
878fn punct_char(input: Cursor) -> PResult<char> {
879 if input.starts_with("//") || input.starts_with("/*") {
880 return Err(Reject);
882 }
883
884 let mut chars = input.chars();
885 let first = match chars.next() {
886 Some(ch) => ch,
887 None => {
888 return Err(Reject);
889 }
890 };
891 let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
892 if recognized.contains(first) {
893 Ok((input.advance(first.len_utf8()), first))
894 } else {
895 Err(Reject)
896 }
897}
898
899fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
900 #[cfg(span_locations)]
901 let lo = input.off;
902 let (rest, (comment, inner)) = doc_comment_contents(input)?;
903 let span = crate::Span::_new_fallback(Span {
904 #[cfg(span_locations)]
905 lo,
906 #[cfg(span_locations)]
907 hi: rest.off,
908 });
909
910 let mut scan_for_bare_cr = comment;
911 while let Some(cr) = scan_for_bare_cr.find('\r') {
912 let rest = &scan_for_bare_cr[cr + 1..];
913 if !rest.starts_with('\n') {
914 return Err(Reject);
915 }
916 scan_for_bare_cr = rest;
917 }
918
919 let mut pound = Punct::new('#', Spacing::Alone);
920 pound.set_span(span);
921 trees.push_token_from_parser(TokenTree::Punct(pound));
922
923 if inner {
924 let mut bang = Punct::new('!', Spacing::Alone);
925 bang.set_span(span);
926 trees.push_token_from_parser(TokenTree::Punct(bang));
927 }
928
929 let doc_ident = crate::Ident::new("doc", span);
930 let mut equal = Punct::new('=', Spacing::Alone);
931 equal.set_span(span);
932 let mut literal = crate::Literal::string(comment);
933 literal.set_span(span);
934 let mut bracketed = TokenStreamBuilder::with_capacity(3);
935 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
936 bracketed.push_token_from_parser(TokenTree::Punct(equal));
937 bracketed.push_token_from_parser(TokenTree::Literal(literal));
938 let group = Group::new(Delimiter::Bracket, bracketed.build());
939 let mut group = crate::Group::_new_fallback(group);
940 group.set_span(span);
941 trees.push_token_from_parser(TokenTree::Group(group));
942
943 Ok((rest, ()))
944}
945
946fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
947 if input.starts_with("//!") {
948 let input = input.advance(3);
949 let (input, s) = take_until_newline_or_eof(input);
950 Ok((input, (s, true)))
951 } else if input.starts_with("/*!") {
952 let (input, s) = block_comment(input)?;
953 Ok((input, (&s[3..s.len() - 2], true)))
954 } else if input.starts_with("///") {
955 let input = input.advance(3);
956 if input.starts_with_char('/') {
957 return Err(Reject);
958 }
959 let (input, s) = take_until_newline_or_eof(input);
960 Ok((input, (s, false)))
961 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
962 let (input, s) = block_comment(input)?;
963 Ok((input, (&s[3..s.len() - 2], false)))
964 } else {
965 Err(Reject)
966 }
967}
968
969fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
970 let chars = input.char_indices();
971
972 for (i, ch) in chars {
973 if ch == '\n' {
974 return (input.advance(i), &input.rest[..i]);
975 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
976 return (input.advance(i + 1), &input.rest[..i]);
977 }
978 }
979
980 (input.advance(input.len()), input.rest)
981}