proc_macro2_fallback/
parse.rs

1use crate::fallback::{
2    is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3    TokenStreamBuilder,
4};
5use crate::{Delimiter, Punct, Spacing, TokenTree};
6use core::char;
7use core::str::{Bytes, CharIndices, Chars};
8
9#[derive(Copy, Clone, Eq, PartialEq)]
10pub(crate) struct Cursor<'a> {
11    pub rest: &'a str,
12    #[cfg(span_locations)]
13    pub off: u32,
14}
15
16impl<'a> Cursor<'a> {
17    pub fn advance(&self, bytes: usize) -> Cursor<'a> {
18        let (_front, rest) = self.rest.split_at(bytes);
19        Cursor {
20            rest,
21            #[cfg(span_locations)]
22            off: self.off + _front.chars().count() as u32,
23        }
24    }
25
26    pub fn starts_with(&self, s: &str) -> bool {
27        self.rest.starts_with(s)
28    }
29
30    pub fn starts_with_char(&self, ch: char) -> bool {
31        self.rest.starts_with(ch)
32    }
33
34    pub fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35    where
36        Pattern: FnMut(char) -> bool,
37    {
38        self.rest.starts_with(f)
39    }
40
41    pub fn is_empty(&self) -> bool {
42        self.rest.is_empty()
43    }
44
45    fn len(&self) -> usize {
46        self.rest.len()
47    }
48
49    fn as_bytes(&self) -> &'a [u8] {
50        self.rest.as_bytes()
51    }
52
53    fn bytes(&self) -> Bytes<'a> {
54        self.rest.bytes()
55    }
56
57    fn chars(&self) -> Chars<'a> {
58        self.rest.chars()
59    }
60
61    fn char_indices(&self) -> CharIndices<'a> {
62        self.rest.char_indices()
63    }
64
65    fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66        if self.starts_with(tag) {
67            Ok(self.advance(tag.len()))
68        } else {
69            Err(Reject)
70        }
71    }
72}
73
74pub(crate) struct Reject;
75type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
77fn skip_whitespace(input: Cursor) -> Cursor {
78    let mut s = input;
79
80    while !s.is_empty() {
81        let byte = s.as_bytes()[0];
82        if byte == b'/' {
83            if s.starts_with("//")
84                && (!s.starts_with("///") || s.starts_with("////"))
85                && !s.starts_with("//!")
86            {
87                let (cursor, _) = take_until_newline_or_eof(s);
88                s = cursor;
89                continue;
90            } else if s.starts_with("/**/") {
91                s = s.advance(4);
92                continue;
93            } else if s.starts_with("/*")
94                && (!s.starts_with("/**") || s.starts_with("/***"))
95                && !s.starts_with("/*!")
96            {
97                match block_comment(s) {
98                    Ok((rest, _)) => {
99                        s = rest;
100                        continue;
101                    }
102                    Err(Reject) => return s,
103                }
104            }
105        }
106        match byte {
107            b' ' | 0x09..=0x0d => {
108                s = s.advance(1);
109                continue;
110            }
111            b if b.is_ascii() => {}
112            _ => {
113                let ch = s.chars().next().unwrap();
114                if is_whitespace(ch) {
115                    s = s.advance(ch.len_utf8());
116                    continue;
117                }
118            }
119        }
120        return s;
121    }
122    s
123}
124
125fn block_comment(input: Cursor) -> PResult<&str> {
126    if !input.starts_with("/*") {
127        return Err(Reject);
128    }
129
130    let mut depth = 0usize;
131    let bytes = input.as_bytes();
132    let mut i = 0usize;
133    let upper = bytes.len() - 1;
134
135    while i < upper {
136        if bytes[i] == b'/' && bytes[i + 1] == b'*' {
137            depth += 1;
138            i += 1; // eat '*'
139        } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
140            depth -= 1;
141            if depth == 0 {
142                return Ok((input.advance(i + 2), &input.rest[..i + 2]));
143            }
144            i += 1; // eat '/'
145        }
146        i += 1;
147    }
148
149    Err(Reject)
150}
151
152fn is_whitespace(ch: char) -> bool {
153    // Rust treats left-to-right mark and right-to-left mark as whitespace
154    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
155}
156
157fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158    match input.chars().next() {
159        Some(ch) if is_ident_continue(ch) => Err(Reject),
160        Some(_) | None => Ok(input),
161    }
162}
163
164pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
165    let mut trees = TokenStreamBuilder::new();
166    let mut stack = Vec::new();
167
168    loop {
169        input = skip_whitespace(input);
170
171        if let Ok((rest, ())) = doc_comment(input, &mut trees) {
172            input = rest;
173            continue;
174        }
175
176        #[cfg(span_locations)]
177        let lo = input.off;
178
179        let first = match input.bytes().next() {
180            Some(first) => first,
181            None => match stack.last() {
182                None => return Ok(trees.build()),
183                #[cfg(span_locations)]
184                Some((lo, _frame)) => {
185                    return Err(LexError {
186                        span: Span { lo: *lo, hi: *lo },
187                    })
188                }
189                #[cfg(not(span_locations))]
190                Some(_frame) => return Err(LexError { span: Span {} }),
191            },
192        };
193
194        if let Some(open_delimiter) = match first {
195            b'(' => Some(Delimiter::Parenthesis),
196            b'[' => Some(Delimiter::Bracket),
197            b'{' => Some(Delimiter::Brace),
198            _ => None,
199        } {
200            input = input.advance(1);
201            let frame = (open_delimiter, trees);
202            #[cfg(span_locations)]
203            let frame = (lo, frame);
204            stack.push(frame);
205            trees = TokenStreamBuilder::new();
206        } else if let Some(close_delimiter) = match first {
207            b')' => Some(Delimiter::Parenthesis),
208            b']' => Some(Delimiter::Bracket),
209            b'}' => Some(Delimiter::Brace),
210            _ => None,
211        } {
212            let frame = match stack.pop() {
213                Some(frame) => frame,
214                None => return Err(lex_error(input)),
215            };
216            #[cfg(span_locations)]
217            let (lo, frame) = frame;
218            let (open_delimiter, outer) = frame;
219            if open_delimiter != close_delimiter {
220                return Err(lex_error(input));
221            }
222            input = input.advance(1);
223            let mut g = Group::new(open_delimiter, trees.build());
224            g.set_span(Span {
225                #[cfg(span_locations)]
226                lo,
227                #[cfg(span_locations)]
228                hi: input.off,
229            });
230            trees = outer;
231            trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
232        } else {
233            let (rest, mut tt) = match leaf_token(input) {
234                Ok((rest, tt)) => (rest, tt),
235                Err(Reject) => return Err(lex_error(input)),
236            };
237            tt.set_span(crate::Span::_new_fallback(Span {
238                #[cfg(span_locations)]
239                lo,
240                #[cfg(span_locations)]
241                hi: rest.off,
242            }));
243            trees.push_token_from_parser(tt);
244            input = rest;
245        }
246    }
247}
248
249fn lex_error(cursor: Cursor) -> LexError {
250    #[cfg(not(span_locations))]
251    let _ = cursor;
252    LexError {
253        span: Span {
254            #[cfg(span_locations)]
255            lo: cursor.off,
256            #[cfg(span_locations)]
257            hi: cursor.off,
258        },
259    }
260}
261
262fn leaf_token(input: Cursor) -> PResult<TokenTree> {
263    if let Ok((input, l)) = literal(input) {
264        // must be parsed before ident
265        Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
266    } else if let Ok((input, p)) = punct(input) {
267        Ok((input, TokenTree::Punct(p)))
268    } else if let Ok((input, i)) = ident(input) {
269        Ok((input, TokenTree::Ident(i)))
270    } else {
271        Err(Reject)
272    }
273}
274
275fn ident(input: Cursor) -> PResult<crate::Ident> {
276    if [
277        "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
278    ]
279    .iter()
280    .any(|prefix| input.starts_with(prefix))
281    {
282        Err(Reject)
283    } else {
284        ident_any(input)
285    }
286}
287
288fn ident_any(input: Cursor) -> PResult<crate::Ident> {
289    let raw = input.starts_with("r#");
290    let rest = input.advance((raw as usize) << 1);
291
292    let (rest, sym) = ident_not_raw(rest)?;
293
294    if !raw {
295        let ident = crate::Ident::new(sym, crate::Span::call_site());
296        return Ok((rest, ident));
297    }
298
299    match sym {
300        "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
301        _ => {}
302    }
303
304    let ident = crate::Ident::_new_raw(sym, crate::Span::call_site());
305    Ok((rest, ident))
306}
307
308fn ident_not_raw(input: Cursor) -> PResult<&str> {
309    let mut chars = input.char_indices();
310
311    match chars.next() {
312        Some((_, ch)) if is_ident_start(ch) => {}
313        _ => return Err(Reject),
314    }
315
316    let mut end = input.len();
317    for (i, ch) in chars {
318        if !is_ident_continue(ch) {
319            end = i;
320            break;
321        }
322    }
323
324    Ok((input.advance(end), &input.rest[..end]))
325}
326
327pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
328    let rest = literal_nocapture(input)?;
329    let end = input.len() - rest.len();
330    Ok((rest, Literal::_new(input.rest[..end].to_string())))
331}
332
333fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
334    if let Ok(ok) = string(input) {
335        Ok(ok)
336    } else if let Ok(ok) = byte_string(input) {
337        Ok(ok)
338    } else if let Ok(ok) = c_string(input) {
339        Ok(ok)
340    } else if let Ok(ok) = byte(input) {
341        Ok(ok)
342    } else if let Ok(ok) = character(input) {
343        Ok(ok)
344    } else if let Ok(ok) = float(input) {
345        Ok(ok)
346    } else if let Ok(ok) = int(input) {
347        Ok(ok)
348    } else {
349        Err(Reject)
350    }
351}
352
353fn literal_suffix(input: Cursor) -> Cursor {
354    match ident_not_raw(input) {
355        Ok((input, _)) => input,
356        Err(Reject) => input,
357    }
358}
359
360fn string(input: Cursor) -> Result<Cursor, Reject> {
361    if let Ok(input) = input.parse("\"") {
362        cooked_string(input)
363    } else if let Ok(input) = input.parse("r") {
364        raw_string(input)
365    } else {
366        Err(Reject)
367    }
368}
369
370fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
371    let mut chars = input.char_indices();
372
373    while let Some((i, ch)) = chars.next() {
374        match ch {
375            '"' => {
376                let input = input.advance(i + 1);
377                return Ok(literal_suffix(input));
378            }
379            '\r' => match chars.next() {
380                Some((_, '\n')) => {}
381                _ => break,
382            },
383            '\\' => match chars.next() {
384                Some((_, 'x')) => {
385                    backslash_x_char(&mut chars)?;
386                }
387                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
388                Some((_, 'u')) => {
389                    backslash_u(&mut chars)?;
390                }
391                Some((newline, ch @ ('\n' | '\r'))) => {
392                    input = input.advance(newline + 1);
393                    trailing_backslash(&mut input, ch as u8)?;
394                    chars = input.char_indices();
395                }
396                _ => break,
397            },
398            _ch => {}
399        }
400    }
401    Err(Reject)
402}
403
404fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
405    let (input, delimiter) = delimiter_of_raw_string(input)?;
406    let mut bytes = input.bytes().enumerate();
407    while let Some((i, byte)) = bytes.next() {
408        match byte {
409            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
410                let rest = input.advance(i + 1 + delimiter.len());
411                return Ok(literal_suffix(rest));
412            }
413            b'\r' => match bytes.next() {
414                Some((_, b'\n')) => {}
415                _ => break,
416            },
417            _ => {}
418        }
419    }
420    Err(Reject)
421}
422
423fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
424    if let Ok(input) = input.parse("b\"") {
425        cooked_byte_string(input)
426    } else if let Ok(input) = input.parse("br") {
427        raw_byte_string(input)
428    } else {
429        Err(Reject)
430    }
431}
432
433fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
434    let mut bytes = input.bytes().enumerate();
435    while let Some((offset, b)) = bytes.next() {
436        match b {
437            b'"' => {
438                let input = input.advance(offset + 1);
439                return Ok(literal_suffix(input));
440            }
441            b'\r' => match bytes.next() {
442                Some((_, b'\n')) => {}
443                _ => break,
444            },
445            b'\\' => match bytes.next() {
446                Some((_, b'x')) => {
447                    backslash_x_byte(&mut bytes)?;
448                }
449                Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
450                Some((newline, b @ (b'\n' | b'\r'))) => {
451                    input = input.advance(newline + 1);
452                    trailing_backslash(&mut input, b)?;
453                    bytes = input.bytes().enumerate();
454                }
455                _ => break,
456            },
457            b if b.is_ascii() => {}
458            _ => break,
459        }
460    }
461    Err(Reject)
462}
463
464fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
465    for (i, byte) in input.bytes().enumerate() {
466        match byte {
467            b'"' => {
468                if i > 255 {
469                    // https://github.com/rust-lang/rust/pull/95251
470                    return Err(Reject);
471                }
472                return Ok((input.advance(i + 1), &input.rest[..i]));
473            }
474            b'#' => {}
475            _ => break,
476        }
477    }
478    Err(Reject)
479}
480
481fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
482    let (input, delimiter) = delimiter_of_raw_string(input)?;
483    let mut bytes = input.bytes().enumerate();
484    while let Some((i, byte)) = bytes.next() {
485        match byte {
486            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
487                let rest = input.advance(i + 1 + delimiter.len());
488                return Ok(literal_suffix(rest));
489            }
490            b'\r' => match bytes.next() {
491                Some((_, b'\n')) => {}
492                _ => break,
493            },
494            other => {
495                if !other.is_ascii() {
496                    break;
497                }
498            }
499        }
500    }
501    Err(Reject)
502}
503
504fn c_string(input: Cursor) -> Result<Cursor, Reject> {
505    if let Ok(input) = input.parse("c\"") {
506        cooked_c_string(input)
507    } else if let Ok(input) = input.parse("cr") {
508        raw_c_string(input)
509    } else {
510        Err(Reject)
511    }
512}
513
514fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
515    let (input, delimiter) = delimiter_of_raw_string(input)?;
516    let mut bytes = input.bytes().enumerate();
517    while let Some((i, byte)) = bytes.next() {
518        match byte {
519            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
520                let rest = input.advance(i + 1 + delimiter.len());
521                return Ok(literal_suffix(rest));
522            }
523            b'\r' => match bytes.next() {
524                Some((_, b'\n')) => {}
525                _ => break,
526            },
527            b'\0' => break,
528            _ => {}
529        }
530    }
531    Err(Reject)
532}
533
534fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
535    let mut chars = input.char_indices();
536
537    while let Some((i, ch)) = chars.next() {
538        match ch {
539            '"' => {
540                let input = input.advance(i + 1);
541                return Ok(literal_suffix(input));
542            }
543            '\r' => match chars.next() {
544                Some((_, '\n')) => {}
545                _ => break,
546            },
547            '\\' => match chars.next() {
548                Some((_, 'x')) => {
549                    backslash_x_nonzero(&mut chars)?;
550                }
551                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
552                Some((_, 'u')) => {
553                    if backslash_u(&mut chars)? == '\0' {
554                        break;
555                    }
556                }
557                Some((newline, ch @ ('\n' | '\r'))) => {
558                    input = input.advance(newline + 1);
559                    trailing_backslash(&mut input, ch as u8)?;
560                    chars = input.char_indices();
561                }
562                _ => break,
563            },
564            '\0' => break,
565            _ch => {}
566        }
567    }
568    Err(Reject)
569}
570
571fn byte(input: Cursor) -> Result<Cursor, Reject> {
572    let input = input.parse("b'")?;
573    let mut bytes = input.bytes().enumerate();
574    let ok = match bytes.next().map(|(_, b)| b) {
575        Some(b'\\') => match bytes.next().map(|(_, b)| b) {
576            Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
577            Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
578            _ => false,
579        },
580        b => b.is_some(),
581    };
582    if !ok {
583        return Err(Reject);
584    }
585    let (offset, _) = bytes.next().ok_or(Reject)?;
586    if !input.chars().as_str().is_char_boundary(offset) {
587        return Err(Reject);
588    }
589    let input = input.advance(offset).parse("'")?;
590    Ok(literal_suffix(input))
591}
592
593fn character(input: Cursor) -> Result<Cursor, Reject> {
594    let input = input.parse("'")?;
595    let mut chars = input.char_indices();
596    let ok = match chars.next().map(|(_, ch)| ch) {
597        Some('\\') => match chars.next().map(|(_, ch)| ch) {
598            Some('x') => backslash_x_char(&mut chars).is_ok(),
599            Some('u') => backslash_u(&mut chars).is_ok(),
600            Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
601            _ => false,
602        },
603        ch => ch.is_some(),
604    };
605    if !ok {
606        return Err(Reject);
607    }
608    let (idx, _) = chars.next().ok_or(Reject)?;
609    let input = input.advance(idx).parse("'")?;
610    Ok(literal_suffix(input))
611}
612
613macro_rules! next_ch {
614    ($chars:ident @ $pat:pat) => {
615        match $chars.next() {
616            Some((_, ch)) => match ch {
617                $pat => ch,
618                _ => return Err(Reject),
619            },
620            None => return Err(Reject),
621        }
622    };
623}
624
625fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
626where
627    I: Iterator<Item = (usize, char)>,
628{
629    next_ch!(chars @ '0'..='7');
630    next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
631    Ok(())
632}
633
634fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
635where
636    I: Iterator<Item = (usize, u8)>,
637{
638    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
639    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
640    Ok(())
641}
642
643fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
644where
645    I: Iterator<Item = (usize, char)>,
646{
647    let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
648    let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
649    if first == '0' && second == '0' {
650        Err(Reject)
651    } else {
652        Ok(())
653    }
654}
655
656fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
657where
658    I: Iterator<Item = (usize, char)>,
659{
660    next_ch!(chars @ '{');
661    let mut value = 0;
662    let mut len = 0;
663    for (_, ch) in chars {
664        let digit = match ch {
665            '0'..='9' => ch as u8 - b'0',
666            'a'..='f' => 10 + ch as u8 - b'a',
667            'A'..='F' => 10 + ch as u8 - b'A',
668            '_' if len > 0 => continue,
669            '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
670            _ => break,
671        };
672        if len == 6 {
673            break;
674        }
675        value *= 0x10;
676        value += u32::from(digit);
677        len += 1;
678    }
679    Err(Reject)
680}
681
682fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
683    let mut whitespace = input.bytes().enumerate();
684    loop {
685        if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
686            return Err(Reject);
687        }
688        match whitespace.next() {
689            Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
690                last = b;
691            }
692            Some((offset, _)) => {
693                *input = input.advance(offset);
694                return Ok(());
695            }
696            None => return Err(Reject),
697        }
698    }
699}
700
701fn float(input: Cursor) -> Result<Cursor, Reject> {
702    let mut rest = float_digits(input)?;
703    if let Some(ch) = rest.chars().next() {
704        if is_ident_start(ch) {
705            rest = ident_not_raw(rest)?.0;
706        }
707    }
708    word_break(rest)
709}
710
711fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
712    let mut chars = input.chars().peekable();
713    match chars.next() {
714        Some(ch) if '0' <= ch && ch <= '9' => {}
715        _ => return Err(Reject),
716    }
717
718    let mut len = 1;
719    let mut has_dot = false;
720    let mut has_exp = false;
721    while let Some(&ch) = chars.peek() {
722        match ch {
723            '0'..='9' | '_' => {
724                chars.next();
725                len += 1;
726            }
727            '.' => {
728                if has_dot {
729                    break;
730                }
731                chars.next();
732                if chars
733                    .peek()
734                    .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
735                {
736                    return Err(Reject);
737                }
738                len += 1;
739                has_dot = true;
740            }
741            'e' | 'E' => {
742                chars.next();
743                len += 1;
744                has_exp = true;
745                break;
746            }
747            _ => break,
748        }
749    }
750
751    if !(has_dot || has_exp) {
752        return Err(Reject);
753    }
754
755    if has_exp {
756        let token_before_exp = if has_dot {
757            Ok(input.advance(len - 1))
758        } else {
759            Err(Reject)
760        };
761        let mut has_sign = false;
762        let mut has_exp_value = false;
763        while let Some(&ch) = chars.peek() {
764            match ch {
765                '+' | '-' => {
766                    if has_exp_value {
767                        break;
768                    }
769                    if has_sign {
770                        return token_before_exp;
771                    }
772                    chars.next();
773                    len += 1;
774                    has_sign = true;
775                }
776                '0'..='9' => {
777                    chars.next();
778                    len += 1;
779                    has_exp_value = true;
780                }
781                '_' => {
782                    chars.next();
783                    len += 1;
784                }
785                _ => break,
786            }
787        }
788        if !has_exp_value {
789            return token_before_exp;
790        }
791    }
792
793    Ok(input.advance(len))
794}
795
796fn int(input: Cursor) -> Result<Cursor, Reject> {
797    let mut rest = digits(input)?;
798    if let Some(ch) = rest.chars().next() {
799        if is_ident_start(ch) {
800            rest = ident_not_raw(rest)?.0;
801        }
802    }
803    word_break(rest)
804}
805
806fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
807    let base = if input.starts_with("0x") {
808        input = input.advance(2);
809        16
810    } else if input.starts_with("0o") {
811        input = input.advance(2);
812        8
813    } else if input.starts_with("0b") {
814        input = input.advance(2);
815        2
816    } else {
817        10
818    };
819
820    let mut len = 0;
821    let mut empty = true;
822    for b in input.bytes() {
823        match b {
824            b'0'..=b'9' => {
825                let digit = (b - b'0') as u64;
826                if digit >= base {
827                    return Err(Reject);
828                }
829            }
830            b'a'..=b'f' => {
831                let digit = 10 + (b - b'a') as u64;
832                if digit >= base {
833                    break;
834                }
835            }
836            b'A'..=b'F' => {
837                let digit = 10 + (b - b'A') as u64;
838                if digit >= base {
839                    break;
840                }
841            }
842            b'_' => {
843                if empty && base == 10 {
844                    return Err(Reject);
845                }
846                len += 1;
847                continue;
848            }
849            _ => break,
850        };
851        len += 1;
852        empty = false;
853    }
854    if empty {
855        Err(Reject)
856    } else {
857        Ok(input.advance(len))
858    }
859}
860
861fn punct(input: Cursor) -> PResult<Punct> {
862    let (rest, ch) = punct_char(input)?;
863    if ch == '\'' {
864        if ident_any(rest)?.0.starts_with_char('\'') {
865            Err(Reject)
866        } else {
867            Ok((rest, Punct::new('\'', Spacing::Joint)))
868        }
869    } else {
870        let kind = match punct_char(rest) {
871            Ok(_) => Spacing::Joint,
872            Err(Reject) => Spacing::Alone,
873        };
874        Ok((rest, Punct::new(ch, kind)))
875    }
876}
877
878fn punct_char(input: Cursor) -> PResult<char> {
879    if input.starts_with("//") || input.starts_with("/*") {
880        // Do not accept `/` of a comment as a punct.
881        return Err(Reject);
882    }
883
884    let mut chars = input.chars();
885    let first = match chars.next() {
886        Some(ch) => ch,
887        None => {
888            return Err(Reject);
889        }
890    };
891    let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
892    if recognized.contains(first) {
893        Ok((input.advance(first.len_utf8()), first))
894    } else {
895        Err(Reject)
896    }
897}
898
899fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
900    #[cfg(span_locations)]
901    let lo = input.off;
902    let (rest, (comment, inner)) = doc_comment_contents(input)?;
903    let span = crate::Span::_new_fallback(Span {
904        #[cfg(span_locations)]
905        lo,
906        #[cfg(span_locations)]
907        hi: rest.off,
908    });
909
910    let mut scan_for_bare_cr = comment;
911    while let Some(cr) = scan_for_bare_cr.find('\r') {
912        let rest = &scan_for_bare_cr[cr + 1..];
913        if !rest.starts_with('\n') {
914            return Err(Reject);
915        }
916        scan_for_bare_cr = rest;
917    }
918
919    let mut pound = Punct::new('#', Spacing::Alone);
920    pound.set_span(span);
921    trees.push_token_from_parser(TokenTree::Punct(pound));
922
923    if inner {
924        let mut bang = Punct::new('!', Spacing::Alone);
925        bang.set_span(span);
926        trees.push_token_from_parser(TokenTree::Punct(bang));
927    }
928
929    let doc_ident = crate::Ident::new("doc", span);
930    let mut equal = Punct::new('=', Spacing::Alone);
931    equal.set_span(span);
932    let mut literal = crate::Literal::string(comment);
933    literal.set_span(span);
934    let mut bracketed = TokenStreamBuilder::with_capacity(3);
935    bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
936    bracketed.push_token_from_parser(TokenTree::Punct(equal));
937    bracketed.push_token_from_parser(TokenTree::Literal(literal));
938    let group = Group::new(Delimiter::Bracket, bracketed.build());
939    let mut group = crate::Group::_new_fallback(group);
940    group.set_span(span);
941    trees.push_token_from_parser(TokenTree::Group(group));
942
943    Ok((rest, ()))
944}
945
946fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
947    if input.starts_with("//!") {
948        let input = input.advance(3);
949        let (input, s) = take_until_newline_or_eof(input);
950        Ok((input, (s, true)))
951    } else if input.starts_with("/*!") {
952        let (input, s) = block_comment(input)?;
953        Ok((input, (&s[3..s.len() - 2], true)))
954    } else if input.starts_with("///") {
955        let input = input.advance(3);
956        if input.starts_with_char('/') {
957            return Err(Reject);
958        }
959        let (input, s) = take_until_newline_or_eof(input);
960        Ok((input, (s, false)))
961    } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
962        let (input, s) = block_comment(input)?;
963        Ok((input, (&s[3..s.len() - 2], false)))
964    } else {
965        Err(Reject)
966    }
967}
968
969fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
970    let chars = input.char_indices();
971
972    for (i, ch) in chars {
973        if ch == '\n' {
974            return (input.advance(i), &input.rest[..i]);
975        } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
976            return (input.advance(i + 1), &input.rest[..i]);
977        }
978    }
979
980    (input.advance(input.len()), input.rest)
981}