proc_macro2/
parse.rs

1use crate::fallback::{
2    self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
3    TokenStreamBuilder,
4};
5use crate::{Delimiter, Punct, Spacing, TokenTree};
6use core::char;
7use core::str::{Bytes, CharIndices, Chars};
8
9#[derive(#[automatically_derived]
impl<'a> ::core::marker::Copy for Cursor<'a> { }Copy, #[automatically_derived]
impl<'a> ::core::clone::Clone for Cursor<'a> {
    #[inline]
    fn clone(&self) -> Cursor<'a> {
        let _: ::core::clone::AssertParamIsClone<&'a str>;
        let _: ::core::clone::AssertParamIsClone<u32>;
        *self
    }
}Clone, #[automatically_derived]
impl<'a> ::core::cmp::Eq for Cursor<'a> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<&'a str>;
        let _: ::core::cmp::AssertParamIsEq<u32>;
    }
}Eq, #[automatically_derived]
impl<'a> ::core::cmp::PartialEq for Cursor<'a> {
    #[inline]
    fn eq(&self, other: &Cursor<'a>) -> bool {
        self.off == other.off && self.rest == other.rest
    }
}PartialEq)]
10pub(crate) struct Cursor<'a> {
11    pub(crate) rest: &'a str,
12    #[cfg(span_locations)]
13    pub(crate) off: u32,
14}
15
16impl<'a> Cursor<'a> {
17    pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
18        let (_front, rest) = self.rest.split_at(bytes);
19        Cursor {
20            rest,
21            #[cfg(span_locations)]
22            off: self.off + _front.chars().count() as u32,
23        }
24    }
25
26    pub(crate) fn starts_with(&self, s: &str) -> bool {
27        self.rest.starts_with(s)
28    }
29
30    pub(crate) fn starts_with_char(&self, ch: char) -> bool {
31        self.rest.starts_with(ch)
32    }
33
34    pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35    where
36        Pattern: FnMut(char) -> bool,
37    {
38        self.rest.starts_with(f)
39    }
40
41    pub(crate) fn is_empty(&self) -> bool {
42        self.rest.is_empty()
43    }
44
45    fn len(&self) -> usize {
46        self.rest.len()
47    }
48
49    fn as_bytes(&self) -> &'a [u8] {
50        self.rest.as_bytes()
51    }
52
53    fn bytes(&self) -> Bytes<'a> {
54        self.rest.bytes()
55    }
56
57    fn chars(&self) -> Chars<'a> {
58        self.rest.chars()
59    }
60
61    fn char_indices(&self) -> CharIndices<'a> {
62        self.rest.char_indices()
63    }
64
65    fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66        if self.starts_with(tag) {
67            Ok(self.advance(tag.len()))
68        } else {
69            Err(Reject)
70        }
71    }
72}
73
74pub(crate) struct Reject;
75type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
77fn skip_whitespace(input: Cursor) -> Cursor {
78    let mut s = input;
79
80    while !s.is_empty() {
81        let byte = s.as_bytes()[0];
82        if byte == b'/' {
83            if s.starts_with("//")
84                && (!s.starts_with("///") || s.starts_with("////"))
85                && !s.starts_with("//!")
86            {
87                let (cursor, _) = take_until_newline_or_eof(s);
88                s = cursor;
89                continue;
90            } else if s.starts_with("/**/") {
91                s = s.advance(4);
92                continue;
93            } else if s.starts_with("/*")
94                && (!s.starts_with("/**") || s.starts_with("/***"))
95                && !s.starts_with("/*!")
96            {
97                match block_comment(s) {
98                    Ok((rest, _)) => {
99                        s = rest;
100                        continue;
101                    }
102                    Err(Reject) => return s,
103                }
104            }
105        }
106        match byte {
107            b' ' | 0x09..=0x0d => {
108                s = s.advance(1);
109                continue;
110            }
111            b if b.is_ascii() => {}
112            _ => {
113                let ch = s.chars().next().unwrap();
114                if is_whitespace(ch) {
115                    s = s.advance(ch.len_utf8());
116                    continue;
117                }
118            }
119        }
120        return s;
121    }
122    s
123}
124
125fn block_comment(input: Cursor) -> PResult<&str> {
126    if !input.starts_with("/*") {
127        return Err(Reject);
128    }
129
130    let mut depth = 0usize;
131    let bytes = input.as_bytes();
132    let mut i = 0usize;
133    let upper = bytes.len() - 1;
134
135    while i < upper {
136        if bytes[i] == b'/' && bytes[i + 1] == b'*' {
137            depth += 1;
138            i += 1; // eat '*'
139        } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
140            depth -= 1;
141            if depth == 0 {
142                return Ok((input.advance(i + 2), &input.rest[..i + 2]));
143            }
144            i += 1; // eat '/'
145        }
146        i += 1;
147    }
148
149    Err(Reject)
150}
151
152fn is_whitespace(ch: char) -> bool {
153    // Rust treats left-to-right mark and right-to-left mark as whitespace
154    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
155}
156
157fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158    match input.chars().next() {
159        Some(ch) if is_ident_continue(ch) => Err(Reject),
160        Some(_) | None => Ok(input),
161    }
162}
163
164// Rustc's representation of a macro expansion error in expression position or
165// type position.
166const ERROR: &str = "(/*ERROR*/)";
167
168pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
169    let mut tokens = TokenStreamBuilder::new();
170    let mut stack = Vec::new();
171
172    loop {
173        input = skip_whitespace(input);
174
175        if let Ok((rest, ())) = doc_comment(input, &mut tokens) {
176            input = rest;
177            continue;
178        }
179
180        #[cfg(span_locations)]
181        let lo = input.off;
182
183        let Some(first) = input.bytes().next() else {
184            return match stack.last() {
185                None => Ok(tokens.build()),
186                #[cfg(span_locations)]
187                Some((lo, _frame)) => Err(LexError {
188                    span: Span { lo: *lo, hi: *lo },
189                }),
190                #[cfg(not(span_locations))]
191                Some(_frame) => Err(LexError { span: Span {} }),
192            };
193        };
194
195        if let Some(open_delimiter) = match first {
196            b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
197            b'[' => Some(Delimiter::Bracket),
198            b'{' => Some(Delimiter::Brace),
199            _ => None,
200        } {
201            input = input.advance(1);
202            let frame = (open_delimiter, tokens);
203            #[cfg(span_locations)]
204            let frame = (lo, frame);
205            stack.push(frame);
206            tokens = TokenStreamBuilder::new();
207        } else if let Some(close_delimiter) = match first {
208            b')' => Some(Delimiter::Parenthesis),
209            b']' => Some(Delimiter::Bracket),
210            b'}' => Some(Delimiter::Brace),
211            _ => None,
212        } {
213            let Some(frame) = stack.pop() else {
214                return Err(lex_error(input));
215            };
216            #[cfg(span_locations)]
217            let (lo, frame) = frame;
218            let (open_delimiter, outer) = frame;
219            if open_delimiter != close_delimiter {
220                return Err(lex_error(input));
221            }
222            input = input.advance(1);
223            let mut g = Group::new(open_delimiter, tokens.build());
224            g.set_span(Span {
225                #[cfg(span_locations)]
226                lo,
227                #[cfg(span_locations)]
228                hi: input.off,
229            });
230            tokens = outer;
231            tokens.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
232        } else {
233            let (rest, mut tt) = match leaf_token(input) {
234                Ok((rest, tt)) => (rest, tt),
235                Err(Reject) => return Err(lex_error(input)),
236            };
237            tt.set_span(crate::Span::_new_fallback(Span {
238                #[cfg(span_locations)]
239                lo,
240                #[cfg(span_locations)]
241                hi: rest.off,
242            }));
243            tokens.push_token_from_parser(tt);
244            input = rest;
245        }
246    }
247}
248
249fn lex_error(cursor: Cursor) -> LexError {
250    #[cfg(not(span_locations))]
251    let _ = cursor;
252    LexError {
253        span: Span {
254            #[cfg(span_locations)]
255            lo: cursor.off,
256            #[cfg(span_locations)]
257            hi: cursor.off,
258        },
259    }
260}
261
262fn leaf_token(input: Cursor) -> PResult<TokenTree> {
263    if let Ok((input, l)) = literal(input) {
264        // must be parsed before ident
265        Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
266    } else if let Ok((input, p)) = punct(input) {
267        Ok((input, TokenTree::Punct(p)))
268    } else if let Ok((input, i)) = ident(input) {
269        Ok((input, TokenTree::Ident(i)))
270    } else if input.starts_with(ERROR) {
271        let rest = input.advance(ERROR.len());
272        let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
273        Ok((rest, TokenTree::Literal(repr)))
274    } else {
275        Err(Reject)
276    }
277}
278
279fn ident(input: Cursor) -> PResult<crate::Ident> {
280    if [
281        "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
282    ]
283    .iter()
284    .any(|prefix| input.starts_with(prefix))
285    {
286        Err(Reject)
287    } else {
288        ident_any(input)
289    }
290}
291
292fn ident_any(input: Cursor) -> PResult<crate::Ident> {
293    let raw = input.starts_with("r#");
294    let rest = input.advance((raw as usize) << 1);
295
296    let (rest, sym) = ident_not_raw(rest)?;
297
298    if !raw {
299        let ident =
300            crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
301        return Ok((rest, ident));
302    }
303
304    match sym {
305        "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
306        _ => {}
307    }
308
309    let ident =
310        crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
311    Ok((rest, ident))
312}
313
314fn ident_not_raw(input: Cursor) -> PResult<&str> {
315    let mut chars = input.char_indices();
316
317    match chars.next() {
318        Some((_, ch)) if is_ident_start(ch) => {}
319        _ => return Err(Reject),
320    }
321
322    let mut end = input.len();
323    for (i, ch) in chars {
324        if !is_ident_continue(ch) {
325            end = i;
326            break;
327        }
328    }
329
330    Ok((input.advance(end), &input.rest[..end]))
331}
332
333pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
334    let rest = literal_nocapture(input)?;
335    let end = input.len() - rest.len();
336    Ok((rest, Literal::_new(input.rest[..end].to_string())))
337}
338
339fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
340    if let Ok(ok) = string(input) {
341        Ok(ok)
342    } else if let Ok(ok) = byte_string(input) {
343        Ok(ok)
344    } else if let Ok(ok) = c_string(input) {
345        Ok(ok)
346    } else if let Ok(ok) = byte(input) {
347        Ok(ok)
348    } else if let Ok(ok) = character(input) {
349        Ok(ok)
350    } else if let Ok(ok) = float(input) {
351        Ok(ok)
352    } else if let Ok(ok) = int(input) {
353        Ok(ok)
354    } else {
355        Err(Reject)
356    }
357}
358
359fn literal_suffix(input: Cursor) -> Cursor {
360    match ident_not_raw(input) {
361        Ok((input, _)) => input,
362        Err(Reject) => input,
363    }
364}
365
366fn string(input: Cursor) -> Result<Cursor, Reject> {
367    if let Ok(input) = input.parse("\"") {
368        cooked_string(input)
369    } else if let Ok(input) = input.parse("r") {
370        raw_string(input)
371    } else {
372        Err(Reject)
373    }
374}
375
376fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
377    let mut chars = input.char_indices();
378
379    while let Some((i, ch)) = chars.next() {
380        match ch {
381            '"' => {
382                let input = input.advance(i + 1);
383                return Ok(literal_suffix(input));
384            }
385            '\r' => match chars.next() {
386                Some((_, '\n')) => {}
387                _ => break,
388            },
389            '\\' => match chars.next() {
390                Some((_, 'x')) => {
391                    backslash_x_char(&mut chars)?;
392                }
393                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
394                Some((_, 'u')) => {
395                    backslash_u(&mut chars)?;
396                }
397                Some((newline, ch @ ('\n' | '\r'))) => {
398                    input = input.advance(newline + 1);
399                    trailing_backslash(&mut input, ch as u8)?;
400                    chars = input.char_indices();
401                }
402                _ => break,
403            },
404            _ch => {}
405        }
406    }
407    Err(Reject)
408}
409
410fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
411    let (input, delimiter) = delimiter_of_raw_string(input)?;
412    let mut bytes = input.bytes().enumerate();
413    while let Some((i, byte)) = bytes.next() {
414        match byte {
415            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
416                let rest = input.advance(i + 1 + delimiter.len());
417                return Ok(literal_suffix(rest));
418            }
419            b'\r' => match bytes.next() {
420                Some((_, b'\n')) => {}
421                _ => break,
422            },
423            _ => {}
424        }
425    }
426    Err(Reject)
427}
428
429fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
430    if let Ok(input) = input.parse("b\"") {
431        cooked_byte_string(input)
432    } else if let Ok(input) = input.parse("br") {
433        raw_byte_string(input)
434    } else {
435        Err(Reject)
436    }
437}
438
439fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
440    let mut bytes = input.bytes().enumerate();
441    while let Some((offset, b)) = bytes.next() {
442        match b {
443            b'"' => {
444                let input = input.advance(offset + 1);
445                return Ok(literal_suffix(input));
446            }
447            b'\r' => match bytes.next() {
448                Some((_, b'\n')) => {}
449                _ => break,
450            },
451            b'\\' => match bytes.next() {
452                Some((_, b'x')) => {
453                    backslash_x_byte(&mut bytes)?;
454                }
455                Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
456                Some((newline, b @ (b'\n' | b'\r'))) => {
457                    input = input.advance(newline + 1);
458                    trailing_backslash(&mut input, b)?;
459                    bytes = input.bytes().enumerate();
460                }
461                _ => break,
462            },
463            b if b.is_ascii() => {}
464            _ => break,
465        }
466    }
467    Err(Reject)
468}
469
470fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
471    for (i, byte) in input.bytes().enumerate() {
472        match byte {
473            b'"' => {
474                if i > 255 {
475                    // https://github.com/rust-lang/rust/pull/95251
476                    return Err(Reject);
477                }
478                return Ok((input.advance(i + 1), &input.rest[..i]));
479            }
480            b'#' => {}
481            _ => break,
482        }
483    }
484    Err(Reject)
485}
486
487fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
488    let (input, delimiter) = delimiter_of_raw_string(input)?;
489    let mut bytes = input.bytes().enumerate();
490    while let Some((i, byte)) = bytes.next() {
491        match byte {
492            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
493                let rest = input.advance(i + 1 + delimiter.len());
494                return Ok(literal_suffix(rest));
495            }
496            b'\r' => match bytes.next() {
497                Some((_, b'\n')) => {}
498                _ => break,
499            },
500            other => {
501                if !other.is_ascii() {
502                    break;
503                }
504            }
505        }
506    }
507    Err(Reject)
508}
509
510fn c_string(input: Cursor) -> Result<Cursor, Reject> {
511    if let Ok(input) = input.parse("c\"") {
512        cooked_c_string(input)
513    } else if let Ok(input) = input.parse("cr") {
514        raw_c_string(input)
515    } else {
516        Err(Reject)
517    }
518}
519
520fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
521    let (input, delimiter) = delimiter_of_raw_string(input)?;
522    let mut bytes = input.bytes().enumerate();
523    while let Some((i, byte)) = bytes.next() {
524        match byte {
525            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
526                let rest = input.advance(i + 1 + delimiter.len());
527                return Ok(literal_suffix(rest));
528            }
529            b'\r' => match bytes.next() {
530                Some((_, b'\n')) => {}
531                _ => break,
532            },
533            b'\0' => break,
534            _ => {}
535        }
536    }
537    Err(Reject)
538}
539
540fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
541    let mut chars = input.char_indices();
542
543    while let Some((i, ch)) = chars.next() {
544        match ch {
545            '"' => {
546                let input = input.advance(i + 1);
547                return Ok(literal_suffix(input));
548            }
549            '\r' => match chars.next() {
550                Some((_, '\n')) => {}
551                _ => break,
552            },
553            '\\' => match chars.next() {
554                Some((_, 'x')) => {
555                    backslash_x_nonzero(&mut chars)?;
556                }
557                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
558                Some((_, 'u')) => {
559                    if backslash_u(&mut chars)? == '\0' {
560                        break;
561                    }
562                }
563                Some((newline, ch @ ('\n' | '\r'))) => {
564                    input = input.advance(newline + 1);
565                    trailing_backslash(&mut input, ch as u8)?;
566                    chars = input.char_indices();
567                }
568                _ => break,
569            },
570            '\0' => break,
571            _ch => {}
572        }
573    }
574    Err(Reject)
575}
576
577fn byte(input: Cursor) -> Result<Cursor, Reject> {
578    let input = input.parse("b'")?;
579    let mut bytes = input.bytes().enumerate();
580    let ok = match bytes.next().map(|(_, b)| b) {
581        Some(b'\\') => match bytes.next().map(|(_, b)| b) {
582            Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
583            Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
584            _ => false,
585        },
586        b => b.is_some(),
587    };
588    if !ok {
589        return Err(Reject);
590    }
591    let (offset, _) = bytes.next().ok_or(Reject)?;
592    if !input.chars().as_str().is_char_boundary(offset) {
593        return Err(Reject);
594    }
595    let input = input.advance(offset).parse("'")?;
596    Ok(literal_suffix(input))
597}
598
599fn character(input: Cursor) -> Result<Cursor, Reject> {
600    let input = input.parse("'")?;
601    let mut chars = input.char_indices();
602    let ok = match chars.next().map(|(_, ch)| ch) {
603        Some('\\') => match chars.next().map(|(_, ch)| ch) {
604            Some('x') => backslash_x_char(&mut chars).is_ok(),
605            Some('u') => backslash_u(&mut chars).is_ok(),
606            Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
607            _ => false,
608        },
609        ch => ch.is_some(),
610    };
611    if !ok {
612        return Err(Reject);
613    }
614    let (idx, _) = chars.next().ok_or(Reject)?;
615    let input = input.advance(idx).parse("'")?;
616    Ok(literal_suffix(input))
617}
618
619macro_rules! next_ch {
620    ($chars:ident @ $pat:pat) => {
621        match $chars.next() {
622            Some((_, ch)) => match ch {
623                $pat => ch,
624                _ => return Err(Reject),
625            },
626            None => return Err(Reject),
627        }
628    };
629}
630
631fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
632where
633    I: Iterator<Item = (usize, char)>,
634{
635    match chars.next() {
    Some((_, ch)) => match ch { '0'..='7' => ch, _ => return Err(Reject), },
    None => return Err(Reject),
};next_ch!(chars @ '0'..='7');
636    match chars.next() {
    Some((_, ch)) =>
        match ch {
            '0'..='9' | 'a'..='f' | 'A'..='F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
};next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
637    Ok(())
638}
639
640fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
641where
642    I: Iterator<Item = (usize, u8)>,
643{
644    match chars.next() {
    Some((_, ch)) =>
        match ch {
            b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
};next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
645    match chars.next() {
    Some((_, ch)) =>
        match ch {
            b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
};next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
646    Ok(())
647}
648
649fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
650where
651    I: Iterator<Item = (usize, char)>,
652{
653    let first = match chars.next() {
    Some((_, ch)) =>
        match ch {
            '0'..='9' | 'a'..='f' | 'A'..='F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
}next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
654    let second = match chars.next() {
    Some((_, ch)) =>
        match ch {
            '0'..='9' | 'a'..='f' | 'A'..='F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
}next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
655    if first == '0' && second == '0' {
656        Err(Reject)
657    } else {
658        Ok(())
659    }
660}
661
662fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
663where
664    I: Iterator<Item = (usize, char)>,
665{
666    match chars.next() {
    Some((_, ch)) => match ch { '{' => ch, _ => return Err(Reject), },
    None => return Err(Reject),
};next_ch!(chars @ '{');
667    let mut value = 0;
668    let mut len = 0;
669    for (_, ch) in chars {
670        let digit = match ch {
671            '0'..='9' => ch as u8 - b'0',
672            'a'..='f' => 10 + ch as u8 - b'a',
673            'A'..='F' => 10 + ch as u8 - b'A',
674            '_' if len > 0 => continue,
675            '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
676            _ => break,
677        };
678        if len == 6 {
679            break;
680        }
681        value *= 0x10;
682        value += u32::from(digit);
683        len += 1;
684    }
685    Err(Reject)
686}
687
688fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
689    let mut whitespace = input.bytes().enumerate();
690    loop {
691        if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
692            return Err(Reject);
693        }
694        match whitespace.next() {
695            Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
696                last = b;
697            }
698            Some((offset, _)) => {
699                *input = input.advance(offset);
700                return Ok(());
701            }
702            None => return Err(Reject),
703        }
704    }
705}
706
707fn float(input: Cursor) -> Result<Cursor, Reject> {
708    let mut rest = float_digits(input)?;
709    if let Some(ch) = rest.chars().next() {
710        if is_ident_start(ch) {
711            rest = ident_not_raw(rest)?.0;
712        }
713    }
714    word_break(rest)
715}
716
717fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
718    let mut chars = input.chars().peekable();
719    match chars.next() {
720        Some(ch) if '0' <= ch && ch <= '9' => {}
721        _ => return Err(Reject),
722    }
723
724    let mut len = 1;
725    let mut has_dot = false;
726    let mut has_exp = false;
727    while let Some(&ch) = chars.peek() {
728        match ch {
729            '0'..='9' | '_' => {
730                chars.next();
731                len += 1;
732            }
733            '.' => {
734                if has_dot {
735                    break;
736                }
737                chars.next();
738                if chars
739                    .peek()
740                    .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
741                {
742                    return Err(Reject);
743                }
744                len += 1;
745                has_dot = true;
746            }
747            'e' | 'E' => {
748                chars.next();
749                len += 1;
750                has_exp = true;
751                break;
752            }
753            _ => break,
754        }
755    }
756
757    if !(has_dot || has_exp) {
758        return Err(Reject);
759    }
760
761    if has_exp {
762        let token_before_exp = if has_dot {
763            Ok(input.advance(len - 1))
764        } else {
765            Err(Reject)
766        };
767        let mut has_sign = false;
768        let mut has_exp_value = false;
769        while let Some(&ch) = chars.peek() {
770            match ch {
771                '+' | '-' => {
772                    if has_exp_value {
773                        break;
774                    }
775                    if has_sign {
776                        return token_before_exp;
777                    }
778                    chars.next();
779                    len += 1;
780                    has_sign = true;
781                }
782                '0'..='9' => {
783                    chars.next();
784                    len += 1;
785                    has_exp_value = true;
786                }
787                '_' => {
788                    chars.next();
789                    len += 1;
790                }
791                _ => break,
792            }
793        }
794        if !has_exp_value {
795            return token_before_exp;
796        }
797    }
798
799    Ok(input.advance(len))
800}
801
802fn int(input: Cursor) -> Result<Cursor, Reject> {
803    let mut rest = digits(input)?;
804    if let Some(ch) = rest.chars().next() {
805        if is_ident_start(ch) {
806            rest = ident_not_raw(rest)?.0;
807        }
808    }
809    word_break(rest)
810}
811
812fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
813    let base = if input.starts_with("0x") {
814        input = input.advance(2);
815        16
816    } else if input.starts_with("0o") {
817        input = input.advance(2);
818        8
819    } else if input.starts_with("0b") {
820        input = input.advance(2);
821        2
822    } else {
823        10
824    };
825
826    let mut len = 0;
827    let mut empty = true;
828    for b in input.bytes() {
829        match b {
830            b'0'..=b'9' => {
831                let digit = (b - b'0') as u64;
832                if digit >= base {
833                    return Err(Reject);
834                }
835            }
836            b'a'..=b'f' => {
837                let digit = 10 + (b - b'a') as u64;
838                if digit >= base {
839                    break;
840                }
841            }
842            b'A'..=b'F' => {
843                let digit = 10 + (b - b'A') as u64;
844                if digit >= base {
845                    break;
846                }
847            }
848            b'_' => {
849                if empty && base == 10 {
850                    return Err(Reject);
851                }
852                len += 1;
853                continue;
854            }
855            _ => break,
856        }
857        len += 1;
858        empty = false;
859    }
860    if empty {
861        Err(Reject)
862    } else {
863        Ok(input.advance(len))
864    }
865}
866
867fn punct(input: Cursor) -> PResult<Punct> {
868    let (rest, ch) = punct_char(input)?;
869    if ch == '\'' {
870        let (after_lifetime, _ident) = ident_any(rest)?;
871        if after_lifetime.starts_with_char('\'')
872            || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
873        {
874            Err(Reject)
875        } else {
876            Ok((rest, Punct::new('\'', Spacing::Joint)))
877        }
878    } else {
879        let kind = match punct_char(rest) {
880            Ok(_) => Spacing::Joint,
881            Err(Reject) => Spacing::Alone,
882        };
883        Ok((rest, Punct::new(ch, kind)))
884    }
885}
886
887fn punct_char(input: Cursor) -> PResult<char> {
888    if input.starts_with("//") || input.starts_with("/*") {
889        // Do not accept `/` of a comment as a punct.
890        return Err(Reject);
891    }
892
893    let mut chars = input.chars();
894    let Some(first) = chars.next() else {
895        return Err(Reject);
896    };
897    let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
898    if recognized.contains(first) {
899        Ok((input.advance(first.len_utf8()), first))
900    } else {
901        Err(Reject)
902    }
903}
904
905fn doc_comment<'a>(input: Cursor<'a>, tokens: &mut TokenStreamBuilder) -> PResult<'a, ()> {
906    #[cfg(span_locations)]
907    let lo = input.off;
908    let (rest, (comment, inner)) = doc_comment_contents(input)?;
909    let fallback_span = Span {
910        #[cfg(span_locations)]
911        lo,
912        #[cfg(span_locations)]
913        hi: rest.off,
914    };
915    let span = crate::Span::_new_fallback(fallback_span);
916
917    let mut scan_for_bare_cr = comment;
918    while let Some(cr) = scan_for_bare_cr.find('\r') {
919        let rest = &scan_for_bare_cr[cr + 1..];
920        if !rest.starts_with('\n') {
921            return Err(Reject);
922        }
923        scan_for_bare_cr = rest;
924    }
925
926    let mut pound = Punct::new('#', Spacing::Alone);
927    pound.set_span(span);
928    tokens.push_token_from_parser(TokenTree::Punct(pound));
929
930    if inner {
931        let mut bang = Punct::new('!', Spacing::Alone);
932        bang.set_span(span);
933        tokens.push_token_from_parser(TokenTree::Punct(bang));
934    }
935
936    let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
937    let mut equal = Punct::new('=', Spacing::Alone);
938    equal.set_span(span);
939    let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
940    literal.set_span(span);
941    let mut bracketed = TokenStreamBuilder::with_capacity(3);
942    bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
943    bracketed.push_token_from_parser(TokenTree::Punct(equal));
944    bracketed.push_token_from_parser(TokenTree::Literal(literal));
945    let group = Group::new(Delimiter::Bracket, bracketed.build());
946    let mut group = crate::Group::_new_fallback(group);
947    group.set_span(span);
948    tokens.push_token_from_parser(TokenTree::Group(group));
949
950    Ok((rest, ()))
951}
952
953fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
954    if input.starts_with("//!") {
955        let input = input.advance(3);
956        let (input, s) = take_until_newline_or_eof(input);
957        Ok((input, (s, true)))
958    } else if input.starts_with("/*!") {
959        let (input, s) = block_comment(input)?;
960        Ok((input, (&s[3..s.len() - 2], true)))
961    } else if input.starts_with("///") {
962        let input = input.advance(3);
963        if input.starts_with_char('/') {
964            return Err(Reject);
965        }
966        let (input, s) = take_until_newline_or_eof(input);
967        Ok((input, (s, false)))
968    } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
969        let (input, s) = block_comment(input)?;
970        Ok((input, (&s[3..s.len() - 2], false)))
971    } else {
972        Err(Reject)
973    }
974}
975
976fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
977    let chars = input.char_indices();
978
979    for (i, ch) in chars {
980        if ch == '\n' {
981            return (input.advance(i), &input.rest[..i]);
982        } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
983            return (input.advance(i + 1), &input.rest[..i]);
984        }
985    }
986
987    (input.advance(input.len()), input.rest)
988}