proc_macro2/
parse.rs

1use crate::fallback::{
2    self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
3    TokenStreamBuilder,
4};
5use crate::{Delimiter, Punct, Spacing, TokenTree};
6use alloc::borrow::ToOwned as _;
7use alloc::string::ToString as _;
8use alloc::vec::Vec;
9use core::char;
10use core::str::{Bytes, CharIndices, Chars};
11
12#[derive(#[automatically_derived]
impl<'a> ::core::marker::Copy for Cursor<'a> { }Copy, #[automatically_derived]
impl<'a> ::core::clone::Clone for Cursor<'a> {
    #[inline]
    fn clone(&self) -> Cursor<'a> {
        let _: ::core::clone::AssertParamIsClone<&'a str>;
        let _: ::core::clone::AssertParamIsClone<u32>;
        *self
    }
}Clone, #[automatically_derived]
impl<'a> ::core::cmp::Eq for Cursor<'a> {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) -> () {
        let _: ::core::cmp::AssertParamIsEq<&'a str>;
        let _: ::core::cmp::AssertParamIsEq<u32>;
    }
}Eq, #[automatically_derived]
impl<'a> ::core::cmp::PartialEq for Cursor<'a> {
    #[inline]
    fn eq(&self, other: &Cursor<'a>) -> bool {
        self.off == other.off && self.rest == other.rest
    }
}PartialEq)]
13pub(crate) struct Cursor<'a> {
14    pub(crate) rest: &'a str,
15    #[cfg(span_locations)]
16    pub(crate) off: u32,
17}
18
19impl<'a> Cursor<'a> {
20    pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
21        let (_front, rest) = self.rest.split_at(bytes);
22        Cursor {
23            rest,
24            #[cfg(span_locations)]
25            off: self.off + _front.chars().count() as u32,
26        }
27    }
28
29    pub(crate) fn starts_with(&self, s: &str) -> bool {
30        self.rest.starts_with(s)
31    }
32
33    pub(crate) fn starts_with_char(&self, ch: char) -> bool {
34        self.rest.starts_with(ch)
35    }
36
37    pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
38    where
39        Pattern: FnMut(char) -> bool,
40    {
41        self.rest.starts_with(f)
42    }
43
44    pub(crate) fn is_empty(&self) -> bool {
45        self.rest.is_empty()
46    }
47
48    fn len(&self) -> usize {
49        self.rest.len()
50    }
51
52    fn as_bytes(&self) -> &'a [u8] {
53        self.rest.as_bytes()
54    }
55
56    fn bytes(&self) -> Bytes<'a> {
57        self.rest.bytes()
58    }
59
60    fn chars(&self) -> Chars<'a> {
61        self.rest.chars()
62    }
63
64    fn char_indices(&self) -> CharIndices<'a> {
65        self.rest.char_indices()
66    }
67
68    fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
69        if self.starts_with(tag) {
70            Ok(self.advance(tag.len()))
71        } else {
72            Err(Reject)
73        }
74    }
75}
76
77pub(crate) struct Reject;
78type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
79
80fn skip_whitespace(input: Cursor) -> Cursor {
81    let mut s = input;
82
83    while !s.is_empty() {
84        let byte = s.as_bytes()[0];
85        if byte == b'/' {
86            if s.starts_with("//")
87                && (!s.starts_with("///") || s.starts_with("////"))
88                && !s.starts_with("//!")
89            {
90                let (cursor, _) = take_until_newline_or_eof(s);
91                s = cursor;
92                continue;
93            } else if s.starts_with("/**/") {
94                s = s.advance(4);
95                continue;
96            } else if s.starts_with("/*")
97                && (!s.starts_with("/**") || s.starts_with("/***"))
98                && !s.starts_with("/*!")
99            {
100                match block_comment(s) {
101                    Ok((rest, _)) => {
102                        s = rest;
103                        continue;
104                    }
105                    Err(Reject) => return s,
106                }
107            }
108        }
109        match byte {
110            b' ' | 0x09..=0x0d => {
111                s = s.advance(1);
112                continue;
113            }
114            b if b.is_ascii() => {}
115            _ => {
116                let ch = s.chars().next().unwrap();
117                if is_whitespace(ch) {
118                    s = s.advance(ch.len_utf8());
119                    continue;
120                }
121            }
122        }
123        return s;
124    }
125    s
126}
127
128fn block_comment(input: Cursor) -> PResult<&str> {
129    if !input.starts_with("/*") {
130        return Err(Reject);
131    }
132
133    let mut depth = 0usize;
134    let bytes = input.as_bytes();
135    let mut i = 0usize;
136    let upper = bytes.len() - 1;
137
138    while i < upper {
139        if bytes[i] == b'/' && bytes[i + 1] == b'*' {
140            depth += 1;
141            i += 1; // eat '*'
142        } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
143            depth -= 1;
144            if depth == 0 {
145                return Ok((input.advance(i + 2), &input.rest[..i + 2]));
146            }
147            i += 1; // eat '/'
148        }
149        i += 1;
150    }
151
152    Err(Reject)
153}
154
155fn is_whitespace(ch: char) -> bool {
156    // Rust treats left-to-right mark and right-to-left mark as whitespace
157    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
158}
159
160fn word_break(input: Cursor) -> Result<Cursor, Reject> {
161    match input.chars().next() {
162        Some(ch) if is_ident_continue(ch) => Err(Reject),
163        Some(_) | None => Ok(input),
164    }
165}
166
167// Rustc's representation of a macro expansion error in expression position or
168// type position.
169const ERROR: &str = "(/*ERROR*/)";
170
171pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
172    let mut tokens = TokenStreamBuilder::new();
173    let mut stack = Vec::new();
174
175    loop {
176        input = skip_whitespace(input);
177
178        if let Ok((rest, ())) = doc_comment(input, &mut tokens) {
179            input = rest;
180            continue;
181        }
182
183        #[cfg(span_locations)]
184        let lo = input.off;
185
186        let Some(first) = input.bytes().next() else {
187            return match stack.last() {
188                None => Ok(tokens.build()),
189                #[cfg(span_locations)]
190                Some((lo, _frame)) => Err(LexError {
191                    span: Span { lo: *lo, hi: *lo },
192                }),
193                #[cfg(not(span_locations))]
194                Some(_frame) => Err(LexError { span: Span {} }),
195            };
196        };
197
198        if let Some(open_delimiter) = match first {
199            b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
200            b'[' => Some(Delimiter::Bracket),
201            b'{' => Some(Delimiter::Brace),
202            _ => None,
203        } {
204            input = input.advance(1);
205            let frame = (open_delimiter, tokens);
206            #[cfg(span_locations)]
207            let frame = (lo, frame);
208            stack.push(frame);
209            tokens = TokenStreamBuilder::new();
210        } else if let Some(close_delimiter) = match first {
211            b')' => Some(Delimiter::Parenthesis),
212            b']' => Some(Delimiter::Bracket),
213            b'}' => Some(Delimiter::Brace),
214            _ => None,
215        } {
216            let Some(frame) = stack.pop() else {
217                return Err(lex_error(input));
218            };
219            #[cfg(span_locations)]
220            let (lo, frame) = frame;
221            let (open_delimiter, outer) = frame;
222            if open_delimiter != close_delimiter {
223                return Err(lex_error(input));
224            }
225            input = input.advance(1);
226            let mut g = Group::new(open_delimiter, tokens.build());
227            g.set_span(Span {
228                #[cfg(span_locations)]
229                lo,
230                #[cfg(span_locations)]
231                hi: input.off,
232            });
233            tokens = outer;
234            tokens.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
235        } else {
236            let (rest, mut tt) = match leaf_token(input) {
237                Ok((rest, tt)) => (rest, tt),
238                Err(Reject) => return Err(lex_error(input)),
239            };
240            tt.set_span(crate::Span::_new_fallback(Span {
241                #[cfg(span_locations)]
242                lo,
243                #[cfg(span_locations)]
244                hi: rest.off,
245            }));
246            tokens.push_token_from_parser(tt);
247            input = rest;
248        }
249    }
250}
251
252fn lex_error(cursor: Cursor) -> LexError {
253    #[cfg(not(span_locations))]
254    let _ = cursor;
255    LexError {
256        span: Span {
257            #[cfg(span_locations)]
258            lo: cursor.off,
259            #[cfg(span_locations)]
260            hi: cursor.off,
261        },
262    }
263}
264
265fn leaf_token(input: Cursor) -> PResult<TokenTree> {
266    if let Ok((input, l)) = literal(input) {
267        // must be parsed before ident
268        Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
269    } else if let Ok((input, p)) = punct(input) {
270        Ok((input, TokenTree::Punct(p)))
271    } else if let Ok((input, i)) = ident(input) {
272        Ok((input, TokenTree::Ident(i)))
273    } else if input.starts_with(ERROR) {
274        let rest = input.advance(ERROR.len());
275        let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
276        Ok((rest, TokenTree::Literal(repr)))
277    } else {
278        Err(Reject)
279    }
280}
281
282fn ident(input: Cursor) -> PResult<crate::Ident> {
283    if [
284        "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
285    ]
286    .iter()
287    .any(|prefix| input.starts_with(prefix))
288    {
289        Err(Reject)
290    } else {
291        ident_any(input)
292    }
293}
294
295fn ident_any(input: Cursor) -> PResult<crate::Ident> {
296    let raw = input.starts_with("r#");
297    let rest = input.advance((raw as usize) << 1);
298
299    let (rest, sym) = ident_not_raw(rest)?;
300
301    if !raw {
302        let ident =
303            crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
304        return Ok((rest, ident));
305    }
306
307    match sym {
308        "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
309        _ => {}
310    }
311
312    let ident =
313        crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
314    Ok((rest, ident))
315}
316
317fn ident_not_raw(input: Cursor) -> PResult<&str> {
318    let mut chars = input.char_indices();
319
320    match chars.next() {
321        Some((_, ch)) if is_ident_start(ch) => {}
322        _ => return Err(Reject),
323    }
324
325    let mut end = input.len();
326    for (i, ch) in chars {
327        if !is_ident_continue(ch) {
328            end = i;
329            break;
330        }
331    }
332
333    Ok((input.advance(end), &input.rest[..end]))
334}
335
336pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
337    let rest = literal_nocapture(input)?;
338    let end = input.len() - rest.len();
339    Ok((rest, Literal::_new(input.rest[..end].to_string())))
340}
341
342fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
343    if let Ok(ok) = string(input) {
344        Ok(ok)
345    } else if let Ok(ok) = byte_string(input) {
346        Ok(ok)
347    } else if let Ok(ok) = c_string(input) {
348        Ok(ok)
349    } else if let Ok(ok) = byte(input) {
350        Ok(ok)
351    } else if let Ok(ok) = character(input) {
352        Ok(ok)
353    } else if let Ok(ok) = float(input) {
354        Ok(ok)
355    } else if let Ok(ok) = int(input) {
356        Ok(ok)
357    } else {
358        Err(Reject)
359    }
360}
361
362fn literal_suffix(input: Cursor) -> Cursor {
363    match ident_not_raw(input) {
364        Ok((input, _)) => input,
365        Err(Reject) => input,
366    }
367}
368
369fn string(input: Cursor) -> Result<Cursor, Reject> {
370    if let Ok(input) = input.parse("\"") {
371        cooked_string(input)
372    } else if let Ok(input) = input.parse("r") {
373        raw_string(input)
374    } else {
375        Err(Reject)
376    }
377}
378
379fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
380    let mut chars = input.char_indices();
381
382    while let Some((i, ch)) = chars.next() {
383        match ch {
384            '"' => {
385                let input = input.advance(i + 1);
386                return Ok(literal_suffix(input));
387            }
388            '\r' => match chars.next() {
389                Some((_, '\n')) => {}
390                _ => break,
391            },
392            '\\' => match chars.next() {
393                Some((_, 'x')) => {
394                    backslash_x_char(&mut chars)?;
395                }
396                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
397                Some((_, 'u')) => {
398                    backslash_u(&mut chars)?;
399                }
400                Some((newline, ch @ ('\n' | '\r'))) => {
401                    input = input.advance(newline + 1);
402                    trailing_backslash(&mut input, ch as u8)?;
403                    chars = input.char_indices();
404                }
405                _ => break,
406            },
407            _ch => {}
408        }
409    }
410    Err(Reject)
411}
412
413fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
414    let (input, delimiter) = delimiter_of_raw_string(input)?;
415    let mut bytes = input.bytes().enumerate();
416    while let Some((i, byte)) = bytes.next() {
417        match byte {
418            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
419                let rest = input.advance(i + 1 + delimiter.len());
420                return Ok(literal_suffix(rest));
421            }
422            b'\r' => match bytes.next() {
423                Some((_, b'\n')) => {}
424                _ => break,
425            },
426            _ => {}
427        }
428    }
429    Err(Reject)
430}
431
432fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
433    if let Ok(input) = input.parse("b\"") {
434        cooked_byte_string(input)
435    } else if let Ok(input) = input.parse("br") {
436        raw_byte_string(input)
437    } else {
438        Err(Reject)
439    }
440}
441
442fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
443    let mut bytes = input.bytes().enumerate();
444    while let Some((offset, b)) = bytes.next() {
445        match b {
446            b'"' => {
447                let input = input.advance(offset + 1);
448                return Ok(literal_suffix(input));
449            }
450            b'\r' => match bytes.next() {
451                Some((_, b'\n')) => {}
452                _ => break,
453            },
454            b'\\' => match bytes.next() {
455                Some((_, b'x')) => {
456                    backslash_x_byte(&mut bytes)?;
457                }
458                Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
459                Some((newline, b @ (b'\n' | b'\r'))) => {
460                    input = input.advance(newline + 1);
461                    trailing_backslash(&mut input, b)?;
462                    bytes = input.bytes().enumerate();
463                }
464                _ => break,
465            },
466            b if b.is_ascii() => {}
467            _ => break,
468        }
469    }
470    Err(Reject)
471}
472
473fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
474    for (i, byte) in input.bytes().enumerate() {
475        match byte {
476            b'"' => {
477                if i > 255 {
478                    // https://github.com/rust-lang/rust/pull/95251
479                    return Err(Reject);
480                }
481                return Ok((input.advance(i + 1), &input.rest[..i]));
482            }
483            b'#' => {}
484            _ => break,
485        }
486    }
487    Err(Reject)
488}
489
490fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
491    let (input, delimiter) = delimiter_of_raw_string(input)?;
492    let mut bytes = input.bytes().enumerate();
493    while let Some((i, byte)) = bytes.next() {
494        match byte {
495            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
496                let rest = input.advance(i + 1 + delimiter.len());
497                return Ok(literal_suffix(rest));
498            }
499            b'\r' => match bytes.next() {
500                Some((_, b'\n')) => {}
501                _ => break,
502            },
503            other => {
504                if !other.is_ascii() {
505                    break;
506                }
507            }
508        }
509    }
510    Err(Reject)
511}
512
513fn c_string(input: Cursor) -> Result<Cursor, Reject> {
514    if let Ok(input) = input.parse("c\"") {
515        cooked_c_string(input)
516    } else if let Ok(input) = input.parse("cr") {
517        raw_c_string(input)
518    } else {
519        Err(Reject)
520    }
521}
522
523fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
524    let (input, delimiter) = delimiter_of_raw_string(input)?;
525    let mut bytes = input.bytes().enumerate();
526    while let Some((i, byte)) = bytes.next() {
527        match byte {
528            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
529                let rest = input.advance(i + 1 + delimiter.len());
530                return Ok(literal_suffix(rest));
531            }
532            b'\r' => match bytes.next() {
533                Some((_, b'\n')) => {}
534                _ => break,
535            },
536            b'\0' => break,
537            _ => {}
538        }
539    }
540    Err(Reject)
541}
542
543fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
544    let mut chars = input.char_indices();
545
546    while let Some((i, ch)) = chars.next() {
547        match ch {
548            '"' => {
549                let input = input.advance(i + 1);
550                return Ok(literal_suffix(input));
551            }
552            '\r' => match chars.next() {
553                Some((_, '\n')) => {}
554                _ => break,
555            },
556            '\\' => match chars.next() {
557                Some((_, 'x')) => {
558                    backslash_x_nonzero(&mut chars)?;
559                }
560                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
561                Some((_, 'u')) => {
562                    if backslash_u(&mut chars)? == '\0' {
563                        break;
564                    }
565                }
566                Some((newline, ch @ ('\n' | '\r'))) => {
567                    input = input.advance(newline + 1);
568                    trailing_backslash(&mut input, ch as u8)?;
569                    chars = input.char_indices();
570                }
571                _ => break,
572            },
573            '\0' => break,
574            _ch => {}
575        }
576    }
577    Err(Reject)
578}
579
580fn byte(input: Cursor) -> Result<Cursor, Reject> {
581    let input = input.parse("b'")?;
582    let mut bytes = input.bytes().enumerate();
583    let ok = match bytes.next().map(|(_, b)| b) {
584        Some(b'\\') => match bytes.next().map(|(_, b)| b) {
585            Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
586            Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
587            _ => false,
588        },
589        b => b.is_some(),
590    };
591    if !ok {
592        return Err(Reject);
593    }
594    let (offset, _) = bytes.next().ok_or(Reject)?;
595    if !input.chars().as_str().is_char_boundary(offset) {
596        return Err(Reject);
597    }
598    let input = input.advance(offset).parse("'")?;
599    Ok(literal_suffix(input))
600}
601
602fn character(input: Cursor) -> Result<Cursor, Reject> {
603    let input = input.parse("'")?;
604    let mut chars = input.char_indices();
605    let ok = match chars.next().map(|(_, ch)| ch) {
606        Some('\\') => match chars.next().map(|(_, ch)| ch) {
607            Some('x') => backslash_x_char(&mut chars).is_ok(),
608            Some('u') => backslash_u(&mut chars).is_ok(),
609            Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
610            _ => false,
611        },
612        ch => ch.is_some(),
613    };
614    if !ok {
615        return Err(Reject);
616    }
617    let (idx, _) = chars.next().ok_or(Reject)?;
618    let input = input.advance(idx).parse("'")?;
619    Ok(literal_suffix(input))
620}
621
622macro_rules! next_ch {
623    ($chars:ident @ $pat:pat) => {
624        match $chars.next() {
625            Some((_, ch)) => match ch {
626                $pat => ch,
627                _ => return Err(Reject),
628            },
629            None => return Err(Reject),
630        }
631    };
632}
633
634fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
635where
636    I: Iterator<Item = (usize, char)>,
637{
638    match chars.next() {
    Some((_, ch)) => match ch { '0'..='7' => ch, _ => return Err(Reject), },
    None => return Err(Reject),
};next_ch!(chars @ '0'..='7');
639    match chars.next() {
    Some((_, ch)) =>
        match ch {
            '0'..='9' | 'a'..='f' | 'A'..='F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
};next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
640    Ok(())
641}
642
643fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
644where
645    I: Iterator<Item = (usize, u8)>,
646{
647    match chars.next() {
    Some((_, ch)) =>
        match ch {
            b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
};next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
648    match chars.next() {
    Some((_, ch)) =>
        match ch {
            b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
};next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
649    Ok(())
650}
651
652fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
653where
654    I: Iterator<Item = (usize, char)>,
655{
656    let first = match chars.next() {
    Some((_, ch)) =>
        match ch {
            '0'..='9' | 'a'..='f' | 'A'..='F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
}next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
657    let second = match chars.next() {
    Some((_, ch)) =>
        match ch {
            '0'..='9' | 'a'..='f' | 'A'..='F' => ch,
            _ => return Err(Reject),
        },
    None => return Err(Reject),
}next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
658    if first == '0' && second == '0' {
659        Err(Reject)
660    } else {
661        Ok(())
662    }
663}
664
665fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
666where
667    I: Iterator<Item = (usize, char)>,
668{
669    match chars.next() {
    Some((_, ch)) => match ch { '{' => ch, _ => return Err(Reject), },
    None => return Err(Reject),
};next_ch!(chars @ '{');
670    let mut value = 0;
671    let mut len = 0;
672    for (_, ch) in chars {
673        let digit = match ch {
674            '0'..='9' => ch as u8 - b'0',
675            'a'..='f' => 10 + ch as u8 - b'a',
676            'A'..='F' => 10 + ch as u8 - b'A',
677            '_' if len > 0 => continue,
678            '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
679            _ => break,
680        };
681        if len == 6 {
682            break;
683        }
684        value *= 0x10;
685        value += u32::from(digit);
686        len += 1;
687    }
688    Err(Reject)
689}
690
691fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
692    let mut whitespace = input.bytes().enumerate();
693    loop {
694        if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
695            return Err(Reject);
696        }
697        match whitespace.next() {
698            Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
699                last = b;
700            }
701            Some((offset, _)) => {
702                *input = input.advance(offset);
703                return Ok(());
704            }
705            None => return Err(Reject),
706        }
707    }
708}
709
710fn float(input: Cursor) -> Result<Cursor, Reject> {
711    let mut rest = float_digits(input)?;
712    if let Some(ch) = rest.chars().next() {
713        if is_ident_start(ch) {
714            rest = ident_not_raw(rest)?.0;
715        }
716    }
717    word_break(rest)
718}
719
720fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
721    let mut chars = input.chars().peekable();
722    match chars.next() {
723        Some(ch) if '0' <= ch && ch <= '9' => {}
724        _ => return Err(Reject),
725    }
726
727    let mut len = 1;
728    let mut has_dot = false;
729    let mut has_exp = false;
730    while let Some(&ch) = chars.peek() {
731        match ch {
732            '0'..='9' | '_' => {
733                chars.next();
734                len += 1;
735            }
736            '.' => {
737                if has_dot {
738                    break;
739                }
740                chars.next();
741                if chars
742                    .peek()
743                    .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
744                {
745                    return Err(Reject);
746                }
747                len += 1;
748                has_dot = true;
749            }
750            'e' | 'E' => {
751                chars.next();
752                len += 1;
753                has_exp = true;
754                break;
755            }
756            _ => break,
757        }
758    }
759
760    if !(has_dot || has_exp) {
761        return Err(Reject);
762    }
763
764    if has_exp {
765        let token_before_exp = if has_dot {
766            Ok(input.advance(len - 1))
767        } else {
768            Err(Reject)
769        };
770        let mut has_sign = false;
771        let mut has_exp_value = false;
772        while let Some(&ch) = chars.peek() {
773            match ch {
774                '+' | '-' => {
775                    if has_exp_value {
776                        break;
777                    }
778                    if has_sign {
779                        return token_before_exp;
780                    }
781                    chars.next();
782                    len += 1;
783                    has_sign = true;
784                }
785                '0'..='9' => {
786                    chars.next();
787                    len += 1;
788                    has_exp_value = true;
789                }
790                '_' => {
791                    chars.next();
792                    len += 1;
793                }
794                _ => break,
795            }
796        }
797        if !has_exp_value {
798            return token_before_exp;
799        }
800    }
801
802    Ok(input.advance(len))
803}
804
805fn int(input: Cursor) -> Result<Cursor, Reject> {
806    let mut rest = digits(input)?;
807    if let Some(ch) = rest.chars().next() {
808        if is_ident_start(ch) {
809            rest = ident_not_raw(rest)?.0;
810        }
811    }
812    word_break(rest)
813}
814
815fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
816    let base = if input.starts_with("0x") {
817        input = input.advance(2);
818        16
819    } else if input.starts_with("0o") {
820        input = input.advance(2);
821        8
822    } else if input.starts_with("0b") {
823        input = input.advance(2);
824        2
825    } else {
826        10
827    };
828
829    let mut len = 0;
830    let mut empty = true;
831    for b in input.bytes() {
832        match b {
833            b'0'..=b'9' => {
834                let digit = (b - b'0') as u64;
835                if digit >= base {
836                    return Err(Reject);
837                }
838            }
839            b'a'..=b'f' => {
840                let digit = 10 + (b - b'a') as u64;
841                if digit >= base {
842                    break;
843                }
844            }
845            b'A'..=b'F' => {
846                let digit = 10 + (b - b'A') as u64;
847                if digit >= base {
848                    break;
849                }
850            }
851            b'_' => {
852                if empty && base == 10 {
853                    return Err(Reject);
854                }
855                len += 1;
856                continue;
857            }
858            _ => break,
859        }
860        len += 1;
861        empty = false;
862    }
863    if empty {
864        Err(Reject)
865    } else {
866        Ok(input.advance(len))
867    }
868}
869
870fn punct(input: Cursor) -> PResult<Punct> {
871    let (rest, ch) = punct_char(input)?;
872    if ch == '\'' {
873        let (after_lifetime, _ident) = ident_any(rest)?;
874        if after_lifetime.starts_with_char('\'')
875            || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
876        {
877            Err(Reject)
878        } else {
879            Ok((rest, Punct::new('\'', Spacing::Joint)))
880        }
881    } else {
882        let kind = match punct_char(rest) {
883            Ok(_) => Spacing::Joint,
884            Err(Reject) => Spacing::Alone,
885        };
886        Ok((rest, Punct::new(ch, kind)))
887    }
888}
889
890fn punct_char(input: Cursor) -> PResult<char> {
891    if input.starts_with("//") || input.starts_with("/*") {
892        // Do not accept `/` of a comment as a punct.
893        return Err(Reject);
894    }
895
896    let mut chars = input.chars();
897    let Some(first) = chars.next() else {
898        return Err(Reject);
899    };
900    let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
901    if recognized.contains(first) {
902        Ok((input.advance(first.len_utf8()), first))
903    } else {
904        Err(Reject)
905    }
906}
907
908fn doc_comment<'a>(input: Cursor<'a>, tokens: &mut TokenStreamBuilder) -> PResult<'a, ()> {
909    #[cfg(span_locations)]
910    let lo = input.off;
911    let (rest, (comment, inner)) = doc_comment_contents(input)?;
912    let fallback_span = Span {
913        #[cfg(span_locations)]
914        lo,
915        #[cfg(span_locations)]
916        hi: rest.off,
917    };
918    let span = crate::Span::_new_fallback(fallback_span);
919
920    let mut scan_for_bare_cr = comment;
921    while let Some(cr) = scan_for_bare_cr.find('\r') {
922        let rest = &scan_for_bare_cr[cr + 1..];
923        if !rest.starts_with('\n') {
924            return Err(Reject);
925        }
926        scan_for_bare_cr = rest;
927    }
928
929    let mut pound = Punct::new('#', Spacing::Alone);
930    pound.set_span(span);
931    tokens.push_token_from_parser(TokenTree::Punct(pound));
932
933    if inner {
934        let mut bang = Punct::new('!', Spacing::Alone);
935        bang.set_span(span);
936        tokens.push_token_from_parser(TokenTree::Punct(bang));
937    }
938
939    let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
940    let mut equal = Punct::new('=', Spacing::Alone);
941    equal.set_span(span);
942    let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
943    literal.set_span(span);
944    let mut bracketed = TokenStreamBuilder::with_capacity(3);
945    bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
946    bracketed.push_token_from_parser(TokenTree::Punct(equal));
947    bracketed.push_token_from_parser(TokenTree::Literal(literal));
948    let group = Group::new(Delimiter::Bracket, bracketed.build());
949    let mut group = crate::Group::_new_fallback(group);
950    group.set_span(span);
951    tokens.push_token_from_parser(TokenTree::Group(group));
952
953    Ok((rest, ()))
954}
955
956fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
957    if input.starts_with("//!") {
958        let input = input.advance(3);
959        let (input, s) = take_until_newline_or_eof(input);
960        Ok((input, (s, true)))
961    } else if input.starts_with("/*!") {
962        let (input, s) = block_comment(input)?;
963        Ok((input, (&s[3..s.len() - 2], true)))
964    } else if input.starts_with("///") {
965        let input = input.advance(3);
966        if input.starts_with_char('/') {
967            return Err(Reject);
968        }
969        let (input, s) = take_until_newline_or_eof(input);
970        Ok((input, (s, false)))
971    } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
972        let (input, s) = block_comment(input)?;
973        Ok((input, (&s[3..s.len() - 2], false)))
974    } else {
975        Err(Reject)
976    }
977}
978
979fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
980    let chars = input.char_indices();
981
982    for (i, ch) in chars {
983        if ch == '\n' {
984            return (input.advance(i), &input.rest[..i]);
985        } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
986            return (input.advance(i + 1), &input.rest[..i]);
987        }
988    }
989
990    (input.advance(input.len()), input.rest)
991}