Skip to main content

time/format_description/parse/
lexer_ast.rs

1//! Lexer for parsing format descriptions.
2
3use alloc::borrow::ToOwned as _;
4use alloc::boxed::Box;
5use alloc::string::String;
6use alloc::vec::Vec;
7
8use super::format_item::{
9    AstComponent, component_from_ast, ident_eq, parse_optional_format_modifier,
10};
11use super::{
12    Error, Location, Span, Spanned, SpannedValue, WithLocation, WithLocationValue as _, unused,
13};
14use crate::error::InvalidFormatDescription;
15use crate::format_description::__private::FormatDescriptionV3Inner;
16use crate::format_description::{BorrowedFormatItem, FormatDescriptionV3, OwnedFormatItem};
17use crate::hint;
18use crate::internal_macros::try_likely_ok;
19
20#[must_use]
21enum Context {
22    Component,
23    Literal,
24}
25
26impl Context {
27    #[inline]
28    const fn is_component(&self) -> bool {
29        matches!(self, Self::Component)
30    }
31
32    #[inline]
33    const fn is_literal(&self) -> bool {
34        matches!(self, Self::Literal)
35    }
36}
37
38enum NextModifier<'a> {
39    Modifier(Modifier<'a>),
40    TrailingWhitespace(Spanned<&'a str>),
41    None,
42}
43
44type ParseItemWithLiteralLifetime<'input, const VERSION: u8, const OWNED: bool> =
45    <() as ParseTarget<'input, VERSION, OWNED>>::ItemWithLiteralLifetime;
46type ParseOutput<'input, const VERSION: u8, const OWNED: bool> =
47    <() as ParseTarget<'input, VERSION, OWNED>>::Output;
48
49pub(super) trait ParseTarget<'input, const VERSION: u8, const OWNED: bool> {
50    type ItemWithLiteralLifetime;
51    type ItemWithStaticLifetime;
52    type Component: TryFrom<AstComponent, Error: Into<Error>>;
53    type Output;
54
55    fn literal(value: &'input str) -> Self::ItemWithLiteralLifetime;
56    fn component(component: Self::Component) -> Result<Self::ItemWithLiteralLifetime, Error>;
57    fn optional(
58        value: Vec<Self::ItemWithLiteralLifetime>,
59        format: bool,
60        span: Span,
61    ) -> Result<Self::ItemWithLiteralLifetime, Error>;
62    fn first(
63        value: Vec<Vec<Self::ItemWithLiteralLifetime>>,
64        span: Span,
65    ) -> Result<Self::ItemWithLiteralLifetime, Error>;
66    fn parse(s: &'input str) -> Result<Self::Output, Error>;
67}
68
69pub(super) fn parse_generic<'input, const VERSION: u8, const OWNED: bool>(
70    s: &'input str,
71) -> Result<ParseOutput<'input, VERSION, OWNED>, Error>
72where
73    (): ParseTarget<'input, VERSION, OWNED>,
74{
75    <() as ParseTarget<'input, VERSION, OWNED>>::parse(s)
76}
77
78macro_rules! v1_v2_parse_target {
79    ($($version:literal)+) => {$(
80        impl<'input> ParseTarget<'input, $version, false> for () {
81            type ItemWithLiteralLifetime = BorrowedFormatItem<'input>;
82            type ItemWithStaticLifetime = BorrowedFormatItem<'static>;
83            type Component = AstComponent;
84            type Output = Vec<BorrowedFormatItem<'input>>;
85
86            #[inline]
87            fn literal(value: &'input str) -> Self::ItemWithLiteralLifetime {
88                BorrowedFormatItem::StringLiteral(value)
89            }
90
91            #[inline]
92            fn component(component: Self::Component) -> Result<Self::ItemWithStaticLifetime, Error>
93            {
94                Ok(BorrowedFormatItem::Component(try_likely_ok!(
95                    component.try_into()
96                )))
97            }
98
99            #[inline]
100            fn optional(
101                _value: Vec<Self::ItemWithLiteralLifetime>,
102                _format: bool,
103                span: Span,
104            ) -> Result<Self::ItemWithLiteralLifetime, Error> {
105                hint::cold_path();
106                Err(Error {
107                    _inner: unused(span.error(
108                        "optional items are not supported in runtime-parsed format descriptions",
109                    )),
110                    public: InvalidFormatDescription::NotSupported {
111                        what: "optional item",
112                        context: "runtime-parsed format descriptions",
113                        index: span.start.byte as usize,
114                    },
115                })
116            }
117
118            #[inline]
119            fn first(_value: Vec<Vec<Self::ItemWithLiteralLifetime>>, span: Span)
120                -> Result<Self::ItemWithLiteralLifetime, Error>
121            {
122                hint::cold_path();
123                Err(Error {
124                    _inner: unused(span.error(
125                        "'first' items are not supported in runtime-parsed format descriptions",
126                    )),
127                    public: InvalidFormatDescription::NotSupported {
128                        what: "'first' item",
129                        context: "runtime-parsed format descriptions",
130                        index: span.start.byte as usize,
131                    },
132                })
133            }
134
135            #[inline]
136            fn parse(s: &'input str) -> Result<ParseOutput<'input, $version, false>, Error> {
137                let mut items = Vec::with_capacity(16);
138                let mut lexer = Lexer::<$version, false>::new(s);
139                while !lexer.input.is_empty() {
140                    items.push(try_likely_ok!(lexer.parse_next_item()));
141                }
142                Ok(items)
143            }
144        }
145
146        impl<'input> ParseTarget<'input, $version, true> for () {
147            type ItemWithLiteralLifetime = OwnedFormatItem;
148            type ItemWithStaticLifetime = OwnedFormatItem;
149            type Component = AstComponent;
150            type Output = OwnedFormatItem;
151
152            #[inline]
153            fn literal(value: &'input str) -> Self::ItemWithLiteralLifetime {
154                OwnedFormatItem::StringLiteral(value.to_owned().into_boxed_str())
155            }
156
157            #[inline]
158            fn component(component: Self::Component) -> Result<Self::ItemWithStaticLifetime, Error>
159            {
160                Ok(OwnedFormatItem::Component(try_likely_ok!(
161                    component.try_into()
162                )))
163            }
164
165            #[inline]
166            fn optional(
167                value: Vec<Self::ItemWithLiteralLifetime>,
168                format: bool,
169                span: Span,
170            ) -> Result<Self::ItemWithLiteralLifetime, Error> {
171                if !format {
172                    hint::cold_path();
173                    return Err(Error {
174                        _inner: unused(span.error(
175                            "v1 and v2 format descriptions do not support optional items that are \
176                             not formatted",
177                        )),
178                        public: InvalidFormatDescription::NotSupported {
179                            what: "optional item with `format:false`",
180                            context: "v1 and v2 format descriptions",
181                            index: span.start.byte as usize,
182                        },
183                    });
184                }
185
186                Ok(OwnedFormatItem::Optional(Box::new(
187                    items_to_owned_format_item(value),
188                )))
189            }
190
191            #[inline]
192            fn first(value: Vec<Vec<Self::ItemWithLiteralLifetime>>, _span: Span)
193                -> Result<Self::ItemWithLiteralLifetime, Error>
194            {
195                Ok(OwnedFormatItem::First(
196                    value.into_iter().map(items_to_owned_format_item).collect(),
197                ))
198            }
199
200            #[inline]
201            fn parse(s: &'input str) -> Result<ParseOutput<'input, $version, true>, Error> {
202                let mut items = Vec::with_capacity(16);
203                let mut lexer = Lexer::<$version, true>::new(s);
204                while !lexer.input.is_empty() {
205                    items.push(try_likely_ok!(lexer.parse_next_item()));
206                }
207                Ok(items_to_owned_format_item(items))
208            }
209        }
210    )+};
211}
212
213macro_rules! v3_parse_target {
214    ($owned:tt, $output_lt:lifetime, $literal:expr, $items_to_v3:expr) => {
215        impl<'input> ParseTarget<'input, 3, $owned> for () {
216            type ItemWithLiteralLifetime = FormatDescriptionV3Inner<$output_lt>;
217            type ItemWithStaticLifetime = FormatDescriptionV3Inner<'static>;
218            type Component = FormatDescriptionV3Inner<'static>;
219            type Output = FormatDescriptionV3<$output_lt>;
220
221            #[inline]
222            fn literal(value: &'input str) -> Self::ItemWithLiteralLifetime {
223                $literal(value.into())
224            }
225
226            #[inline]
227            fn component(
228                component: Self::Component,
229            ) -> Result<Self::ItemWithStaticLifetime, Error> {
230                Ok(component)
231            }
232
233            #[inline]
234            fn optional(
235                value: Vec<Self::ItemWithLiteralLifetime>,
236                format: bool,
237                _span: Span,
238            ) -> Result<Self::ItemWithLiteralLifetime, Error> {
239                Ok(FormatDescriptionV3Inner::OwnedOptional {
240                    format,
241                    item: Box::new($items_to_v3(value)),
242                })
243            }
244
245            #[inline]
246            fn first(
247                value: Vec<Vec<Self::ItemWithLiteralLifetime>>,
248                _span: Span,
249            ) -> Result<Self::ItemWithLiteralLifetime, Error> {
250                Ok(FormatDescriptionV3Inner::OwnedFirst(
251                    value.into_iter().map($items_to_v3).collect(),
252                ))
253            }
254
255            #[inline]
256            fn parse(s: &'input str) -> Result<Self::Output, Error> {
257                let mut items = Vec::with_capacity(16);
258                let mut lexer = Lexer::<3, false>::new(s);
259
260                while let Some(&byte) = lexer.input.first() {
261                    let location = Location {
262                        byte: lexer.byte_pos,
263                    };
264                    let token = match byte {
265                        b'[' => lexer.consume_component(location),
266                        b']' => {
267                            hint::cold_path();
268                            return Err(Error {
269                                _inner: unused(location.error("right brackets must be escaped")),
270                                public: InvalidFormatDescription::Expected {
271                                    what: "right bracket to be escaped",
272                                    index: location.byte as usize,
273                                },
274                            });
275                        }
276                        b'\\' => lexer
277                            .consume_backslash_escape_sequence(location)
278                            .map(<() as ParseTarget<'input, 3, $owned>>::literal),
279                        _ => Ok(<() as ParseTarget<'input, 3, $owned>>::literal(
280                            lexer.consume_literal().into(),
281                        )),
282                    };
283
284                    items.push(try_likely_ok!(token));
285                }
286
287                Ok($items_to_v3(items).into_opaque())
288            }
289        }
290    };
291}
292
293v1_v2_parse_target!(1 2);
294v3_parse_target!(false, 'input, FormatDescriptionV3Inner::BorrowedLiteral, items_to_v3_borrowed);
295v3_parse_target!(true, 'static, FormatDescriptionV3Inner::OwnedLiteral, items_to_v3_owned);
296
297fn items_to_owned_format_item(items: Vec<OwnedFormatItem>) -> OwnedFormatItem {
298    match <[_; 1]>::try_from(items) {
299        Ok([item]) => item,
300        Err(items) => OwnedFormatItem::Compound(items.into_boxed_slice()),
301    }
302}
303
304fn items_to_v3_borrowed<'input>(
305    items: Vec<FormatDescriptionV3Inner<'input>>,
306) -> FormatDescriptionV3Inner<'input> {
307    match <[_; 1]>::try_from(items) {
308        Ok([item]) => item,
309        Err(items) => FormatDescriptionV3Inner::OwnedCompound(items.into_boxed_slice()),
310    }
311}
312
313fn items_to_v3_owned(
314    items: Vec<FormatDescriptionV3Inner<'_>>,
315) -> FormatDescriptionV3Inner<'static> {
316    match <[_; 1]>::try_from(items) {
317        Ok([item]) => item.into_owned(),
318        Err(items) => FormatDescriptionV3Inner::OwnedCompound(
319            items
320                .into_iter()
321                .map(FormatDescriptionV3Inner::into_owned)
322                .collect(),
323        ),
324    }
325}
326
327/// An iterator over the lexed tokens.
328pub(super) struct Lexer<'input, const VERSION: u8, const OWNED: bool> {
329    input: &'input [u8],
330    depth: u8,
331    byte_pos: u32,
332}
333
334impl<'input, const VERSION: u8, const OWNED: bool> Lexer<'input, VERSION, OWNED> {
335    /// Parse the string into a series of [`Token`]s.
336    ///
337    /// `VERSION` controls the version of the format description that is being parsed.
338    ///
339    /// - When `VERSION` is 1, `[[` is the only escape sequence, resulting in a literal `[`. For the
340    ///   start of a nested format description, a single `[` is used and is _never_ part of the
341    ///   escape sequence. For example, `[optional [[day]]]` will lex successfully, ultimately
342    ///   resulting in a component named `optional` with the nested component `day`.
343    /// - When `VERSION` is 2 or 3, all escape sequences begin with `\`. The only characters that
344    ///   may currently follow are `\`, `[`, and `]`, all of which result in the literal character.
345    ///   All other characters result in a lex error.
346    #[inline]
347    pub(super) const fn new(input: &'input str) -> Self {
348        Self {
349            input: input.as_bytes(),
350            depth: 0,
351            byte_pos: 0,
352        }
353    }
354
355    /// Advance the input by the given number of bytes.
356    #[inline]
357    fn advance(&mut self, bytes: u32) {
358        self.input = &self.input[bytes as usize..];
359        self.byte_pos += bytes;
360    }
361
362    /// Whether the lexer is currently parsing a component or a literal.
363    #[inline]
364    const fn context(&self) -> Context {
365        if self.depth.is_multiple_of(2) {
366            Context::Literal
367        } else {
368            Context::Component
369        }
370    }
371
372    /// Consume the next token if it is a component item that is whitespace.
373    #[inline]
374    fn consume_whitespace(&mut self) -> Option<Spanned<&'input str>> {
375        debug_assert!(self.context().is_component());
376
377        let bytes = self
378            .input
379            .iter()
380            .take_while(|byte| byte.is_ascii_whitespace())
381            .count() as u32;
382
383        if bytes == 0 {
384            return None;
385        }
386
387        let start_loc = Location {
388            byte: self.byte_pos,
389        };
390        let end_loc = Location {
391            byte: self.byte_pos + bytes,
392        };
393
394        // Safety: Runtime format descriptions always originate with a string passed as a parameter
395        // and we have only consumed full codepoints, ensuring that a valid string remains.
396        let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
397        self.advance(bytes);
398
399        Some(value.spanned(start_loc.to(end_loc)))
400    }
401
402    /// Consume the next token if it is a component item that is not whitespace.
403    #[inline]
404    fn consume_component_part(&mut self) -> Option<Spanned<&'input str>> {
405        debug_assert!(self.context().is_component());
406
407        let bytes = self
408            .input
409            .iter()
410            .take_while(|byte| !byte.is_ascii_whitespace() && !matches!(byte, b'\\' | b'[' | b']'))
411            .count() as u32;
412
413        if bytes == 0 {
414            hint::cold_path();
415            return None;
416        }
417
418        let start_loc = Location {
419            byte: self.byte_pos,
420        };
421        let end_loc = Location {
422            byte: self.byte_pos + bytes,
423        };
424
425        // Safety: Runtime format descriptions always originate with a string passed as a parameter
426        // and we have only consumed full codepoints, ensuring that a valid string remains.
427        let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
428        self.advance(bytes);
429
430        Some(value.spanned(start_loc.to(end_loc)))
431    }
432
433    /// Consume the next token if it is a closing bracket.
434    #[inline]
435    fn consume_closing_bracket(&mut self) -> Option<Location> {
436        if self.input.first() != Some(&b']') {
437            hint::cold_path();
438            return None;
439        }
440
441        self.depth -= 1;
442
443        let location = Location {
444            byte: self.byte_pos,
445        };
446        self.advance(1);
447        Some(location)
448    }
449
450    /// Consume the next token if it is a component name. The caller is expected to be inside a
451    /// component header.
452    #[inline]
453    fn consume_component_name(
454        &mut self,
455        opening_bracket: Location,
456    ) -> Result<Spanned<&'input str>, Error> {
457        let leading_whitespace = self.consume_whitespace().is_some();
458
459        let Some(name) = self.consume_component_part() else {
460            hint::cold_path();
461            let location = if leading_whitespace {
462                opening_bracket.offset(1)
463            } else {
464                opening_bracket
465            };
466            return Err(Error {
467                _inner: unused(location.error("expected component name")),
468                public: InvalidFormatDescription::MissingComponentName {
469                    index: location.byte as usize,
470                },
471            });
472        };
473
474        Ok(name)
475    }
476
477    #[inline]
478    fn consume_modifier(&mut self) -> Result<NextModifier<'input>, Error> {
479        let Some(whitespace) = self.consume_whitespace() else {
480            hint::cold_path();
481            return Ok(NextModifier::None);
482        };
483
484        let Some(token) = self.consume_component_part() else {
485            hint::cold_path();
486            return Ok(NextModifier::TrailingWhitespace(whitespace));
487        };
488
489        let modifier = try_likely_ok!(self.modifier_from_token(token));
490        Ok(NextModifier::Modifier(modifier))
491    }
492
493    /// Parse a component.
494    #[inline]
495    fn consume_component(
496        &mut self,
497        opening_bracket: Location,
498    ) -> Result<ParseItemWithLiteralLifetime<'input, VERSION, OWNED>, Error>
499    where
500        (): ParseTarget<'input, VERSION, OWNED>,
501    {
502        match self.depth.checked_add(1) {
503            Some(depth) => self.depth = depth,
504            None => {
505                hint::cold_path();
506                return Err(Error {
507                    _inner: unused(opening_bracket.error("too much nesting")),
508                    public: InvalidFormatDescription::NotSupported {
509                        what: "highly-nested format description",
510                        context: "",
511                        index: opening_bracket.byte as usize,
512                    },
513                });
514            }
515        };
516        // consume the opening bracket, which was checked prior to calling this method
517        self.advance(1);
518
519        let name = try_likely_ok!(self.consume_component_name(opening_bracket));
520        let modifiers = try_likely_ok!(Modifiers::parse::<VERSION, OWNED>(self));
521
522        let mut nested_format_descriptions = Vec::new();
523        while self.is_nested_description_start()
524            && let Ok(description) = self.consume_nested(modifiers.end_location())
525        {
526            nested_format_descriptions.push(description);
527        }
528
529        if modifiers.trailing_whitespace.is_some()
530            && let Some(first_nested) = nested_format_descriptions.first_mut()
531        {
532            first_nested.leading_whitespace = modifiers.trailing_whitespace;
533        }
534
535        if modifiers.trailing_whitespace.is_none() || !nested_format_descriptions.is_empty() {
536            self.consume_whitespace();
537        }
538
539        let Some(closing_bracket) = self.consume_closing_bracket() else {
540            hint::cold_path();
541            return Err(Error {
542                _inner: unused(opening_bracket.error("unclosed bracket")),
543                public: InvalidFormatDescription::UnclosedOpeningBracket {
544                    index: opening_bracket.byte as usize,
545                },
546            });
547        };
548
549        if let Some(first_nested_fd) = nested_format_descriptions.first()
550            && first_nested_fd.leading_whitespace.is_none()
551        {
552            hint::cold_path();
553            return Err(Error {
554                _inner: unused(
555                    opening_bracket
556                        .to(closing_bracket)
557                        .error("missing leading whitespace before nested format description"),
558                ),
559                public: InvalidFormatDescription::Expected {
560                    what: "whitespace before nested format description",
561                    index: first_nested_fd.opening_bracket.byte as usize,
562                },
563            });
564        }
565
566        if ident_eq::<VERSION>(*name, "optional") {
567            hint::cold_path();
568
569            let format = try_likely_ok!(parse_optional_format_modifier::<VERSION>(
570                &modifiers.modifiers,
571            ));
572
573            let nested_format_description = match <[_; 1]>::try_from(nested_format_descriptions) {
574                Ok([nested_format_description]) => nested_format_description,
575                Err(e) => {
576                    hint::cold_path();
577                    if let Some((second_fd, last_fd)) = e.first().zip(e.last()) {
578                        return Err(Error {
579                            _inner: unused(
580                                second_fd.opening_bracket.to(last_fd.closing_bracket).error(
581                                    "the `optional` component only allows a single nested format \
582                                     description",
583                                ),
584                            ),
585                            public: InvalidFormatDescription::NotSupported {
586                                what: "more than one nested format description",
587                                context: "`optional` components",
588                                index: second_fd.opening_bracket.byte as usize,
589                            },
590                        });
591                    } else {
592                        return Err(Error {
593                            _inner: unused(opening_bracket.to(closing_bracket).error(
594                                "missing nested format description for `optional` component",
595                            )),
596                            public: InvalidFormatDescription::Expected {
597                                what: "nested format description",
598                                index: closing_bracket.byte as usize,
599                            },
600                        });
601                    }
602                }
603            };
604
605            return <() as ParseTarget<'input, VERSION, OWNED>>::optional(
606                nested_format_description.items,
607                *format,
608                opening_bracket.to(closing_bracket),
609            );
610        }
611
612        if ident_eq::<VERSION>(*name, "first") {
613            hint::cold_path();
614            if !modifiers.modifiers.is_empty() {
615                hint::cold_path();
616                let modifier = &modifiers.modifiers[0];
617                return Err(Error {
618                    _inner: unused(modifier.key_span().error("invalid modifier key")),
619                    public: InvalidFormatDescription::InvalidModifier {
620                        value: (*modifier.key).to_owned(),
621                        index: modifier.key.location.byte as usize,
622                    },
623                });
624            }
625
626            if version!(3..) && nested_format_descriptions.is_empty() {
627                hint::cold_path();
628                return Err(Error {
629                    _inner: unused(opening_bracket.to(closing_bracket).error(
630                        "the `first` component requires at least one nested format description",
631                    )),
632                    public: InvalidFormatDescription::Expected {
633                        what: "at least one nested format description",
634                        index: closing_bracket.byte as usize,
635                    },
636                });
637            }
638
639            let items = nested_format_descriptions
640                .into_iter()
641                .map(|nested_format_description| nested_format_description.items)
642                .collect();
643
644            return <() as ParseTarget<'input, VERSION, OWNED>>::first(
645                items,
646                opening_bracket.to(closing_bracket),
647            );
648        }
649
650        if !nested_format_descriptions.is_empty() {
651            hint::cold_path();
652            return Err(Error {
653                _inner: unused(
654                    opening_bracket
655                        .to(closing_bracket)
656                        .error("this component does not support nested format descriptions"),
657                ),
658                public: InvalidFormatDescription::NotSupported {
659                    what: "nested format descriptions",
660                    context: "on this component",
661                    index: opening_bracket.byte as usize,
662                },
663            });
664        }
665
666        let component = try_likely_ok!(component_from_ast::<VERSION>(&name, &modifiers.modifiers));
667        <() as ParseTarget<'input, VERSION, OWNED>>::component(try_likely_ok!(component.try_into()))
668    }
669
670    /// Parse a nested format description. The location provided is the most recent one consumed.
671    #[inline]
672    fn consume_nested(
673        &mut self,
674        last_location: Location,
675    ) -> Result<
676        NestedFormatDescription<'input, ParseItemWithLiteralLifetime<'input, VERSION, OWNED>>,
677        Error,
678    >
679    where
680        (): ParseTarget<'input, VERSION, OWNED>,
681    {
682        let leading_whitespace = self.consume_whitespace();
683
684        let opening_bracket = {
685            match self.depth.checked_add(1) {
686                Some(depth) => self.depth = depth,
687                None => {
688                    hint::cold_path();
689                    return Err(Error {
690                        _inner: unused(last_location.error("too much nesting")),
691                        public: InvalidFormatDescription::NotSupported {
692                            what: "highly-nested format description",
693                            context: "",
694                            index: last_location.byte as usize,
695                        },
696                    });
697                }
698            }
699            let location = Location {
700                byte: self.byte_pos,
701            };
702            self.advance(1);
703            location
704        };
705
706        let mut items = Vec::new();
707        while !self.input.is_empty() {
708            // If we're in a literal context and the next byte is a closing bracket, stop so that we
709            // can consume it.
710            if self.context().is_literal() && self.input.first() == Some(&b']') {
711                break;
712            }
713
714            items.push(try_likely_ok!(self.parse_next_item()));
715        }
716
717        let Some(closing_bracket) = self.consume_closing_bracket() else {
718            hint::cold_path();
719            return Err(Error {
720                _inner: unused(opening_bracket.error("unclosed bracket")),
721                public: InvalidFormatDescription::UnclosedOpeningBracket {
722                    index: opening_bracket.byte as usize,
723                },
724            });
725        };
726
727        Ok(NestedFormatDescription {
728            leading_whitespace,
729            opening_bracket,
730            items,
731            closing_bracket,
732        })
733    }
734
735    #[inline]
736    fn modifier_from_token(&self, token: Spanned<&'input str>) -> Result<Modifier<'input>, Error> {
737        let Some(colon_index) = token.bytes().position(|b| b == b':') else {
738            hint::cold_path();
739            return Err(Error {
740                _inner: unused(token.span.error("modifier must be of the form `key:value`")),
741                public: InvalidFormatDescription::InvalidModifier {
742                    value: (*token).to_owned(),
743                    index: token.span.start.byte as usize,
744                },
745            });
746        };
747        let key = &token[..colon_index];
748        let value = &token[colon_index + 1..];
749
750        if key.is_empty() {
751            hint::cold_path();
752            return Err(Error {
753                _inner: unused(token.span.shrink_to_start().error("expected modifier key")),
754                public: InvalidFormatDescription::InvalidModifier {
755                    value: String::new(),
756                    index: token.span.start.byte as usize,
757                },
758            });
759        }
760        if value.is_empty() {
761            hint::cold_path();
762            return Err(Error {
763                _inner: unused(token.span.shrink_to_end().error("expected modifier value")),
764                public: InvalidFormatDescription::InvalidModifier {
765                    value: String::new(),
766                    index: token.span.start.byte as usize + colon_index,
767                },
768            });
769        }
770
771        Ok(Modifier {
772            key: key.with_location(token.span.start),
773            value,
774        })
775    }
776
777    /// Check whether the next tokens start a nested format description. Does not consume any
778    /// input.
779    ///
780    /// Note that this call is strictly an optimization, as checking the error path on
781    /// `parse_nested` is sufficient for knowing if a nested format description is present. This
782    /// method avoids the overhead of constructing an error only to throw it away.
783    #[inline]
784    fn is_nested_description_start(&self) -> bool {
785        debug_assert!(self.context().is_component());
786
787        let Some(index) = self
788            .input
789            .iter()
790            .position(|&byte| !byte.is_ascii_whitespace())
791        else {
792            return false;
793        };
794
795        self.input[index] == b'['
796            && (version!(2..)
797                || self.context().is_component()
798                || self.input.get(index + 1) != Some(&b'['))
799    }
800
801    #[inline]
802    fn consume_literal(&mut self) -> &'input str {
803        let bytes = self
804            .input
805            .iter()
806            .take_while(|&&byte| byte != b'[' && byte != b']' && (version!(1) || byte != b'\\'))
807            .count() as u32;
808
809        // Safety: A string was passed to this function, and only UTF-8 has been consumed,
810        // leaving behind a string known to begin at a character boundary.
811        let value = unsafe { str::from_utf8_unchecked(&self.input[..bytes as usize]) };
812        self.advance(bytes);
813
814        value
815    }
816
817    #[inline]
818    fn consume_backslash_escape_sequence(
819        &mut self,
820        location: Location,
821    ) -> Result<&'input str, Error> {
822        let backslash_loc = location;
823
824        Ok(match self.input.get(1) {
825            Some(b'\\' | b'[' | b']') => {
826                // The escaped character is emitted as-is.
827                // Safety: We know that this is either a left bracket, right bracket, or
828                // backslash.
829                let char = unsafe { str::from_utf8_unchecked(&self.input[1..2]) };
830                self.advance(2);
831                char
832            }
833            Some(_) => {
834                hint::cold_path();
835                let loc = Location {
836                    byte: self.byte_pos + 1,
837                };
838                return Err(Error {
839                    _inner: unused(loc.error("invalid escape sequence")),
840                    public: InvalidFormatDescription::Expected {
841                        what: "valid escape sequence",
842                        index: loc.byte as usize,
843                    },
844                });
845            }
846            None => {
847                hint::cold_path();
848                return Err(Error {
849                    _inner: unused(backslash_loc.error("unexpected end of input")),
850                    public: InvalidFormatDescription::Expected {
851                        what: "valid escape sequence",
852                        index: backslash_loc.byte as usize,
853                    },
854                });
855            }
856        })
857    }
858}
859
860impl<'input, const VERSION: u8, const OWNED: bool> Lexer<'input, VERSION, OWNED> {
861    #[inline(always)]
862    fn parse_next_item(
863        &mut self,
864    ) -> Result<ParseItemWithLiteralLifetime<'input, VERSION, OWNED>, Error>
865    where
866        (): ParseTarget<'input, VERSION, OWNED>,
867    {
868        let byte = self.input[0];
869        let location = Location {
870            byte: self.byte_pos,
871        };
872
873        Ok(match byte {
874            b'[' if version!(1) && self.input.get(1) == Some(&b'[') => {
875                self.advance(2);
876                <() as ParseTarget<'input, VERSION, OWNED>>::literal("[")
877            }
878            b'[' => return self.consume_component(location),
879            b']' if version!(3..) => {
880                hint::cold_path();
881                return Err(Error {
882                    _inner: unused(location.error("right brackets must be escaped")),
883                    public: InvalidFormatDescription::Expected {
884                        what: "right bracket to be escaped",
885                        index: location.byte as usize,
886                    },
887                });
888            }
889            b']' if version!(1..=2) => {
890                self.advance(1);
891                <() as ParseTarget<'input, VERSION, OWNED>>::literal("]")
892            }
893            b'\\' if version!(2..) => {
894                return self
895                    .consume_backslash_escape_sequence(location)
896                    .map(<() as ParseTarget<'input, VERSION, OWNED>>::literal);
897            }
898            _ => <() as ParseTarget<'input, VERSION, OWNED>>::literal(self.consume_literal()),
899        })
900    }
901}
902
903/// A format description that is nested within another format description.
904pub(super) struct NestedFormatDescription<'a, Item> {
905    /// Whitespace between the end of the previous item and the opening bracket.
906    pub(super) leading_whitespace: Option<Spanned<&'a str>>,
907    /// Where the opening bracket was in the format string.
908    pub(super) opening_bracket: Location,
909    /// The items within the nested format description.
910    pub(super) items: Vec<Item>,
911    /// Where the closing bracket was in the format string.
912    pub(super) closing_bracket: Location,
913}
914
915/// A modifier for a component.
916pub(super) struct Modifier<'a> {
917    /// The key of the modifier.
918    pub(super) key: WithLocation<&'a str>,
919    /// The value of the modifier.
920    pub(super) value: &'a str,
921}
922
923impl Modifier<'_> {
924    #[inline]
925    pub(super) fn key_value_span(&self) -> Span {
926        self.key
927            .location
928            .with_length(self.key.len() + self.value.len() + 1)
929    }
930
931    #[inline]
932    pub(super) fn key_span(&self) -> Span {
933        self.key.location.with_length(self.key.len())
934    }
935
936    #[inline]
937    pub(super) fn value_span(&self) -> Span {
938        self.key
939            .location
940            .offset(self.key.len() as u32 + 1)
941            .with_length(self.value.len())
942    }
943}
944
945pub(super) struct Modifiers<'a> {
946    pub(super) modifiers: Vec<Modifier<'a>>,
947    pub(super) trailing_whitespace: Option<Spanned<&'a str>>,
948}
949
950impl<'a> Modifiers<'a> {
951    /// Parse modifiers until there are none left. Returns the modifiers along with any trailing
952    /// whitespace after the last modifier.
953    #[inline]
954    pub(super) fn parse<const VERSION: u8, const OWNED: bool>(
955        tokens: &mut Lexer<'a, VERSION, OWNED>,
956    ) -> Result<Self, Error> {
957        let mut modifiers = Vec::new();
958        loop {
959            match try_likely_ok!(tokens.consume_modifier()) {
960                NextModifier::Modifier(modifier) => modifiers.push(modifier),
961                NextModifier::TrailingWhitespace(whitespace) => {
962                    return Ok(Self {
963                        modifiers,
964                        trailing_whitespace: Some(whitespace),
965                    });
966                }
967                NextModifier::None => {
968                    return Ok(Self {
969                        modifiers,
970                        trailing_whitespace: None,
971                    });
972                }
973            }
974        }
975    }
976
977    #[inline]
978    pub(super) fn end_location(&self) -> Location {
979        match &*self.modifiers {
980            [] => Location::DUMMY,
981            [.., modifier] => modifier.value_span().end,
982        }
983    }
984}