time 0.3.17

Date and time library. Fully interoperable with the standard library. Mostly compatible with #![no_std].
Documentation
//! AST for parsing format descriptions.

use alloc::string::String;
use alloc::vec::Vec;
use core::iter;
use core::iter::Peekable;

use super::{lexer, Error, Location, Span};

/// One part of a complete format description.
#[allow(variant_size_differences)]
pub(super) enum Item<'a> {
    /// A literal string, formatted and parsed as-is.
    Literal {
        /// The string itself.
        value: &'a [u8],
        /// Where the string originates from in the format string.
        _span: Span,
    },
    /// A sequence of brackets. The first acts as the escape character.
    EscapedBracket {
        /// The first bracket.
        _first: Location,
        /// The second bracket.
        _second: Location,
    },
    /// Part of a type, along with its modifiers.
    Component {
        /// Where the opening bracket was in the format string.
        _opening_bracket: Location,
        /// Whitespace between the opening bracket and name.
        _leading_whitespace: Option<Whitespace<'a>>,
        /// The name of the component.
        name: Name<'a>,
        /// The modifiers for the component.
        modifiers: Vec<Modifier<'a>>,
        /// Whitespace between the modifiers and closing bracket.
        _trailing_whitespace: Option<Whitespace<'a>>,
        /// Where the closing bracket was in the format string.
        _closing_bracket: Location,
    },
}

/// Whitespace within a component.
pub(super) struct Whitespace<'a> {
    /// The whitespace itself.
    pub(super) _value: &'a [u8],
    /// Where the whitespace was in the format string.
    pub(super) span: Span,
}

/// The name of a component.
pub(super) struct Name<'a> {
    /// The name itself.
    pub(super) value: &'a [u8],
    /// Where the name was in the format string.
    pub(super) span: Span,
}

/// A modifier for a component.
pub(super) struct Modifier<'a> {
    /// Whitespace preceding the modifier.
    pub(super) _leading_whitespace: Whitespace<'a>,
    /// The key of the modifier.
    pub(super) key: Key<'a>,
    /// Where the colon of the modifier was in the format string.
    pub(super) _colon: Location,
    /// The value of the modifier.
    pub(super) value: Value<'a>,
}

/// The key of a modifier.
pub(super) struct Key<'a> {
    /// The key itself.
    pub(super) value: &'a [u8],
    /// Where the key was in the format string.
    pub(super) span: Span,
}

/// The value of a modifier.
pub(super) struct Value<'a> {
    /// The value itself.
    pub(super) value: &'a [u8],
    /// Where the value was in the format string.
    pub(super) span: Span,
}

/// Parse the provided tokens into an AST.
pub(super) fn parse<'a>(
    tokens: impl Iterator<Item = lexer::Token<'a>>,
) -> impl Iterator<Item = Result<Item<'a>, Error>> {
    let mut tokens = tokens.peekable();
    iter::from_fn(move || {
        Some(match tokens.next()? {
            lexer::Token::Literal { value, span } => Ok(Item::Literal { value, _span: span }),
            lexer::Token::Bracket {
                kind: lexer::BracketKind::Opening,
                location,
            } => {
                // escaped bracket
                if let Some(&lexer::Token::Bracket {
                    kind: lexer::BracketKind::Opening,
                    location: second_location,
                }) = tokens.peek()
                {
                    tokens.next(); // consume
                    Ok(Item::EscapedBracket {
                        _first: location,
                        _second: second_location,
                    })
                }
                // component
                else {
                    parse_component(location, &mut tokens)
                }
            }
            lexer::Token::Bracket {
                kind: lexer::BracketKind::Closing,
                location: _,
            } => unreachable!(
                "internal error: closing bracket should have been consumed by `parse_component`",
            ),
            lexer::Token::ComponentPart {
                kind: _,
                value: _,
                span: _,
            } => unreachable!(
                "internal error: component part should have been consumed by `parse_component`",
            ),
        })
    })
}

/// Parse a component. This assumes that the opening bracket has already been consumed.
fn parse_component<'a>(
    opening_bracket: Location,
    tokens: &mut Peekable<impl Iterator<Item = lexer::Token<'a>>>,
) -> Result<Item<'a>, Error> {
    let leading_whitespace = if let Some(&lexer::Token::ComponentPart {
        kind: lexer::ComponentKind::Whitespace,
        value,
        span,
    }) = tokens.peek()
    {
        tokens.next(); // consume
        Some(Whitespace {
            _value: value,
            span,
        })
    } else {
        None
    };

    let name = if let Some(&lexer::Token::ComponentPart {
        kind: lexer::ComponentKind::NotWhitespace,
        value,
        span,
    }) = tokens.peek()
    {
        tokens.next(); // consume
        Name { value, span }
    } else {
        let span = leading_whitespace.map_or_else(
            || Span {
                start: opening_bracket,
                end: opening_bracket,
            },
            |whitespace| whitespace.span.shrink_to_end(),
        );
        return Err(Error {
            _inner: span.error("expected component name"),
            public: crate::error::InvalidFormatDescription::MissingComponentName {
                index: span.start_byte(),
            },
        });
    };

    let mut modifiers = Vec::new();
    let trailing_whitespace = loop {
        let whitespace = if let Some(&lexer::Token::ComponentPart {
            kind: lexer::ComponentKind::Whitespace,
            value,
            span,
        }) = tokens.peek()
        {
            tokens.next(); // consume
            Whitespace {
                _value: value,
                span,
            }
        } else {
            break None;
        };

        if let Some(&lexer::Token::ComponentPart {
            kind: lexer::ComponentKind::NotWhitespace,
            value,
            span,
        }) = tokens.peek()
        {
            tokens.next(); // consume

            let colon_index = match value.iter().position(|&b| b == b':') {
                Some(index) => index,
                None => {
                    return Err(Error {
                        _inner: span.error("modifier must be of the form `key:value`"),
                        public: crate::error::InvalidFormatDescription::InvalidModifier {
                            value: String::from_utf8_lossy(value).into_owned(),
                            index: span.start_byte(),
                        },
                    });
                }
            };
            let key = &value[..colon_index];
            let value = &value[colon_index + 1..];

            if key.is_empty() {
                return Err(Error {
                    _inner: span.shrink_to_start().error("expected modifier key"),
                    public: crate::error::InvalidFormatDescription::InvalidModifier {
                        value: String::new(),
                        index: span.start_byte(),
                    },
                });
            }
            if value.is_empty() {
                return Err(Error {
                    _inner: span.shrink_to_end().error("expected modifier value"),
                    public: crate::error::InvalidFormatDescription::InvalidModifier {
                        value: String::new(),
                        index: span.shrink_to_end().start_byte(),
                    },
                });
            }

            modifiers.push(Modifier {
                _leading_whitespace: whitespace,
                key: Key {
                    value: key,
                    span: span.subspan(..colon_index),
                },
                _colon: span.start.offset(colon_index),
                value: Value {
                    value,
                    span: span.subspan(colon_index + 1..),
                },
            });
        } else {
            break Some(whitespace);
        }
    };

    let closing_bracket = if let Some(&lexer::Token::Bracket {
        kind: lexer::BracketKind::Closing,
        location,
    }) = tokens.peek()
    {
        tokens.next(); // consume
        location
    } else {
        return Err(Error {
            _inner: opening_bracket.error("unclosed bracket"),
            public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket {
                index: opening_bracket.byte,
            },
        });
    };

    Ok(Item::Component {
        _opening_bracket: opening_bracket,
        _leading_whitespace: leading_whitespace,
        name,
        modifiers,
        _trailing_whitespace: trailing_whitespace,
        _closing_bracket: closing_bracket,
    })
}