alkale 2.0.0

A simple LL(1) lexer library for Rust.
Documentation
//! Sub-module of [`common`][crate::common] for parsing identifiers.

use core::str;

use crate::{span::Spanned, SourceCodeScanner};

impl<'src> SourceCodeScanner<'src> {
    /// Consumes an identifier defined by the two input predicates and returns it.
    ///
    /// The first predicate defines acceptable values for the first character in the identifier,
    /// the second defines acceptable values for the rest of the identifier's characters.
    ///
    /// If the first predicate originally matched, then this function will return a [`prim@str`] reference of all
    /// consumed characters along with the [`Span`][crate::span::Span] of the consumed region. If not, [None] is
    /// returned and no characters are consumed.
    ///
    /// # Examples
    /// ```rust
    /// # use alkale::SourceCodeScanner;
    /// # use alkale::span::Span;
    /// fn first_char(c: char) -> bool {
    ///     c == '#'
    /// }
    ///
    /// fn rest_char(c: char) -> bool {
    ///     c.is_ascii_alphabetic()
    /// }
    ///
    /// let scanner = SourceCodeScanner::new("#abc h #whatever");
    ///
    /// # unsafe {
    /// assert_eq!(
    ///     scanner.try_consume_identifier(first_char, rest_char),
    ///     Some(Span::new(0, 4).wrap("#abc"))
    /// );
    ///
    /// scanner.skip();
    ///
    /// assert_eq!(
    ///     scanner.try_consume_identifier(first_char, rest_char),
    ///     None
    /// );
    ///
    /// assert_eq!(scanner.next(), Some('h'));
    /// scanner.skip();
    ///
    /// assert_eq!(
    ///     scanner.try_consume_identifier(first_char, rest_char),
    ///     Some(Span::new(7, 9).wrap("#whatever"))
    /// );
    ///
    /// assert_eq!(
    ///     scanner.try_consume_identifier(first_char, rest_char),
    ///     None
    /// );
    /// # }
    /// ```
    ///
    /// # See Also
    /// If you are looking to match typical language identifiers (that is, `[a-zA-Z_][a-zA-Z0-9_]*`),
    /// use [`try_consume_standard_identifier`][Self::try_consume_standard_identifier].
    #[inline]
    pub fn try_consume_identifier<F: Fn(char) -> bool, G: Fn(char) -> bool>(
        &self,
        first_predicate: F,
        rest_predicate: G,
    ) -> Option<Spanned<&'src str>> {
        // Peek the next character, exit if it doesn't exist.
        let first_char = self.peek()?;

        // If the peeked character doesn't match the predicate, exit.
        if !first_predicate(first_char) {
            return None;
        }

        let ident = self.capture_str(|| {
            self.skip();

            while let Some(char) = self.peek() {
                if rest_predicate(char) {
                    self.skip();
                } else {
                    break;
                }
            }
        });

        Some(ident)
    }

    /// Consumes an identifier defined by the regex `[a-zA-Z_][a-zA-Z0-9_]*`.
    ///
    /// This method will return a  [`prim@str`] reference of all consumed characters along
    /// with the [`Span`][crate::span::Span] of the consumed region. If the pattern didn't match,
    /// [None] is returned and no characters are consumed.
    ///
    /// # Examples
    /// ```rust
    /// # use alkale::SourceCodeScanner;
    /// # use alkale::span::Span;
    /// let scanner = SourceCodeScanner::new("ident 3 whatever");
    ///
    /// # unsafe {
    /// assert_eq!(
    ///     scanner.try_consume_standard_identifier(),
    ///     Some(Span::new(0, 5).wrap("ident"))
    /// );
    ///
    /// scanner.skip();
    ///
    /// assert_eq!(
    ///     scanner.try_consume_standard_identifier(),
    ///     None
    /// );
    ///
    /// assert_eq!(scanner.next(), Some('3'));
    /// scanner.skip();
    ///
    /// assert_eq!(
    ///     scanner.try_consume_standard_identifier(),
    ///     Some(Span::new(8, 8).wrap("whatever"))
    /// );
    ///
    /// assert_eq!(
    ///     scanner.try_consume_standard_identifier(),
    ///     None
    /// );
    ///
    /// # }
    /// ```
    ///
    /// # See Also
    /// If you are looking to match identifiers more generally, use [`try_parse_identifier`][Self::try_consume_identifier].
    #[inline]
    pub fn try_consume_standard_identifier(&self) -> Option<Spanned<&'src str>> {
        self.try_consume_identifier(
            |x| x.is_ascii_alphabetic() || x == '_',
            |x| x.is_ascii_alphanumeric() || x == '_',
        )
    }
}

#[cfg(test)]
mod tests {
    use crate::{
        span::{Span, Spannable},
        SourceCodeScanner,
    };

    macro_rules! edge_case {
        ($name:ident, $($code:literal => $result:expr),* $(,)?) => {
            #[test]
            fn $name() {
                $({
                    let code = SourceCodeScanner::new($code);

                    assert_eq!(
                        code.try_consume_standard_identifier(),
                        $result,
                    );
                })*
            }
        }
    }

    #[test]
    fn try_parse_standard_identifier() {
        let code = SourceCodeScanner::new("awesome b23 0 ident$ifier");

        // SAFETY: Spans are valid.
        unsafe {
            assert_eq!(
                code.try_consume_standard_identifier(),
                Some("awesome".spanned(Span::new(0, 7)))
            );

            code.skip();

            assert_eq!(
                code.try_consume_standard_identifier(),
                Some("b23".spanned(Span::new(8, 3)))
            );

            code.skip();

            assert_eq!(code.try_consume_standard_identifier(), None);

            assert_eq!(code.next(), Some('0'));

            code.skip();

            assert_eq!(
                code.try_consume_standard_identifier(),
                Some("ident".spanned(Span::new(14, 5)))
            );

            assert_eq!(code.next(), Some('$'));

            assert_eq!(
                code.try_consume_standard_identifier(),
                Some("ifier".spanned(Span::new(20, 5)))
            );

            assert_eq!(code.try_consume_standard_identifier(), None);

            assert!(!code.has_next());
        }
    }

    #[test]
    fn try_parse_identifier() {
        let code = SourceCodeScanner::new("awesome b23 0 ident$ifier");

        let initial = |x: char| x.is_alphanumeric();
        let internal = |x: char| x.is_alphanumeric() || x == '$';

        // SAFETY: Spans are all valid.
        unsafe {
            assert_eq!(
                code.try_consume_identifier(initial, internal),
                Some("awesome".spanned(Span::new(0, 7)))
            );

            code.skip();

            assert_eq!(
                code.try_consume_identifier(initial, internal),
                Some("b23".spanned(Span::new(8, 3)))
            );

            code.skip();

            assert_eq!(
                code.try_consume_identifier(initial, internal),
                Some("0".spanned(Span::new(12, 1)))
            );

            code.skip();

            assert_eq!(
                code.try_consume_identifier(initial, internal),
                Some("ident$ifier".spanned(Span::new(14, 11)))
            );

            assert_eq!(code.try_consume_identifier(initial, internal), None);

            assert!(!code.has_next());
        }
    }

    edge_case!(empty, "" => None);
    edge_case!(space, " " => None);
    // SAFETY: Span is valid.
    edge_case!(single_char, "x" => unsafe { Some(Span::new(0, 1).wrap("x")) });
    // SAFETY: Spans are valid.
    edge_case!(garbage_after,
        "ident@#%)_@)%" => unsafe { Some(Span::new(0, 5).wrap("ident")) },
        "x@" => unsafe { Some(Span::new(0, 1).wrap("x")) },
    );
    edge_case!(untrimmed, "  ident  " => None);
}