keyword-parser 0.0.3

Keyword parser for combine.
Documentation
use combine:: {
    Parser, ParseError, ParseResult::{ self, * }, RangeStream,
    stream::uncons_range,
};
use core::marker::PhantomData;

struct LongestMatcher<K, V, T>
where
    K: AsRef<str>, V: Copy, T: AsRef<[(K, V)]>,
{
    tbl: T,
    _key: PhantomData<K>,
    _value: PhantomData<V>,
}

impl<K, V, T> LongestMatcher<K, V, T>
where
    K: AsRef<str>, V: Copy, T: AsRef<[(K, V)]>,
{
    fn new(tbl: T) -> Self {
        Self {
            tbl,
            _key: PhantomData,
            _value: PhantomData,
        }
    }
}

impl<'a, I, K, T, V> Parser<I> for LongestMatcher<K, V, T>
where
    I: RangeStream<Range = &'a str>,
    I::Token: Copy + Ord,
    I::Error: ParseError<I::Token, I::Range, I::Position>,
    K: AsRef<str>, V: Copy, T: AsRef<[(K, V)]>,
{
    type Output = V;
    type PartialState = ();

    fn parse_lazy(&mut self, input: &mut I) -> ParseResult<Self::Output, I::Error> {
        let position = input.position();
        let mut pre_match: Option<usize> = None;
        let mut j = 0;
        let s = input.range();
        let tbl = self.tbl.as_ref();

        for (i, ch) in s.char_indices() {
            j = i + ch.len_utf8();

            let s = &s[0..=j - 1];

            match tbl.binary_search_by_key(&s, |e| e.0.as_ref()) {
                Ok(idx) => if idx == tbl.len() - 1 || !tbl[idx + 1].0.as_ref().starts_with(s) {
                    return uncons_range(input, j).map(|_| tbl[idx].1);
                } else {
                    pre_match = idx.into();
                },
                Err(idx) => if idx == tbl.len() || !tbl[idx].0.as_ref().starts_with(s) {
                    break;
                },
            }
        }
        match uncons_range(input, j) {
            CommitOk(_) | PeekOk(_) => match pre_match {
                Some(pre) => CommitOk(tbl[pre].1),
                None => PeekErr(I::Error::empty(position).into()),
            },
            CommitErr(e) => CommitErr(e),
            PeekErr(e) => PeekErr(e),
        }
    }
}

/// Searches a keyword in sorted `[(&str, T)]` type array.
///
/// Matches even if the string matches only partially.
/// ```
/// # use const_array_attrs::sorted;
/// # use keyword_parser::bin_searcher::*;
/// # use combine::EasyParser;
/// #[sorted]
/// const DEF: [(&str, i8); 5] = [
///     ("aaaa", 0),
///     ("a", 1),
///     ("A", 2),
///     ("AVAV", 3),
///     ("こんにちは", 4),
/// ];
///
/// let result = longest_matcher(&DEF).easy_parse("こんにちは 世界");
///
/// assert_eq!(result, Ok((4, " 世界")));
/// ```
pub fn longest_matcher<'a, I, K, V, T>(tbl: T) -> impl 'a + Parser<I, Output = V>
where
    I: 'a + RangeStream<Range = &'a str>,
    I::Token: Copy + Ord,
    I::Error: ParseError<I::Token, I::Range, I::Position>,
    K: 'a + AsRef<str>, V: 'a + Copy, T: 'a + AsRef<[(K, V)]>,
{
    LongestMatcher::new(tbl)
}

struct SimpleMatcher<K, V, T>
where
    K: AsRef<str>, V: Copy, T: AsRef<[(K, V)]>,
{
    tbl: T,
    _key: PhantomData<K>,
    _value: PhantomData<V>,
}

impl<K, V, T> SimpleMatcher<K, V, T>
where
    K: AsRef<str>, V: Copy, T: AsRef<[(K, V)]>,
{
    fn new(tbl: T) -> Self {
        Self {
            tbl,
            _key: PhantomData,
            _value: PhantomData,
        }
    }
}

impl<'a, I, K, T, V> Parser<I> for SimpleMatcher<K, V, T>
where
    I: RangeStream<Range = &'a str>,
    I::Token: Copy + Ord,
    I::Error: ParseError<I::Token, I::Range, I::Position>,
    K: AsRef<str>, V: Copy, T: AsRef<[(K, V)]>,
{
    type Output = V;
    type PartialState = ();

    fn parse_lazy(&mut self, input: &mut I) -> ParseResult<Self::Output, I::Error> {
        let position = input.position();

        match uncons_range(input, input.range().len()) {
            CommitOk(s) | PeekOk(s) => match self.tbl.as_ref().binary_search_by_key(&s, |e| e.0.as_ref()) {
                Ok(idx) => CommitOk(self.tbl.as_ref()[idx].1),
                _ => PeekErr(I::Error::empty(position).into()),
            },
            CommitErr(e) => CommitErr(e),
            PeekErr(e) => PeekErr(e),
        }
    }
}

/// Searches the keyword in `[(&str, T)]` type array.
///
/// Only matches when the exactly same key is in the array.
/// ```
/// # use const_array_attrs::sorted;
/// # use keyword_parser::bin_searcher::*;
/// # use combine::EasyParser;
/// #[sorted]
/// const DEF: [(&str, i8); 5] = [
///     ("aaaa", 0),
///     ("a", 1),
///     ("A", 2),
///     ("AVAV", 3),
///     ("\u{2014}", 4),
/// ];
///
/// let result = simple_matcher(&DEF).easy_parse("\u{2014}");
///
/// assert_eq!(result, Ok((4, "")));
/// ```
pub fn simple_matcher<'a, I, K, V, T>(tbl: T) -> impl 'a + Parser<I, Output = V>
where
    I: 'a + RangeStream<Range = &'a str>,
    I::Token: Copy + Ord,
    I::Error: ParseError<I::Token, I::Range, I::Position>,
    K: 'a + AsRef<str>, V: 'a + Copy, T: 'a + AsRef<[(K, V)]>,
{
    SimpleMatcher::new(tbl)
}

#[cfg(test)]
mod tests {
    use super::*;
    use const_array_attrs::sorted;
    use combine::EasyParser;

    #[sorted]
    const DEF: [(&str, i8); 5] = [
        ("aaaa", 0),
        ("a", 1),
        ("A", 2),
        ("AVAV", 3),
        ("こんにちは", 4),
    ];

    #[test]
    fn test_longest_match() {
        let result = longest_matcher(&DEF).easy_parse("こんにちは 世界");
        assert_eq!(result, Ok((4, " 世界")));
    }
}