sima-rs 0.1.0

A blazingly fast simple string matcher library.
Documentation
#![feature(iter_advance_by)]

mod tests;

use memchr::memmem;
use std::str::Chars;

/// simple_match matches the pattern against str without a limit.
#[inline]
pub fn simple_match(str: &str, pattern: &str) -> bool {
    if pattern == "*" {
        return true;
    }

    pat_match(str, pattern, -1) == MatchResult::Match
}

/// simple_match_limit matches the pattern against str.
/// If the limit is reached, an error is returned.
#[inline]
pub fn simple_match_limit(str: &str, pattern: &str, limit: u32) -> Result<bool, &'static str> {
    match pat_match(str, pattern, limit as i32) {
        MatchResult::Match => Ok(true),
        MatchResult::NoMatch => Ok(false),
        MatchResult::Stop => Err("limit reached"),
    }
}

#[derive(PartialEq, Eq)]
enum MatchResult {
    NoMatch,
    Match,
    Stop,
}

#[inline]
fn pat_match(str: &str, pattern: &str, limit: i32) -> MatchResult {
    let mut complexity = 0;
    let mut pat_it = pattern.chars();
    let mut str_it = str.chars();

    while let Some(c) = pat_it.next() {
        match c {
            '?' => {
                if let None = str_it.next() {
                    return MatchResult::NoMatch;
                }
            }
            '*' => {
                match pat_it.clone().peekable().peek() {
                    Some(c) => {
                        if *c == '*' {
                            // we skip over multiple '*'
                            continue;
                        }
                    }
                    None => {
                        // if no more "pattern" is left, it's a match
                        return MatchResult::Match;
                    }
                }

                if limit > 0 {
                    complexity += 1;
                    if complexity >= limit {
                        return MatchResult::Stop;
                    }
                }

                complexity += 1;

                if suffix_match(&mut str_it, &mut pat_it) != MatchResult::Match {
                    return MatchResult::NoMatch;
                }
            }
            _ => {
                let str_c = str_it.next();
                if str_c != Some(c) {
                    return MatchResult::NoMatch;
                }
            }
        }
    }

    if let None = str_it.next() {
        return MatchResult::Match;
    }

    MatchResult::NoMatch
}

#[inline]
fn suffix_match(str: &mut Chars<'_>, pat: &mut Chars<'_>) -> MatchResult {
    let raw_pattern = pat.as_str().as_bytes();
    let pattern_idx = find_next_wildcard(raw_pattern);

    match memmem::rfind(str.as_str().as_bytes(), &raw_pattern[..pattern_idx]) {
        Some(str_offset) => {
            // advance pat and str iterator
            for _ in 0..pattern_idx {
                _ = str.next();
                _ = pat.next();
            }

            for _ in 0..str_offset {
                _ = str.next();
            }

            return MatchResult::Match;
        }
        None => {
            return MatchResult::NoMatch;
        }
    }
}

#[inline(always)]
fn find_next_wildcard(pattern: &[u8]) -> usize {
    memchr::memchr2(b'*', b'?', pattern).unwrap_or(pattern.len())
}