pipa-js 0.1.3

A fast, minimal ES2023 JavaScript runtime built in Rust.
Documentation
use crate::util::memchr::memmem;

#[derive(Clone)]
pub struct StringSearcher {
    pattern: Vec<u8>,

    skip_table: [usize; 256],

    pattern_len: usize,
}

impl StringSearcher {
    pub fn new(pattern: &str) -> Option<Self> {
        let pattern = pattern.as_bytes();
        let pattern_len = pattern.len();

        if pattern_len == 0 {
            return None;
        }

        if pattern_len < 3 {
            return None;
        }

        let mut skip_table = [pattern_len; 256];

        for i in 0..pattern_len - 1 {
            skip_table[pattern[i] as usize] = pattern_len - 1 - i;
        }

        Some(Self {
            pattern: pattern.to_vec(),
            skip_table,
            pattern_len,
        })
    }

    #[inline(always)]
    pub fn find(&self, text: &str) -> Option<usize> {
        let text = text.as_bytes();
        let text_len = text.len();
        let pat = &self.pattern;
        let pat_len = self.pattern_len;

        if pat_len > text_len {
            return None;
        }

        let mut pos = 0;
        let max_pos = text_len - pat_len;

        while pos <= max_pos {
            let mut i = pat_len - 1;

            while text[pos + i] == pat[i] {
                if i == 0 {
                    return Some(pos);
                }
                i -= 1;
            }

            let bad_char = text[pos + pat_len - 1];
            pos += self.skip_table[bad_char as usize];
        }

        None
    }

    pub fn find_all(&self, text: &str) -> Vec<(usize, usize)> {
        let mut matches = Vec::new();
        let mut pos = 0;

        while let Some(m) = self.find(&text[pos..]) {
            let abs_pos = pos + m;
            matches.push((abs_pos, abs_pos + self.pattern_len));
            pos = abs_pos + self.pattern_len;

            if pos >= text.len() {
                break;
            }
        }

        matches
    }
}

pub fn fast_find(haystack: &str, needle: &str) -> Option<usize> {
    if needle.is_empty() {
        return Some(0);
    }

    if needle.len() > haystack.len() {
        return None;
    }

    if needle.len() < 4 {
        return memmem::find(haystack.as_bytes(), needle.as_bytes());
    }

    if let Some(searcher) = StringSearcher::new(needle) {
        searcher.find(haystack)
    } else {
        memmem::find(haystack.as_bytes(), needle.as_bytes())
    }
}

pub fn fast_find_with(searcher: &StringSearcher, haystack: &str) -> Option<usize> {
    searcher.find(haystack)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_bmh_basic() {
        let searcher = StringSearcher::new("world").unwrap();
        assert_eq!(searcher.find("hello world"), Some(6));
    }

    #[test]
    fn test_bmh_not_found() {
        let searcher = StringSearcher::new("xyz").unwrap();
        assert_eq!(searcher.find("hello world"), None);
    }

    #[test]
    fn test_bmh_multiple() {
        let searcher = StringSearcher::new("abc").unwrap();
        assert_eq!(searcher.find("abcabcabc"), Some(0));
    }

    #[test]
    fn test_bmh_long_pattern() {
        let text = "The quick brown fox jumps over the lazy dog. ";
        let pattern = "jumps over";
        let searcher = StringSearcher::new(pattern).unwrap();
        assert_eq!(searcher.find(text), Some(20));
    }

    #[test]
    fn test_fast_find_short() {
        assert_eq!(fast_find("hello", "ll"), Some(2));
        assert_eq!(fast_find("hello", "abc"), None);
    }

    #[test]
    fn test_fast_find_long() {
        let text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ";
        assert_eq!(fast_find(text, "consectetur"), Some(28));
    }

    #[test]
    fn test_find_all() {
        let searcher = StringSearcher::new("abc").unwrap();
        let matches = searcher.find_all("abcxabcxabc");
        assert_eq!(matches, vec![(0, 3), (4, 7), (8, 11)]);
    }
}