changxi 0.3.0

TUI EPUB Reader
use std::cmp::{max, min};

pub struct MaxShiftHorspool {
    skip_table: [usize; 256],
    suffix: Vec<usize>,
    pattern: Vec<u8>,
}

impl MaxShiftHorspool {
    pub fn new(pattern: &str) -> Self {
        let pattern_bytes = pattern.as_bytes().to_vec();
        let length = pattern_bytes.len();
        let mut skip_table = [length; 256];

        for i in 0..length - 1 {
            skip_table[pattern_bytes[i] as usize] = length - 1 - i;
        }

        let suffix = Self::build_suffix_array(&pattern_bytes);

        Self {
            skip_table,
            suffix,
            pattern: pattern_bytes,
        }
    }

    fn build_suffix_array(pattern: &[u8]) -> Vec<usize> {
        let length = pattern.len();
        let mut suffix = vec![0; length];
        let mut z = vec![0; length + 1];

        if length == 0 {
            return suffix;
        }

        let reversed: Vec<u8> = pattern.iter().rev().copied().collect();
        z[0] = length;
        let mut l = 0;
        let mut r = 0;

        for i in 1..length {
            if i <= r {
                z[i] = min(r - i + 1, z[i - l]);
            }
            while i + z[i] < length && reversed[z[i]] == reversed[i + z[i]] {
                z[i] += 1;
            }
            if i + z[i] - 1 > r {
                l = i;
                r = i + z[i] - 1;
            }
        }

        (0..length).for_each(|i| {
            suffix[i] = length;
        });

        (0..length).for_each(|i| {
            let j = i + z[i] - 1;
            if j < length {
                suffix[j] = length - z[i];
            }
        });

        suffix
    }

    pub fn find_all(&self, text: &str) -> Vec<usize> {
        let text_bytes = text.as_bytes();
        let text_length = text_bytes.len();
        let pattern_length = self.pattern.len();

        if pattern_length == 0 || pattern_length > text_length {
            return Vec::new();
        }

        let mut position = Vec::new();
        let mut pos = 0;

        while pos <= text_length - pattern_length {
            let mut i = pattern_length - 1;
            while i < pattern_length && self.pattern[i] == text_bytes[pos + i] {
                if i == 0 {
                    position.push(pos);
                    break;
                }
                i -= 1;
            }

            if i < pattern_length && i != usize::MAX {
                let bad_apple = self.skip_table[text_bytes[pos + pattern_length - 1] as usize];
                let good_girl = if i < pattern_length - 1 {
                    self.suffix[i + 1]
                } else {
                    1
                };

                pos += max(bad_apple, good_girl);
            } else {
                pos += 1;
            }
        }

        position
    }
}