algorithms_fourth 0.1.10

用rust实现算法4书中的算法,作为rust的学习实践
Documentation
//! Boyer-Moore字符串匹配算法(启发式地处理不匹配的字符)
//!
//! # 使用
//! ```
//!         use algorithms_fourth::search::boyer_moore::BoyerMoore;
//!         let kmp = BoyerMoore::new("abc".to_string());
//!         assert_eq!(kmp.search("ababac"), 6);
//!         assert_eq!(kmp.search("ababcc"), 2);
//!         assert_eq!(kmp.search("ababccababcc"), 2);
//!         assert_eq!(kmp.search("abcbcc"), 0);
//!         assert_eq!(kmp.search("babcbcc"), 1);
//!         assert_eq!(kmp.search("aabcbcc"), 1);
//!         assert_eq!(kmp.search("ab"), 2);
//!
//! ```
//!
//! # 原理
//! <pre>
//! 1. 希望检测的字符少而跳过的字符多,从左向右扫描显然不行,所以这里从右向左扫描
//! 2. 知道能不能跳过,必须根据目标字符在某个区域存不存在来决定
//! 3. 结合1、2,通过right来记录每次字符c在模式中的最右索引,来决定跳跃数。i为txt的当前的偏移量,
//! txt与pat的字符对应情况应该是:txt[i+j] = pat[j]。j从大到小
//!     a. 如果所有j对应的情况都匹配,说明模式匹配成功,结果是i
//!     b. 否则,要根据已有信息分析跳跃步数(skip)
//!         b1. 如果txt[i+j]对应的字符只存在j的左侧,假设位置是p,那么移动j-p,模式才有匹配成功的可能
//!         b2. 如果right[j] <= j,移动pat.len() - j,模式才有匹配成功的可能(书中没有,仅为自己推理,所以代码不做实现)
//!   
//! </pre>
//!
const R: usize = 256;
pub struct BoyerMoore {
    right: [usize; R],
    pat: String,
}
fn char_at(s: &str, d: usize) -> usize {
    s.as_bytes()[d] as usize
}
impl BoyerMoore {
    pub fn new(pat: String) -> Self {
        let m = pat.len();
        let mut right = [0; R];
        for j in 0..m {
            right[char_at(&pat, j)] = j + 1;
        }
        BoyerMoore { pat, right }
    }
    pub fn search(&self, txt: &str) -> usize {
        let n = txt.len();
        let m = self.pat.len();
        let mut skip: usize;
        let mut i = 0;
        if n < m {
            return n;
        }
        let end = n - m;
        while i <= end {
            skip = 0;
            for j in (0..m).rev() {
                let p = char_at(txt, i + j);
                if char_at(&self.pat, j) != p {
                    if self.is_left_of_pos(p, j) {
                        skip = j + 1 - self.right[p];
                        assert!(skip >= 1, "is_left_of_pos 计算错误:{},{}", skip, j);
                    } else {
                        skip = 1;
                    }
                    break;
                }
            }
            if skip == 0 {
                return i;
            }
            i += skip;
        }
        n
    }
    /// 判断字符c是否只在pos的左侧出现
    pub fn is_left_of_pos(&self, c: usize, pos: usize) -> bool {
        // 因为usize类型关系,为了防止越界,两边都+1
        self.right[c] < pos + 1
    }
}
#[cfg(test)]
mod test {
    use super::BoyerMoore;

    #[test]
    fn test() {
        let kmp = BoyerMoore::new("abc".to_string());
        assert_eq!(kmp.search("ababac"), 6);
        assert_eq!(kmp.search("ababcc"), 2);
        assert_eq!(kmp.search("ababccababcc"), 2);
        assert_eq!(kmp.search("abcbcc"), 0);
        assert_eq!(kmp.search("babcbcc"), 1);
        assert_eq!(kmp.search("aabcbcc"), 1);
        assert_eq!(kmp.search("ab"), 2);
    }
}