Skip to main content

oxihuman_core/
rolling_hash.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Rolling hash (Rabin-Karp style) for substring search.
5
6#![allow(dead_code)]
7
8const DEFAULT_BASE: u64 = 257;
9const DEFAULT_MODULUS: u64 = 1_000_000_007;
10
11/// Rolling hash state for a sliding window over bytes.
12#[allow(dead_code)]
13pub struct RollingHash {
14    pub window: Vec<u8>,
15    pub hash: u64,
16    pub base: u64,
17    pub modulus: u64,
18    pub window_size: usize,
19}
20
21/// Create a new rolling hash with the given window size, using default base and modulus.
22#[allow(dead_code)]
23pub fn new_rolling_hash(window_size: usize) -> RollingHash {
24    RollingHash {
25        window: Vec::with_capacity(window_size),
26        hash: 0,
27        base: DEFAULT_BASE,
28        modulus: DEFAULT_MODULUS,
29        window_size,
30    }
31}
32
33/// Push a byte into the rolling hash window.
34/// If the window is already full, the oldest byte is evicted.
35#[allow(dead_code)]
36pub fn rolling_hash_push(rh: &mut RollingHash, byte: u8) {
37    let ws = rh.window_size;
38    if ws == 0 {
39        return;
40    }
41    if rh.window.len() == ws {
42        // Recompute hash from scratch (simple correctness-first approach)
43        rh.window.remove(0);
44    }
45    rh.window.push(byte);
46    // Recompute hash over current window
47    let mut h: u64 = 0;
48    let base = rh.base;
49    let modulus = rh.modulus;
50    for &b in &rh.window {
51        h = (h.wrapping_mul(base).wrapping_add(b as u64)) % modulus;
52    }
53    rh.hash = h;
54}
55
56/// Return the current hash value.
57#[allow(dead_code)]
58pub fn rolling_hash_value(rh: &RollingHash) -> u64 {
59    rh.hash
60}
61
62/// Compute a simple polynomial hash of the given byte slice.
63#[allow(dead_code)]
64pub fn simple_hash(data: &[u8]) -> u64 {
65    let mut h: u64 = 0;
66    for &b in data {
67        h = (h.wrapping_mul(DEFAULT_BASE).wrapping_add(b as u64)) % DEFAULT_MODULUS;
68    }
69    h
70}
71
72/// Find the first occurrence of `needle` in `haystack` using rolling hash.
73/// Returns the starting index or None if not found.
74#[allow(dead_code)]
75pub fn find_pattern(haystack: &[u8], needle: &[u8]) -> Option<usize> {
76    if needle.is_empty() {
77        return Some(0);
78    }
79    if needle.len() > haystack.len() {
80        return None;
81    }
82    let target_hash = simple_hash(needle);
83    let mut rh = new_rolling_hash(needle.len());
84    // Fill window
85    for &b in &haystack[..needle.len()] {
86        rolling_hash_push(&mut rh, b);
87    }
88    if rolling_hash_value(&rh) == target_hash && &haystack[..needle.len()] == needle {
89        return Some(0);
90    }
91    for i in needle.len()..haystack.len() {
92        rolling_hash_push(&mut rh, haystack[i]);
93        let start = i + 1 - needle.len();
94        if rolling_hash_value(&rh) == target_hash
95            && &haystack[start..start + needle.len()] == needle
96        {
97            return Some(start);
98        }
99    }
100    None
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    #[test]
108    fn new_rolling_hash_empty() {
109        let rh = new_rolling_hash(4);
110        assert_eq!(rolling_hash_value(&rh), 0);
111        assert!(rh.window.is_empty());
112    }
113
114    #[test]
115    fn simple_hash_same_input_same_output() {
116        let h1 = simple_hash(b"hello");
117        let h2 = simple_hash(b"hello");
118        assert_eq!(h1, h2);
119    }
120
121    #[test]
122    fn simple_hash_different_inputs() {
123        let h1 = simple_hash(b"hello");
124        let h2 = simple_hash(b"world");
125        assert_ne!(h1, h2);
126    }
127
128    #[test]
129    fn find_pattern_found_at_start() {
130        assert_eq!(find_pattern(b"abcdef", b"abc"), Some(0));
131    }
132
133    #[test]
134    fn find_pattern_found_at_end() {
135        assert_eq!(find_pattern(b"abcdef", b"def"), Some(3));
136    }
137
138    #[test]
139    fn find_pattern_found_in_middle() {
140        assert_eq!(find_pattern(b"abcdef", b"bcd"), Some(1));
141    }
142
143    #[test]
144    fn find_pattern_not_found() {
145        assert!(find_pattern(b"abcdef", b"xyz").is_none());
146    }
147
148    #[test]
149    fn find_pattern_empty_needle() {
150        assert_eq!(find_pattern(b"abc", b""), Some(0));
151    }
152
153    #[test]
154    fn find_pattern_needle_longer_than_haystack() {
155        assert!(find_pattern(b"ab", b"abc").is_none());
156    }
157
158    #[test]
159    fn rolling_hash_consistent_with_simple_hash() {
160        let data = b"hello";
161        let mut rh = new_rolling_hash(data.len());
162        for &b in data.iter() {
163            rolling_hash_push(&mut rh, b);
164        }
165        assert_eq!(rolling_hash_value(&rh), simple_hash(data));
166    }
167}