oxihuman_core/
rolling_hash.rs1#![allow(dead_code)]
7
8const DEFAULT_BASE: u64 = 257;
9const DEFAULT_MODULUS: u64 = 1_000_000_007;
10
11#[allow(dead_code)]
13pub struct RollingHash {
14 pub window: Vec<u8>,
15 pub hash: u64,
16 pub base: u64,
17 pub modulus: u64,
18 pub window_size: usize,
19}
20
21#[allow(dead_code)]
23pub fn new_rolling_hash(window_size: usize) -> RollingHash {
24 RollingHash {
25 window: Vec::with_capacity(window_size),
26 hash: 0,
27 base: DEFAULT_BASE,
28 modulus: DEFAULT_MODULUS,
29 window_size,
30 }
31}
32
33#[allow(dead_code)]
36pub fn rolling_hash_push(rh: &mut RollingHash, byte: u8) {
37 let ws = rh.window_size;
38 if ws == 0 {
39 return;
40 }
41 if rh.window.len() == ws {
42 rh.window.remove(0);
44 }
45 rh.window.push(byte);
46 let mut h: u64 = 0;
48 let base = rh.base;
49 let modulus = rh.modulus;
50 for &b in &rh.window {
51 h = (h.wrapping_mul(base).wrapping_add(b as u64)) % modulus;
52 }
53 rh.hash = h;
54}
55
56#[allow(dead_code)]
58pub fn rolling_hash_value(rh: &RollingHash) -> u64 {
59 rh.hash
60}
61
62#[allow(dead_code)]
64pub fn simple_hash(data: &[u8]) -> u64 {
65 let mut h: u64 = 0;
66 for &b in data {
67 h = (h.wrapping_mul(DEFAULT_BASE).wrapping_add(b as u64)) % DEFAULT_MODULUS;
68 }
69 h
70}
71
72#[allow(dead_code)]
75pub fn find_pattern(haystack: &[u8], needle: &[u8]) -> Option<usize> {
76 if needle.is_empty() {
77 return Some(0);
78 }
79 if needle.len() > haystack.len() {
80 return None;
81 }
82 let target_hash = simple_hash(needle);
83 let mut rh = new_rolling_hash(needle.len());
84 for &b in &haystack[..needle.len()] {
86 rolling_hash_push(&mut rh, b);
87 }
88 if rolling_hash_value(&rh) == target_hash && &haystack[..needle.len()] == needle {
89 return Some(0);
90 }
91 for i in needle.len()..haystack.len() {
92 rolling_hash_push(&mut rh, haystack[i]);
93 let start = i + 1 - needle.len();
94 if rolling_hash_value(&rh) == target_hash
95 && &haystack[start..start + needle.len()] == needle
96 {
97 return Some(start);
98 }
99 }
100 None
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106
107 #[test]
108 fn new_rolling_hash_empty() {
109 let rh = new_rolling_hash(4);
110 assert_eq!(rolling_hash_value(&rh), 0);
111 assert!(rh.window.is_empty());
112 }
113
114 #[test]
115 fn simple_hash_same_input_same_output() {
116 let h1 = simple_hash(b"hello");
117 let h2 = simple_hash(b"hello");
118 assert_eq!(h1, h2);
119 }
120
121 #[test]
122 fn simple_hash_different_inputs() {
123 let h1 = simple_hash(b"hello");
124 let h2 = simple_hash(b"world");
125 assert_ne!(h1, h2);
126 }
127
128 #[test]
129 fn find_pattern_found_at_start() {
130 assert_eq!(find_pattern(b"abcdef", b"abc"), Some(0));
131 }
132
133 #[test]
134 fn find_pattern_found_at_end() {
135 assert_eq!(find_pattern(b"abcdef", b"def"), Some(3));
136 }
137
138 #[test]
139 fn find_pattern_found_in_middle() {
140 assert_eq!(find_pattern(b"abcdef", b"bcd"), Some(1));
141 }
142
143 #[test]
144 fn find_pattern_not_found() {
145 assert!(find_pattern(b"abcdef", b"xyz").is_none());
146 }
147
148 #[test]
149 fn find_pattern_empty_needle() {
150 assert_eq!(find_pattern(b"abc", b""), Some(0));
151 }
152
153 #[test]
154 fn find_pattern_needle_longer_than_haystack() {
155 assert!(find_pattern(b"ab", b"abc").is_none());
156 }
157
158 #[test]
159 fn rolling_hash_consistent_with_simple_hash() {
160 let data = b"hello";
161 let mut rh = new_rolling_hash(data.len());
162 for &b in data.iter() {
163 rolling_hash_push(&mut rh, b);
164 }
165 assert_eq!(rolling_hash_value(&rh), simple_hash(data));
166 }
167}