makepad_rabin_karp/
lib.rs

1/// Searches the given needle in the given haystack, appending the starting indices of all matches
2/// to the given indices vector.
3///
4/// This function uses the Rabin-Karp algorithm, see:
5/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
6///
7/// for more information.
8pub struct RabinKarpResult{
9    pub new_line_byte: usize,
10    pub line: usize,
11    pub column_byte: usize,
12    pub byte: usize
13}
14
15pub fn search(haystack: &[u8], needle: &[u8], indices: &mut Vec<RabinKarpResult>) {
16    if needle.len()>haystack.len(){
17        return
18    }
19    const BASE: u32 = 257;
20    const MODULO: u32 = 16_711_921;
21
22    // Compute the base to the n-th power, where n is the length of the needle.
23    let mut base_pow = 1;
24    for _ in 0..needle.len() - 1 {
25        base_pow = (base_pow * BASE) % MODULO;
26    }
27
28    // Compute the hash of both the initial window of the haystack and the needle.
29    let mut haystack_hash = 0;
30    let mut needle_hash = 0;
31    for index in 0..needle.len() {
32        haystack_hash = (haystack_hash * BASE + haystack[index] as u32) % MODULO;
33        needle_hash = (needle_hash * BASE + needle[index] as u32) % MODULO;
34    }
35    
36    let mut line = 0;
37    let mut column_byte:usize = 0;
38    let mut new_line_byte = 0;
39    
40    for index in 0..haystack.len() - needle.len() + 1 {
41        if haystack[index] == b'\n'{
42            new_line_byte = index + 1;
43            line += 1;
44            column_byte = 0;
45        }
46        else{
47            column_byte += 1;
48        }
49        // If the hash of the current window of the haystack matches the hash of the needle, we have
50        // a potential match. Make sure that we have an actual match, and if so append the start
51        // index of the match to the indices vector.
52        if haystack_hash == needle_hash && &haystack[index..][..needle.len()] == needle {
53            indices.push(RabinKarpResult{
54                new_line_byte,
55                line,
56                column_byte: column_byte.saturating_sub(1),
57                byte: index
58            });
59        }
60        // Update the hash of the the current window of the haystack, by removing the first
61        // byte from and adding the next byte to the hash.
62        if index < haystack.len() - needle.len() {
63            haystack_hash =
64                (haystack_hash + MODULO - (haystack[index] as u32 * base_pow) % MODULO) % MODULO;
65            haystack_hash = (haystack_hash * BASE + haystack[index + needle.len()] as u32) % MODULO;
66        }
67    }
68}