1use colored::*;
2use std::cmp;
3use std::collections::HashMap;
4use std::mem;
5
6#[cfg(test)]
7extern crate quickcheck;
8#[cfg(test)]
9#[macro_use(quickcheck)]
10extern crate quickcheck_macros;
11
12pub mod baseline;
13
14#[cfg(test)]
15mod test;
16
17#[derive(Copy, Clone, Debug, Eq, PartialEq)]
19pub struct Match {
20 pub distance: usize,
22 pub end: usize,
26}
27
28pub type BitapResult = Result<Vec<Match>, &'static str>;
29pub type FindResult = Result<Vec<usize>, &'static str>;
30
31static ERR_INVALID_PATTERN: &'static str = "invalid pattern";
32
33fn pattern_length_is_valid(pattern_length: usize) -> bool {
34 pattern_length > 0 && pattern_length < mem::size_of::<usize>() * 8
35}
36
37pub fn reference(
39 pattern: &str,
40 text: &str,
41 max_distance: usize,
42 allow_transpositions: bool,
43 debug: bool,
44) -> BitapResult {
45 let pattern_len = pattern.chars().count();
46 if !pattern_length_is_valid(pattern_len) {
47 return Err(ERR_INVALID_PATTERN);
48 }
49
50 let max_distance = cmp::min(max_distance, pattern_len);
52
53 let mut masks: HashMap<char, usize> = HashMap::new();
67 for (i, c) in pattern.chars().enumerate() {
68 masks
69 .entry(c)
70 .and_modify(|mask| *mask &= !(1usize << i))
71 .or_insert(!(1usize << i));
72 }
73
74 let mut r = (0..=max_distance).map(|i| !1usize << i).collect::<Vec<_>>();
85 let mut trans = vec![!1usize; max_distance];
86
87 let results = text
88 .chars()
89 .enumerate()
90 .filter_map(move |(i, c)| {
91 let letter_mask = match masks.get(&c) {
92 Some(mask) => *mask,
93 None => !0usize,
94 };
95 let mut prev_parent = r[0];
96 r[0] |= letter_mask;
97 r[0] <<= 1;
98
99 for j in 1..=max_distance {
100 let prev = r[j];
101 let current = (prev | letter_mask) << 1;
102 let replace = prev_parent << 1;
103 let delete = r[j - 1] << 1;
104 let insert = prev_parent;
105 r[j] = current & insert & delete & replace;
106
107 if allow_transpositions {
108 let transpose = (trans[j - 1] | (letter_mask << 1)) << 1;
109 r[j] &= transpose;
110
111 trans[j - 1] = (prev_parent << 1) | letter_mask;
116 }
117
118 prev_parent = prev;
119 }
120
121 if debug {
122 debug_bitap(pattern, text, i, &r);
123 }
124
125 for (k, rv) in r.iter().enumerate() {
126 if 0 == (rv & (1usize << pattern_len)) {
127 return Some(Match {
128 distance: k,
129 end: i,
130 });
131 }
132 }
133 None
134 })
135 .collect::<Vec<_>>();
136 Ok(results)
137}
138
139fn debug_bitap(pattern: &str, text: &str, index: usize, state: &[usize]) {
143 let pattern_len = pattern.chars().count();
144 let text_colored = text
145 .chars()
146 .enumerate()
147 .map(|(i, c)| {
148 if i == index {
149 format!("{}", c.to_string().green())
150 } else {
151 c.to_string()
152 }
153 })
154 .collect::<Vec<String>>()
155 .concat();
156 println!("{: <4}| {}", index, text_colored);
157 println!("-------------------");
158 println!(" | {}", pattern);
159 println!("-------------------");
160 for (i, r) in state.iter().enumerate() {
161 let bitmask: String = format!("{:b}", r)
162 .chars()
163 .rev()
164 .skip(1)
165 .take(pattern_len)
166 .collect();
167 let is_match = bitmask.ends_with('0');
168 if is_match {
169 println!("{} | {}", i, bitmask.green());
170 } else {
171 println!("{} | {}", i, bitmask);
172 }
173 }
174 println!("-------------------");
175}
176
177pub fn find(pattern: &str, text: &str) -> FindResult {
178 reference(pattern, text, 0, false, false).map(|v| {
179 let offset = pattern.chars().count() - 1;
180 v.iter().map(|m| m.end - offset).collect::<Vec<_>>()
181 })
182}
183
184pub fn lev(pattern: &str, text: &str, max_distance: usize) -> BitapResult {
185 reference(pattern, text, max_distance, false, false)
186}
187
188pub fn osa(pattern: &str, text: &str, max_distance: usize) -> BitapResult {
189 reference(pattern, text, max_distance, true, false)
190}