bitap_reference/
baseline.rs1use super::*;
2
3use std::cmp;
4use strsim;
5
6pub enum DistanceFn {
7 Levenshtein,
8 DamerauLevenshtein,
9 OptimalStringAlignment,
10}
11
12pub fn baseline(
18 pattern: &str,
19 text: &str,
20 max_distance: usize,
21 distance_fn: DistanceFn,
22) -> super::BitapResult {
23 let pattern_len = pattern.chars().count();
32 if !pattern_length_is_valid(pattern_len) {
33 return Err(ERR_INVALID_PATTERN);
34 }
35
36 let max_distance = cmp::min(max_distance, pattern_len);
38
39 let text_chars = text.chars().collect::<Vec<_>>();
40
41 let mut results = Vec::new();
42
43 for i in 0..text_chars.len() {
44 let max_diff = max_distance + pattern_len;
48 let start = if i > max_diff { i - max_diff } else { 0 };
49
50 let mut best_distance: usize = max_distance + 1;
51 for j in start..=i {
52 let sub_text: String = text_chars[j..=i].iter().collect();
53 let distance = match distance_fn {
54 DistanceFn::Levenshtein => strsim::levenshtein(pattern, &sub_text),
55 DistanceFn::DamerauLevenshtein => strsim::damerau_levenshtein(pattern, &sub_text),
56 DistanceFn::OptimalStringAlignment => strsim::osa_distance(pattern, &sub_text),
57 };
58 if distance < best_distance {
59 best_distance = distance;
60 }
61 if best_distance == 0 {
62 break;
63 }
64 }
65 if best_distance <= max_distance {
66 results.push(Match {
67 distance: best_distance,
68 end: i,
69 })
70 }
71 }
72 Ok(results)
73}
74
75pub fn find(pattern: &str, text: &str) -> FindResult {
76 baseline(pattern, text, 0, DistanceFn::Levenshtein).map(|v| {
77 let offset = pattern.chars().count() - 1;
78 v.iter().map(|m| m.end - offset).collect::<Vec<_>>()
79 })
80}
81
82pub fn lev(pattern: &str, text: &str, k: usize) -> BitapResult {
83 baseline(pattern, text, k, DistanceFn::Levenshtein)
84}
85
86pub fn damerau(pattern: &str, text: &str, k: usize) -> BitapResult {
87 baseline(pattern, text, k, DistanceFn::DamerauLevenshtein)
88}
89
90pub fn osa(pattern: &str, text: &str, k: usize) -> BitapResult {
91 baseline(pattern, text, k, DistanceFn::OptimalStringAlignment)
92}