powierza_coefficient/
lib.rs

1#![doc = include_str!("../README.md")]
2
3/// Computes Powierża coefficient of a pattern and a piece of text. For a
4/// detailed explanation, see [lib's docs](crate).
5pub fn powierża_coefficient(pattern: &str, text: &str) -> Option<u32> {
6    let text_len = text.chars().count();
7    let pattern_len = pattern.chars().count();
8
9    // The coefficient is undefined if either of the three is true:
10    // 1. The pattern is longer than the text.
11    // 2. The pattern length is 0.
12    // 3. The text length is 0.
13    //
14    // The third check is redundant but makes code more understandable.
15    if pattern_len > text_len || pattern_len == 0 || text_len == 0 {
16        return None;
17    }
18
19    // The matrix is padded with 0s on the top.
20    let mut previous_row = vec![Some(0); text_len];
21    let mut n_chars_to_skip = 0;
22
23    for (y, pattern_char) in pattern.chars().enumerate() {
24        let n_chars_to_omit = pattern_len - y - 1;
25
26        let (current_row, first_match_ix) = match_pattern_char(
27            pattern_char,
28            text,
29            &previous_row,
30            n_chars_to_skip,
31            n_chars_to_omit,
32        )?;
33
34        previous_row.clear();
35        previous_row.extend_from_slice(&current_row);
36        n_chars_to_skip = first_match_ix + 1;
37    }
38
39    let first_match_ix_in_last_row = n_chars_to_skip - 1;
40
41    let coefficient = previous_row
42        .into_iter()
43        .skip(first_match_ix_in_last_row)
44        .filter_map(|cell| cell)
45        .min();
46    
47    coefficient
48}
49
50type Row = Vec<Option<u32>>;
51
52fn match_pattern_char(
53    pattern_char: char,
54    text: &str,
55    previous_row: &Row,
56    n_chars_to_skip: usize,
57    n_chars_to_omit: usize,
58) -> Option<(Row, usize)> {
59    let text_len = text.chars().count();
60
61    let text_chars = text
62        .chars()
63        .enumerate()
64        .take(text_len - n_chars_to_omit)
65        .skip(n_chars_to_skip);
66
67    let mut current_row = vec![None; text_len];
68    let mut first_match_ix = None;
69    let mut left_score_and_is_gap = None;
70
71    for (x, text_char) in text_chars {
72        let is_match = pattern_char == text_char;
73        let diagonal_score = get_diagonal_score(&previous_row, x);
74
75        let current_score_and_is_gap = calc_score(is_match, diagonal_score, left_score_and_is_gap);
76
77        left_score_and_is_gap = current_score_and_is_gap;
78
79        match current_score_and_is_gap {
80            Some((score, is_gap)) => {
81                if !is_gap {
82                    first_match_ix = first_match_ix.or(Some(x));
83                }
84
85                current_row[x] = Some(score);
86            }
87            None => {
88                current_row[x] = None;
89            }
90        }
91    }
92
93    match first_match_ix {
94        None => return None,
95        Some(first_match_ix) => return Some((current_row, first_match_ix)),
96    }
97}
98
99#[inline]
100fn get_diagonal_score(previous_row: &Row, x: usize) -> Option<u32> {
101    if x > 0 {
102        previous_row[x - 1]
103    } else {
104        Some(0)
105    }
106}
107
108fn calc_score(
109    is_match: bool,
110    diagonal_score: Option<u32>,
111    left_score_and_is_gap: Option<(u32, bool)>,
112) -> Option<(u32, bool)> {
113    if is_match {
114        calc_match_score(diagonal_score, left_score_and_is_gap)
115    } else {
116        let score = calc_mismatch_score(left_score_and_is_gap)?;
117
118        Some((score, true))
119    }
120}
121
122fn calc_match_score(
123    diagonal_score: Option<u32>,
124    left_score_and_is_gap: Option<(u32, bool)>,
125) -> Option<(u32, bool)> {
126    let continuation_score = calc_continuation_score(diagonal_score);
127    let gap_score = calc_gap_score(left_score_and_is_gap);
128
129    match (continuation_score, gap_score) {
130        (None, None) => None,
131        (Some(continuation_score), None) => Some((continuation_score, false)),
132        (None, Some(_gap_score)) => unreachable!(),
133        (Some(continuation_score), Some(gap_score)) => {
134            if continuation_score <= gap_score {
135                Some((continuation_score, false))
136            } else {
137                Some((gap_score, true))
138            }
139        }
140    }
141}
142
143#[inline]
144fn calc_mismatch_score(left_score_and_is_gap: Option<(u32, bool)>) -> Option<u32> {
145    calc_gap_score(left_score_and_is_gap)
146}
147
148#[inline]
149fn calc_continuation_score(diagonal_score: Option<u32>) -> Option<u32> {
150    diagonal_score
151}
152
153#[inline]
154fn calc_gap_score(left_score_and_is_gap: Option<(u32, bool)>) -> Option<u32> {
155    let (left_score, is_left_gap) = left_score_and_is_gap?;
156
157    if is_left_gap {
158        Some(left_score)
159    } else {
160        Some(left_score + 1)
161    }
162}
163
164#[cfg(test)]
165mod test {
166    use super::powierża_coefficient as powierża;
167
168    #[test]
169    fn test_coefficient_should_be_undefined_if_pattern_is_empty() {
170        assert!(powierża("", "text is not empty").is_none());
171    }
172
173    #[test]
174    fn test_coefficient_should_be_undefined_if_text_is_empty() {
175        assert!(powierża("pattern is not empty", "").is_none());
176    }
177
178    #[test]
179    fn test_coefficient_should_be_undefined_if_text_is_longer_than_tet() {
180        assert!(powierża(
181            "pattern is not empty______________________________________",
182            " text is not empty but its shorter than pattern"
183        )
184        .is_none());
185    }
186
187    #[test]
188    fn test_coefficient_should_be_undefined_if_pattern_is_not_present_in_text() {
189        assert!(powierża("abc", "xyz").is_none());
190    }
191
192    #[test]
193    fn test_coefficient_value_with_progressing_number_of_gaps() {
194        let pattern = "xddd";
195        assert_eq!(powierża(pattern, "xddd").unwrap(), 0);
196        assert_eq!(powierża(pattern, "xdd_d").unwrap(), 1);
197        assert_eq!(powierża(pattern, "xd_d_d").unwrap(), 2);
198        assert_eq!(powierża(pattern, "x_d_d_d").unwrap(), 3);
199    }
200
201    #[test]
202    fn test_coefficient_value_should_not_change_when_part_with_higher_value_is_added() {
203        let pattern = "xddd";
204
205        // The prefix `x_d_d_d` should not influence the coefficient
206        // because matching it would result in higher (worse) score.
207
208        assert_eq!(powierża(pattern, "x_d_d_d___xddd").unwrap(), 0);
209        assert_eq!(powierża(pattern, "x_d_d_d___xdd_d").unwrap(), 1);
210        assert_eq!(powierża(pattern, "x_d_d_d___xd_d_d").unwrap(), 2);
211        assert_eq!(powierża(pattern, "x_d_d_d___x_d_d_d").unwrap(), 3);
212    }
213
214    #[test]
215    fn test_coefficient_value_should_not_change_when_gap_length_changes() {
216        let pattern = "xddd";
217
218        assert_eq!(powierża(pattern, "x_d_d_d").unwrap(), 3);
219        assert_eq!(powierża(pattern, "x_d_d__d").unwrap(), 3);
220        assert_eq!(powierża(pattern, "x_d__d__d").unwrap(), 3);
221        assert_eq!(powierża(pattern, "x__d__d__d").unwrap(), 3);
222        assert_eq!(powierża(pattern, "x__d__d__d").unwrap(), 3);
223        assert_eq!(powierża(pattern, "x__d__d__d").unwrap(), 3);
224    }
225
226    #[test]
227    fn test_coefficient_value_should_not_change_when_gap_is_moved() {
228        let pattern = "xddd";
229
230        assert_eq!(powierża(pattern, "xdd_d").unwrap(), 1);
231        assert_eq!(powierża(pattern, "xd_dd").unwrap(), 1);
232        assert_eq!(powierża(pattern, "x_ddd").unwrap(), 1);
233    }
234
235    #[test]
236    fn test_coefficient_value_should_not_change_when_suffix_is_added() {
237        let pattern = "xddd";
238
239        assert_eq!(powierża(pattern, "xddd_").unwrap(), 0);
240        assert_eq!(powierża(pattern, "xdd_d_").unwrap(), 1);
241        assert_eq!(powierża(pattern, "xd_d_d_").unwrap(), 2);
242        assert_eq!(powierża(pattern, "x_d_d_d_").unwrap(), 3);
243    }
244
245    #[test]
246    fn test_coefficient_value_should_not_change_when_prefix_is_added() {
247        let pattern = "xddd";
248
249        assert_eq!(powierża(pattern, "_xddd").unwrap(), 0);
250        assert_eq!(powierża(pattern, "_xdd_d").unwrap(), 1);
251        assert_eq!(powierża(pattern, "_xd_d_d").unwrap(), 2);
252        assert_eq!(powierża(pattern, "_x_d_d_d").unwrap(), 3);
253    }
254
255    #[test]
256    fn test_coefficient_value_with_real_folder_names() {
257        let pattern = "de";
258
259        assert_eq!(powierża(pattern, ".docker").unwrap(), 1);
260        assert_eq!(powierża(pattern, "documents").unwrap(), 1);
261        assert_eq!(powierża(pattern, "vb-shared-files").unwrap(), 1);
262    }
263}