harper_core/patterns/
within_edit_distance.rs

1use std::cell::RefCell;
2
3use super::SingleTokenPattern;
4use crate::{CharString, CharStringExt, Token};
5
6use crate::edit_distance::edit_distance_min_alloc;
7
8/// Matches single words within a certain edit distance of a given word.
9pub struct WithinEditDistance {
10    word: CharString,
11    max_edit_dist: u8,
12}
13
14impl WithinEditDistance {
15    pub fn new(word: CharString, max_edit_dist: u8) -> Self {
16        Self {
17            word,
18            max_edit_dist,
19        }
20    }
21
22    pub fn from_str(word: &str, edit_dist: u8) -> Self {
23        let chars = word.chars().collect();
24
25        Self::new(chars, edit_dist)
26    }
27}
28
29thread_local! {
30    // To avoid allocating each call to `matches`.
31    static BUFFERS: RefCell<(Vec<u8>, Vec<u8>)> = const { RefCell::new((Vec::new(), Vec::new())) };
32}
33
34impl SingleTokenPattern for WithinEditDistance {
35    fn matches_token(&self, token: &Token, source: &[char]) -> bool {
36        if !token.kind.is_word() {
37            return false;
38        }
39
40        let content = token.span.get_content(source);
41
42        BUFFERS.with_borrow_mut(|(buffer_a, buffer_b)| {
43            let distance = edit_distance_min_alloc(
44                &content.to_lower(),
45                &self.word.to_lower(),
46                buffer_a,
47                buffer_b,
48            );
49            distance <= self.max_edit_dist
50        })
51    }
52}