harper_core/expr/
similar_to_phrase.rs

1use crate::patterns::{WithinEditDistance, Word};
2use crate::{Document, Span, Token, TokenKind};
3
4use super::{Expr, SequenceExpr};
5
6pub struct SimilarToPhrase {
7    phrase: SequenceExpr,
8    fuzzy_phrase: SequenceExpr,
9}
10
11impl SimilarToPhrase {
12    /// Create an error-tolerant SequenceExpr that looks for phrases similar to (but not the same as) that contained
13    /// in the provided text.
14    ///
15    /// This is an expensive operation, so try to only do it at startup and in tests.
16    ///
17    /// It will panic if your document is too complex, so only run this with curated phrases.
18    pub fn from_phrase(text: &str, max_edit_dist: u8) -> Self {
19        let document = Document::new_plain_english_curated(text);
20
21        Self::from_doc(&document, max_edit_dist)
22    }
23
24    /// Create an error-tolerant SequenceExpr that looks for phrases similar to (but not the same as) that contained
25    /// in the provided document.
26    ///
27    /// This is an expensive operation, so try to only do it at startup and in tests.
28    ///
29    /// It will panic if your document contains certain token types, so only run this with curated phrases.
30    pub fn from_doc(document: &Document, max_edit_dist: u8) -> Self {
31        let mut phrase = SequenceExpr::default();
32        let mut fuzzy_phrase = SequenceExpr::default();
33
34        for token in document.fat_tokens() {
35            match token.kind {
36                TokenKind::Word(_lexeme_metadata) => {
37                    phrase = phrase.then(Word::from_chars(token.content.as_slice()));
38                    fuzzy_phrase = fuzzy_phrase
39                        .then(WithinEditDistance::new(token.content.into(), max_edit_dist));
40                }
41                TokenKind::Space(_) => {
42                    fuzzy_phrase = fuzzy_phrase.then_whitespace();
43                    phrase = phrase.then_whitespace();
44                }
45                TokenKind::ParagraphBreak => {
46                    fuzzy_phrase = fuzzy_phrase.then_whitespace();
47                    phrase = phrase.then_whitespace();
48                }
49                _ => panic!("Fell out of expected document formats."),
50            }
51        }
52
53        Self {
54            phrase,
55            fuzzy_phrase,
56        }
57    }
58}
59
60impl Expr for SimilarToPhrase {
61    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
62        if self.phrase.run(cursor, tokens, source).is_some() {
63            return None;
64        }
65        self.fuzzy_phrase.run(cursor, tokens, source)
66    }
67}