harper_core/patterns/
fixed_phrase.rs1use crate::{Document, Token, TokenKind};
2
3use super::{Pattern, SequencePattern, Word};
4
5pub struct FixedPhrase {
19 inner: SequencePattern,
20}
21
22impl FixedPhrase {
23 pub fn from_phrase(text: &str) -> Self {
26 let document = Document::new_plain_english_curated(text);
27 Self::from_document(&document)
28 }
29
30 pub fn from_document(doc: &Document) -> Self {
33 let mut phrase = SequencePattern::default();
34
35 for token in doc.fat_tokens() {
36 match token.kind {
37 TokenKind::Word(_word_metadata) => {
38 phrase = phrase.then(Word::from_chars(token.content.as_slice()));
39 }
40 TokenKind::Space(_) => {
41 phrase = phrase.then_whitespace();
42 }
43 TokenKind::Punctuation(p) => {
44 phrase = phrase.then(move |t: &Token, _source: &[char]| {
45 t.kind.as_punctuation().cloned() == Some(p)
46 })
47 }
48 TokenKind::ParagraphBreak => {
49 phrase = phrase.then_whitespace();
50 }
51 TokenKind::Number(n) => {
52 phrase = phrase
53 .then(move |tok: &Token, _source: &[char]| tok.kind == TokenKind::Number(n))
54 }
55 _ => panic!("Fell out of expected document formats."),
56 }
57 }
58
59 Self { inner: phrase }
60 }
61}
62
63impl Pattern for FixedPhrase {
64 fn matches(&self, tokens: &[Token], source: &[char]) -> Option<usize> {
67 self.inner.matches(tokens, source)
68 }
69}
70
71#[cfg(test)]
72mod tests {
73 use crate::{
74 Document,
75 patterns::{FixedPhrase, Pattern},
76 };
77
78 #[test]
79 fn test_not_case_sensitive() {
80 let doc_lower = Document::new_plain_english_curated("hello world");
81 let doc_upper = Document::new_plain_english_curated("HELLO WORLD");
82 let doc_title = Document::new_plain_english_curated("Hello World");
83 let phrase = FixedPhrase::from_document(&doc_lower);
84 assert_eq!(
85 phrase.matches(doc_lower.get_tokens(), doc_title.get_source()),
86 Some(3)
87 );
88 assert_eq!(
89 phrase.matches(doc_lower.get_tokens(), doc_upper.get_source()),
90 Some(3)
91 );
92 assert_eq!(
93 phrase.matches(doc_title.get_tokens(), doc_lower.get_source()),
94 Some(3)
95 );
96 assert_eq!(
97 phrase.matches(doc_title.get_tokens(), doc_upper.get_source()),
98 Some(3)
99 );
100 assert_eq!(
101 phrase.matches(doc_upper.get_tokens(), doc_lower.get_source()),
102 Some(3)
103 );
104 assert_eq!(
105 phrase.matches(doc_upper.get_tokens(), doc_title.get_source()),
106 Some(3)
107 );
108 }
109}