harper_core/expr/
fixed_phrase.rs

1use crate::parsers::PlainEnglish;
2use crate::patterns::Word;
3use crate::{Document, Span, Token, TokenKind};
4
5use super::{Expr, SequenceExpr};
6
7/// Matches a fixed sequence of tokens as they appear in the input.
8/// Case-insensitive for words but maintains exact matching for other token types.
9///
10/// # Example
11///
12/// ```rust
13/// use harper_core::expr::{FixedPhrase, Expr};
14/// use harper_core::Document;
15///
16/// let doc = Document::new_plain_english_curated("Hello, world!");
17/// let phrase = FixedPhrase::from_phrase("Hello, world!");
18/// assert!(phrase.run(0, doc.get_tokens(), doc.get_source()).is_some());
19/// ```
20pub struct FixedPhrase {
21    inner: SequenceExpr,
22}
23
24impl FixedPhrase {
25    /// Creates a [`FixedPhrase`] from a plaintext string.
26    /// Uses plain English tokenization rules.
27    pub fn from_phrase(text: &str) -> Self {
28        let document = Document::new_basic_tokenize(text, &PlainEnglish);
29        Self::from_document(&document)
30    }
31
32    /// Creates a [`FixedPhrase`] from a pre-tokenized document.
33    /// Allows custom tokenization by creating a `Document` first.
34    pub fn from_document(doc: &Document) -> Self {
35        let mut phrase = SequenceExpr::default();
36
37        for token in doc.fat_tokens() {
38            match token.kind {
39                TokenKind::Word(_lexeme_metadata) => {
40                    phrase = phrase.then(Word::from_chars(token.content.as_slice()));
41                }
42                TokenKind::Space(_) => {
43                    phrase = phrase.then_whitespace();
44                }
45                TokenKind::Punctuation(p) => {
46                    phrase = phrase
47                        .then_kind_where(move |kind| kind.as_punctuation().cloned() == Some(p));
48                }
49                TokenKind::ParagraphBreak => {
50                    phrase = phrase.then_whitespace();
51                }
52                TokenKind::Number(_) => phrase = phrase.then_kind_where(|kind| kind.is_number()),
53                _ => panic!("Fell out of expected document formats."),
54            }
55        }
56
57        Self { inner: phrase }
58    }
59}
60
61impl Expr for FixedPhrase {
62    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
63        self.inner.run(cursor, tokens, source)
64    }
65}
66
67#[cfg(test)]
68mod tests {
69    use super::FixedPhrase;
70    use crate::expr::Expr;
71    use crate::{Document, Span};
72
73    #[test]
74    fn test_not_case_sensitive() {
75        let doc_lower = Document::new_plain_english_curated("hello world");
76        let doc_upper = Document::new_plain_english_curated("HELLO WORLD");
77        let doc_title = Document::new_plain_english_curated("Hello World");
78        let phrase = FixedPhrase::from_document(&doc_lower);
79        assert_eq!(
80            phrase.run(0, doc_lower.get_tokens(), doc_title.get_source()),
81            Some(Span::new(0, 3))
82        );
83        assert_eq!(
84            phrase.run(0, doc_lower.get_tokens(), doc_upper.get_source()),
85            Some(Span::new(0, 3))
86        );
87        assert_eq!(
88            phrase.run(0, doc_title.get_tokens(), doc_lower.get_source()),
89            Some(Span::new(0, 3))
90        );
91        assert_eq!(
92            phrase.run(0, doc_title.get_tokens(), doc_upper.get_source()),
93            Some(Span::new(0, 3))
94        );
95        assert_eq!(
96            phrase.run(0, doc_upper.get_tokens(), doc_lower.get_source()),
97            Some(Span::new(0, 3))
98        );
99        assert_eq!(
100            phrase.run(0, doc_upper.get_tokens(), doc_title.get_source()),
101            Some(Span::new(0, 3))
102        );
103    }
104}