harper_core/expr/
fixed_phrase.rs

1use crate::patterns::Word;
2use crate::{Document, Span, Token, TokenKind};
3
4use super::{Expr, SequenceExpr};
5
6/// Matches a fixed sequence of tokens as they appear in the input.
7/// Case-insensitive for words but maintains exact matching for other token types.
8///
9/// # Example
10///
11/// ```rust
12/// use harper_core::expr::{FixedPhrase, Expr};
13/// use harper_core::Document;
14///
15/// let doc = Document::new_plain_english_curated("Hello, world!");
16/// let phrase = FixedPhrase::from_phrase("Hello, world!");
17/// assert!(phrase.run(0, doc.get_tokens(), doc.get_source()).is_some());
18/// ```
19pub struct FixedPhrase {
20    inner: SequenceExpr,
21}
22
23impl FixedPhrase {
24    /// Creates a [`FixedPhrase`] from a plaintext string.
25    /// Uses plain English tokenization rules.
26    pub fn from_phrase(text: &str) -> Self {
27        let document = Document::new_plain_english_curated(text);
28        Self::from_document(&document)
29    }
30
31    /// Creates a [`FixedPhrase`] from a pre-tokenized document.
32    /// Allows custom tokenization by creating a `Document` first.
33    pub fn from_document(doc: &Document) -> Self {
34        let mut phrase = SequenceExpr::default();
35
36        for token in doc.fat_tokens() {
37            match token.kind {
38                TokenKind::Word(_word_metadata) => {
39                    phrase = phrase.then(Word::from_chars(token.content.as_slice()));
40                }
41                TokenKind::Space(_) => {
42                    phrase = phrase.then_whitespace();
43                }
44                TokenKind::Punctuation(p) => {
45                    phrase = phrase.then(move |t: &Token, _source: &[char]| {
46                        t.kind.as_punctuation().cloned() == Some(p)
47                    })
48                }
49                TokenKind::ParagraphBreak => {
50                    phrase = phrase.then_whitespace();
51                }
52                TokenKind::Number(n) => {
53                    phrase = phrase
54                        .then(move |tok: &Token, _source: &[char]| tok.kind == TokenKind::Number(n))
55                }
56                _ => panic!("Fell out of expected document formats."),
57            }
58        }
59
60        Self { inner: phrase }
61    }
62}
63
64impl Expr for FixedPhrase {
65    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
66        self.inner.run(cursor, tokens, source)
67    }
68}
69
70#[cfg(test)]
71mod tests {
72    use super::FixedPhrase;
73    use crate::expr::Expr;
74    use crate::{Document, Span};
75
76    #[test]
77    fn test_not_case_sensitive() {
78        let doc_lower = Document::new_plain_english_curated("hello world");
79        let doc_upper = Document::new_plain_english_curated("HELLO WORLD");
80        let doc_title = Document::new_plain_english_curated("Hello World");
81        let phrase = FixedPhrase::from_document(&doc_lower);
82        assert_eq!(
83            phrase.run(0, doc_lower.get_tokens(), doc_title.get_source()),
84            Some(Span::new(0, 3))
85        );
86        assert_eq!(
87            phrase.run(0, doc_lower.get_tokens(), doc_upper.get_source()),
88            Some(Span::new(0, 3))
89        );
90        assert_eq!(
91            phrase.run(0, doc_title.get_tokens(), doc_lower.get_source()),
92            Some(Span::new(0, 3))
93        );
94        assert_eq!(
95            phrase.run(0, doc_title.get_tokens(), doc_upper.get_source()),
96            Some(Span::new(0, 3))
97        );
98        assert_eq!(
99            phrase.run(0, doc_upper.get_tokens(), doc_lower.get_source()),
100            Some(Span::new(0, 3))
101        );
102        assert_eq!(
103            phrase.run(0, doc_upper.get_tokens(), doc_title.get_source()),
104            Some(Span::new(0, 3))
105        );
106    }
107}