harper_core/expr/
fixed_phrase.rs

1use crate::patterns::Word;
2use crate::{Document, Span, Token, TokenKind};
3
4use super::{Expr, SequenceExpr};
5
6/// Matches a fixed sequence of tokens as they appear in the input.
7/// Case-insensitive for words but maintains exact matching for other token types.
8///
9/// # Example
10///
11/// ```rust
12/// use harper_core::expr::{FixedPhrase, Expr};
13/// use harper_core::Document;
14///
15/// let doc = Document::new_plain_english_curated("Hello, world!");
16/// let phrase = FixedPhrase::from_phrase("Hello, world!");
17/// assert!(phrase.run(0, doc.get_tokens(), doc.get_source()).is_some());
18/// ```
19pub struct FixedPhrase {
20    inner: SequenceExpr,
21}
22
23impl FixedPhrase {
24    /// Creates a [`FixedPhrase`] from a plaintext string.
25    /// Uses plain English tokenization rules.
26    pub fn from_phrase(text: &str) -> Self {
27        let document = Document::new_plain_english_curated(text);
28        Self::from_document(&document)
29    }
30
31    /// Creates a [`FixedPhrase`] from a pre-tokenized document.
32    /// Allows custom tokenization by creating a `Document` first.
33    pub fn from_document(doc: &Document) -> Self {
34        let mut phrase = SequenceExpr::default();
35
36        for token in doc.fat_tokens() {
37            match token.kind {
38                TokenKind::Word(_lexeme_metadata) => {
39                    phrase = phrase.then(Word::from_chars(token.content.as_slice()));
40                }
41                TokenKind::Space(_) => {
42                    phrase = phrase.then_whitespace();
43                }
44                TokenKind::Punctuation(p) => {
45                    phrase = phrase
46                        .then_kind_where(move |kind| kind.as_punctuation().cloned() == Some(p));
47                }
48                TokenKind::ParagraphBreak => {
49                    phrase = phrase.then_whitespace();
50                }
51                TokenKind::Number(_) => phrase = phrase.then_kind_where(|kind| kind.is_number()),
52                _ => panic!("Fell out of expected document formats."),
53            }
54        }
55
56        Self { inner: phrase }
57    }
58}
59
60impl Expr for FixedPhrase {
61    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
62        self.inner.run(cursor, tokens, source)
63    }
64}
65
66#[cfg(test)]
67mod tests {
68    use super::FixedPhrase;
69    use crate::expr::Expr;
70    use crate::{Document, Span};
71
72    #[test]
73    fn test_not_case_sensitive() {
74        let doc_lower = Document::new_plain_english_curated("hello world");
75        let doc_upper = Document::new_plain_english_curated("HELLO WORLD");
76        let doc_title = Document::new_plain_english_curated("Hello World");
77        let phrase = FixedPhrase::from_document(&doc_lower);
78        assert_eq!(
79            phrase.run(0, doc_lower.get_tokens(), doc_title.get_source()),
80            Some(Span::new(0, 3))
81        );
82        assert_eq!(
83            phrase.run(0, doc_lower.get_tokens(), doc_upper.get_source()),
84            Some(Span::new(0, 3))
85        );
86        assert_eq!(
87            phrase.run(0, doc_title.get_tokens(), doc_lower.get_source()),
88            Some(Span::new(0, 3))
89        );
90        assert_eq!(
91            phrase.run(0, doc_title.get_tokens(), doc_upper.get_source()),
92            Some(Span::new(0, 3))
93        );
94        assert_eq!(
95            phrase.run(0, doc_upper.get_tokens(), doc_lower.get_source()),
96            Some(Span::new(0, 3))
97        );
98        assert_eq!(
99            phrase.run(0, doc_upper.get_tokens(), doc_title.get_source()),
100            Some(Span::new(0, 3))
101        );
102    }
103}