harper_core/patterns/
nominal_phrase.rs

1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
6pub struct NominalPhrase;
7
8impl Pattern for NominalPhrase {
9    fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
10        let mut cursor = 0;
11
12        loop {
13            let tok = tokens.get(cursor)?;
14
15            if (tok.kind.is_adjective()
16                || tok.kind.is_determiner()
17                || tok.kind.is_verb_progressive_form())
18                && let Some(next) = tokens.get(cursor + 1)
19                && next.kind.is_whitespace()
20            {
21                cursor += 2;
22                continue;
23            }
24
25            if tok.kind.is_nominal() {
26                return Some(cursor + 1);
27            }
28
29            return None;
30        }
31    }
32}
33
34#[cfg(test)]
35mod tests {
36    use super::super::DocPattern;
37    use super::NominalPhrase;
38    use crate::{Document, Span, Token, patterns::Pattern};
39
40    trait SpanVecExt {
41        fn to_strings(&self, doc: &Document) -> Vec<String>;
42    }
43
44    impl SpanVecExt for Vec<Span<Token>> {
45        fn to_strings(&self, doc: &Document) -> Vec<String> {
46            self.iter()
47                .map(|sp| {
48                    doc.get_tokens()[sp.start..sp.end]
49                        .iter()
50                        .map(|tok| doc.get_span_content_str(&tok.span))
51                        .collect::<String>()
52                })
53                .collect()
54        }
55    }
56
57    #[test]
58    fn simple_apple() {
59        let doc = Document::new_markdown_default_curated("A red apple");
60        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
61
62        assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
63    }
64
65    #[test]
66    fn complex_apple() {
67        let doc = Document::new_markdown_default_curated("A red apple with a long stem");
68        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
69
70        assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
71    }
72
73    #[test]
74    fn list_fruit() {
75        let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
76        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
77
78        assert_eq!(
79            matches.to_strings(&doc),
80            vec!["An apple", "a banana", "a pear"]
81        )
82    }
83
84    #[test]
85    fn simplest_banana() {
86        let doc = Document::new_markdown_default_curated("a banana");
87        assert!(
88            NominalPhrase
89                .matches(doc.get_tokens(), doc.get_source())
90                .is_some()
91        );
92    }
93
94    #[test]
95    fn food() {
96        let doc = Document::new_markdown_default_curated(
97            "My favorite foods are pizza, sushi, tacos and burgers.",
98        );
99        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
100
101        dbg!(&matches);
102        dbg!(matches.to_strings(&doc));
103
104        for span in &matches {
105            let gc = span
106                .to_char_span(doc.get_tokens())
107                .get_content(doc.get_source());
108            dbg!(gc);
109        }
110
111        assert_eq!(
112            matches.to_strings(&doc),
113            vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
114        )
115    }
116
117    #[test]
118    fn simplest_way() {
119        let doc = Document::new_markdown_default_curated("a way");
120        assert!(
121            NominalPhrase
122                .matches(doc.get_tokens(), doc.get_source())
123                .is_some()
124        );
125    }
126
127    #[test]
128    fn present_participle_way() {
129        let doc = Document::new_markdown_default_curated("a winning way");
130        assert!(
131            NominalPhrase
132                .matches(doc.get_tokens(), doc.get_source())
133                .is_some()
134        );
135    }
136
137    #[test]
138    fn perfect_participle_way() {
139        let doc = Document::new_markdown_default_curated("a failed way");
140        assert!(
141            NominalPhrase
142                .matches(doc.get_tokens(), doc.get_source())
143                .is_some()
144        );
145    }
146}