harper_core/patterns/
nominal_phrase.rs

1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
6pub struct NominalPhrase;
7
8impl Pattern for NominalPhrase {
9    fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
10        let mut cursor = 0;
11
12        loop {
13            let tok = tokens.get(cursor)?;
14
15            if tok.kind.is_adjective()
16                || tok.kind.is_determiner()
17                || tok.kind.is_verb_progressive_form()
18            {
19                if let Some(next) = tokens.get(cursor + 1) {
20                    if next.kind.is_whitespace() {
21                        cursor += 2;
22                        continue;
23                    }
24                }
25            }
26
27            if tok.kind.is_nominal() {
28                return Some(cursor + 1);
29            }
30
31            return None;
32        }
33    }
34}
35
36#[cfg(test)]
37mod tests {
38    use super::super::DocPattern;
39    use super::NominalPhrase;
40    use crate::{Document, Span, patterns::Pattern};
41
42    trait SpanVecExt {
43        fn to_strings(&self, doc: &Document) -> Vec<String>;
44    }
45
46    impl SpanVecExt for Vec<Span> {
47        fn to_strings(&self, doc: &Document) -> Vec<String> {
48            self.iter()
49                .map(|sp| {
50                    doc.get_tokens()[sp.start..sp.end]
51                        .iter()
52                        .map(|tok| doc.get_span_content_str(&tok.span))
53                        .collect::<String>()
54                })
55                .collect()
56        }
57    }
58
59    #[test]
60    fn simple_apple() {
61        let doc = Document::new_markdown_default_curated("A red apple");
62        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
63
64        assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
65    }
66
67    #[test]
68    fn complex_apple() {
69        let doc = Document::new_markdown_default_curated("A red apple with a long stem");
70        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
71
72        assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
73    }
74
75    #[test]
76    fn list_fruit() {
77        let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
78        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
79
80        assert_eq!(
81            matches.to_strings(&doc),
82            vec!["An apple", "a banana", "a pear"]
83        )
84    }
85
86    #[test]
87    fn simplest_banana() {
88        let doc = Document::new_markdown_default_curated("a banana");
89        assert!(
90            NominalPhrase
91                .matches(doc.get_tokens(), doc.get_source())
92                .is_some()
93        );
94    }
95
96    #[test]
97    fn food() {
98        let doc = Document::new_markdown_default_curated(
99            "My favorite foods are pizza, sushi, tacos and burgers.",
100        );
101        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
102
103        dbg!(&matches);
104        dbg!(matches.to_strings(&doc));
105
106        for span in &matches {
107            let gc = span.get_content(doc.get_source());
108            dbg!(gc);
109        }
110
111        assert_eq!(
112            matches.to_strings(&doc),
113            vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
114        )
115    }
116
117    #[test]
118    fn simplest_way() {
119        let doc = Document::new_markdown_default_curated("a way");
120        assert!(
121            NominalPhrase
122                .matches(doc.get_tokens(), doc.get_source())
123                .is_some()
124        );
125    }
126
127    #[test]
128    fn progressive_way() {
129        let doc = Document::new_markdown_default_curated("a winning way");
130        assert!(
131            NominalPhrase
132                .matches(doc.get_tokens(), doc.get_source())
133                .is_some()
134        );
135    }
136
137    #[test]
138    fn perfect_way() {
139        let doc = Document::new_markdown_default_curated("a failed way");
140        assert!(
141            NominalPhrase
142                .matches(doc.get_tokens(), doc.get_source())
143                .is_some()
144        );
145    }
146}