harper_core/patterns/
nominal_phrase.rs

1use crate::Token;
2
3use super::Pattern;
4
5/// A pattern that uses primitive syntax-tree heuristics to locate nominal phrases.
6/// Given that it does not take context into account, it is not recommended for new code.
7/// Please prefer [`DictWordMetadata::np_member`](crate::DictWordMetadata::np_member).
8#[derive(Default)]
9pub struct NominalPhrase;
10
11impl Pattern for NominalPhrase {
12    fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
13        let mut cursor = 0;
14
15        loop {
16            let tok = tokens.get(cursor)?;
17
18            if (tok.kind.is_adjective()
19                || tok.kind.is_determiner()
20                || tok.kind.is_verb_progressive_form())
21                && let Some(next) = tokens.get(cursor + 1)
22                && next.kind.is_whitespace()
23            {
24                cursor += 2;
25                continue;
26            }
27
28            if tok.kind.is_nominal() {
29                return Some(cursor + 1);
30            }
31
32            return None;
33        }
34    }
35}
36
37#[cfg(test)]
38mod tests {
39    use super::super::DocPattern;
40    use super::NominalPhrase;
41    use crate::{Document, Span, Token, patterns::Pattern};
42
43    trait SpanVecExt {
44        fn to_strings(&self, doc: &Document) -> Vec<String>;
45    }
46
47    impl SpanVecExt for Vec<Span<Token>> {
48        fn to_strings(&self, doc: &Document) -> Vec<String> {
49            self.iter()
50                .map(|sp| {
51                    doc.get_tokens()[sp.start..sp.end]
52                        .iter()
53                        .map(|tok| doc.get_span_content_str(&tok.span))
54                        .collect::<String>()
55                })
56                .collect()
57        }
58    }
59
60    #[test]
61    fn simple_apple() {
62        let doc = Document::new_markdown_default_curated("A red apple");
63        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
64
65        assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
66    }
67
68    #[test]
69    fn complex_apple() {
70        let doc = Document::new_markdown_default_curated("A red apple with a long stem");
71        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
72
73        assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
74    }
75
76    #[test]
77    fn list_fruit() {
78        let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
79        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
80
81        assert_eq!(
82            matches.to_strings(&doc),
83            vec!["An apple", "a banana", "a pear"]
84        )
85    }
86
87    #[test]
88    fn simplest_banana() {
89        let doc = Document::new_markdown_default_curated("a banana");
90        assert!(
91            NominalPhrase
92                .matches(doc.get_tokens(), doc.get_source())
93                .is_some()
94        );
95    }
96
97    #[test]
98    fn food() {
99        let doc = Document::new_markdown_default_curated(
100            "My favorite foods are pizza, sushi, tacos and burgers.",
101        );
102        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
103
104        dbg!(&matches);
105        dbg!(matches.to_strings(&doc));
106
107        for span in &matches {
108            let gc = span
109                .to_char_span(doc.get_tokens())
110                .get_content(doc.get_source());
111            dbg!(gc);
112        }
113
114        assert_eq!(
115            matches.to_strings(&doc),
116            vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
117        )
118    }
119
120    #[test]
121    fn simplest_way() {
122        let doc = Document::new_markdown_default_curated("a way");
123        assert!(
124            NominalPhrase
125                .matches(doc.get_tokens(), doc.get_source())
126                .is_some()
127        );
128    }
129
130    #[test]
131    fn present_participle_way() {
132        let doc = Document::new_markdown_default_curated("a winning way");
133        assert!(
134            NominalPhrase
135                .matches(doc.get_tokens(), doc.get_source())
136                .is_some()
137        );
138    }
139
140    #[test]
141    fn perfect_participle_way() {
142        let doc = Document::new_markdown_default_curated("a failed way");
143        assert!(
144            NominalPhrase
145                .matches(doc.get_tokens(), doc.get_source())
146                .is_some()
147        );
148    }
149}