harper_core/patterns/
nominal_phrase.rs

1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
6pub struct NominalPhrase;
7
8impl Pattern for NominalPhrase {
9    fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
10        let mut cursor = 0;
11
12        loop {
13            let tok = tokens.get(cursor)?;
14
15            if tok.kind.is_adjective() || tok.kind.is_determiner() {
16                let next = tokens.get(cursor + 1)?;
17
18                if !next.kind.is_whitespace() {
19                    return None;
20                }
21
22                cursor += 2;
23                continue;
24            }
25
26            if tok.kind.is_nominal() {
27                return Some(cursor + 1);
28            }
29
30            return None;
31        }
32    }
33}
34
35#[cfg(test)]
36mod tests {
37    use super::super::DocPattern;
38    use super::NominalPhrase;
39    use crate::{Document, Span, patterns::Pattern};
40
41    trait SpanVecExt {
42        fn to_strings(&self, doc: &Document) -> Vec<String>;
43    }
44
45    impl SpanVecExt for Vec<Span> {
46        fn to_strings(&self, doc: &Document) -> Vec<String> {
47            self.iter()
48                .map(|sp| {
49                    doc.get_tokens()[sp.start..sp.end]
50                        .iter()
51                        .map(|tok| doc.get_span_content_str(&tok.span))
52                        .collect::<String>()
53                })
54                .collect()
55        }
56    }
57
58    #[test]
59    fn simple_apple() {
60        let doc = Document::new_markdown_default_curated("A red apple");
61        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
62
63        assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
64    }
65
66    #[test]
67    fn complex_apple() {
68        let doc = Document::new_markdown_default_curated("A red apple with a long stem");
69        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
70
71        assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
72    }
73
74    #[test]
75    fn list_fruit() {
76        let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
77        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
78
79        assert_eq!(
80            matches.to_strings(&doc),
81            vec!["An apple", "a banana", "a pear"]
82        )
83    }
84
85    #[test]
86    fn simplest_banana() {
87        let doc = Document::new_markdown_default_curated("a banana");
88        assert!(
89            NominalPhrase
90                .matches(doc.get_tokens(), doc.get_source())
91                .is_some()
92        );
93    }
94
95    #[test]
96    fn food() {
97        let doc = Document::new_markdown_default_curated(
98            "My favorite foods are pizza, sushi, tacos and burgers.",
99        );
100        let matches = NominalPhrase.find_all_matches_in_doc(&doc);
101
102        dbg!(&matches);
103        dbg!(matches.to_strings(&doc));
104
105        for span in &matches {
106            let gc = span.get_content(doc.get_source());
107            dbg!(gc);
108        }
109
110        assert_eq!(
111            matches.to_strings(&doc),
112            vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
113        )
114    }
115}