harper_core/patterns/
nominal_phrase.rs1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
6pub struct NominalPhrase;
7
8impl Pattern for NominalPhrase {
9 fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
10 let mut cursor = 0;
11
12 loop {
13 let tok = tokens.get(cursor)?;
14
15 if (tok.kind.is_adjective()
16 || tok.kind.is_determiner()
17 || tok.kind.is_verb_progressive_form())
18 && let Some(next) = tokens.get(cursor + 1)
19 && next.kind.is_whitespace()
20 {
21 cursor += 2;
22 continue;
23 }
24
25 if tok.kind.is_nominal() {
26 return Some(cursor + 1);
27 }
28
29 return None;
30 }
31 }
32}
33
34#[cfg(test)]
35mod tests {
36 use super::super::DocPattern;
37 use super::NominalPhrase;
38 use crate::{Document, Span, Token, patterns::Pattern};
39
40 trait SpanVecExt {
41 fn to_strings(&self, doc: &Document) -> Vec<String>;
42 }
43
44 impl SpanVecExt for Vec<Span<Token>> {
45 fn to_strings(&self, doc: &Document) -> Vec<String> {
46 self.iter()
47 .map(|sp| {
48 doc.get_tokens()[sp.start..sp.end]
49 .iter()
50 .map(|tok| doc.get_span_content_str(&tok.span))
51 .collect::<String>()
52 })
53 .collect()
54 }
55 }
56
57 #[test]
58 fn simple_apple() {
59 let doc = Document::new_markdown_default_curated("A red apple");
60 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
61
62 assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
63 }
64
65 #[test]
66 fn complex_apple() {
67 let doc = Document::new_markdown_default_curated("A red apple with a long stem");
68 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
69
70 assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
71 }
72
73 #[test]
74 fn list_fruit() {
75 let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
76 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
77
78 assert_eq!(
79 matches.to_strings(&doc),
80 vec!["An apple", "a banana", "a pear"]
81 )
82 }
83
84 #[test]
85 fn simplest_banana() {
86 let doc = Document::new_markdown_default_curated("a banana");
87 assert!(
88 NominalPhrase
89 .matches(doc.get_tokens(), doc.get_source())
90 .is_some()
91 );
92 }
93
94 #[test]
95 fn food() {
96 let doc = Document::new_markdown_default_curated(
97 "My favorite foods are pizza, sushi, tacos and burgers.",
98 );
99 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
100
101 dbg!(&matches);
102 dbg!(matches.to_strings(&doc));
103
104 for span in &matches {
105 let gc = span
106 .to_char_span(doc.get_tokens())
107 .get_content(doc.get_source());
108 dbg!(gc);
109 }
110
111 assert_eq!(
112 matches.to_strings(&doc),
113 vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
114 )
115 }
116
117 #[test]
118 fn simplest_way() {
119 let doc = Document::new_markdown_default_curated("a way");
120 assert!(
121 NominalPhrase
122 .matches(doc.get_tokens(), doc.get_source())
123 .is_some()
124 );
125 }
126
127 #[test]
128 fn present_participle_way() {
129 let doc = Document::new_markdown_default_curated("a winning way");
130 assert!(
131 NominalPhrase
132 .matches(doc.get_tokens(), doc.get_source())
133 .is_some()
134 );
135 }
136
137 #[test]
138 fn perfect_participle_way() {
139 let doc = Document::new_markdown_default_curated("a failed way");
140 assert!(
141 NominalPhrase
142 .matches(doc.get_tokens(), doc.get_source())
143 .is_some()
144 );
145 }
146}