harper_core/patterns/
nominal_phrase.rs1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
9pub struct NominalPhrase;
10
11impl Pattern for NominalPhrase {
12 fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
13 let mut cursor = 0;
14
15 loop {
16 let tok = tokens.get(cursor)?;
17
18 if (tok.kind.is_adjective()
19 || tok.kind.is_determiner()
20 || tok.kind.is_verb_progressive_form())
21 && let Some(next) = tokens.get(cursor + 1)
22 && next.kind.is_whitespace()
23 {
24 cursor += 2;
25 continue;
26 }
27
28 if tok.kind.is_nominal() {
29 return Some(cursor + 1);
30 }
31
32 return None;
33 }
34 }
35}
36
37#[cfg(test)]
38mod tests {
39 use super::super::DocPattern;
40 use super::NominalPhrase;
41 use crate::{Document, Span, Token, patterns::Pattern};
42
43 trait SpanVecExt {
44 fn to_strings(&self, doc: &Document) -> Vec<String>;
45 }
46
47 impl SpanVecExt for Vec<Span<Token>> {
48 fn to_strings(&self, doc: &Document) -> Vec<String> {
49 self.iter()
50 .map(|sp| {
51 doc.get_tokens()[sp.start..sp.end]
52 .iter()
53 .map(|tok| doc.get_span_content_str(&tok.span))
54 .collect::<String>()
55 })
56 .collect()
57 }
58 }
59
60 #[test]
61 fn simple_apple() {
62 let doc = Document::new_markdown_default_curated("A red apple");
63 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
64
65 assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
66 }
67
68 #[test]
69 fn complex_apple() {
70 let doc = Document::new_markdown_default_curated("A red apple with a long stem");
71 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
72
73 assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
74 }
75
76 #[test]
77 fn list_fruit() {
78 let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
79 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
80
81 assert_eq!(
82 matches.to_strings(&doc),
83 vec!["An apple", "a banana", "a pear"]
84 )
85 }
86
87 #[test]
88 fn simplest_banana() {
89 let doc = Document::new_markdown_default_curated("a banana");
90 assert!(
91 NominalPhrase
92 .matches(doc.get_tokens(), doc.get_source())
93 .is_some()
94 );
95 }
96
97 #[test]
98 fn food() {
99 let doc = Document::new_markdown_default_curated(
100 "My favorite foods are pizza, sushi, tacos and burgers.",
101 );
102 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
103
104 dbg!(&matches);
105 dbg!(matches.to_strings(&doc));
106
107 for span in &matches {
108 let gc = span
109 .to_char_span(doc.get_tokens())
110 .get_content(doc.get_source());
111 dbg!(gc);
112 }
113
114 assert_eq!(
115 matches.to_strings(&doc),
116 vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
117 )
118 }
119
120 #[test]
121 fn simplest_way() {
122 let doc = Document::new_markdown_default_curated("a way");
123 assert!(
124 NominalPhrase
125 .matches(doc.get_tokens(), doc.get_source())
126 .is_some()
127 );
128 }
129
130 #[test]
131 fn present_participle_way() {
132 let doc = Document::new_markdown_default_curated("a winning way");
133 assert!(
134 NominalPhrase
135 .matches(doc.get_tokens(), doc.get_source())
136 .is_some()
137 );
138 }
139
140 #[test]
141 fn perfect_participle_way() {
142 let doc = Document::new_markdown_default_curated("a failed way");
143 assert!(
144 NominalPhrase
145 .matches(doc.get_tokens(), doc.get_source())
146 .is_some()
147 );
148 }
149}