harper_core/patterns/
nominal_phrase.rs1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
6pub struct NominalPhrase;
7
8impl Pattern for NominalPhrase {
9 fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
10 let mut cursor = 0;
11
12 loop {
13 let tok = tokens.get(cursor)?;
14
15 if tok.kind.is_adjective()
16 || tok.kind.is_determiner()
17 || tok.kind.is_verb_progressive_form()
18 {
19 if let Some(next) = tokens.get(cursor + 1) {
20 if next.kind.is_whitespace() {
21 cursor += 2;
22 continue;
23 }
24 }
25 }
26
27 if tok.kind.is_nominal() {
28 return Some(cursor + 1);
29 }
30
31 return None;
32 }
33 }
34}
35
36#[cfg(test)]
37mod tests {
38 use super::super::DocPattern;
39 use super::NominalPhrase;
40 use crate::{Document, Span, patterns::Pattern};
41
42 trait SpanVecExt {
43 fn to_strings(&self, doc: &Document) -> Vec<String>;
44 }
45
46 impl SpanVecExt for Vec<Span> {
47 fn to_strings(&self, doc: &Document) -> Vec<String> {
48 self.iter()
49 .map(|sp| {
50 doc.get_tokens()[sp.start..sp.end]
51 .iter()
52 .map(|tok| doc.get_span_content_str(&tok.span))
53 .collect::<String>()
54 })
55 .collect()
56 }
57 }
58
59 #[test]
60 fn simple_apple() {
61 let doc = Document::new_markdown_default_curated("A red apple");
62 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
63
64 assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
65 }
66
67 #[test]
68 fn complex_apple() {
69 let doc = Document::new_markdown_default_curated("A red apple with a long stem");
70 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
71
72 assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
73 }
74
75 #[test]
76 fn list_fruit() {
77 let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
78 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
79
80 assert_eq!(
81 matches.to_strings(&doc),
82 vec!["An apple", "a banana", "a pear"]
83 )
84 }
85
86 #[test]
87 fn simplest_banana() {
88 let doc = Document::new_markdown_default_curated("a banana");
89 assert!(
90 NominalPhrase
91 .matches(doc.get_tokens(), doc.get_source())
92 .is_some()
93 );
94 }
95
96 #[test]
97 fn food() {
98 let doc = Document::new_markdown_default_curated(
99 "My favorite foods are pizza, sushi, tacos and burgers.",
100 );
101 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
102
103 dbg!(&matches);
104 dbg!(matches.to_strings(&doc));
105
106 for span in &matches {
107 let gc = span.get_content(doc.get_source());
108 dbg!(gc);
109 }
110
111 assert_eq!(
112 matches.to_strings(&doc),
113 vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
114 )
115 }
116
117 #[test]
118 fn simplest_way() {
119 let doc = Document::new_markdown_default_curated("a way");
120 assert!(
121 NominalPhrase
122 .matches(doc.get_tokens(), doc.get_source())
123 .is_some()
124 );
125 }
126
127 #[test]
128 fn progressive_way() {
129 let doc = Document::new_markdown_default_curated("a winning way");
130 assert!(
131 NominalPhrase
132 .matches(doc.get_tokens(), doc.get_source())
133 .is_some()
134 );
135 }
136
137 #[test]
138 fn perfect_way() {
139 let doc = Document::new_markdown_default_curated("a failed way");
140 assert!(
141 NominalPhrase
142 .matches(doc.get_tokens(), doc.get_source())
143 .is_some()
144 );
145 }
146}