harper_core/patterns/
nominal_phrase.rs1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
6pub struct NominalPhrase;
7
8impl Pattern for NominalPhrase {
9 fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
10 let mut cursor = 0;
11
12 loop {
13 let tok = tokens.get(cursor)?;
14
15 if tok.kind.is_adjective()
16 || tok.kind.is_determiner()
17 || tok.kind.is_verb_progressive_form()
18 {
19 if let Some(next) = tokens.get(cursor + 1) {
20 if next.kind.is_whitespace() {
21 cursor += 2;
22 continue;
23 }
24 }
25 }
26
27 if tok.kind.is_nominal() {
28 return Some(cursor + 1);
29 }
30
31 return None;
32 }
33 }
34}
35
36#[cfg(test)]
37mod tests {
38 use super::super::DocPattern;
39 use super::NominalPhrase;
40 use crate::{Document, Span, Token, patterns::Pattern};
41
42 trait SpanVecExt {
43 fn to_strings(&self, doc: &Document) -> Vec<String>;
44 }
45
46 impl SpanVecExt for Vec<Span<Token>> {
47 fn to_strings(&self, doc: &Document) -> Vec<String> {
48 self.iter()
49 .map(|sp| {
50 doc.get_tokens()[sp.start..sp.end]
51 .iter()
52 .map(|tok| doc.get_span_content_str(&tok.span))
53 .collect::<String>()
54 })
55 .collect()
56 }
57 }
58
59 #[test]
60 fn simple_apple() {
61 let doc = Document::new_markdown_default_curated("A red apple");
62 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
63
64 assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
65 }
66
67 #[test]
68 fn complex_apple() {
69 let doc = Document::new_markdown_default_curated("A red apple with a long stem");
70 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
71
72 assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
73 }
74
75 #[test]
76 fn list_fruit() {
77 let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
78 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
79
80 assert_eq!(
81 matches.to_strings(&doc),
82 vec!["An apple", "a banana", "a pear"]
83 )
84 }
85
86 #[test]
87 fn simplest_banana() {
88 let doc = Document::new_markdown_default_curated("a banana");
89 assert!(
90 NominalPhrase
91 .matches(doc.get_tokens(), doc.get_source())
92 .is_some()
93 );
94 }
95
96 #[test]
97 fn food() {
98 let doc = Document::new_markdown_default_curated(
99 "My favorite foods are pizza, sushi, tacos and burgers.",
100 );
101 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
102
103 dbg!(&matches);
104 dbg!(matches.to_strings(&doc));
105
106 for span in &matches {
107 let gc = span
108 .to_char_span(doc.get_tokens())
109 .get_content(doc.get_source());
110 dbg!(gc);
111 }
112
113 assert_eq!(
114 matches.to_strings(&doc),
115 vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
116 )
117 }
118
119 #[test]
120 fn simplest_way() {
121 let doc = Document::new_markdown_default_curated("a way");
122 assert!(
123 NominalPhrase
124 .matches(doc.get_tokens(), doc.get_source())
125 .is_some()
126 );
127 }
128
129 #[test]
130 fn progressive_way() {
131 let doc = Document::new_markdown_default_curated("a winning way");
132 assert!(
133 NominalPhrase
134 .matches(doc.get_tokens(), doc.get_source())
135 .is_some()
136 );
137 }
138
139 #[test]
140 fn perfect_way() {
141 let doc = Document::new_markdown_default_curated("a failed way");
142 assert!(
143 NominalPhrase
144 .matches(doc.get_tokens(), doc.get_source())
145 .is_some()
146 );
147 }
148}