harper_core/patterns/
nominal_phrase.rs1use crate::Token;
2
3use super::Pattern;
4
5#[derive(Default)]
6pub struct NominalPhrase;
7
8impl Pattern for NominalPhrase {
9 fn matches(&self, tokens: &[Token], _source: &[char]) -> Option<usize> {
10 let mut cursor = 0;
11
12 loop {
13 let tok = tokens.get(cursor)?;
14
15 if tok.kind.is_adjective() || tok.kind.is_determiner() {
16 let next = tokens.get(cursor + 1)?;
17
18 if !next.kind.is_whitespace() {
19 return None;
20 }
21
22 cursor += 2;
23 continue;
24 }
25
26 if tok.kind.is_nominal() {
27 return Some(cursor + 1);
28 }
29
30 return None;
31 }
32 }
33}
34
35#[cfg(test)]
36mod tests {
37 use super::super::DocPattern;
38 use super::NominalPhrase;
39 use crate::{Document, Span, patterns::Pattern};
40
41 trait SpanVecExt {
42 fn to_strings(&self, doc: &Document) -> Vec<String>;
43 }
44
45 impl SpanVecExt for Vec<Span> {
46 fn to_strings(&self, doc: &Document) -> Vec<String> {
47 self.iter()
48 .map(|sp| {
49 doc.get_tokens()[sp.start..sp.end]
50 .iter()
51 .map(|tok| doc.get_span_content_str(&tok.span))
52 .collect::<String>()
53 })
54 .collect()
55 }
56 }
57
58 #[test]
59 fn simple_apple() {
60 let doc = Document::new_markdown_default_curated("A red apple");
61 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
62
63 assert_eq!(matches.to_strings(&doc), vec!["A red apple"])
64 }
65
66 #[test]
67 fn complex_apple() {
68 let doc = Document::new_markdown_default_curated("A red apple with a long stem");
69 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
70
71 assert_eq!(matches.to_strings(&doc), vec!["A red apple", "a long stem"])
72 }
73
74 #[test]
75 fn list_fruit() {
76 let doc = Document::new_markdown_default_curated("An apple, a banana and a pear");
77 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
78
79 assert_eq!(
80 matches.to_strings(&doc),
81 vec!["An apple", "a banana", "a pear"]
82 )
83 }
84
85 #[test]
86 fn simplest_banana() {
87 let doc = Document::new_markdown_default_curated("a banana");
88 assert!(
89 NominalPhrase
90 .matches(doc.get_tokens(), doc.get_source())
91 .is_some()
92 );
93 }
94
95 #[test]
96 fn food() {
97 let doc = Document::new_markdown_default_curated(
98 "My favorite foods are pizza, sushi, tacos and burgers.",
99 );
100 let matches = NominalPhrase.find_all_matches_in_doc(&doc);
101
102 dbg!(&matches);
103 dbg!(matches.to_strings(&doc));
104
105 for span in &matches {
106 let gc = span.get_content(doc.get_source());
107 dbg!(gc);
108 }
109
110 assert_eq!(
111 matches.to_strings(&doc),
112 vec!["My favorite foods", "pizza", "sushi", "tacos", "burgers"]
113 )
114 }
115}