Skip to main content

harper_core/expr/
pronoun_be.rs

1use crate::{Span, Token, patterns::WordSet};
2
3use super::{Expr, SequenceExpr};
4
5pub struct PronounBe {
6    expr: SequenceExpr,
7}
8
9impl Default for PronounBe {
10    fn default() -> Self {
11        Self {
12            expr: SequenceExpr::default().then_any_of(vec![
13                Box::new(
14                    SequenceExpr::default()
15                        .then_subject_pronoun()
16                        .t_ws()
17                        .t_set(&["am", "are", "is", "was", "were"]),
18                ),
19                Box::new(WordSet::new(&[
20                    "i'm", "we're", "you're", "he's", "she's", "it's", "they're",
21                ])),
22            ]),
23        }
24    }
25}
26
27impl Expr for PronounBe {
28    fn run(&self, cursor: usize, toks: &[Token], src: &[char]) -> Option<Span<Token>> {
29        if toks.is_empty() {
30            return None;
31        }
32
33        self.expr.run(cursor, toks, src)
34    }
35}
36
37#[cfg(test)]
38mod tests {
39    use crate::{
40        Document,
41        expr::{ExprExt, PronounBe},
42    };
43
44    fn assert_count(text: &str, expected_count: usize) {
45        assert_eq!(
46            PronounBe::default()
47                .iter_matches_in_doc(&Document::new_plain_english_curated(text))
48                .count(),
49            expected_count
50        );
51    }
52
53    #[test]
54    fn ok_i_am() {
55        assert_count("I am an auditor of software in M&A.", 1);
56    }
57
58    #[test]
59    fn ok_i_m() {
60        assert_count(
61            "I'm using Beads in every new project and adding it to every old project I visit with an agent.",
62            1,
63        );
64    }
65
66    #[test]
67    fn ok_we_are() {
68        assert_count(
69            "Error: we are enable to complete your request at this time.",
70            1,
71        );
72    }
73
74    #[test]
75    fn ok_we_re() {
76        assert_count(
77            "We're currently experiencing high demand, which may cause temporary errors.",
78            1,
79        );
80    }
81
82    #[test]
83    fn ok_you_are() {
84        assert_count(
85            "You are Dolphin, an uncensored and unbiased AI assistant.",
86            1,
87        );
88    }
89
90    #[test]
91    fn ok_you_re() {
92        assert_count(
93            "You're trying to use a SD1 LoRA model with a SDXL Stable Diffusion model.",
94            1,
95        );
96    }
97
98    #[test]
99    fn ok_he_is() {
100        assert_count("He is just a modulary guy for checking service health.", 1);
101    }
102
103    #[test]
104    fn ok_he_s_and_it_is() {
105        assert_count(
106            "He's Dead, Jim is a link checking program, specifically it is a command-line tool for finding and reporting dead links",
107            2,
108        );
109    }
110
111    #[test]
112    fn ok_she_is() {
113        assert_count(
114            "SHE is designed by following the SHE functional specification.",
115            1,
116        );
117    }
118
119    #[test]
120    fn ok_she_s() {
121        assert_count(
122            "She's Coding is an open-source website project currently under development in cooperation with the documentary film CODE: Debugging the Gender Gap.",
123            1,
124        );
125    }
126
127    #[test]
128    fn ok_it_is() {
129        assert_count("It is same as ms-dos command where.exe.", 1);
130    }
131
132    #[test]
133    fn ok_it_s_and_you_are() {
134        assert_count(
135            "It's not working · Microcontrollers have only a limited amount of RAM: Verify that you are not running out of available RAM!",
136            2,
137        );
138    }
139
140    #[test]
141    fn ok_they_are() {
142        assert_count(
143            "Multidist binaries ignore the way they are called and check only the running binary name, making them hard to invoke from a single one.",
144            1,
145        );
146    }
147
148    #[test]
149    fn ok_they_re() {
150        assert_count("Cannot load dataset, they're greyed out.", 1);
151    }
152
153    #[test]
154    fn various_apostrophes() {
155        assert_count("it's/it’s", 2);
156        assert_count("it;s/it´s", 0);
157    }
158}