harper_core/expr/
reflexive_pronoun.rs

1use crate::{
2    Span, Token,
3    expr::{Expr, FirstMatchOf},
4    patterns::WordSet,
5};
6
7// These are considered ungrammatical, or are at least not in `dictionary.dict` but are commonly used anyway.
8// The tests below check if this changes so we can update this `Expr`
9const BAD_REFLEXIVE_PRONOUNS: &[&str] = &[
10    "hisself",
11    "oneselves",
12    "theirself",
13    "theirselves",
14    "themself",
15];
16
17/// Matches reflexive pronouns with configurable strictness.
18///
19/// By default, only matches standard English reflexive pronouns. Use `with_common_errors()` to include
20/// frequently encountered non-standard forms like "hisself" or "theirself".
21pub struct ReflexivePronoun {
22    include_common_errors: bool,
23}
24
25impl Default for ReflexivePronoun {
26    fn default() -> Self {
27        Self::standard()
28    }
29}
30
31impl ReflexivePronoun {
32    /// Creates a matcher for standard English reflexive pronouns.
33    ///
34    /// Matches only the correct forms: "myself", "yourself", "himself", "herself", "itself",
35    /// "ourselves", "yourselves", and "themselves".
36    pub fn standard() -> Self {
37        Self {
38            include_common_errors: false,
39        }
40    }
41
42    /// Creates a matcher that includes non-standard but commonly used reflexive pronouns.
43    ///
44    /// In addition to standard forms, matches common errors like "hisself", "theirself",
45    /// and other non-standard forms that are frequently seen in user-generated content.
46    pub fn with_common_errors() -> Self {
47        Self {
48            include_common_errors: true,
49        }
50    }
51}
52
53impl Expr for ReflexivePronoun {
54    fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span<Token>> {
55        let good_pronouns = |token: &Token, _: &[char]| token.kind.is_reflexive_pronoun();
56        let mut expr = FirstMatchOf::new(vec![Box::new(good_pronouns)]);
57        if self.include_common_errors {
58            expr.add(WordSet::new(BAD_REFLEXIVE_PRONOUNS));
59        }
60        expr.run(cursor, tokens, source)
61    }
62}
63
64#[cfg(test)]
65mod tests {
66    use crate::{
67        Document, TokenKind,
68        expr::{ExprExt, ReflexivePronoun, reflexive_pronoun::BAD_REFLEXIVE_PRONOUNS},
69    };
70
71    // These are considered grammatically correct, or are at least in `dictionary.dict`.
72    // The tests below check if this changes so we can update this `Expr`
73    const GOOD_REFLEXIVE_PRONOUNS: &[&str] = &[
74        "herself",
75        "himself",
76        "itself",
77        "myself",
78        "oneself",
79        "ourself",
80        "ourselves",
81        "themselves",
82        "thyself",
83        "yourself",
84        "yourselves",
85    ];
86
87    fn test_pronoun(word: &str) {
88        let doc = Document::new_plain_english_curated(word);
89        let token = doc.tokens().next().expect("No tokens in document");
90
91        let is_good_pron = GOOD_REFLEXIVE_PRONOUNS.contains(&word);
92        let is_bad_pron = BAD_REFLEXIVE_PRONOUNS.contains(&word);
93
94        match (is_good_pron, is_bad_pron, &token.kind) {
95            (true, false, TokenKind::Word(Some(md))) => {
96                assert!(md.is_pronoun());
97                assert!(md.is_reflexive_pronoun());
98            }
99            (true, false, TokenKind::Word(None)) => {
100                panic!("Widely accepted pronoun '{word}' has gone missing from the dictionary!")
101            }
102            (false, true, TokenKind::Word(Some(_))) => panic!(
103                "Unaccepted pronoun '{word}' that's used in bad English is now in the dictionary!"
104            ),
105            (false, true, TokenKind::Word(None)) => {}
106            (false, false, TokenKind::Word(Some(_))) => panic!(
107                "non-pronoun '{word}' is made up just for testing but is now in the dictionary!"
108            ),
109            (false, false, TokenKind::Word(None)) => {}
110            (true, true, _) => panic!("'{word}' is in both good and bad lists"),
111            _ => panic!("'{word}' doesn't match any expected case"),
112        }
113    }
114
115    #[test]
116    fn test_good_reflexive_pronouns() {
117        for word in GOOD_REFLEXIVE_PRONOUNS {
118            test_pronoun(word);
119        }
120    }
121
122    #[test]
123    fn test_bad_reflexive_pronouns() {
124        for word in BAD_REFLEXIVE_PRONOUNS {
125            test_pronoun(word);
126        }
127    }
128
129    // It's expected that nobody uses these words even in bad English.
130    #[test]
131    fn test_non_pronouns() {
132        test_pronoun("myselves");
133        test_pronoun("weselves");
134        test_pronoun("usself");
135        test_pronoun("usselves");
136    }
137
138    #[test]
139    fn ensure_standard_ctor_includes_myself() {
140        let doc =
141            Document::new_plain_english_curated("If you want something done, do it yourself.");
142        let rp = ReflexivePronoun::standard();
143        let matches = rp.iter_matches_in_doc(&doc);
144        assert_eq!(matches.count(), 1);
145    }
146
147    #[test]
148    fn ensure_default_ctor_includes_myself() {
149        let doc = Document::new_plain_english_curated(
150            "I wanted a reflexive pronoun module, so I wrote one myself.",
151        );
152        let rp = ReflexivePronoun::default();
153        let matches = rp.iter_matches_in_doc(&doc);
154        assert_eq!(matches.count(), 1);
155    }
156
157    #[test]
158    fn ensure_with_common_errors_includes_hisself() {
159        let doc = Document::new_plain_english_curated("He teached hisself English.");
160        let rp = ReflexivePronoun::with_common_errors();
161        let matches = rp.iter_matches_in_doc(&doc);
162        assert_eq!(matches.count(), 1);
163    }
164
165    #[test]
166    fn ensure_standard_ctor_excludes_hisself() {
167        let doc = Document::new_plain_english_curated("Was he pleased with hisself?");
168        let rp = ReflexivePronoun::standard();
169        let matches = rp.iter_matches_in_doc(&doc);
170        assert_eq!(matches.count(), 0);
171    }
172
173    #[test]
174    fn ensure_default_ctor_excludes_theirself() {
175        let doc = Document::new_plain_english_curated("They look at theirself in the mirror.");
176        let rp = ReflexivePronoun::default();
177        let matches = rp.iter_matches_in_doc(&doc);
178        assert_eq!(matches.count(), 0);
179    }
180}