harper_core/linting/
avoid_curses.rs

1use crate::Token;
2use crate::expr::{Expr, SequenceExpr};
3use crate::linting::{LintKind, Suggestion};
4
5use super::{ExprLinter, Lint};
6
7pub struct AvoidCurses {
8    expr: Box<dyn Expr>,
9}
10
11impl Default for AvoidCurses {
12    fn default() -> Self {
13        Self {
14            expr: Box::new(SequenceExpr::default().then_swear()),
15        }
16    }
17}
18
19impl ExprLinter for AvoidCurses {
20    fn expr(&self) -> &dyn Expr {
21        self.expr.as_ref()
22    }
23
24    fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
25        if toks.len() != 1 {
26            return None;
27        }
28
29        let tok = &toks[0];
30        let span = tok.span;
31        let bad_word_chars = span.get_content(src);
32        let bad_word_str = span.get_content_string(src);
33        let bad_word_norm = bad_word_str.to_lowercase();
34
35        // Define offensive morphemes which are common parts of multiple words
36        // Each entry maps a morpheme to an optional censored version.
37        const MORPHEMES: &[(&str, Option<&str>)] = &[
38            ("arse", None),
39            ("ass", Some("a**")),
40            ("cock", Some("c**k")),
41            ("cunt", Some("c**t")),
42            ("dick", Some("d**k")),
43            ("fuck", Some("f**k")),
44            ("piss", Some("p**s")),
45            ("shit", Some("sh*t")),
46            ("wank", Some("w**k")),
47        ];
48
49        // Define offensive words and their possible replacements
50        const WORDS: &[(&str, &[&str])] = &[
51            ("apeshit", &["crazy", "mad", "insane", "wild"]),
52            (
53                "arse",
54                &["bum", "buttocks", "backside", "bottom", "rump", "posterior"],
55            ),
56            (
57                "arses",
58                &[
59                    "bums",
60                    "buttocks",
61                    "backsides",
62                    "bottoms",
63                    "rumps",
64                    "posteriors",
65                ],
66            ),
67            ("arsed", &["bothered"]),
68            ("arsehole", &["bumhole"]),
69            (
70                "ass",
71                &[
72                    "butt",
73                    "buttocks",
74                    "backside",
75                    "bottom",
76                    "rump",
77                    "posterior",
78                    "tuchus",
79                    "tush",
80                ],
81            ),
82            (
83                "asses",
84                &[
85                    "butts",
86                    "buttocks",
87                    "backsides",
88                    "bottoms",
89                    "rumps",
90                    "posteriors",
91                    "tuchuses",
92                    "tushes",
93                ],
94            ),
95            ("asshole", &["butthole"]),
96            // batshit
97            // birdshit
98            ("bullshit", &["bullcrap", "bulldust", "lie", "lies"]),
99            ("bullshitted", &["bullcrapped", "lied"]),
100            ("bullshitting", &["bullcrapping", "lying"]),
101            ("bullshitter", &["liar"]),
102            // bullshittery
103            ("chickenshit", &["gutless", "cowardly"]),
104            ("cock", &["pee-pee", "willy", "penis", "phallus", "member"]),
105            (
106                "cocks",
107                &["pee-pees", "willies", "penises", "phalluses", "members"],
108            ),
109            // cocksucker
110            ("cunt", &["vagina"]),
111            ("cunts", &["vaginas"]),
112            ("dick", &["pee-pee", "penis"]),
113            ("dicks", &["pee-pees", "penises"]),
114            ("dickhead", &["jerk", "idiot"]),
115            ("dichheads", &["jerks", "idiots"]),
116            // dipshit
117            ("dumbass", &["idiot", "fool"]),
118            ("dumbasses", &["idiots", "fools"]),
119            ("fart", &["gas", "wind", "break wind"]),
120            ("farts", &["gas", "wind", "breaks wind"]),
121            ("farted", &["broke wind", "broken wind"]),
122            ("farting", &["breaking wind"]),
123            ("fuck", &["fudge", "screw", "damn", "hoot"]),
124            ("fucks", &["screws"]),
125            ("fucked", &["screwed"]),
126            ("fucking", &["screwing"]),
127            ("fucker", &["jerk"]),
128            ("fuckers", &["jerks"]),
129            // fuckhead
130            ("horseshit", &["nonsense"]),
131            // mindfuck
132            // motherfucker
133            // nigga
134            // nigger
135            ("piss", &["pee", "urine", "urinate"]),
136            ("pisses", &["pees", "urinates"]),
137            ("pissed", &["peed", "urinated"]),
138            ("pissing", &["peeing", "urinating"]),
139            ("pisser", &["toilet", "bathroom", "restroom", "washroom"]),
140            // pissy
141            (
142                "shit",
143                &["crap", "poo", "poop", "feces", "dung", "damn", "hoot"],
144            ),
145            ("shits", &["craps", "poos", "poops"]),
146            ("shitted", &["crapped", "pooed", "pooped"]),
147            ("shitting", &["crapping", "pooing", "pooping"]),
148            // shitcoin
149            // shitfaced
150            // shitfest
151            // shithead
152            ("shitless", &["witless"]),
153            (
154                "shitload",
155                &["crapload", "shedload", "shirtload", "load", "tons", "pile"],
156            ),
157            (
158                "shitloads",
159                &[
160                    "craploads",
161                    "shedloads",
162                    "shirtloads",
163                    "loads",
164                    "tons",
165                    "piles",
166                ],
167            ),
168            // shitpost
169            ("shitty", &["shirty", "crappy", "inferior"]),
170            ("shittier", &["crappier", "shirtier"]),
171            ("shittiest", &["crappiest", "shirtiest"]),
172            ("tit", &["boob", "breast"]),
173            ("tits", &["boobs", "breasts"]),
174            ("titty", &["boob", "breast"]),
175            ("titties", &["boobs", "breasts"]),
176            ("turd", &["poo", "poop", "feces", "dung"]),
177            ("turds", &["poos", "poops", "feces", "dung"]),
178            ("twat", &["vagina"]),
179            // wank
180            ("wanker", &["jerk"]),
181            // wanky
182            ("whore", &["prostitute"]),
183        ];
184
185        // Replace common morphemes with both specific censored versions and all-asterisk versions
186        let morpheme_replacements: Vec<String> = MORPHEMES
187            .iter()
188            .filter(|(m, _)| bad_word_norm.contains(m))
189            .flat_map(|(m, censored)| {
190                let mut replacements = Vec::new();
191
192                // Add all-asterisk version for the censored morpheme only
193                let asterisked = "*".repeat(m.len());
194                let asterisked_word = bad_word_norm.replace(m, &asterisked);
195                replacements.push(asterisked_word);
196
197                // Add specific censored version if it exists
198                if let Some(c) = censored {
199                    let censored_word = bad_word_norm.replace(m, c);
200                    replacements.push(censored_word);
201                }
202
203                replacements
204            })
205            .collect();
206
207        // Find all replacement suggestions for the bad word
208        let word_replacements: Vec<&str> = WORDS
209            .iter()
210            .filter(|(bad, _)| *bad == bad_word_norm)
211            .flat_map(|(_, suggestions)| suggestions.iter().copied())
212            .collect();
213
214        if morpheme_replacements.is_empty() && word_replacements.is_empty() {
215            return None;
216        }
217
218        let m_suggestions: Vec<Suggestion> = morpheme_replacements
219            .into_iter()
220            .map(|replacement| {
221                Suggestion::replace_with_match_case(replacement.chars().collect(), bad_word_chars)
222            })
223            .collect();
224
225        let w_suggestions: Vec<Suggestion> = word_replacements
226            .into_iter()
227            .map(|replacement| {
228                Suggestion::replace_with_match_case(replacement.chars().collect(), bad_word_chars)
229            })
230            .collect();
231
232        let suggestions = m_suggestions.into_iter().chain(w_suggestions).collect();
233
234        Some(Lint {
235            span,
236            lint_kind: LintKind::WordChoice,
237            suggestions,
238            message: "Try to avoid offensive language.".to_string(),
239            ..Default::default()
240        })
241    }
242
243    fn description(&self) -> &'static str {
244        "Flags offensive language and offers various ways to censor or replace with euphemisms."
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::AvoidCurses;
251    use crate::linting::tests::{assert_lint_count, assert_top3_suggestion_result};
252
253    #[test]
254    fn detects_shit() {
255        assert_lint_count(
256            "He ate shit when he fell off the bike.",
257            AvoidCurses::default(),
258            1,
259        );
260    }
261
262    #[test]
263    fn fix_shit() {
264        assert_top3_suggestion_result("shit", AvoidCurses::default(), "crap")
265    }
266
267    #[test]
268    fn fix_shit_titlecase() {
269        assert_top3_suggestion_result("Shit", AvoidCurses::default(), "Crap")
270    }
271
272    #[test]
273    fn fix_shit_allcaps() {
274        assert_top3_suggestion_result("SHIT", AvoidCurses::default(), "CRAP")
275    }
276
277    #[test]
278    fn fix_f_word_to_all_asterisks() {
279        assert_top3_suggestion_result(
280            "fuck those fucking fuckers",
281            AvoidCurses::default(),
282            "**** those ****ing ****ers",
283        )
284    }
285
286    #[test]
287    fn fix_shit_with_single_asterisk() {
288        assert_top3_suggestion_result("shit", AvoidCurses::default(), "sh*t")
289    }
290
291    #[test]
292    fn fix_shite_all_caps_with_single_asterisk() {
293        assert_top3_suggestion_result("SHIT", AvoidCurses::default(), "SH*T")
294    }
295}