use crate::Token;
use crate::expr::{Expr, SequenceExpr};
use crate::linting::{LintKind, Suggestion};
use super::{ExprLinter, Lint};
use crate::linting::expr_linter::Chunk;
pub struct AvoidCurses {
expr: SequenceExpr,
}
impl Default for AvoidCurses {
fn default() -> Self {
Self {
expr: SequenceExpr::default().then_swear(),
}
}
}
impl ExprLinter for AvoidCurses {
type Unit = Chunk;
fn expr(&self) -> &dyn Expr {
&self.expr
}
fn match_to_lint(&self, toks: &[Token], src: &[char]) -> Option<Lint> {
if toks.len() != 1 {
return None;
}
let tok = &toks[0];
let span = tok.span;
let bad_word_chars = span.get_content(src);
let bad_word_str = span.get_content_string(src);
let bad_word_norm = bad_word_str.to_lowercase();
const MORPHEMES: &[(&str, Option<&str>)] = &[
("arse", None),
("ass", Some("a**")),
("cock", Some("c**k")),
("cunt", Some("c**t")),
("dick", Some("d**k")),
("fuck", Some("f**k")),
("piss", Some("p**s")),
("shit", Some("sh*t")),
("wank", Some("w**k")),
];
const WORDS: &[(&str, &[&str])] = &[
("apeshit", &["crazy", "mad", "insane", "wild"]),
(
"arse",
&["bum", "buttocks", "backside", "bottom", "rump", "posterior"],
),
(
"arses",
&[
"bums",
"buttocks",
"backsides",
"bottoms",
"rumps",
"posteriors",
],
),
("arsed", &["bothered"]),
("arsehole", &["bumhole"]),
(
"ass",
&[
"butt",
"buttocks",
"backside",
"bottom",
"rump",
"posterior",
"tuchus",
"tush",
],
),
(
"asses",
&[
"butts",
"buttocks",
"backsides",
"bottoms",
"rumps",
"posteriors",
"tuchuses",
"tushes",
],
),
("asshole", &["butthole"]),
("bullshit", &["bullcrap", "bulldust", "lie", "lies"]),
("bullshitted", &["bullcrapped", "lied"]),
("bullshitting", &["bullcrapping", "lying"]),
("bullshitter", &["liar"]),
("chickenshit", &["gutless", "cowardly"]),
("cock", &["pee-pee", "willy", "penis", "phallus", "member"]),
(
"cocks",
&["pee-pees", "willies", "penises", "phalluses", "members"],
),
("cunt", &["vagina"]),
("cunts", &["vaginas"]),
("dick", &["pee-pee", "penis"]),
("dicks", &["pee-pees", "penises"]),
("dickhead", &["jerk", "idiot"]),
("dichheads", &["jerks", "idiots"]),
("dumbass", &["idiot", "fool"]),
("dumbasses", &["idiots", "fools"]),
("fart", &["gas", "wind", "break wind"]),
("farts", &["gas", "wind", "breaks wind"]),
("farted", &["broke wind", "broken wind"]),
("farting", &["breaking wind"]),
("fuck", &["fudge", "screw", "damn", "hoot"]),
("fucks", &["screws"]),
("fucked", &["screwed"]),
("fucking", &["screwing"]),
("fucker", &["jerk"]),
("fuckers", &["jerks"]),
("horseshit", &["nonsense"]),
("piss", &["pee", "urine", "urinate"]),
("pisses", &["pees", "urinates"]),
("pissed", &["peed", "urinated"]),
("pissing", &["peeing", "urinating"]),
("pisser", &["toilet", "bathroom", "restroom", "washroom"]),
(
"shit",
&["crap", "poo", "poop", "feces", "dung", "damn", "hoot"],
),
("shits", &["craps", "poos", "poops"]),
("shitted", &["crapped", "pooed", "pooped"]),
("shitting", &["crapping", "pooing", "pooping"]),
("shitless", &["witless"]),
(
"shitload",
&["crapload", "shedload", "shirtload", "load", "tons", "pile"],
),
(
"shitloads",
&[
"craploads",
"shedloads",
"shirtloads",
"loads",
"tons",
"piles",
],
),
("shitty", &["shirty", "crappy", "inferior"]),
("shittier", &["crappier", "shirtier"]),
("shittiest", &["crappiest", "shirtiest"]),
("tit", &["boob", "breast"]),
("tits", &["boobs", "breasts"]),
("titty", &["boob", "breast"]),
("titties", &["boobs", "breasts"]),
("turd", &["poo", "poop", "feces", "dung"]),
("turds", &["poos", "poops", "feces", "dung"]),
("twat", &["vagina"]),
("wanker", &["jerk"]),
("whore", &["prostitute"]),
];
let morpheme_replacements: Vec<String> = MORPHEMES
.iter()
.filter(|(m, _)| bad_word_norm.contains(m))
.flat_map(|(m, censored)| {
let mut replacements = Vec::new();
let asterisked = "*".repeat(m.len());
let asterisked_word = bad_word_norm.replace(m, &asterisked);
replacements.push(asterisked_word);
if let Some(c) = censored {
let censored_word = bad_word_norm.replace(m, c);
replacements.push(censored_word);
}
replacements
})
.collect();
let word_replacements: Vec<&str> = WORDS
.iter()
.filter(|(bad, _)| *bad == bad_word_norm)
.flat_map(|(_, suggestions)| suggestions.iter().copied())
.collect();
if morpheme_replacements.is_empty() && word_replacements.is_empty() {
return None;
}
let m_suggestions: Vec<Suggestion> = morpheme_replacements
.into_iter()
.map(|replacement| {
Suggestion::replace_with_match_case(replacement.chars().collect(), bad_word_chars)
})
.collect();
let w_suggestions: Vec<Suggestion> = word_replacements
.into_iter()
.map(|replacement| {
Suggestion::replace_with_match_case(replacement.chars().collect(), bad_word_chars)
})
.collect();
let suggestions = m_suggestions.into_iter().chain(w_suggestions).collect();
Some(Lint {
span,
lint_kind: LintKind::WordChoice,
suggestions,
message: "Try to avoid offensive language.".to_string(),
..Default::default()
})
}
fn description(&self) -> &'static str {
"Flags offensive language and offers various ways to censor or replace with euphemisms."
}
}
#[cfg(test)]
mod tests {
use super::AvoidCurses;
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
#[test]
fn detects_shit() {
assert_lint_count(
"He ate shit when he fell off the bike.",
AvoidCurses::default(),
1,
);
}
#[test]
fn fix_shit() {
assert_suggestion_result("shit", AvoidCurses::default(), "crap")
}
#[test]
fn fix_shit_titlecase() {
assert_suggestion_result("Shit", AvoidCurses::default(), "Crap")
}
#[test]
fn fix_shit_allcaps() {
assert_suggestion_result("SHIT", AvoidCurses::default(), "CRAP")
}
#[test]
fn fix_f_word_to_all_asterisks() {
assert_suggestion_result(
"fuck those fucking fuckers",
AvoidCurses::default(),
"**** those ****ing ****ers",
)
}
#[test]
fn fix_shit_with_single_asterisk() {
assert_suggestion_result("shit", AvoidCurses::default(), "sh*t")
}
#[test]
fn fix_shite_all_caps_with_single_asterisk() {
assert_suggestion_result("SHIT", AvoidCurses::default(), "SH*T")
}
}