1#![warn(missing_docs)]
2
3use std::{
67 collections::{BTreeSet, HashMap, HashSet},
68 ops::{Add, AddAssign, Sub, SubAssign},
69};
70
71use once_cell::sync::Lazy;
72
73static CHAR_ALIASES: Lazy<HashMap<char, char>> = Lazy::new(|| {
74 let mut map = HashMap::new();
75 const CASE_DIFF: u8 = b'a' - b'A';
76 for c in b'A'..=b'Z' {
77 map.insert(c as char, (c + CASE_DIFF) as char);
78 }
79 macro_rules! alias {
80 ($reduced:literal => $($alias:literal),*) => {
81 $(map.insert($alias, $reduced);)*
82 };
83 }
84 alias!('a' => '4', '@', 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'à', 'á', 'â', 'ã', 'ä', 'å', 'α', 'Α');
85 alias!('b' => 'ß', 'Β', '฿');
86 alias!('c' => '¢', 'ç', 'Ç', '©');
87 alias!('d' => 'Ð', '₫');
88 alias!('e' => '3', '£', '€', 'È', 'É', 'Ê', 'Ë', 'è', 'é', 'ê', 'ë', 'ε', 'Ε', 'Ξ', 'Σ');
89 alias!('g' => '6');
90 alias!('h' => 'Η');
91 alias!('k' => 'κ', 'Κ');
92 alias!('i' => '1', '|', '!', 'Ì', 'Í', 'Î', 'Ï', 'ì', 'í', 'î', 'ï', 'Ι');
93 alias!('m' => 'Μ');
94 alias!('n' => 'ñ', 'Ñ', 'η', 'Ν', 'Π');
95 alias!('o' => '0', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', 'ò', 'ó', 'ô', 'õ', 'ö', 'Ø', 'ø', 'θ', 'ο', 'σ', 'Θ', 'Ο', 'Φ');
96 alias!('p' => 'ρ', 'Ρ', '₱', '℗', 'Þ', 'þ');
97 alias!('r' => '®');
98 alias!('s' => '5', '$');
99 alias!('t' => 'τ', 'Τ');
100 alias!('u' => 'Ù', 'Ú', 'Û', 'Ü', 'ù', 'ú', 'û', 'ü', 'μ', 'υ');
101 alias!('v' => 'ν');
102 alias!('w' => 'ω', '₩');
103 alias!('x' => '×', 'χ', 'Χ');
104 alias!('y' => '¥', 'Ý', 'ý', 'ÿ', 'γ', 'Υ');
105 alias!('z' => '2', 'Ζ');
106 map
107});
108
109macro_rules! word_set {
110 ($doc:literal, $name:ident, $($word:literal),*) => {
111 #[doc = $doc]
112 #[doc = ""]
113 #[doc = "#### List"]
114 $(
115 #[doc = $word]
116 #[doc = ""]
117 )*
118 pub static $name: Lazy<HashSet<String>> = Lazy::new(|| {
119 let mut set = HashSet::new();
120 let words = [$($word),*];
121 for i in 0..words.len() {
122 set.insert(String::from(words[i]));
123 }
124 set
125 });
126 };
127}
128
129word_set!(
130 "Words that are profanities by most people's definition",
131 STANDARD_WORDS,
132 "ass",
133 "asshole",
134 "bitch",
135 "cock",
136 "cunt",
137 "fag",
138 "fagot",
139 "faggot",
140 "fuck",
141 "nigger",
142 "piss",
143 "pussy",
144 "shit",
145 "twat",
146 "whore"
147);
148word_set!(
149 "Words that are profanities only to the zealous",
150 ZEALOUS_WORDS,
151 "crap",
152 "damn",
153 "goddamn",
154 "hell",
155 "suck"
156);
157word_set!(
158 "Words related to sex",
159 SEX_WORDS,
160 "ass",
161 "asshole",
162 "blowjob",
163 "boob",
164 "boobie",
165 "boobies",
166 "boobjob",
167 "breast",
168 "clitoris",
169 "cock",
170 "condom",
171 "cunnilingus",
172 "cunt",
173 "dick",
174 "doggystyle",
175 "ejaculate",
176 "felate",
177 "felatio",
178 "fetish",
179 "foreskin",
180 "handjob",
181 "labia",
182 "masturbate",
183 "masturbation",
184 "masterbate",
185 "masterbation",
186 "penis",
187 "pussy",
188 "rimjob",
189 "semen",
190 "sex",
191 "tits",
192 "tittie",
193 "titties",
194 "titty",
195 "twat",
196 "vagina",
197 "vulva"
198);
199
200#[derive(Debug, Clone, Eq)]
204pub enum Censor {
205 Standard,
211 Sex,
219 Zealous,
227 Custom(HashSet<String>),
229}
230
231pub use Censor::*;
232
233impl Default for Censor {
234 fn default() -> Self {
235 Standard
236 }
237}
238
239impl Censor {
240 pub fn empty() -> Self {
242 Custom(HashSet::new())
243 }
244 pub fn custom<I, W>(words: I) -> Self
246 where
247 I: IntoIterator<Item = W>,
248 W: Into<String>,
249 {
250 Custom(words.into_iter().map(Into::into).collect())
251 }
252 pub fn check(&self, text: &str) -> bool {
254 !self.bad_chars(text, 0, 0).is_empty()
255 }
256 pub fn count(&self, text: &str) -> usize {
269 let bad_chars = self.bad_chars(text, 0, 0);
270 let mut count = 0;
271 let mut in_censored = false;
272 for i in 0..text.chars().count() {
273 if bad_chars.contains(&i) {
274 if !in_censored {
275 in_censored = true;
276 count += 1;
277 }
278 } else {
279 in_censored = false;
280 }
281 }
282 count
283 }
284 pub fn censor(&self, text: &str) -> String {
286 self.replace(text, "*")
287 }
288 #[track_caller]
295 pub fn replace(&self, text: &str, grawlix: &str) -> String {
296 self.replace_with_offsets(text, grawlix, 0, 0)
297 }
298 #[track_caller]
307 pub fn replace_with_offsets(
308 &self,
309 text: &str,
310 grawlix: &str,
311 start_offset: usize,
312 end_offset: usize,
313 ) -> String {
314 if grawlix.is_empty() {
315 panic!("grawlix is empty");
316 }
317 let graw_chars: Vec<char> = grawlix.chars().collect();
318 let mut graw_offset: usize = 0;
319
320 let bad_chars = self.bad_chars(text, start_offset, end_offset);
321 text.chars()
322 .enumerate()
323 .map(|(i, c)| {
324 if bad_chars.contains(&i) {
325 let graw = graw_chars[graw_offset];
326 graw_offset = (graw_offset + 1) % graw_chars.len();
327 graw
328 } else {
329 c
330 }
331 })
332 .collect()
333 }
334 pub fn bad_chars(&self, text: &str, start_offset: usize, end_offset: usize) -> HashSet<usize> {
337 let lowercase = text.to_lowercase();
338 let sizes: BTreeSet<usize> = self.list().map(|s| s.len()).collect();
339 let (alphanum_only, alphanum_map) = remove_non_alpha(&lowercase);
341 let bad_alphanum_chars = self._bad_chars(
342 &alphanum_only,
343 &alphanum_map,
344 &sizes,
345 start_offset,
346 end_offset,
347 );
348 let (alias_ws, alias_ws_map) = remove_whitespace(&alias(&lowercase));
350 let bad_alias_ws_chars =
351 self._bad_chars(&alias_ws, &alias_ws_map, &sizes, start_offset, end_offset);
352 let (alias_alphanum, alias_alphanum_map) = remove_non_alpha(&alias(&lowercase));
354 let bad_alias_alphanum_chars = self._bad_chars(
355 &alias_alphanum,
356 &alias_alphanum_map,
357 &sizes,
358 start_offset,
359 end_offset,
360 );
361 bad_alphanum_chars
363 .into_iter()
364 .chain(bad_alias_ws_chars)
365 .chain(bad_alias_alphanum_chars)
366 .collect()
367 }
368 fn _bad_chars(
369 &self,
370 text: &str,
371 map: &HashMap<usize, usize>,
372 sizes: &BTreeSet<usize>,
373 start_offset: usize,
374 end_offset: usize,
375 ) -> HashSet<usize> {
376 let (deduped, dd_map) = dedup_string(text);
377 let mut set = HashSet::new();
378 for &size in sizes.iter().rev() {
379 for word in self.list().filter(|s| s.len() == size) {
380 for (i, _) in text.match_indices(word.as_str()) {
381 for j in start_offset..word.len().saturating_sub(end_offset) {
382 let k = i + j;
383 if let Some(k) = map.get(&k) {
384 set.insert(*k);
385 }
386 }
387 }
388 for (i, _) in deduped.match_indices(word.as_str()) {
389 for j in start_offset..word.len().saturating_sub(end_offset) {
390 let k = i + j;
391 if let Some(ls) = dd_map.get(&k) {
392 for l in ls {
393 if let Some(k) = map.get(l) {
394 set.insert(*k);
395 }
396 }
397 }
398 }
399 }
400 }
401 }
402 set
403 }
404 pub fn set(&self) -> &HashSet<String> {
406 match self {
407 Standard => &STANDARD_WORDS,
408 Zealous => &ZEALOUS_WORDS,
409 Sex => &SEX_WORDS,
410 Custom(words) => words,
411 }
412 }
413 pub fn list(&self) -> std::collections::hash_set::Iter<String> {
415 self.set().iter()
416 }
417 pub fn find(&self, word: &str) -> Option<&str> {
419 let word = alias(word);
420 self.set().get(&word).map(|w| w.as_str())
421 }
422 pub fn contains(&self, word: &str) -> bool {
424 self.find(word).is_some()
425 }
426}
427
428impl AddAssign for Censor {
429 fn add_assign(&mut self, other: Self) {
430 *self = Censor::Custom(self.set().union(other.set()).cloned().collect());
431 }
432}
433
434impl PartialEq for Censor {
435 fn eq(&self, other: &Self) -> bool {
436 self.set() == other.set()
437 }
438}
439
440impl<S> AddAssign<S> for Censor
441where
442 S: Into<String>,
443{
444 fn add_assign(&mut self, other: S) {
445 *self = Censor::Custom(self.list().cloned().chain(Some(other.into())).collect());
446 }
447}
448
449impl SubAssign for Censor {
450 fn sub_assign(&mut self, other: Self) {
451 *self = Censor::Custom(self.set().difference(other.set()).cloned().collect());
452 }
453}
454
455impl<S> SubAssign<S> for Censor
456where
457 S: Into<String>,
458{
459 fn sub_assign(&mut self, other: S) {
460 let other = other.into();
461 *self = Censor::Custom(self.list().filter(|&s| s != &other).cloned().collect());
462 }
463}
464
465impl Add for Censor {
466 type Output = Censor;
467 fn add(mut self, other: Self) -> Self::Output {
468 self += other;
469 self
470 }
471}
472
473impl<S> Add<S> for Censor
474where
475 S: Into<String>,
476{
477 type Output = Censor;
478 fn add(mut self, other: S) -> Self::Output {
479 self += other;
480 self
481 }
482}
483
484impl Sub for Censor {
485 type Output = Censor;
486 fn sub(mut self, other: Self) -> Self::Output {
487 self -= other;
488 self
489 }
490}
491
492impl<S> Sub<S> for Censor
493where
494 S: Into<String>,
495{
496 type Output = Censor;
497 fn sub(mut self, other: S) -> Self::Output {
498 self -= other;
499 self
500 }
501}
502
503fn alias(text: &str) -> String {
504 text.chars()
505 .map(|c| CHAR_ALIASES.get(&c).copied().unwrap_or(c))
506 .collect()
507}
508
509fn remove_whitespace(text: &str) -> (String, HashMap<usize, usize>) {
510 let mut output = String::new();
511 let mut map = HashMap::new();
512 for (i, (j, c)) in text
513 .chars()
514 .enumerate()
515 .filter(|(_, c)| !c.is_whitespace())
516 .enumerate()
517 {
518 output.push(c);
519 map.insert(i, j);
520 }
521 (output, map)
522}
523
524fn remove_non_alpha(text: &str) -> (String, HashMap<usize, usize>) {
525 let mut output = String::new();
526 let mut map = HashMap::new();
527 for (i, (j, c)) in text
528 .chars()
529 .enumerate()
530 .filter(|(_, c)| c.is_alphabetic())
531 .enumerate()
532 {
533 output.push(c);
534 map.insert(i, j);
535 }
536 (output, map)
537}
538
539fn dedup_string(s: &str) -> (String, HashMap<usize, Vec<usize>>) {
540 let mut last = None;
541 let mut res = String::new();
542 let mut map = HashMap::new();
543 let mut j = 0;
544 for (i, c) in s.chars().enumerate() {
545 if last.map(|l| l != c).unwrap_or(true) {
546 res.push(c);
547 map.entry(j).or_insert_with(Vec::new).push(i);
548 j += 1;
549 } else {
550 map.entry(j).or_insert_with(Vec::new).push(i);
551 }
552 last = Some(c);
553 }
554 (res, map)
555}