Struct rustrict::Type

source ·

pub struct Type(_);

Expand description

Type is represents a type or severity of inappropriateness. They can be combined with bitwise AND and OR operators, and are not mutually exclusive.

For example, the following means profane or at-least moderately mean: Type::PROFANE | (Type::MEAN & Type::MODERATE_OR_HIGHER)

Implementations§

source §

impl Type

source

pub const PROFANE: Self = _

Bad words.

source

pub const OFFENSIVE: Self = _

Offensive words.

source

pub const SEXUAL: Self = _

Sexual words.

source

pub const MEAN: Self = _

Mean words.

source

pub const EVASIVE: Self = _

Words intended to evade detection.

source

pub const SPAM: Self = _

Spam/gibberish/SHOUTING.

source

pub const SAFE: Self = _

One of a very small number of safe phases. Recommended to enforce this on users who repeatedly evade the filter.

source

pub const MILD: Self = _

Not that bad.

source

pub const MODERATE: Self = _

Bad.

source

pub const SEVERE: Self = _

Cover your eyes!

source

pub const MILD_OR_HIGHER: Self = _

Any level; Type::MILD, Type::MODERATE, or Type::SEVERE.

source

pub const MODERATE_OR_HIGHER: Self = _

Any level in excess of Type::MILD.

source

pub const INAPPROPRIATE: Self = _

The default Type, meaning profane, offensive, sexual, or severely mean.

source

pub const ANY: Self = _

Any type of detection (except SAFE). This will be expanded to cover all future types.

source

pub const NONE: Self = _

No type of detection.

source

pub fn is(self, threshold: Self) -> bool

Returns true if and only if self, the analysis result, meets the given threshold.

Examples found in repository ?

src/trie.rs (line 57)

    pub fn add(&mut self, mut word: &str, typ: Type, overwrite: bool) {
        let mut current = &mut self.root;
        let mut contains_space = false;
        if word.starts_with(' ') {
            // Chomp the first space, since what we actually want is to only match separate
            // strings, not only strings that start with a space character.
            contains_space = true;
            word = word.trim_start_matches(' ');
        }
        for (i, c) in word.chars().enumerate() {
            let next = current.children.entry(c);
            contains_space |= c == ' ';
            current = next.or_insert_with(|| Node {
                children: Map::default(),
                word: false,
                contains_space: false,
                typ: Type::NONE,
                depth: (i + 1) as u8,
                last: Some(c),
                #[cfg(feature = "trace")]
                trace: word.chars().take(i + 1).collect(),
            });
        }
        current.word = true;
        if overwrite {
            current.typ = typ;
            current.contains_space = contains_space;
        } else {
            current.typ |= typ;
            current.contains_space |= contains_space;
        }
        debug_assert!(
            !(current.typ.is(Type::ANY) && current.typ.is(Type::SAFE)),
            "if word is Type::SAFE, it cannot be anything else"
        );
    }

More examples

Hide additional examples

src/mtch.rs (line 69)

    pub(crate) fn commit<I: Iterator<Item = char>>(
        &self,
        typ: &mut Type,
        spy: &mut BufferProxyIterator<I>,
        censor_threshold: Type,
        censor_first_character_threshold: Type,
        censor_replacement: char,
    ) -> bool {
        #[cfg(feature = "trace")]
        print!(
            "Committing {} with begin_separate={}, spaces={}, skipped={}, end_separate={}, depth={}, replacements={}, contains_space={}: ",
            self.node.trace,
            self.begin_separate,
            self.spaces,
            self.skipped,
            self.end_separate,
            self.node.depth,
            self.replacements,
            self.node.contains_space
        );

        let too_many_replacements = !(self.begin_separate
            && (self.end_separate
                || (self.spaces == 0
                    && self.node.depth > 2
                    && self.node.typ.is(Type::MODERATE_OR_HIGHER))))
            && self.node.depth > 1
            // In theory, prevents blahsex, but allows blahsexblah.
            && (!(self.end_separate || self.begin_separate) || self.node.depth < 3 || self.spaces.max(self.skipped).max(self.replacements) > 0 || self.node.typ.isnt(Type::MODERATE_OR_HIGHER))
            && self.spaces.max(self.skipped).max(self.replacements) as usize + 4 > self.node.depth as usize;

        let low_confidence_replacements = self.low_confidence_replacements > 1
            && self.low_confidence_replacements as usize
                > (self.end - self.start).saturating_sub(1)
            && self.node.depth > 1;

        let low_confidence_short = self.replacements >= self.node.depth
            && self.node.depth <= 3
            && !self.node.typ.is(Type::SEVERE);

        // Make it so "squirrels word" doesn't contain "s word"
        let low_confidence_special = self.node.contains_space && !self.begin_separate;

        if too_many_replacements
            || low_confidence_replacements
            || low_confidence_short
            || low_confidence_special
        {
            // Match isn't strong enough.
            #[cfg(feature = "trace")]
            println!(
                "(rejected: {} {} {} {})",
                too_many_replacements,
                low_confidence_replacements,
                low_confidence_short,
                low_confidence_special
            );
            return false;
        }

        #[cfg(feature = "trace")]
        println!("(committed)");

        // Apply detection.
        *typ |= self.node.typ
            | if self.replacements >= 2 {
                Type::EVASIVE & Type::MILD
            } else {
                Type::NONE
            };

        // Decide whether to censor.
        if self.node.typ.is(censor_threshold) {
            // Decide whether to censor the first character.
            let offset =
                if self.node.typ.is(censor_first_character_threshold) || self.node.depth == 1 {
                    0
                } else {
                    1
                };
            spy.censor(self.start + offset..=self.end, censor_replacement);
        }

        true
    }

src/context.rs (line 249)

    pub fn process_with_options(
        &mut self,
        message: String,
        options: &ContextProcessingOptions,
    ) -> Result<String, BlockReason> {
        let now = Instant::now();
        let elapsed = self.last_message.map(|l| now - l).unwrap_or(Duration::ZERO);

        let suspicion = self.suspicion.max(1).saturating_mul(self.reports.max(1));

        // How convinced are we that the user is a bad actor.
        let is_kinda_sus = suspicion >= 2;
        let is_impostor = suspicion >= 15;

        // Don't give bad actors the benefit of the doubt when it comes to meanness.
        let meanness_threshold = if is_impostor {
            Type::MILD_OR_HIGHER
        } else if is_kinda_sus {
            Type::MODERATE_OR_HIGHER
        } else {
            Type::SEVERE
        };

        let censor_threshold =
            Type::PROFANE | Type::OFFENSIVE | Type::SEXUAL | (Type::MEAN & meanness_threshold);

        // Don't give bad actors the benefit of letting their first character through.
        let censor_first_character_threshold = if is_kinda_sus {
            censor_threshold
        } else {
            // Mainly for protection against the n-word being discernible.
            Type::OFFENSIVE & Type::SEVERE
        };

        let (mut censored, analysis) = Censor::from_str(&message)
            .with_censor_threshold(censor_threshold)
            .with_censor_first_character_threshold(censor_first_character_threshold)
            .censor_and_analyze();

        let mut censored_str = if should_skip_censor(&message) {
            message.as_str()
        } else {
            censored.as_str()
        };

        if let Some(character_limit) = options.character_limit {
            #[cfg(feature = "width")]
            {
                censored_str = crate::trim_to_width(censored_str, character_limit.get());
            }
            if let Some((limit, _)) = censored_str.char_indices().nth(character_limit.get()) {
                censored_str = &censored_str[..limit];
            }
        }

        if options.trim_whitespace {
            censored_str = trim_whitespace(censored_str);
        }

        if censored_str.len() < censored.len() {
            // Something was trimmed, must must re-allocate.
            censored = String::from(censored_str);
        }

        self.total = self.total.saturating_add(1);
        if analysis.is(Type::INAPPROPRIATE) {
            self.total_inappropriate = self.total_inappropriate.saturating_add(1);
        }

        // Collecting suspicion.
        let type_to_sus = |typ: Type| -> u8 {
            let combined = analysis & typ;
            if combined.is(Type::SEVERE) {
                3
            } else if combined.is(Type::MODERATE) {
                2
            } else if combined.is(Type::MILD) {
                1
            } else {
                0
            }
        };

        // Repetition detection.
        let mut recent_similar = 0;

        if let Some(opts) = options.repetition_limit.as_ref() {
            self.history.retain(|&(_, t)| now - t < opts.memory);

            for (recent_message, _) in &self.history {
                if strsim::normalized_levenshtein(recent_message, &message)
                    >= opts.similarity_threshold as f64
                {
                    recent_similar += 1;
                }
            }
        }

        let mut new_suspicion = type_to_sus(Type::PROFANE | Type::OFFENSIVE | Type::SEXUAL)
            + type_to_sus(Type::EVASIVE)
            + type_to_sus(Type::SPAM);

        if recent_similar >= 2 {
            // Don't penalize as much for repeated messages, since an innocent user may repeat their
            // message multiple times if it was erroneously detected.
            new_suspicion /= 2;
        }

        if ((is_kinda_sus && new_suspicion >= 4) || (is_impostor && new_suspicion >= 2))
            && !options.max_safe_timeout.is_zero()
        {
            if let Some(only_safe_until) =
                self.only_safe_until
                    .unwrap_or(now)
                    .checked_add(if self.reports > 0 {
                        Duration::from_secs(10 * 60)
                    } else {
                        Duration::from_secs(5 * 60)
                    })
            {
                self.only_safe_until = Some(only_safe_until.min(now + options.max_safe_timeout));
            }
        }

        self.suspicion = self.suspicion.saturating_add(new_suspicion);

        let remaining_rate_limit = Self::remaining_duration(&mut self.rate_limited_until, now);

        if let Some(remaining) = options
            .safe_mode_until
            .filter(|_| analysis.isnt(Type::SAFE))
            .and_then(|until| until.checked_duration_since(now))
        {
            Err(BlockReason::Unsafe {
                remaining,
                targeted: false,
            })
        } else if let Some(dur) =
            Self::remaining_duration(&mut self.muted_until, now).filter(|_| options.block_if_muted)
        {
            Err(BlockReason::Muted(dur))
        } else if options.block_if_empty && censored.is_empty() {
            Err(BlockReason::Empty)
        } else if let Some(dur) = options
            .rate_limit
            .as_ref()
            .and_then(|opt| remaining_rate_limit.filter(|_| self.burst_used >= opt.burst))
        {
            Err(BlockReason::Spam(dur))
        } else if options
            .repetition_limit
            .as_ref()
            .map(|opts| recent_similar >= opts.limit)
            .unwrap_or(false)
        {
            Err(BlockReason::Repetitious(recent_similar as usize))
        } else if options.block_if_severely_inappropriate
            && analysis.is(Type::INAPPROPRIATE & Type::SEVERE)
        {
            Err(BlockReason::Inappropriate(analysis))
        } else if let Some(remaining) = Self::remaining_duration(&mut self.only_safe_until, now)
            .filter(|_| !(analysis.is(Type::SAFE) || options.max_safe_timeout.is_zero()))
        {
            Err(BlockReason::Unsafe {
                remaining,
                targeted: true,
            })
        } else {
            self.last_message = Some(now);
            if let Some(rate_limit_options) = options.rate_limit.as_ref() {
                // How many messages does this count for against the rate limit.
                let rate_limit_messages =
                    if let Some(char_limit) = rate_limit_options.character_limit {
                        let char_count = message.chars().count();

                        #[cfg(feature = "width")]
                        let char_count = char_count.max(crate::width_str(&message));

                        (char_count / char_limit.get() as usize).clamp(1, 3) as u8
                    } else {
                        1
                    };

                self.burst_used = if remaining_rate_limit.is_some() {
                    self.burst_used.saturating_add(rate_limit_messages)
                } else {
                    self.burst_used.saturating_sub(
                        (elapsed.as_nanos() / rate_limit_options.limit.as_nanos())
                            .min(u8::MAX as u128) as u8,
                    )
                };
                if let Some(rate_limited_until) =
                    self.rate_limited_until.unwrap_or(now).checked_add(
                        rate_limit_options.limit * (rate_limit_messages + new_suspicion) as u32,
                    )
                {
                    self.rate_limited_until = Some(rate_limited_until);
                }
            }
            // Forgiveness (minus one suspicion per safe message, and also per minute between messages).
            self.suspicion = self.suspicion.saturating_sub(
                (elapsed.as_secs() / 60).clamp(analysis.is(Type::SAFE) as u64, u8::MAX as u64)
                    as u8,
            );

            if let Some(repetition_blocking_options) = options.repetition_limit.as_ref() {
                if self.history.len() >= repetition_blocking_options.limit as usize * 2 {
                    self.history.pop_front();
                }

                self.history.push_back((message, now));
            }

            Ok(censored)
        }
    }

    /// Returns how long the user is muted for (possibly [`Duration::ZERO`]).
    pub fn muted_for(&self) -> Duration {
        self.muted_until
            .map(|muted_until| muted_until.saturating_duration_since(Instant::now()))
            .unwrap_or(Duration::ZERO)
    }

    /// Returns the latest instant the user is muted (possibly in the past).
    pub fn muted_until(&self) -> Option<Instant> {
        self.muted_until
    }

    /// Returns how long the user is restricted to [`Type::SAFE`] for (possibly [`Duration::ZERO`]).
    pub fn restricted_for(&self) -> Duration {
        self.only_safe_until
            .map(|restricted_until| restricted_until.saturating_duration_since(Instant::now()))
            .unwrap_or(Duration::ZERO)
    }

    /// Returns the latest instant the user is restricted (possibly in the past).
    pub fn restricted_until(&self) -> Option<Instant> {
        self.only_safe_until
    }

    /// Manually mute this user's messages for a duration. Overwrites any previous manual mute.
    /// Passing `Duration::ZERO` will therefore un-mute.
    pub fn mute_for(&mut self, duration: Duration) {
        self.mute_until(Instant::now() + duration);
    }

    /// Manually mute this user's messages until an instant. Overwrites any previous manual mute.
    /// Passing an instant in the past will therefore un-mute.
    pub fn mute_until(&mut self, instant: Instant) {
        self.muted_until = Some(instant);
    }

    /// Manually restrict this user's messages to known safe phrases for a duration. Overwrites any
    /// previous manual restriction. Passing `Duration::ZERO` will therefore un-restrict.
    pub fn restrict_for(&mut self, duration: Duration) {
        self.restrict_until(Instant::now() + duration);
    }

    /// Manually restrict this user's messages to known safe phrases until an instant. Overwrites any
    /// previous manual restriction. Passing an instant in the past will therefore un-restrict.
    pub fn restrict_until(&mut self, instant: Instant) {
        self.only_safe_until = Some(instant);
    }

    /// Call if another user "reports" this user's message(s). The function of reports is for
    /// suspicion of bad behavior to be confirmed faster.
    pub fn report(&mut self) {
        self.reports = self.reports.saturating_add(1);
    }

    /// Returns number of reports received via `Self::report()`. It is not guaranteed that the full
    /// range of `usize` of reports will be counted (currently only `u8::MAX` are counted).
    pub fn reports(&self) -> usize {
        self.reports as usize
    }

    /// Clear suspicion and reports, and automatic mutes (not manual mute or rate limit).
    pub fn exonerate(&mut self) {
        self.suspicion = 0;
        self.reports = 0;
        self.only_safe_until = None;
    }

    /// Returns total number of messages processed. It is not guaranteed that the full
    /// range of `usize` of messages will be counted (currently only `u16::MAX` are counted).
    pub fn total(&self) -> usize {
        self.total as usize
    }

    /// Returns total number of messages processed that were `Type::INAPPROPRIATE`. It is not
    /// guaranteed that the full range of `usize` of messages will be counted (currently only
    /// `u16::MAX` are counted).
    pub fn total_inappropriate(&self) -> usize {
        self.total_inappropriate as usize
    }
}

impl Default for Context {
    fn default() -> Self {
        Self::new()
    }
}

#[derive(Copy, Clone, Debug, PartialEq)]
#[non_exhaustive]
pub enum BlockReason {
    /// The particular message was *severely* inappropriate, more specifically, `Type`.
    Inappropriate(Type),
    /// Recent messages were generally inappropriate, and this message isn't on the safe list.
    /// Alternatively, if targeted is false, safe mode was configured globally.
    /// Try again after `Duration`.
    Unsafe {
        remaining: Duration,
        /// Whether unsafe mode was targeted at this user (as opposed to configured globally).
        targeted: bool,
    },
    /// This message was too similar to `usize` recent messages.
    Repetitious(usize),
    /// Too many messages per unit time, try again after `Duration`.
    Spam(Duration),
    /// Manually muted for `Duration`.
    Muted(Duration),
    /// Message was, at least after censoring, completely empty.
    Empty,
}

impl BlockReason {
    /// You may display `BlockReason` in any manner you choose, but this will return a reasonable
    /// default warning to send to the user.
    pub fn generic_str(self) -> &'static str {
        match self {
            Self::Inappropriate(_) => "Your message was held for severe profanity",
            Self::Unsafe { .. } => "You have been temporarily restricted due to profanity/spam",
            Self::Repetitious(_) => "Your message was too similar to recent messages",
            Self::Spam(_) => "You have been temporarily muted due to excessive frequency",
            Self::Muted(_) => "You have been temporarily muted",
            Self::Empty => "Your message was empty",
        }
    }

    #[deprecated = "use contextual_string"]
    pub fn contextual_str(self) -> String {
        self.contextual_string()
    }

    /// You may display `BlockReason` in any manner you choose, but this will return a reasonable
    /// default warning to send to the user that includes some context (such as how long they are
    /// muted for).
    pub fn contextual_string(self) -> String {
        match self {
            Self::Inappropriate(typ) => String::from(if typ.is(Type::OFFENSIVE) {
                "Your message was held for being highly offensive"
            } else if typ.is(Type::SEXUAL) {
                "Your message was held for being overly sexual"
            } else if typ.is(Type::MEAN) {
                "Your message was held for being overly mean"
            } else {
                "Your message was held for severe profanity"
            }),
            Self::Unsafe {
                remaining,
                targeted: true,
            } => format!(
                "You have been restricted for {} due to profanity/spam",
                FormattedDuration(remaining)
            ),
            Self::Unsafe {
                remaining,
                targeted: false,
            } => format!("Safe mode is active for {}", FormattedDuration(remaining)),
            Self::Repetitious(count) => {
                format!("Your message was too similar to {} recent messages", count)
            }
            Self::Spam(dur) => format!(
                "You have been muted for {} due to excessive frequency",
                FormattedDuration(dur)
            ),
            Self::Muted(dur) => format!("You have been muted for {}", FormattedDuration(dur)),
            _ => String::from(self.generic_str()),
        }
    }

src/censor.rs (line 601)

    fn next(&mut self) -> Option<Self::Item> {
        while let Some(raw_c) = self.buffer.next().or_else(|| {
            if self.space_appended {
                None
            } else {
                self.space_appended = true;
                Some(' ')
            }
        }) {
            if !self.space_appended && raw_c != '!' && raw_c != '.' && raw_c != '?' {
                // The input is not over yet, so any previous notion of safety is irrelevant.
                self.safe = false;
            }

            let pos = self.buffer.index();

            self.uppercase = self.uppercase.saturating_add(raw_c.is_uppercase() as u8);
            /*
            // Very old whitelist (allows a ton of abuse):
            let skippable = match c {
                ' ' | '~' | '-' | '−' | '_' | '.' | '!' | '?' | ',' | '*' | '"' | '\'' | '\n' | '\r'
                | '\t' => true,
                _ => false,
            };

            // More recent whitelist (still allows abuse like f^u^c^k):
            let skippable = raw_c.is_punctuation()
                || raw_c.is_separator()
                || is_whitespace(raw_c)
                || matches!(raw_c, '(' | ')');
            // Use a blacklist instead:
             */
            let skippable = !raw_c.is_alphanumeric() || is_whitespace(raw_c);
            let replacement = REPLACEMENTS.get(&raw_c);

            #[cfg(feature = "trace")]
            println!(
                "Read '{}', skippable={}, replacing with={:?}",
                raw_c, skippable, replacement
            );

            if (!self.separate || self.last == Some(self.censor_replacement))
                && raw_c == self.censor_replacement
            {
                // Censor replacement found but not beginning of word.
                self.self_censoring = self.self_censoring.saturating_add(1);
            }

            if let Some(last) = self.last {
                if raw_c == last {
                    self.repetitions = self.repetitions.saturating_add(1);
                }

                // Characters on the home-row of a QWERTY keyboard.
                fn is_gibberish(c: char) -> bool {
                    matches!(c, 'a' | 's' | 'd' | 'f' | 'j' | 'k' | 'l' | ';')
                }

                // Single gibberish characters don't count. Must have been preceded by another gibberish character.
                if is_gibberish(raw_c) && is_gibberish(last) {
                    self.gibberish = self.gibberish.saturating_add(1);
                }
            }

            if let Some(pos) = pos {
                // Must special-case all skippable, non-replaced characters that may start
                // a profanity, so that these profanities are detected.
                //
                // Not adding a match is mainly an optimization.
                if !(skippable && replacement.is_none() && !matches!(raw_c, ' ' | '_' | '🖕')) {
                    let begin_camel_case_word = raw_c.is_ascii_uppercase()
                        && self.last.map(|c| !c.is_ascii_uppercase()).unwrap_or(false);

                    // Seed a new match for every character read.
                    self.matches.insert(Match {
                        node: &TRIE.root,
                        start: pos, // will immediately be incremented if match is kept.
                        end: usize::MAX, // sentinel.
                        last: 0 as char, // sentinel.
                        begin_separate: self.separate || begin_camel_case_word,
                        end_separate: false, // unknown at this time.
                        spaces: 0,
                        skipped: 0,
                        replacements: 0,
                        low_confidence_replacements: 0,
                    });
                }
            }

            self.separate = skippable;

            if self.separate {
                for pending in self.pending_commit.iter_mut() {
                    if pending.end == self.last_pos {
                        pending.end_separate = true;
                    }
                }
            }

            let mut drain_start: Option<usize> = None;
            let mut safety_end = usize::MAX;
            let mut replacement_counted = false;
            let raw_c_lower = raw_c.to_lowercase().next().unwrap();

            mem::swap(&mut self.matches, &mut self.matches_tmp);
            for c in replacement
                .unwrap_or(&&*raw_c.encode_utf8(&mut [0; 4]))
                .chars()
            {
                // This replacement (uppercase to lower case) raises absolutely zero suspicion.
                let benign_replacement = c == raw_c_lower;

                // This counts as a replacement, mainly for spam detection purposes.
                let countable_replacement = !(replacement_counted
                    || benign_replacement
                    || raw_c.is_ascii_alphabetic()
                    || (raw_c.is_ascii_digit()
                        && self.last.map(|l| l.is_ascii_digit()).unwrap_or(false)));

                if countable_replacement {
                    self.replacements = self.replacements.saturating_add(1);
                    replacement_counted = true;
                }

                #[cfg(feature = "trace")]
                println!(
                    " - Replacement '{}', benign={}, countable={}",
                    c, benign_replacement, countable_replacement
                );

                // These separators don't invalidate a false-positive match.
                //
                // -
                // half-right =/= frig
                //
                // '
                // invalidating false positives in cases like didn't (it where ( is a space.
                // also, so "i'm fine" matches "im fine" for safety purposes.
                let ignore_sep = matches!(c, '-' | '\'' | '\n' | '\r');

                for m in self.matches_tmp.iter() {
                    let m = m.clone();

                    safety_end = safety_end.min(m.start);

                    #[cfg(feature = "trace")]
                    println!(
                        "  - Consider match \"{}\" with spaces={}",
                        m.node.trace, m.spaces
                    );

                    if (skippable || c == m.last) && m.start != pos.unwrap_or(0) {
                        // Undo remove.
                        #[cfg(feature = "trace")]
                        println!("undo remove \"{}\" where last={}, node last={:?} and initial spaces={}", m.node.trace, m.last, m.node.last, m.spaces);

                        // Here, '.' is primarily for allowing ellipsis ("...") as a form of
                        // space.
                        // ( and ) are for ignoring appositive phrases.
                        // Checking node.last is to collapse multiple spaces into one, to avoid
                        let new_space = matches!(c, ' ' | '.' | ',' | ':' | ';' | '…' | '(' | ')')
                            // && skippable
                            && m.node.last != Some(' ');
                        // && !ignore_sep;

                        let new_skip = skippable && !ignore_sep;
                        let new_replacement = !benign_replacement && !self.separate;
                        let new_low_confidence_replacement =
                            !benign_replacement && raw_c.is_ascii_digit();

                        let undo_m = Match {
                            spaces: m.spaces.saturating_add(new_space as u8),
                            skipped: m.skipped.saturating_add(new_skip as u8),
                            replacements: m.replacements.saturating_add(new_replacement as u8),
                            low_confidence_replacements: m
                                .low_confidence_replacements
                                .saturating_add(new_low_confidence_replacement as u8),
                            ..m
                        };
                        if let Some(existing) = self.matches.get(&undo_m) {
                            let replacement = existing.combine(&undo_m);
                            self.matches.replace(replacement);
                        } else {
                            self.matches.insert(undo_m);
                        }
                    }

                    if let Some(next) = m.node.children.get(&c) {
                        let next_m = Match {
                            node: next,
                            spaces: m
                                .spaces
                                .saturating_add((c != raw_c && self.separate && c != '\'') as u8),
                            replacements: m
                                .replacements
                                .saturating_add((!benign_replacement && !self.separate) as u8),
                            low_confidence_replacements: m
                                .low_confidence_replacements
                                .saturating_add(
                                    (!benign_replacement && raw_c.is_ascii_digit()) as u8,
                                ),
                            last: c,
                            ..m
                        };

                        #[cfg(feature = "trace")]
                        println!(
                            "     - Next is \"{}\", with spaces={}, replacements = {}",
                            next.trace, next_m.spaces, next_m.replacements
                        );

                        if next.word {
                            if next_m.node.typ.is(Type::SAFE)
                                && next_m.start == 0
                                && next_m.spaces == 0
                                && next_m.skipped == 0
                                && !self.ignore_false_positives
                            {
                                // Everything in the input until now is safe.
                                #[cfg(feature = "trace")]
                                println!("found safe word: {}", next_m.node.trace);
                                self.safe = true;
                            }

                            #[cfg(feature = "trace")]
                            if !next_m.node.typ.is(Type::ANY) {
                                if self.ignore_false_positives {
                                    print!("ignoring");
                                } else {
                                    print!("found");
                                }
                                println!(
                                    " false positive \"{}\", spaces={}, skipped={}, replacements={}",
                                    next_m.node.trace, next_m.spaces, next_m.skipped, next_m.replacements
                                );
                            }

                            if next_m.node.typ.is(Type::ANY) {
                                self.pending_commit.push(Match {
                                    end: pos.unwrap(),
                                    ..next_m
                                });
                            } else if next_m.spaces == 0
                                && next_m.skipped == 0
                                && next_m.replacements == 0
                                && !self.ignore_false_positives
                            {
                                // Is false positive, so invalidate internal matches.
                                #[cfg(feature = "trace")]
                                println!("Found false positive {}", next_m.node.trace);
                                drain_start = Some(
                                    drain_start
                                        .map(|start| start.min(next_m.start))
                                        .unwrap_or(next_m.start),
                                );
                            }
                        }

                        if let Some(existing) = self.matches.get(&next_m) {
                            let replacement = existing.combine(&next_m);
                            self.matches.replace(replacement);
                        } else {
                            self.matches.insert(next_m);
                        }
                    }
                }
            }
            self.matches_tmp.clear();
            self.last = Some(raw_c);
            if let Some(pos) = pos {
                self.last_pos = pos;
            }

            let typ = &mut self.typ;
            let spy = &mut self.buffer;
            let censor_threshold = self.censor_threshold;
            let censor_first_character_threshold = self.censor_first_character_threshold;
            let censor_replacement = self.censor_replacement;
            #[cfg(any(feature = "find_false_positives", feature = "trace"))]
            let first_match_ptr = &mut self.match_ptrs;
            #[cfg(any(feature = "find_false_positives", feature = "trace"))]
            let total_matches = &mut self.total_matches;
            #[cfg(any(feature = "find_false_positives", feature = "trace"))]
            let total_match_characters = &mut self.total_match_characters;

            self.pending_commit.retain(|pending| {
                #[cfg(feature = "trace")]
                println!("Consider whether to cancel pending commit {} with start={} against drain_start={:?}", pending.node.trace, pending.start, drain_start);

                // Cancel due to false positive.
                if let Some(start) = drain_start {
                    if pending.start >= start {
                        #[cfg(feature = "trace")]
                        println!("Cancelled {}", pending.node.trace);
                        return false;
                    }
                }

                // Can pre-commit due to lack of false positive matches.
                if pending.end < safety_end {
                    if pending.commit(
                        typ,
                        spy,
                        censor_threshold,
                        censor_first_character_threshold,
                        censor_replacement,
                    ) {
                        #[cfg(any(feature = "find_false_positives", feature = "trace"))]
                        {
                            *first_match_ptr ^= pending.node as *const _ as usize;
                            *total_matches += 1;
                            *total_match_characters += pending.end - pending.start;
                        }
                    }
                    return false;
                }

                // At this point, don't know whether this match will be committed or cancelled, so
                // return.
                true
            });

            // Yield one character if possible.
            if let Some(spy_next_index) = self.buffer.spy_next_index() {
                // This covers all in-flight matches.
                let mut safe_until = spy_next_index < safety_end;

                // This covers all pending commit matches.
                for pending in &self.pending_commit {
                    if pending.start <= spy_next_index {
                        safe_until = false;
                        break;
                    }
                }
                if safe_until {
                    return self.buffer.spy_next();
                }
            }
        }

        let residual = mem::take(&mut self.pending_commit);
        #[cfg(feature = "trace")]
        if !residual.is_empty() {
            println!("{} residuals", residual.len());
        }
        for pending in residual {
            if pending.commit(
                &mut self.typ,
                &mut self.buffer,
                self.censor_threshold,
                self.censor_first_character_threshold,
                self.censor_replacement,
            ) {
                #[cfg(any(feature = "find_false_positives", feature = "trace"))]
                {
                    self.match_ptrs ^= pending.node as *const _ as usize;
                    self.total_matches += 1;
                    self.total_match_characters += pending.end - pending.start;
                }
            }
        }

        if let Some(c) = self.buffer.spy_next() {
            return Some(c);
        }

        self.done = true;

        None
    }
}

/// CensorStr makes it easy to sanitize a `String` or `&str` by calling `.censor()`.
pub trait CensorStr: Sized {
    /// The output is a newly allocated, censored string.
    fn censor(self) -> String;

    /// Returns `true` if the text is inappropriate.
    fn is_inappropriate(self) -> bool {
        self.is(Type::INAPPROPRIATE)
    }

    /// Returns `true` if text meets the provided threshold.
    fn is(self, threshold: Type) -> bool;

    /// Returns `true` if text **does not** meet the provided threshold.
    fn isnt(self, threshold: Type) -> bool {
        !self.is(threshold)
    }
}

impl CensorStr for &str {
    fn censor(self) -> String {
        if should_skip_censor(self) {
            self.to_owned()
        } else {
            Censor::new(self.chars()).censor()
        }
    }

    fn is(self, threshold: Type) -> bool {
        Censor::from_str(self).analyze().is(threshold)
    }

source

pub fn isnt(self, threshold: Self) -> bool

Logical opposite of Self::is.

Examples found in repository ?

src/mtch.rs (line 72)

    pub(crate) fn commit<I: Iterator<Item = char>>(
        &self,
        typ: &mut Type,
        spy: &mut BufferProxyIterator<I>,
        censor_threshold: Type,
        censor_first_character_threshold: Type,
        censor_replacement: char,
    ) -> bool {
        #[cfg(feature = "trace")]
        print!(
            "Committing {} with begin_separate={}, spaces={}, skipped={}, end_separate={}, depth={}, replacements={}, contains_space={}: ",
            self.node.trace,
            self.begin_separate,
            self.spaces,
            self.skipped,
            self.end_separate,
            self.node.depth,
            self.replacements,
            self.node.contains_space
        );

        let too_many_replacements = !(self.begin_separate
            && (self.end_separate
                || (self.spaces == 0
                    && self.node.depth > 2
                    && self.node.typ.is(Type::MODERATE_OR_HIGHER))))
            && self.node.depth > 1
            // In theory, prevents blahsex, but allows blahsexblah.
            && (!(self.end_separate || self.begin_separate) || self.node.depth < 3 || self.spaces.max(self.skipped).max(self.replacements) > 0 || self.node.typ.isnt(Type::MODERATE_OR_HIGHER))
            && self.spaces.max(self.skipped).max(self.replacements) as usize + 4 > self.node.depth as usize;

        let low_confidence_replacements = self.low_confidence_replacements > 1
            && self.low_confidence_replacements as usize
                > (self.end - self.start).saturating_sub(1)
            && self.node.depth > 1;

        let low_confidence_short = self.replacements >= self.node.depth
            && self.node.depth <= 3
            && !self.node.typ.is(Type::SEVERE);

        // Make it so "squirrels word" doesn't contain "s word"
        let low_confidence_special = self.node.contains_space && !self.begin_separate;

        if too_many_replacements
            || low_confidence_replacements
            || low_confidence_short
            || low_confidence_special
        {
            // Match isn't strong enough.
            #[cfg(feature = "trace")]
            println!(
                "(rejected: {} {} {} {})",
                too_many_replacements,
                low_confidence_replacements,
                low_confidence_short,
                low_confidence_special
            );
            return false;
        }

        #[cfg(feature = "trace")]
        println!("(committed)");

        // Apply detection.
        *typ |= self.node.typ
            | if self.replacements >= 2 {
                Type::EVASIVE & Type::MILD
            } else {
                Type::NONE
            };

        // Decide whether to censor.
        if self.node.typ.is(censor_threshold) {
            // Decide whether to censor the first character.
            let offset =
                if self.node.typ.is(censor_first_character_threshold) || self.node.depth == 1 {
                    0
                } else {
                    1
                };
            spy.censor(self.start + offset..=self.end, censor_replacement);
        }

        true
    }

More examples

Hide additional examples

src/context.rs (line 314)

    pub fn process_with_options(
        &mut self,
        message: String,
        options: &ContextProcessingOptions,
    ) -> Result<String, BlockReason> {
        let now = Instant::now();
        let elapsed = self.last_message.map(|l| now - l).unwrap_or(Duration::ZERO);

        let suspicion = self.suspicion.max(1).saturating_mul(self.reports.max(1));

        // How convinced are we that the user is a bad actor.
        let is_kinda_sus = suspicion >= 2;
        let is_impostor = suspicion >= 15;

        // Don't give bad actors the benefit of the doubt when it comes to meanness.
        let meanness_threshold = if is_impostor {
            Type::MILD_OR_HIGHER
        } else if is_kinda_sus {
            Type::MODERATE_OR_HIGHER
        } else {
            Type::SEVERE
        };

        let censor_threshold =
            Type::PROFANE | Type::OFFENSIVE | Type::SEXUAL | (Type::MEAN & meanness_threshold);

        // Don't give bad actors the benefit of letting their first character through.
        let censor_first_character_threshold = if is_kinda_sus {
            censor_threshold
        } else {
            // Mainly for protection against the n-word being discernible.
            Type::OFFENSIVE & Type::SEVERE
        };

        let (mut censored, analysis) = Censor::from_str(&message)
            .with_censor_threshold(censor_threshold)
            .with_censor_first_character_threshold(censor_first_character_threshold)
            .censor_and_analyze();

        let mut censored_str = if should_skip_censor(&message) {
            message.as_str()
        } else {
            censored.as_str()
        };

        if let Some(character_limit) = options.character_limit {
            #[cfg(feature = "width")]
            {
                censored_str = crate::trim_to_width(censored_str, character_limit.get());
            }
            if let Some((limit, _)) = censored_str.char_indices().nth(character_limit.get()) {
                censored_str = &censored_str[..limit];
            }
        }

        if options.trim_whitespace {
            censored_str = trim_whitespace(censored_str);
        }

        if censored_str.len() < censored.len() {
            // Something was trimmed, must must re-allocate.
            censored = String::from(censored_str);
        }

        self.total = self.total.saturating_add(1);
        if analysis.is(Type::INAPPROPRIATE) {
            self.total_inappropriate = self.total_inappropriate.saturating_add(1);
        }

        // Collecting suspicion.
        let type_to_sus = |typ: Type| -> u8 {
            let combined = analysis & typ;
            if combined.is(Type::SEVERE) {
                3
            } else if combined.is(Type::MODERATE) {
                2
            } else if combined.is(Type::MILD) {
                1
            } else {
                0
            }
        };

        // Repetition detection.
        let mut recent_similar = 0;

        if let Some(opts) = options.repetition_limit.as_ref() {
            self.history.retain(|&(_, t)| now - t < opts.memory);

            for (recent_message, _) in &self.history {
                if strsim::normalized_levenshtein(recent_message, &message)
                    >= opts.similarity_threshold as f64
                {
                    recent_similar += 1;
                }
            }
        }

        let mut new_suspicion = type_to_sus(Type::PROFANE | Type::OFFENSIVE | Type::SEXUAL)
            + type_to_sus(Type::EVASIVE)
            + type_to_sus(Type::SPAM);

        if recent_similar >= 2 {
            // Don't penalize as much for repeated messages, since an innocent user may repeat their
            // message multiple times if it was erroneously detected.
            new_suspicion /= 2;
        }

        if ((is_kinda_sus && new_suspicion >= 4) || (is_impostor && new_suspicion >= 2))
            && !options.max_safe_timeout.is_zero()
        {
            if let Some(only_safe_until) =
                self.only_safe_until
                    .unwrap_or(now)
                    .checked_add(if self.reports > 0 {
                        Duration::from_secs(10 * 60)
                    } else {
                        Duration::from_secs(5 * 60)
                    })
            {
                self.only_safe_until = Some(only_safe_until.min(now + options.max_safe_timeout));
            }
        }

        self.suspicion = self.suspicion.saturating_add(new_suspicion);

        let remaining_rate_limit = Self::remaining_duration(&mut self.rate_limited_until, now);

        if let Some(remaining) = options
            .safe_mode_until
            .filter(|_| analysis.isnt(Type::SAFE))
            .and_then(|until| until.checked_duration_since(now))
        {
            Err(BlockReason::Unsafe {
                remaining,
                targeted: false,
            })
        } else if let Some(dur) =
            Self::remaining_duration(&mut self.muted_until, now).filter(|_| options.block_if_muted)
        {
            Err(BlockReason::Muted(dur))
        } else if options.block_if_empty && censored.is_empty() {
            Err(BlockReason::Empty)
        } else if let Some(dur) = options
            .rate_limit
            .as_ref()
            .and_then(|opt| remaining_rate_limit.filter(|_| self.burst_used >= opt.burst))
        {
            Err(BlockReason::Spam(dur))
        } else if options
            .repetition_limit
            .as_ref()
            .map(|opts| recent_similar >= opts.limit)
            .unwrap_or(false)
        {
            Err(BlockReason::Repetitious(recent_similar as usize))
        } else if options.block_if_severely_inappropriate
            && analysis.is(Type::INAPPROPRIATE & Type::SEVERE)
        {
            Err(BlockReason::Inappropriate(analysis))
        } else if let Some(remaining) = Self::remaining_duration(&mut self.only_safe_until, now)
            .filter(|_| !(analysis.is(Type::SAFE) || options.max_safe_timeout.is_zero()))
        {
            Err(BlockReason::Unsafe {
                remaining,
                targeted: true,
            })
        } else {
            self.last_message = Some(now);
            if let Some(rate_limit_options) = options.rate_limit.as_ref() {
                // How many messages does this count for against the rate limit.
                let rate_limit_messages =
                    if let Some(char_limit) = rate_limit_options.character_limit {
                        let char_count = message.chars().count();

                        #[cfg(feature = "width")]
                        let char_count = char_count.max(crate::width_str(&message));

                        (char_count / char_limit.get() as usize).clamp(1, 3) as u8
                    } else {
                        1
                    };

                self.burst_used = if remaining_rate_limit.is_some() {
                    self.burst_used.saturating_add(rate_limit_messages)
                } else {
                    self.burst_used.saturating_sub(
                        (elapsed.as_nanos() / rate_limit_options.limit.as_nanos())
                            .min(u8::MAX as u128) as u8,
                    )
                };
                if let Some(rate_limited_until) =
                    self.rate_limited_until.unwrap_or(now).checked_add(
                        rate_limit_options.limit * (rate_limit_messages + new_suspicion) as u32,
                    )
                {
                    self.rate_limited_until = Some(rate_limited_until);
                }
            }
            // Forgiveness (minus one suspicion per safe message, and also per minute between messages).
            self.suspicion = self.suspicion.saturating_sub(
                (elapsed.as_secs() / 60).clamp(analysis.is(Type::SAFE) as u64, u8::MAX as u64)
                    as u8,
            );

            if let Some(repetition_blocking_options) = options.repetition_limit.as_ref() {
                if self.history.len() >= repetition_blocking_options.limit as usize * 2 {
                    self.history.pop_front();
                }

                self.history.push_back((message, now));
            }

            Ok(censored)
        }
    }