use super::types::MutedWord;
const SUBSTRING_LANGUAGES: &[&str] = &["ja", "zh", "ko", "th", "vi"];
#[must_use]
pub fn check_muted_words(
muted_words: &[MutedWord],
text: &str,
tags: &[String],
languages: &[String],
is_following_author: bool,
) -> Vec<MutedWordMatch> {
let now = chrono::Utc::now().to_rfc3339();
let text_lower = text.to_lowercase();
let tags_lower: Vec<String> = tags.iter().map(|t| t.to_lowercase()).collect();
let use_substring = languages
.iter()
.any(|lang| SUBSTRING_LANGUAGES.contains(&lang.as_str()));
let mut matches = Vec::new();
for word in muted_words {
if let Some(ref expires) = word.expires_at {
if expires.as_str() < now.as_str() {
continue;
}
}
if word.actor_target.as_deref() == Some("exclude-following") && is_following_author {
continue;
}
let muted_lower = word.value.to_lowercase();
if tags_lower.contains(&muted_lower) {
matches.push(MutedWordMatch {
word: word.clone(),
predicate: muted_lower.clone(),
});
continue;
}
if !word.targets.contains(&"content".to_string()) {
continue;
}
if let Some(predicate) = match_text(&text_lower, &muted_lower, use_substring) {
matches.push(MutedWordMatch {
word: word.clone(),
predicate,
});
}
}
matches
}
fn match_text(text: &str, muted: &str, use_substring: bool) -> Option<String> {
if muted.is_empty() || text.is_empty() {
return None;
}
if muted.chars().count() == 1 || use_substring {
if text.contains(muted) {
return Some(muted.to_string());
}
return None;
}
if muted.len() > text.len() {
return None;
}
if muted == text {
return Some(muted.to_string());
}
if has_space_or_punctuation(muted) && text.contains(muted) {
return Some(muted.to_string());
}
match_by_words(text, muted)
}
fn match_by_words(text: &str, muted: &str) -> Option<String> {
for word in text.split_whitespace() {
if word == muted {
return Some(word.to_string());
}
let trimmed = trim_punctuation(word);
if trimmed == muted {
return Some(trimmed.to_string());
}
if trimmed.chars().any(is_punctuation) {
if trimmed.contains('/') {
continue;
}
let spaced: String = trimmed
.chars()
.map(|c| if is_punctuation(c) { ' ' } else { c })
.collect();
if spaced == muted {
return Some(trimmed.to_string());
}
let contiguous: String = spaced.chars().filter(|c| *c != ' ').collect();
if contiguous == muted {
return Some(trimmed.to_string());
}
let parts: Vec<&str> = trimmed
.split(|c: char| is_punctuation(c))
.filter(|s| !s.is_empty())
.collect();
for part in &parts {
if *part == muted {
return Some((*part).to_string());
}
}
}
}
None
}
fn has_space_or_punctuation(s: &str) -> bool {
s.chars().any(|c| c.is_whitespace() || is_punctuation(c))
}
fn is_punctuation(c: char) -> bool {
matches!(
c.general_category_group(),
UnicodeGeneralCategoryGroup::Punctuation
) || matches!(
c,
'!' | '@'
| '#'
| '$'
| '%'
| '^'
| '&'
| '*'
| '('
| ')'
| '-'
| '_'
| '='
| '+'
| '['
| ']'
| '{'
| '}'
| '|'
| '\\'
| ';'
| ':'
| '\''
| '"'
| ','
| '.'
| '<'
| '>'
| '/'
| '?'
| '~'
| '`'
)
}
fn trim_punctuation(word: &str) -> &str {
let start = word
.char_indices()
.find(|(_, c)| !is_punctuation(*c))
.map_or(word.len(), |(i, _)| i);
let end = word
.char_indices()
.rev()
.find(|(_, c)| !is_punctuation(*c))
.map_or(0, |(i, c)| i + c.len_utf8());
if start >= end { "" } else { &word[start..end] }
}
trait UnicodeCategory {
fn general_category_group(&self) -> UnicodeGeneralCategoryGroup;
}
#[derive(PartialEq)]
enum UnicodeGeneralCategoryGroup {
Punctuation,
Other,
}
impl UnicodeCategory for char {
fn general_category_group(&self) -> UnicodeGeneralCategoryGroup {
if self.is_ascii_punctuation() {
return UnicodeGeneralCategoryGroup::Punctuation;
}
match *self {
'\u{00A1}'..='\u{00BF}' => UnicodeGeneralCategoryGroup::Punctuation, '\u{2010}'..='\u{2027}' => UnicodeGeneralCategoryGroup::Punctuation, '\u{2030}'..='\u{205E}' => UnicodeGeneralCategoryGroup::Punctuation, '\u{2E00}'..='\u{2E52}' => UnicodeGeneralCategoryGroup::Punctuation, '\u{3001}'..='\u{3003}' => UnicodeGeneralCategoryGroup::Punctuation, '\u{FE50}'..='\u{FE6B}' => UnicodeGeneralCategoryGroup::Punctuation, '\u{FF01}'..='\u{FF0F}' => UnicodeGeneralCategoryGroup::Punctuation, '\u{FF1A}'..='\u{FF20}' => UnicodeGeneralCategoryGroup::Punctuation, _ => UnicodeGeneralCategoryGroup::Other,
}
}
}
#[derive(Debug, Clone)]
pub struct MutedWordMatch {
pub word: MutedWord,
pub predicate: String,
}
#[cfg(test)]
mod tests {
use super::*;
fn muted(value: &str) -> MutedWord {
MutedWord {
value: value.into(),
targets: vec!["content".into()],
actor_target: None,
expires_at: None,
}
}
fn muted_tag(value: &str) -> MutedWord {
MutedWord {
value: value.into(),
targets: vec!["tag".into()],
actor_target: None,
expires_at: None,
}
}
fn check(words: &[MutedWord], text: &str) -> Vec<MutedWordMatch> {
check_muted_words(words, text, &[], &[], false)
}
fn check_with_tags(words: &[MutedWord], text: &str, tags: &[&str]) -> Vec<MutedWordMatch> {
let tags: Vec<String> = tags.iter().map(|s| s.to_string()).collect();
check_muted_words(words, text, &tags, &[], false)
}
fn check_with_langs(words: &[MutedWord], text: &str, langs: &[&str]) -> Vec<MutedWordMatch> {
let langs: Vec<String> = langs.iter().map(|s| s.to_string()).collect();
check_muted_words(words, text, &[], &langs, false)
}
#[test]
fn exact_word_match() {
let words = [muted("test")];
assert_eq!(check(&words, "this is a test post").len(), 1);
assert_eq!(check(&words, "no match here").len(), 0);
}
#[test]
fn case_insensitive() {
let words = [muted("Test")];
assert_eq!(check(&words, "this is a test").len(), 1);
assert_eq!(check(&words, "this is a TEST").len(), 1);
}
#[test]
fn exact_full_text_match() {
let words = [muted("test")];
assert_eq!(check(&words, "test").len(), 1);
}
#[test]
fn tag_matching() {
let words = [muted_tag("politics")];
assert_eq!(
check_with_tags(&words, "nice weather", &["politics"]).len(),
1
);
assert_eq!(
check_with_tags(&words, "nice weather", &["sports"]).len(),
0
);
}
#[test]
fn tag_matching_case_insensitive() {
let words = [muted_tag("Politics")];
assert_eq!(check_with_tags(&words, "", &["politics"]).len(), 1);
assert_eq!(check_with_tags(&words, "", &["POLITICS"]).len(), 1);
}
#[test]
fn punctuation_trailing() {
let words = [muted("yay")];
assert_eq!(check(&words, "yay!").len(), 1);
assert_eq!(check(&words, "yay!!!").len(), 1);
}
#[test]
fn punctuation_leading() {
let words = [muted("test")];
assert_eq!(check(&words, "...test").len(), 1);
}
#[test]
fn apostrophe_handling() {
let words = [muted("bluesky")];
assert_eq!(check(&words, "Bluesky's cool").len(), 1);
}
#[test]
fn hyphen_splitting() {
let words = [muted("bad")];
assert_eq!(check(&words, "super-bad movie").len(), 1);
}
#[test]
fn underscore_as_space() {
let words = [muted("idk what this")];
assert_eq!(check(&words, "idk_what_this is").len(), 1);
}
#[test]
fn slash_in_word_skipped() {
let words = [muted("and")];
assert_eq!(check(&words, "and/or").len(), 0);
}
#[test]
fn phrase_with_spaces() {
let words = [muted("bad word")];
assert_eq!(check(&words, "this is a bad word in context").len(), 1);
assert_eq!(check(&words, "badword").len(), 0);
}
#[test]
fn single_character_match() {
let words = [muted("X")];
assert_eq!(check(&words, "check x marks").len(), 1);
}
#[test]
fn cjk_substring_matching() {
let words = [muted("テスト")];
assert_eq!(
check_with_langs(&words, "これはテストです", &["ja"]).len(),
1
);
}
#[test]
fn expired_word_skipped() {
let words = [MutedWord {
value: "test".into(),
targets: vec!["content".into()],
actor_target: None,
expires_at: Some("2020-01-01T00:00:00Z".into()),
}];
assert_eq!(check(&words, "this is a test").len(), 0);
}
#[test]
fn exclude_following_when_following() {
let words = [MutedWord {
value: "test".into(),
targets: vec!["content".into()],
actor_target: Some("exclude-following".into()),
expires_at: None,
}];
let result = check_muted_words(&words, "this is a test", &[], &[], true);
assert_eq!(result.len(), 0);
let result = check_muted_words(&words, "this is a test", &[], &[], false);
assert_eq!(result.len(), 1);
}
#[test]
fn empty_text_no_match() {
let words = [muted("test")];
assert_eq!(check(&words, "").len(), 0);
}
#[test]
fn muted_word_longer_than_text() {
let words = [muted("very long muted word phrase")];
assert_eq!(check(&words, "short").len(), 0);
}
#[test]
fn multiple_matches() {
let words = [muted("foo"), muted("bar")];
assert_eq!(check(&words, "foo and bar").len(), 2);
}
#[test]
fn trim_punctuation_basic() {
assert_eq!(trim_punctuation("hello"), "hello");
assert_eq!(trim_punctuation("!hello!"), "hello");
assert_eq!(trim_punctuation("...test..."), "test");
assert_eq!(trim_punctuation("!!!"), "");
}
}