use crate::helpers::regex::{CaseSensitivity, RegexType};
pub struct TextCleaner;
impl TextCleaner {
pub fn clean(text: &str, cleaning_type: RegexType) -> String {
match cleaning_type {
RegexType::Alphabetic(case_sensitivity) => {
Self::clean_alphabetic(text, case_sensitivity)
}
RegexType::AlphaNumeric(case_sensitivity) => {
Self::clean_alphanumeric(text, case_sensitivity)
}
RegexType::AlphaNumericLoose(case_sensitivity) => {
Self::clean_alphanumeric_loose(text, case_sensitivity)
}
RegexType::AlphaNumericSpace(case_sensitivity) => {
Self::clean_alphanumeric_space(text, case_sensitivity)
}
RegexType::AlphaNumericDash(case_sensitivity) => {
Self::clean_alphanumeric_dash(text, case_sensitivity)
}
RegexType::AlphaNumericDot(case_sensitivity) => {
Self::clean_alphanumeric_dot(text, case_sensitivity)
}
RegexType::AlphaNumericDashDot(case_sensitivity) => {
Self::clean_alphanumeric_dash_dot(text, case_sensitivity)
}
RegexType::AlphaNumericUnderscore(case_sensitivity) => {
Self::clean_alphanumeric_underscore(text, case_sensitivity)
}
RegexType::AlphaNumericDotUnderscore(case_sensitivity) => {
Self::clean_alphanumeric_dot_underscore(text, case_sensitivity)
}
RegexType::Digits => Self::clean_digits(text),
RegexType::Email => Self::clean_email(text),
RegexType::Custom(allowed_chars, case_sensitivity, max_length) => Self::clean_custom(
text,
allowed_chars,
case_sensitivity.unwrap_or(CaseSensitivity::CaseSensitive),
max_length,
),
}
}
pub fn clean_username(text: &str) -> String {
Self::clean(
text,
RegexType::AlphaNumericDot(CaseSensitivity::CaseSensitive),
)
}
fn clean_alphabetic(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text.chars().filter(|c| c.is_alphabetic()).collect();
result = Self::apply_case_transformation(result, case_sensitivity);
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text.chars().filter(|c| c.is_alphanumeric()).collect();
result = Self::apply_case_transformation(result, case_sensitivity);
result = Self::ensure_starts_with_letter(result);
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric_loose(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text.chars().filter(|c| c.is_alphanumeric()).collect();
result = Self::apply_case_transformation(result, case_sensitivity);
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric_space(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text
.chars()
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
.collect();
result = Self::apply_case_transformation(result, case_sensitivity);
result = Self::ensure_starts_with_letter(result);
result = Self::normalize_whitespace(result);
result = Self::remove_trailing_whitespace(result);
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric_dash(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text
.chars()
.filter(|c| c.is_alphanumeric() || *c == '-')
.collect();
result = Self::apply_case_transformation(result, case_sensitivity);
result = Self::ensure_starts_with_letter(result);
result = Self::remove_consecutive_chars(result, '-');
result = Self::remove_trailing_char(result, '-');
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric_dot(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text
.chars()
.filter(|c| c.is_alphanumeric() || *c == '.')
.collect();
result = Self::apply_case_transformation(result, case_sensitivity);
result = Self::ensure_starts_with_letter(result);
result = Self::remove_consecutive_chars(result, '.');
result = Self::remove_trailing_char(result, '.');
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric_dash_dot(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text
.chars()
.filter(|c| c.is_alphanumeric() || *c == '-' || *c == '.' || *c == '_')
.collect();
result = Self::apply_case_transformation(result, case_sensitivity);
result = Self::ensure_starts_with_letter(result);
result = Self::remove_consecutive_chars(result, '.');
result = Self::remove_consecutive_chars(result, '-');
result = Self::remove_trailing_char(result, '.');
result = Self::remove_trailing_char(result, '-');
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric_underscore(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text
.chars()
.filter(|c| c.is_alphanumeric() || *c == '_')
.collect();
result = Self::apply_case_transformation(result, case_sensitivity);
result = Self::ensure_starts_with_letter(result);
result = Self::remove_consecutive_chars(result, '_');
result = Self::remove_trailing_char(result, '_');
Self::truncate_to_length(result, 38)
}
fn clean_alphanumeric_dot_underscore(text: &str, case_sensitivity: CaseSensitivity) -> String {
let mut result: String = text
.chars()
.filter(|c| c.is_alphanumeric() || *c == '.' || *c == '_')
.collect();
result = Self::apply_case_transformation(result, case_sensitivity);
result = Self::ensure_starts_with_letter(result);
result = Self::remove_consecutive_chars(result, '.');
result = Self::remove_consecutive_chars(result, '_');
result = Self::remove_trailing_char(result, '.');
result = Self::remove_trailing_char(result, '_');
Self::truncate_to_length(result, 38)
}
fn clean_digits(text: &str) -> String {
text.chars().filter(|c| c.is_ascii_digit()).collect()
}
fn clean_email(text: &str) -> String {
text.chars()
.filter(|c| c.is_alphanumeric() || *c == '.' || *c == '@' || *c == '-' || *c == '_')
.collect::<String>()
.to_lowercase()
}
fn clean_custom(
text: &str,
allowed_chars: &str,
case_sensitivity: CaseSensitivity,
max_length: usize,
) -> String {
let allowed_set: std::collections::HashSet<char> = allowed_chars.chars().collect();
let mut result: String = text
.chars()
.filter(|c| allowed_set.contains(c) || c.is_alphanumeric())
.collect();
result = Self::apply_case_transformation(result, case_sensitivity);
Self::truncate_to_length(result, max_length)
}
fn apply_case_transformation(text: String, case_sensitivity: CaseSensitivity) -> String {
match case_sensitivity {
CaseSensitivity::CaseSensitive => text.to_lowercase(),
CaseSensitivity::CaseInsensitive => text.to_lowercase(),
}
}
fn ensure_starts_with_letter(text: String) -> String {
text.chars().skip_while(|c| !c.is_alphabetic()).collect()
}
fn remove_consecutive_chars(text: String, target_char: char) -> String {
let mut result = String::new();
let mut prev_char = None;
for ch in text.chars() {
if ch == target_char && prev_char == Some(target_char) {
continue; }
result.push(ch);
prev_char = Some(ch);
}
result
}
fn remove_trailing_char(text: String, target_char: char) -> String {
text.trim_end_matches(target_char).to_string()
}
fn normalize_whitespace(text: String) -> String {
let mut result = String::new();
let mut prev_was_space = false;
for ch in text.chars() {
if ch.is_whitespace() {
if !prev_was_space {
result.push(' '); prev_was_space = true;
}
} else {
result.push(ch);
prev_was_space = false;
}
}
result
}
fn remove_trailing_whitespace(text: String) -> String {
text.trim_end().to_string()
}
fn truncate_to_length(text: String, max_length: usize) -> String {
text.chars().take(max_length).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_clean_alphabetic() {
let dirty_text = "User123Name!!!";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::Alphabetic(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username");
let mixed_case = "UserNAME";
let cleaned = TextCleaner::clean(
mixed_case,
RegexType::Alphabetic(CaseSensitivity::CaseInsensitive),
);
assert_eq!(cleaned, "username");
}
#[test]
fn test_clean_alphanumeric() {
let dirty_text = "123User@@Name456";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::AlphaNumeric(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username456");
let starts_with_number = "123username";
let cleaned = TextCleaner::clean(
starts_with_number,
RegexType::AlphaNumeric(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username");
}
#[test]
fn test_clean_alphanumeric_loose() {
let dirty_text = "123User@@Name456";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::AlphaNumericLoose(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "123username456");
}
#[test]
fn test_clean_email() {
let dirty_email = " User..Email@Example.com!! ";
let cleaned = TextCleaner::clean(dirty_email, RegexType::Email);
assert_eq!(cleaned, "user..email@example.com");
}
#[test]
fn test_clean_alphanumeric_space() {
let dirty_text = "User Name 123!!!";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::AlphaNumericSpace(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user name 123");
let mixed_whitespace = "User\t\nName\r123";
let cleaned = TextCleaner::clean(
mixed_whitespace,
RegexType::AlphaNumericSpace(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user name 123");
let trailing_spaces = "username ";
let cleaned = TextCleaner::clean(
trailing_spaces,
RegexType::AlphaNumericSpace(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username");
let leading_spaces = " 123username";
let cleaned = TextCleaner::clean(
leading_spaces,
RegexType::AlphaNumericSpace(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username");
}
#[test]
fn test_clean_alphanumeric_space_case_insensitive() {
let mixed_case = "User NAME 123";
let cleaned = TextCleaner::clean(
mixed_case,
RegexType::AlphaNumericSpace(CaseSensitivity::CaseInsensitive),
);
assert_eq!(cleaned, "user name 123");
}
#[test]
fn test_clean_alphanumeric_dash() {
let dirty_text = "user--name@@123";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::AlphaNumericDash(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user-name123");
let trailing_dash = "username-";
let cleaned = TextCleaner::clean(
trailing_dash,
RegexType::AlphaNumericDash(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username");
}
#[test]
fn test_clean_alphanumeric_dot() {
let dirty_text = "user..name@@123";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::AlphaNumericDot(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user.name123");
let trailing_dot = "username.";
let cleaned = TextCleaner::clean(
trailing_dot,
RegexType::AlphaNumericDot(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username");
}
#[test]
fn test_clean_alphanumeric_underscore() {
let dirty_text = "user__name@@123";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::AlphaNumericUnderscore(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user_name123");
let trailing_underscore = "username_";
let cleaned = TextCleaner::clean(
trailing_underscore,
RegexType::AlphaNumericUnderscore(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "username");
}
#[test]
fn test_clean_username() {
let dirty_username = "User..First@@123";
let cleaned = TextCleaner::clean_username(dirty_username);
assert_eq!(cleaned, "user.first123");
let complex_username = "!!!User123..Name456...";
let cleaned = TextCleaner::clean_username(complex_username);
assert_eq!(cleaned, "user123.name456");
}
#[test]
fn test_clean_digits() {
let dirty_text = "User123Name!!!";
let cleaned = TextCleaner::clean(dirty_text, RegexType::Digits);
assert_eq!(cleaned, "123");
}
#[test]
fn test_clean_custom() {
let dirty_text = "user@domain.com";
let cleaned = TextCleaner::clean(
dirty_text,
RegexType::Custom("@.", Some(CaseSensitivity::CaseSensitive), 20),
);
assert_eq!(cleaned, "user@domain.com");
let long_text = "a".repeat(50);
let cleaned = TextCleaner::clean(
&long_text,
RegexType::Custom("", Some(CaseSensitivity::CaseSensitive), 10),
);
assert_eq!(cleaned.len(), 10);
}
#[test]
fn test_length_truncation() {
let long_text = "a".repeat(50);
let cleaned = TextCleaner::clean(
&long_text,
RegexType::Alphabetic(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned.len(), 38);
}
#[test]
fn test_ensure_starts_with_letter() {
let starts_with_number = "123abc";
let cleaned = TextCleaner::clean(
starts_with_number,
RegexType::AlphaNumeric(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "abc");
let starts_with_symbol = "___abc123";
let cleaned = TextCleaner::clean(
starts_with_symbol,
RegexType::AlphaNumericUnderscore(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "abc123");
}
#[test]
fn test_consecutive_character_removal() {
let multiple_dots = "user...name";
let cleaned = TextCleaner::clean(
multiple_dots,
RegexType::AlphaNumericDot(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user.name");
let multiple_underscores = "user___name";
let cleaned = TextCleaner::clean(
multiple_underscores,
RegexType::AlphaNumericUnderscore(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user_name");
}
#[test]
fn test_normalize_whitespace() {
let multiple_spaces = "user name";
let cleaned = TextCleaner::clean(
multiple_spaces,
RegexType::AlphaNumericSpace(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user name");
let mixed_whitespace_types = "user\t\t\nname";
let cleaned = TextCleaner::clean(
mixed_whitespace_types,
RegexType::AlphaNumericSpace(CaseSensitivity::CaseSensitive),
);
assert_eq!(cleaned, "user name");
}
}