use std::sync::OnceLock;
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use crate::data::SLANG_PAIRS;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Style {
Formal,
Chat,
GenZ,
}
#[must_use]
pub fn to_formal(text: &str) -> String {
let persian = ensure_persian(text);
apply_replacements(&persian, slang_to_formal())
}
#[must_use]
pub fn to_chat(text: &str) -> String {
let persian = ensure_persian(text);
apply_replacements(&persian, formal_to_chat())
}
#[must_use]
pub fn to_genz(text: &str) -> String {
let chat = to_chat(text);
apply_replacements(&chat, genz_replacements())
}
#[must_use]
pub fn convert(text: &str, style: Style) -> String {
match style {
Style::Formal => to_formal(text),
Style::Chat => to_chat(text),
Style::GenZ => to_genz(text),
}
}
fn ensure_persian(text: &str) -> String {
if text.chars().any(|c| c.is_ascii_alphabetic()) {
crate::finglish::to_persian(text)
} else {
text.to_owned()
}
}
fn slang_to_formal() -> &'static AhoCorasickReplacer {
static R: OnceLock<AhoCorasickReplacer> = OnceLock::new();
R.get_or_init(|| AhoCorasickReplacer::new_pairs(SLANG_PAIRS))
}
fn formal_to_chat() -> &'static AhoCorasickReplacer {
static R: OnceLock<AhoCorasickReplacer> = OnceLock::new();
R.get_or_init(|| {
let pairs: Vec<(&'static str, &'static str)> =
SLANG_PAIRS.iter().map(|(i, f)| (*f, *i)).collect();
AhoCorasickReplacer::new(pairs)
})
}
fn genz_replacements() -> &'static AhoCorasickReplacer {
static R: OnceLock<AhoCorasickReplacer> = OnceLock::new();
R.get_or_init(|| AhoCorasickReplacer::new_pairs(GENZ_PAIRS))
}
const GENZ_PAIRS: &[(&str, &str)] = &[
("ممنون", "مرسی"),
("متشکرم", "مرسی"),
("سپاسگزارم", "تنکس"),
("بله", "آره"),
("خیر", "نه"),
("سلام", "های"),
("خداحافظ", "بای"),
("بسیار خوب", "اوسم"),
("بسیار", "خیلی"),
("عالی", "اوسم"),
("عالیه", "اوسمه"),
("جالب", "کول"),
("جذاب", "کول"),
("خندهدار", "فانی"),
("خنده دار", "فانی"),
("دوست داشتنی", "کیوت"),
("مهمانی", "پارتی"),
("جشن", "پارتی"),
("دوستان", "فرندز"),
("دوستدختر", "گرلفرند"),
("دوستپسر", "بویفرند"),
("کار", "جاب"),
("شغل", "جاب"),
("پیام", "مسیج"),
("فیلم", "مووی"),
("موسیقی", "میوزیک"),
("غذا", "فود"),
("نوشیدنی", "درینک"),
("واقعاً", "ریلی"),
("واقعا", "ریلی"),
("کاملاً", "تتلی"),
("دقیقاً", "اگزکتلی"),
];
struct AhoCorasickReplacer {
ac: AhoCorasick,
replacements: Vec<&'static str>,
}
impl AhoCorasickReplacer {
fn new_pairs(pairs: &'static [(&'static str, &'static str)]) -> Self {
let patterns: Vec<&'static str> = pairs.iter().map(|(k, _)| *k).collect();
let replacements: Vec<&'static str> = pairs.iter().map(|(_, v)| *v).collect();
let ac = build_ac(&patterns);
Self { ac, replacements }
}
fn new(pairs: Vec<(&'static str, &'static str)>) -> Self {
let patterns: Vec<&'static str> = pairs.iter().map(|(k, _)| *k).collect();
let replacements: Vec<&'static str> = pairs.iter().map(|(_, v)| *v).collect();
let ac = build_ac(&patterns);
Self { ac, replacements }
}
}
fn build_ac(patterns: &[&str]) -> AhoCorasick {
AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(patterns)
.expect("static patterns are valid")
}
fn apply_replacements(text: &str, r: &AhoCorasickReplacer) -> String {
let mut out = String::with_capacity(text.len());
let mut last_end = 0;
for m in r.ac.find_iter(text) {
let before = text[..m.start()].chars().last();
let after = text[m.end()..].chars().next();
if !is_word_boundary(before) || !is_word_boundary(after) {
continue;
}
out.push_str(&text[last_end..m.start()]);
out.push_str(r.replacements[m.pattern().as_usize()]);
last_end = m.end();
}
out.push_str(&text[last_end..]);
out
}
#[inline]
fn is_word_boundary(adjacent: Option<char>) -> bool {
adjacent.is_none_or(|c| !c.is_alphabetic() && !c.is_numeric())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn formal_to_chat_basic() {
assert_eq!(to_chat("میخواهم"), "میخوام");
assert_eq!(to_chat("نمیدانم"), "نمیدونم");
}
#[test]
fn chat_to_formal_basic() {
assert_eq!(to_formal("میخوام"), "میخواهم");
assert_eq!(to_formal("نمیدونم"), "نمیدانم");
}
#[test]
fn finglish_input_to_chat() {
let out = to_chat("salam khoobi");
assert!(out.contains("سلام"));
assert!(out.contains("خوب"));
}
#[test]
fn genz_swaps_loanwords() {
let out = to_genz("سلام، ممنون از دعوت به مهمانی!");
assert!(out.contains("های") || out.contains("مرسی"));
assert!(out.contains("پارتی"));
}
#[test]
fn convert_dispatches() {
assert_eq!(convert("میخواهم", Style::Chat), "میخوام");
assert_eq!(convert("میخوام", Style::Formal), "میخواهم");
}
#[test]
fn whole_word_boundary_respected() {
let _ = to_chat("ام میخواهم بروم");
}
#[test]
fn empty_string() {
assert_eq!(to_chat(""), "");
assert_eq!(to_formal(""), "");
assert_eq!(to_genz(""), "");
}
}