pub(crate) mod constants;
#[cfg(test)]
mod test;
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
use std::sync::RwLock;
#[derive(Debug, Clone)]
struct WordRule {
rule: Regex,
placement: String,
}
macro_rules! load_regex_vec {
($rules: expr, $uncountable: expr) => {{
let mut vec = $rules
.iter()
.map(|(k, v)| WordRule {
rule: Regex::new(k).expect("Invalid regular expression"),
placement: v.to_string(),
})
.collect::<Vec<WordRule>>();
vec.append(
&mut $uncountable
.iter()
.map(|s| WordRule {
rule: Regex::new(s).expect("Invalid regular expression"),
placement: "$0".to_string(),
})
.collect::<Vec<WordRule>>(),
);
vec
}};
}
macro_rules! load_irregular_map {
($rules: expr, $map: expr) => {
$rules.iter().map($map).collect()
};
}
static IRREGULAR_SINGLES: Lazy<RwLock<HashMap<String, String>>> = Lazy::new(|| {
RwLock::new(load_irregular_map!(constants::IRREGULAR_RULES, |(k, v)| (
k.to_string(),
v.to_string()
)))
});
static IRREGULAR_PLURALS: Lazy<RwLock<HashMap<String, String>>> = Lazy::new(|| {
RwLock::new(load_irregular_map!(constants::IRREGULAR_RULES, |(k, v)| (
v.to_string(),
k.to_string()
)))
});
static PLURAL_RULES: Lazy<RwLock<Vec<WordRule>>> = Lazy::new(|| {
RwLock::new(load_regex_vec!(
constants::PLURAL_RULES,
constants::UNCOUNTABLE_REGEX_RULES
))
});
static SINGULAR_RULES: Lazy<RwLock<Vec<WordRule>>> = Lazy::new(|| {
RwLock::new(load_regex_vec!(
constants::SINGULAR_RULES,
constants::UNCOUNTABLE_REGEX_RULES
))
});
static UNCOUNTABLE_RULES: Lazy<RwLock<Vec<String>>> = Lazy::new(|| {
RwLock::new(
constants::UNCOUNTABLE_RULES
.iter()
.map(|s| s.to_string())
.collect(),
)
});
pub fn add_irregular_rule(singular: String, plural: String) {
{
let mut singles = IRREGULAR_SINGLES.write().unwrap();
singles.insert(singular.clone(), plural.clone());
}
{
let mut plurals = IRREGULAR_PLURALS.write().unwrap();
plurals.insert(plural, singular);
}
}
pub fn add_plural_rule(rule: Regex, placement: String) {
let mut plural_rules = PLURAL_RULES.write().unwrap();
plural_rules.push(WordRule { rule, placement });
}
pub fn add_singular_rule(rule: Regex, placement: String) {
let mut singular_rules = SINGULAR_RULES.write().unwrap();
singular_rules.push(WordRule { rule, placement });
}
pub enum UncountableRule {
Regex(Regex),
String(String),
}
pub fn add_uncountable_rule(rule: UncountableRule) {
match rule {
UncountableRule::Regex(regex_rule) => {
add_plural_rule(regex_rule.clone(), "$0".to_string());
add_singular_rule(regex_rule, "$0".to_string());
}
UncountableRule::String(word) => {
let mut uncountable = UNCOUNTABLE_RULES.write().unwrap();
uncountable.push(word.to_lowercase());
}
}
}
pub fn pluralize(word: &str, count: isize, include_count: bool) -> String {
let pluralized = if count == 1 {
to_singular(word)
} else {
to_plural(word)
};
if include_count {
format!("{} {}", count, pluralized)
} else {
pluralized
}
}
fn to_singular(word: &str) -> String {
let irregular_plurals = IRREGULAR_PLURALS.read().unwrap();
let irregular_singles = IRREGULAR_SINGLES.read().unwrap();
let singular_rules = SINGULAR_RULES.read().unwrap();
let uncountable = UNCOUNTABLE_RULES.read().unwrap();
replace_word(
&irregular_plurals,
&irregular_singles,
&singular_rules,
&uncountable,
word,
)
}
fn to_plural(word: &str) -> String {
let irregular_singles = IRREGULAR_SINGLES.read().unwrap();
let irregular_plurals = IRREGULAR_PLURALS.read().unwrap();
let plural_rules = PLURAL_RULES.read().unwrap();
let uncountable = UNCOUNTABLE_RULES.read().unwrap();
replace_word(
&irregular_singles,
&irregular_plurals,
&plural_rules,
&uncountable,
word,
)
}
fn replace_word(
replace_map: &HashMap<String, String>,
keep_map: &HashMap<String, String>,
rules: &[WordRule],
uncountable: &[String],
word: &str,
) -> String {
let token = word.to_lowercase();
if keep_map.contains_key(&token) {
return restore_case(word, &token);
}
if let Some(replacement) = replace_map.get(&token) {
return restore_case(word, replacement);
}
sanitize_word(&token, word, rules, uncountable)
}
fn sanitize_word(token: &str, word: &str, rules: &[WordRule], uncountable: &[String]) -> String {
if token.is_empty() || uncountable.contains(&token.to_owned()) {
return word.to_string();
}
for word_rule in rules.iter().rev() {
if word_rule.rule.is_match(word) {
let replaced = word_rule.rule.replace(word, |caps: ®ex::Captures| {
let mut out = restore_case(word, &word_rule.placement);
for (i, m) in caps.iter().filter_map(|m| m).enumerate() {
out = out.replace(&format!("${}", i), &restore_case(word, m.as_str()));
}
out
});
return remove_dollar_escapes(&replaced);
}
}
word.to_string()
}
fn remove_dollar_escapes(s: &str) -> String {
let mut skip = false;
let mut result = String::new();
for c in s.chars() {
if skip {
skip = false;
continue;
}
if c == '$' {
skip = true;
continue;
}
result.push(c);
}
result
}
fn restore_case(word: &str, token: &str) -> String {
if word == token {
return token.to_string();
}
if word == word.to_lowercase() {
return token.to_lowercase();
}
if word == word.to_uppercase() {
return token.to_uppercase();
}
if let Some(first) = word.chars().next() {
if first.is_uppercase() {
if let Some(token_first) = token.chars().next() {
let remainder = if token.len() > 1 { &token[1..] } else { "" };
return format!("{}{}", token_first, remainder);
}
}
}
token.to_lowercase()
}