tb_normalization 1.0.0

A library for normalization utf8 string, loc dau vietnamese and some language
Documentation
extern crate unicode_normalization;
extern crate regex;

use regex::Regex;
use unicode_normalization::UnicodeNormalization;

fn string_to_static_str(s: String) -> &'static str {
    Box::leak(s.into_boxed_str())
}

fn tb_normalization(s: String) -> String {
    let c = string_to_static_str(s).clone().nfd().collect::<String>();
    let mut re = Regex::new(r"[\u0300-\u036f]").unwrap();
    let d = re.replace_all(string_to_static_str(c), "").replace("đ", "d").replace("Đ", "D").to_string();
    re = Regex::new(r"\s+").unwrap();
    let e = re.replace_all(string_to_static_str(d), " ");
    e.trim().to_string()
}

fn remove_special_characters(s: String) -> String {
    let mut re = Regex::new(r"[^0-9a-zA-Z]").unwrap();
    let d = re.replace_all(string_to_static_str(s), " ").to_string();
    re = Regex::new(r"\s+").unwrap();
    let e = re.replace_all(string_to_static_str(d), " ");
    e.trim().to_string()
}

pub trait TbNormalization {
    fn tb_normalization(&self) -> String; 
    fn remove_special_characters(&self) -> String;
}

impl TbNormalization for String {
    fn tb_normalization(&self) -> String {
        tb_normalization(self.clone())
    }
    fn remove_special_characters(&self) -> String {
        remove_special_characters(self.clone())
    }
}

impl<'a> TbNormalization for &'a str {
    fn tb_normalization(&self) -> String {
        tb_normalization(self.clone().to_string())
    }
    fn remove_special_characters(&self) -> String {
        remove_special_characters(self.clone().to_string())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_for_static_str() {
        let s = "số 22 ngách 63/30/16 lê đức thọ , mỹ đình 2  Được chưa nhỉ  --";
        assert_eq!(s.tb_normalization(), "so 22 ngach 63/30/16 le duc tho , my dinh 2 Duoc chua nhi --".to_string());
    }
    #[test]
    fn test_for_string() {
        let s = "số 22 ngách 63/30/16 lê đức thọ , mỹ đình 2  Được chưa nhỉ  --";
        assert_eq!(s.to_string().tb_normalization(), "so 22 ngach 63/30/16 le duc tho , my dinh 2 Duoc chua nhi --".to_string());
    }
    #[test]
    fn test_with_remove_special() {
        let s = "số 22 ngách 63/30/16 lê đức thọ , mỹ đình 2  Được chưa nhỉ  --";
        assert_eq!(s.tb_normalization().remove_special_characters(), "so 22 ngach 63 30 16 le duc tho my dinh 2 Duoc chua nhi".to_string());
    }
}