use regex::Regex;
use serde_json;
use std::collections::BTreeMap;
use std::sync::mpsc;
use std::sync::mpsc::{Receiver, Sender};
use std::thread;
use crate::Profile;
macro_rules! regex {
($re:literal $(,)?) => {{
static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
RE.get_or_init(|| regex::Regex::new($re).unwrap())
}};
}
#[allow(dead_code)]
type PatternMap = BTreeMap<String, char>;
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct Fact {
pub key: char,
pub prior_key: Option<char>,
pub next_key: Option<char>,
pub pattern_placeholder: char,
pub starts_with: u32,
pub ends_with: u32,
pub index_offset: u32,
}
impl Fact {
#[inline]
pub fn new(k: char, pp: char, sw: u32, ew: u32, idx_off: u32) -> Fact {
Fact {
key: k,
prior_key: None,
next_key: None,
pattern_placeholder: pp,
starts_with: sw,
ends_with: ew,
index_offset: idx_off,
}
}
#[inline]
pub fn from_serialized(serialized: &str) -> Fact {
serde_json::from_str(&serialized).unwrap()
}
#[inline]
pub fn serialize(&mut self) -> String {
serde_json::to_string(&self).unwrap()
}
#[inline]
pub fn set_next_key(&mut self, nk: char) {
self.next_key = Some(nk);
}
#[inline]
pub fn set_prior_key(&mut self, pk: char) {
self.prior_key = Some(pk);
}
}
pub struct Pattern {
regex_consonant_upper: &'static Regex,
regex_consonant_lower: &'static Regex,
regex_vowel_upper: &'static Regex,
regex_vowel_lower: &'static Regex,
regex_numeric: &'static Regex,
regex_punctuation: &'static Regex,
regex_space: &'static Regex,
}
impl Default for Pattern {
fn default() -> Self {
Pattern {
regex_consonant_upper: regex!(r"(?-u)[B-DF-HJ-NP-TV-Z]"),
regex_consonant_lower: regex!(r"(?-u)[b-df-hj-np-tv-z]"),
regex_vowel_upper: regex!(r"(?-u)[A|E|I|O|U]"),
regex_vowel_lower: regex!(r"(?-u)[a|e|i|o|u]"),
regex_numeric: regex!(r"(?-u)[0-9]"),
regex_punctuation: regex!(r"(?-u)[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
regex_space: regex!(r"(?-u)[\s]"),
}
}
}
pub struct PatternDefinition {
pattern_map: PatternMap,
pattern: Pattern,
}
impl PatternDefinition {
pub fn new() -> PatternDefinition {
let symbols: [char; 9] = ['@', 'C', 'c', 'V', 'v', '#', '~', 'S', 'p'];
let mut pttrn_def = PatternMap::new();
pttrn_def.insert("Unknown".to_string(), symbols[0]);
pttrn_def.insert("ConsonantUpper".to_string(), symbols[1]);
pttrn_def.insert("ConsonantLower".to_string(), symbols[2]);
pttrn_def.insert("VowelUpper".to_string(), symbols[3]);
pttrn_def.insert("VowelLower".to_string(), symbols[4]);
pttrn_def.insert("Numeric".to_string(), symbols[5]);
pttrn_def.insert("RegExSpcChar".to_string(), symbols[6]);
pttrn_def.insert("WhiteSpace".to_string(), symbols[7]);
pttrn_def.insert("Punctuation".to_string(), symbols[8]);
PatternDefinition {
pattern_map: pttrn_def,
pattern: Pattern::default(),
}
}
#[inline]
pub fn analyze(&mut self, entity: &str) -> (String, Vec<Fact>) {
let mut pttrn = String::new();
let mut facts = Vec::new();
for (i, _c) in entity.chars().enumerate() {
let idx: u32 = i as u32;
let fact = self.factualize(entity, idx);
pttrn.push_str(&*fact.pattern_placeholder.to_string());
facts.push(fact);
}
(pttrn, facts)
}
#[inline]
pub fn factualize(&mut self, entity: &str, idx: u32) -> Fact {
let c = entity.chars().nth(idx as usize).unwrap();
let pp = self.symbolize_char(c);
let pk = if idx > 0 {
entity.chars().nth(idx as usize - 1)
} else {
None
};
let nk = if idx < entity.len() as u32 - 1 {
entity.chars().nth(idx as usize + 1)
} else {
None
};
let sw = if idx == 0 { 1 } else { 0 };
let ew = if idx == entity.len() as u32 - 1 { 1 } else { 0 };
let mut fact = Fact::new(c, pp, sw, ew, idx);
if nk.is_some() {
let _ = &fact.set_next_key(nk.unwrap());
}
if pk.is_some() {
let _ = &fact.set_prior_key(pk.unwrap());
}
fact
}
#[inline]
pub fn get(&self, key: &str) -> char {
*self.pattern_map.get(key).unwrap()
}
#[inline]
pub fn symbolize_char(&self, c: char) -> char {
let mut symbol = self.pattern_map.get("Unknown");
let mut found = false;
if !found && self.pattern.regex_consonant_upper.is_match(&c.to_string()) {
symbol = self.pattern_map.get("ConsonantUpper");
found = true;
}
if !found && self.pattern.regex_consonant_lower.is_match(&c.to_string()) {
symbol = self.pattern_map.get("ConsonantLower");
found = true;
}
if !found && self.pattern.regex_vowel_upper.is_match(&c.to_string()) {
symbol = self.pattern_map.get("VowelUpper");
found = true;
}
if !found && self.pattern.regex_vowel_lower.is_match(&c.to_string()) {
symbol = self.pattern_map.get("VowelLower");
found = true;
}
if !found && self.pattern.regex_numeric.is_match(&c.to_string()) {
symbol = self.pattern_map.get("Numeric");
found = true;
}
if !found && self.pattern.regex_space.is_match(&c.to_string()) {
symbol = self.pattern_map.get("WhiteSpace");
found = true;
}
if !found && self.pattern.regex_punctuation.is_match(&c.to_string()) {
symbol = self.pattern_map.get("Punctuation");
found = true;
}
if !found {
symbol = self.pattern_map.get("Unknown");
}
*symbol.unwrap()
}
}
pub trait Engine {
fn analyze_entities(entities: Vec<String>) -> Vec<(String, Vec<Fact>)> {
let (tx, rx): (Sender<(String, Vec<Fact>)>, Receiver<(String, Vec<Fact>)>) =
mpsc::channel();
let mut children = Vec::new();
for entity in entities.clone() {
let thread_tx = tx.clone();
let child = thread::spawn(move || {
thread_tx
.send(PatternDefinition::new().analyze(&entity))
.unwrap();
debug!("PatternDefinition::analyze thread finished for {}", entity);
});
children.push(child);
}
let mut results = Vec::new();
for entity in entities {
results.push(match rx.recv() {
Ok(result) => result,
Err(_) => {
error!("Error: Could not analyze the entity: {}", entity);
panic!("Error: Could not analyze the data!")
}
});
}
for child in children {
child.join().expect("Error: Could not analyze the data!");
}
results
}
fn profile_entities(mut profile: Profile, entities: Vec<String>) -> Result<Profile, String> {
let results = Self::analyze_entities(entities);
for result in results {
match profile.apply_facts(result.0, result.1) {
Ok(_) => {}
Err(e) => {
return Err(format!(
"Error: Couldn't apply the Pattern and Facts to the Profile. Error Message: {}",
e.to_string()
))
}
}
}
Ok(profile)
}
fn profile_entities_with_container(container: EngineContainer) -> Result<Profile, String> {
Self::profile_entities(container.profile, container.entities)
}
}
pub struct EngineContainer {
pub profile: Profile,
pub entities: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
struct Xtest {}
impl Engine for Xtest {}
#[test]
fn test_fact_new() {
let _fact = Fact::new('r', 'c', 0, 0, 2);
assert!(true);
}
#[test]
fn test_fact_new_from_serialized() {
let serialized = "{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}";
let fact = Fact::from_serialized(&serialized);
assert_eq!(fact.pattern_placeholder, 'c');
}
#[test]
fn test_fact_serialize() {
let mut fact = Fact::new('r', 'c', 0, 0, 2);
let serialized = fact.serialize();
assert_eq!(serialized,"{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}");
}
#[test]
fn test_fact_set_next_key() {
let mut fact = Fact::new('r', 'c', 0, 0, 2);
fact.set_next_key('d');
}
#[test]
fn test_fact_set_prior_key() {
let mut fact = Fact::new('r', 'c', 0, 0, 2);
fact.set_prior_key('o');
}
#[test]
fn test_pattern_definition_new() {
let pttrn_def = PatternDefinition::new();
assert_eq!(pttrn_def.get("VowelUpper"), 'V');
}
#[test]
fn test_pattern_definition_symbolize_char() {
let pttrn_def = PatternDefinition::new();
assert_eq!(pttrn_def.symbolize_char('A'), 'V');
}
#[test]
fn test_pattern_definition_factualize() {
let mut pttrn_def = PatternDefinition::new();
let mut fact1 = pttrn_def.factualize("Word", 1);
let mut fact2 = Fact::new('o', 'v', 0, 0, 1);
fact2.set_prior_key('W');
fact2.set_next_key('r');
assert_eq!(fact1.serialize(), fact2.serialize());
}
#[test]
fn test_pattern_definition_analyze() {
let mut pttrn_def = PatternDefinition::new();
let word = pttrn_def.analyze("HELlo0?^@");
assert_eq!(word.0, "CVCcv#pp@");
assert_eq!(word.1.len(), 9);
}
#[test]
fn test_pattern_definition_analyze_multithread() {
let words = vec![
"word-one".to_string(),
"word-two".to_string(),
"word-three".to_string(),
"word-four".to_string(),
"word-five".to_string(),
];
let results = Xtest::analyze_entities(words);
println!("{:?}", results);
assert_eq!(results.len(), 5);
}
#[test]
fn test_profile_entities() {
let profile = Profile::new();
let words = vec![
"word-one".to_string(),
"word-two".to_string(),
"word-three".to_string(),
"word-four".to_string(),
"word-five".to_string(),
];
let result = Xtest::profile_entities(profile, words);
assert!(result.is_ok());
}
}