test_data_generation/engine/
mod.rs

1//!
2//!
3//! # Fact
4//! The Fact object is a representation of a character based on its context within a data entity.
5//! Facts are created during the analyze process and then later used to generate data from the algorithm.
6//!
7//! ## Example
8//!
9//! ```rust
10//! extern crate test_data_generation;
11//!
12//! use test_data_generation::engine::Fact;
13//!
14//! fn main() {
15//!     //fact created for the character 'r' in the string "word"
16//!    	let mut fact =  Fact::new('r','c',0,0,2);
17//!
18//!     // set the char that appears after the 'r'
19//!     fact.set_next_key('d');
20//!
21//!     // set the char that appears before the 'r'
22//!     fact.set_prior_key('o');
23//! }
24//! ```
25//!
26//! # PatternDefinition
27//! The PatternDefinition provides functionality to retrieve symbols that are used in defining a pattern.
28//!
29//! Here is the list of symbols that identify a type of character:</br>
30//! @ = unknown [Unknonw]</br>
31//! C = upper case consonant [ConsonantUpper]</br>
32//! c = lower case consonant [ConsonantLower]</br>
33//! V = upper case vowel [VowelUpper]</br>
34//! v = lower case vowel [VowelLower]</br>
35//! \# = numeric digit [Numeric]</br>
36//! ~ = special regex character [RegExSpcChar]</br>
37//! S = white space [WhiteSpace]</br>
38//! p = punctuation [Punctuation]</br>
39//!
40//! ## Example
41//!
42//! ```rust
43//! extern crate test_data_generation;
44//!
45//! use test_data_generation::engine::PatternDefinition;
46//!
47//! fn main() {
48//! 	let pttrn_def = PatternDefinition::new();
49//!     println!("Upper case vowel symbol: {:?}", pttrn_def.get(&"VowelUpper".to_string()));
50//! }
51//! ```
52
53use regex::Regex;
54use serde_json;
55use std::collections::BTreeMap;
56use std::sync::mpsc;
57use std::sync::mpsc::{Receiver, Sender};
58use std::thread;
59
60use crate::Profile;
61//use async_trait::async_trait;
62
63macro_rules! regex {
64    ($re:literal $(,)?) => {{
65        static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
66        RE.get_or_init(|| regex::Regex::new($re).unwrap())
67    }};
68}
69
70#[allow(dead_code)]
71type PatternMap = BTreeMap<String, char>;
72
73#[derive(Clone, Serialize, Deserialize, Debug)]
74/// Represents a Fact for a character in a sample data entity that has been analyzed
75pub struct Fact {
76    /// the char that the fact defines (.e.g: 'a', '1', '%', etc.)
77    pub key: char,
78    /// the char that appears before (-1) the key in the entity
79    pub prior_key: Option<char>,
80    /// the char that appears after (+1) the key in the entity
81    pub next_key: Option<char>,
82    /// the PatternPlaceholder symbol that represents the type of key
83    pub pattern_placeholder: char,
84    /// indicates if the key is the first char in the entity (0=no, 1=yes)
85    pub starts_with: u32,
86    /// indicates if the key is the last char in the entity (0=no, 1=yes)
87    pub ends_with: u32,
88    /// indicates the number of positions from the index zero (where the char is located in the entity from the first position)
89    pub index_offset: u32,
90}
91
92impl Fact {
93    /// Constructs a new Fact
94    ///
95    /// # Arguments
96    ///
97    /// * `k: char` - The char that the Fact represents (also known as the `key`).</br>
98    /// * `pp: char` - The char that represents the patter placeholder for the key.</br>
99    /// * `sw: u32` - Indicates is the key is the first char in the entity. (0=no, 1=yes)</br>
100    /// * `ew: u32` - Indicates is the key is the last char in the entity. (0=no, 1=yes)</br>
101    /// * `idx_off: u32` - The index that represents the postion of the key from the beginning of the entity (zero based).</br>
102    ///
103    /// # Example
104    ///
105    /// ```rust
106    /// extern crate test_data_generation;
107    ///
108    /// use test_data_generation::engine::Fact;
109    ///
110    /// fn main() {
111    /// 	//fact created for the character 'r' in the string "word"
112    ///    	let mut fact =  Fact::new('r','c',0,0,2);
113    /// }
114    /// ```
115    #[inline]
116    pub fn new(k: char, pp: char, sw: u32, ew: u32, idx_off: u32) -> Fact {
117        Fact {
118            key: k,
119            prior_key: None,
120            next_key: None,
121            pattern_placeholder: pp,
122            starts_with: sw,
123            ends_with: ew,
124            index_offset: idx_off,
125        }
126    }
127
128    /// Constructs a new Fact from a serialized (JSON) string of the Fact object. This is used when restoring from "archive"
129    ///
130    /// # Arguments
131    ///
132    /// * `serialized: &str` - The JSON string that represents the archived Fact object.</br>
133    ///
134    /// # Example
135    ///
136    /// ```rust
137    /// extern crate test_data_generation;
138    ///
139    /// use test_data_generation::engine::Fact;
140    ///
141    /// fn main() {
142    ///		let serialized = "{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}";
143    ///		let mut fact = Fact::from_serialized(&serialized);
144    ///     fact.set_prior_key('a');
145    ///		fact.set_next_key('e');
146    ///
147    ///		assert_eq!(fact.pattern_placeholder, 'c');
148    /// }
149    /// ```
150    #[inline]
151    pub fn from_serialized(serialized: &str) -> Fact {
152        serde_json::from_str(&serialized).unwrap()
153    }
154
155    /// This function converts the Fact to a serialize JSON string.
156    ///
157    /// # Example
158    ///
159    /// ```rust
160    /// extern crate test_data_generation;
161    ///
162    /// use test_data_generation::engine::Fact;
163    ///
164    /// fn main() {
165    /// 	//fact created for the character 'r' in the string "word"
166    ///    	let mut fact =  Fact::new('r','c',0,0,2);
167    ///
168    ///     println!("{}", fact.serialize());
169    ///     // {"key":"r","prior_key":null,"next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}
170    /// }
171    ///
172    #[inline]
173    pub fn serialize(&mut self) -> String {
174        serde_json::to_string(&self).unwrap()
175    }
176
177    /// This function sets the next key attribute to the specified char.
178    ///
179    /// # Arguments
180    ///
181    /// * `nk: char` - The character that represents the next character in the entity
182    ///
183    /// # Example
184    ///
185    /// ```rust
186    /// extern crate test_data_generation;
187    ///
188    /// use test_data_generation::engine::Fact;
189    ///
190    /// fn main() {
191    /// 	//fact created for the character 'r' in the string "word"
192    ///    	let mut fact =  Fact::new('r','c',0,0,2);
193    ///     fact.set_next_key('d');
194    /// }
195    ///
196    #[inline]
197    pub fn set_next_key(&mut self, nk: char) {
198        self.next_key = Some(nk);
199    }
200
201    /// This function sets the prior key attribute to the specified char.
202    ///
203    /// # Arguments
204    ///
205    /// * `pk: char` - The character that represents the prior character in the entity
206    ///
207    /// # Example
208    ///
209    /// ```rust
210    /// extern crate test_data_generation;
211    ///
212    /// use test_data_generation::engine::Fact;
213    ///
214    /// fn main() {
215    /// 	//fact created for the character 'r' in the string "word"
216    ///    	let mut fact =  Fact::new('r','c',0,0,2);
217    ///     fact.set_prior_key('o');
218    /// }
219    ///
220    #[inline]
221    pub fn set_prior_key(&mut self, pk: char) {
222        self.prior_key = Some(pk);
223    }
224}
225
226/// Represents a symbolic pattern of an entity (String)
227pub struct Pattern {
228    /// The regex rule used to find upper case consonants
229    regex_consonant_upper: &'static Regex,
230    /// The regex rule used to find lower case consonants
231    regex_consonant_lower: &'static Regex,
232    /// The regex rule used to find upper case vowels
233    regex_vowel_upper: &'static Regex,
234    /// The regex rule used to find lower case vowels
235    regex_vowel_lower: &'static Regex,
236    /// The regex rule used to find numeric digits
237    regex_numeric: &'static Regex,
238    /// The regex rule used to find punctuation
239    regex_punctuation: &'static Regex,
240    /// The regex rule used to find white spaces
241    regex_space: &'static Regex,
242}
243
244impl Default for Pattern {
245    fn default() -> Self {
246        Pattern {
247            regex_consonant_upper: regex!(r"(?-u)[B-DF-HJ-NP-TV-Z]"),
248            regex_consonant_lower: regex!(r"(?-u)[b-df-hj-np-tv-z]"),
249            regex_vowel_upper: regex!(r"(?-u)[A|E|I|O|U]"),
250            regex_vowel_lower: regex!(r"(?-u)[a|e|i|o|u]"),
251            regex_numeric: regex!(r"(?-u)[0-9]"),
252            regex_punctuation: regex!(r"(?-u)[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
253            regex_space: regex!(r"(?-u)[\s]"),
254        }
255    }
256}
257
258/// Represents the object managing all the symbols used in pattern definitions
259pub struct PatternDefinition {
260    pattern_map: PatternMap,
261    pattern: Pattern,
262}
263
264impl PatternDefinition {
265    /// Constructs a new PatternDefinition
266    ///
267    /// # Example
268    ///
269    /// ```rust
270    /// extern crate test_data_generation;
271    ///
272    /// use test_data_generation::engine::PatternDefinition;
273    ///
274    /// fn main() {
275    /// 	let pttrn_def = PatternDefinition::new();
276    /// }
277    /// ```
278    pub fn new() -> PatternDefinition {
279        let symbols: [char; 9] = ['@', 'C', 'c', 'V', 'v', '#', '~', 'S', 'p'];
280        let mut pttrn_def = PatternMap::new();
281
282        pttrn_def.insert("Unknown".to_string(), symbols[0]);
283        pttrn_def.insert("ConsonantUpper".to_string(), symbols[1]);
284        pttrn_def.insert("ConsonantLower".to_string(), symbols[2]);
285        pttrn_def.insert("VowelUpper".to_string(), symbols[3]);
286        pttrn_def.insert("VowelLower".to_string(), symbols[4]);
287        pttrn_def.insert("Numeric".to_string(), symbols[5]);
288        pttrn_def.insert("RegExSpcChar".to_string(), symbols[6]);
289        pttrn_def.insert("WhiteSpace".to_string(), symbols[7]);
290        pttrn_def.insert("Punctuation".to_string(), symbols[8]);
291
292        PatternDefinition {
293            pattern_map: pttrn_def,
294            pattern: Pattern::default(),
295        }
296    }
297
298    /// This function converts an entity (&str) into a tuplet (String, Vec<Fact>)</br>
299    ///
300    /// # Arguments
301    ///
302    /// * `entity: String` - The textual str of the value to analyze.</br>
303    ///
304    /// # Example
305    ///
306    /// ```rust
307    /// extern crate test_data_generation;
308    ///
309    /// use test_data_generation::engine::PatternDefinition;
310    ///
311    /// fn main() {
312    ///		let mut pttrn_def = PatternDefinition::new();
313    ///     //async {
314    ///         let rslt = pttrn_def.analyze("Hello World");
315    ///         assert_eq!(rslt.0, "CvccvSCvccc");
316    ///     //}
317    /// }
318    /// ```
319    #[inline]
320    pub fn analyze(&mut self, entity: &str) -> (String, Vec<Fact>) {
321        // record the length of the passed value
322        //self.size = entity.len() as u32;
323
324        // String to hold the pattern
325        let mut pttrn = String::new();
326
327        // Vec to hold all the Facts to be returned
328        let mut facts = Vec::new();
329
330        // record the pattern of the passed value
331        for (i, _c) in entity.chars().enumerate() {
332            //let fact = self.factualize(&entity, i as u32);
333            let idx: u32 = i as u32;
334            let fact = self.factualize(entity, idx);
335            pttrn.push_str(&*fact.pattern_placeholder.to_string());
336            facts.push(fact);
337        }
338
339        (pttrn, facts)
340    }
341
342    /// This function converts a char in an entity (&str) based on the index specified into a Fact</br>
343    ///
344    /// # Arguments
345    ///
346    /// * `entity: String` - The textual str of the value to analyze.</br>
347    /// * `idx: u32` - The index that specifies the position of the char in the entity to convert to a Fact.</br>
348    ///
349    /// # Example
350    ///
351    /// ```rust
352    /// extern crate test_data_generation;
353    ///
354    /// use test_data_generation::engine::PatternDefinition;
355    ///
356    /// fn main() {
357    ///		let mut pttrn_def = PatternDefinition::new();
358    ///		let fact = pttrn_def.factualize("Word",0);
359    ///     // will return a Fact that represents the char `W`
360    /// }
361    /// ```
362    #[inline]
363    pub fn factualize(&mut self, entity: &str, idx: u32) -> Fact {
364        let c = entity.chars().nth(idx as usize).unwrap();
365        let pp = self.symbolize_char(c);
366        let pk = if idx > 0 {
367            entity.chars().nth(idx as usize - 1)
368        } else {
369            None
370        };
371        let nk = if idx < entity.len() as u32 - 1 {
372            entity.chars().nth(idx as usize + 1)
373        } else {
374            None
375        };
376        let sw = if idx == 0 { 1 } else { 0 };
377        let ew = if idx == entity.len() as u32 - 1 { 1 } else { 0 };
378
379        let mut fact = Fact::new(c, pp, sw, ew, idx);
380
381        // only if there is a next key
382        if nk.is_some() {
383            let _ = &fact.set_next_key(nk.unwrap());
384        }
385
386        // only if there is a prior key
387        if pk.is_some() {
388            let _ = &fact.set_prior_key(pk.unwrap());
389        }
390
391        fact
392    }
393
394    /// This function returns a pattern symbol that represents the type of character
395    ///
396    /// # Example
397    ///
398    /// ```rust
399    /// extern crate test_data_generation;
400    ///
401    /// use test_data_generation::engine::PatternDefinition;
402    ///
403    /// fn main() {
404    /// 	let pttrn_def = PatternDefinition::new();
405    ///     println!("Upper case vowel symbol: {:?}", pttrn_def.get(&"VowelUpper".to_string()));
406    /// }
407    /// ```
408    #[inline]
409    pub fn get(&self, key: &str) -> char {
410        *self.pattern_map.get(key).unwrap()
411    }
412
413    /// This function converts a char into a pattern symbol
414    ///
415    /// # Example
416    ///
417    /// ```rust
418    /// extern crate test_data_generation;
419    ///
420    /// use test_data_generation::engine::PatternDefinition;
421    ///
422    /// fn main() {
423    /// 	let pttrn_def = PatternDefinition::new();
424    /// 	println!("The pattern symbol for 'A' is {:?}", pttrn_def.symbolize_char('A'));
425    ///     // The pattern symbol for 'A' is V
426    /// }
427    /// ```
428    #[inline]
429    pub fn symbolize_char(&self, c: char) -> char {
430        // if you have to escape regex special characters: &*regex::escape(&*$c.to_string())
431        let mut symbol = self.pattern_map.get("Unknown");
432        let mut found = false;
433
434        if !found && self.pattern.regex_consonant_upper.is_match(&c.to_string()) {
435            symbol = self.pattern_map.get("ConsonantUpper");
436            found = true;
437        }
438
439        if !found && self.pattern.regex_consonant_lower.is_match(&c.to_string()) {
440            symbol = self.pattern_map.get("ConsonantLower");
441            found = true;
442        }
443
444        if !found && self.pattern.regex_vowel_upper.is_match(&c.to_string()) {
445            symbol = self.pattern_map.get("VowelUpper");
446            found = true;
447        }
448
449        if !found && self.pattern.regex_vowel_lower.is_match(&c.to_string()) {
450            symbol = self.pattern_map.get("VowelLower");
451            found = true;
452        }
453
454        if !found && self.pattern.regex_numeric.is_match(&c.to_string()) {
455            symbol = self.pattern_map.get("Numeric");
456            found = true;
457        }
458
459        if !found && self.pattern.regex_space.is_match(&c.to_string()) {
460            symbol = self.pattern_map.get("WhiteSpace");
461            found = true;
462        }
463
464        if !found && self.pattern.regex_punctuation.is_match(&c.to_string()) {
465            symbol = self.pattern_map.get("Punctuation");
466            found = true;
467        }
468
469        // if not matched, then use "Unknown" placeholder symbol
470        if !found {
471            symbol = self.pattern_map.get("Unknown");
472        }
473
474        *symbol.unwrap()
475    }
476}
477
478pub trait Engine {
479    fn analyze_entities(entities: Vec<String>) -> Vec<(String, Vec<Fact>)> {
480        let (tx, rx): (Sender<(String, Vec<Fact>)>, Receiver<(String, Vec<Fact>)>) =
481            mpsc::channel();
482        let mut children = Vec::new();
483
484        for entity in entities.clone() {
485            let thread_tx = tx.clone();
486            let child = thread::spawn(move || {
487                thread_tx
488                    .send(PatternDefinition::new().analyze(&entity))
489                    .unwrap();
490                debug!("PatternDefinition::analyze thread finished for {}", entity);
491            });
492
493            children.push(child);
494        }
495
496        let mut results = Vec::new();
497        for entity in entities {
498            results.push(match rx.recv() {
499                Ok(result) => result,
500                Err(_) => {
501                    error!("Error: Could not analyze the entity: {}", entity);
502                    panic!("Error: Could not analyze the data!")
503                }
504            });
505        }
506
507        for child in children {
508            child.join().expect("Error: Could not analyze the data!");
509        }
510
511        results
512    }
513
514    fn profile_entities(mut profile: Profile, entities: Vec<String>) -> Result<Profile, String> {
515        let results = Self::analyze_entities(entities);
516
517        for result in results {
518            match profile.apply_facts(result.0, result.1) {
519                Ok(_) => {}
520                Err(e) => {
521                    return Err(format!(
522                    "Error: Couldn't apply the Pattern and Facts to the Profile. Error Message: {}",
523                    e.to_string()
524                ))
525                }
526            }
527        }
528
529        Ok(profile)
530    }
531
532    fn profile_entities_with_container(container: EngineContainer) -> Result<Profile, String> {
533        Self::profile_entities(container.profile, container.entities)
534    }
535}
536
537pub struct EngineContainer {
538    pub profile: Profile,
539    pub entities: Vec<String>,
540}
541
542// Unit Tests
543#[cfg(test)]
544mod tests {
545    use super::*;
546
547    struct Xtest {}
548    impl Engine for Xtest {}
549
550    #[test]
551    fn test_fact_new() {
552        //fact created for the character 'r' in the string "word"
553        let _fact = Fact::new('r', 'c', 0, 0, 2);
554
555        assert!(true);
556    }
557
558    #[test]
559    fn test_fact_new_from_serialized() {
560        let serialized = "{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}";
561        let fact = Fact::from_serialized(&serialized);
562        assert_eq!(fact.pattern_placeholder, 'c');
563    }
564
565    #[test]
566    fn test_fact_serialize() {
567        //fact created for the character 'r' in the string "word"
568        let mut fact = Fact::new('r', 'c', 0, 0, 2);
569        let serialized = fact.serialize();
570
571        assert_eq!(serialized,"{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}");
572    }
573
574    #[test]
575    fn test_fact_set_next_key() {
576        //fact created for the character 'r' in the string "word"
577        let mut fact = Fact::new('r', 'c', 0, 0, 2);
578        fact.set_next_key('d');
579    }
580
581    #[test]
582    fn test_fact_set_prior_key() {
583        //fact created for the character 'r' in the string "word"
584        let mut fact = Fact::new('r', 'c', 0, 0, 2);
585        fact.set_prior_key('o');
586    }
587
588    #[test]
589    fn test_pattern_definition_new() {
590        let pttrn_def = PatternDefinition::new();
591        assert_eq!(pttrn_def.get("VowelUpper"), 'V');
592    }
593
594    #[test]
595    fn test_pattern_definition_symbolize_char() {
596        let pttrn_def = PatternDefinition::new();
597
598        assert_eq!(pttrn_def.symbolize_char('A'), 'V');
599    }
600
601    #[test]
602    fn test_pattern_definition_factualize() {
603        let mut pttrn_def = PatternDefinition::new();
604        let mut fact1 = pttrn_def.factualize("Word", 1);
605        let mut fact2 = Fact::new('o', 'v', 0, 0, 1);
606        fact2.set_prior_key('W');
607        fact2.set_next_key('r');
608
609        assert_eq!(fact1.serialize(), fact2.serialize());
610    }
611
612    #[test]
613    fn test_pattern_definition_analyze() {
614        let mut pttrn_def = PatternDefinition::new();
615        let word = pttrn_def.analyze("HELlo0?^@");
616
617        assert_eq!(word.0, "CVCcv#pp@");
618        assert_eq!(word.1.len(), 9);
619    }
620
621    #[test]
622    fn test_pattern_definition_analyze_multithread() {
623        let words = vec![
624            "word-one".to_string(),
625            "word-two".to_string(),
626            "word-three".to_string(),
627            "word-four".to_string(),
628            "word-five".to_string(),
629        ];
630
631        let results = Xtest::analyze_entities(words);
632
633        println!("{:?}", results);
634        assert_eq!(results.len(), 5);
635    }
636
637    #[test]
638    fn test_profile_entities() {
639        //async {
640        let profile = Profile::new();
641        let words = vec![
642            "word-one".to_string(),
643            "word-two".to_string(),
644            "word-three".to_string(),
645            "word-four".to_string(),
646            "word-five".to_string(),
647        ];
648        let result = Xtest::profile_entities(profile, words);
649        assert!(result.is_ok());
650        //};
651    }
652}