[−][src]Struct test_data_generation::Profile
Represents a Profile for sample data that has been analyzed and can be used to generate realistic data
Fields
id: Option<String>An identifier (not necessarily unique) that is used to differentiate profiles from one another
patterns: BTreeMap<String, u32>A list of symbolic patterns with a distinct count of occurrences
pattern_total: u32The total number of patterns in the profile
pattern_keys: Vec<String>A list of symbolic patterns in the profile (used for temporary storage due to lifetime issues)
pattern_vals: Vec<u32>A list of distinct counts for patterns in the profile (used for temporary storage due to lifetime issues)
pattern_percentages: Vec<(String, f64)>A list of symbolic patterns with their percent chance of occurrence
pattern_ranks: Vec<(String, f64)>A list of symbolic patterns with a running total of percent chance of occurrence, in increasing order
sizes: BTreeMap<u32, u32>A list of pattern lengths with a distinct count of occurrence
size_total: u32the total number of pattern sizes (lengths) in the profile
size_ranks: Vec<(u32, f64)>A list of pattern sizes (lengths) with a running total of their percent chance of occurrence, in increasing order
processors: u8The number of processors used to distribute the work load (multi-thread) while finding Facts to generate data
facts: Vec<Vec<Fact>>A list of processors (which are lists of Facts) that store all the Facts in the profile
Implementations
impl Profile[src]
pub fn new() -> Profile[src]
Constructs a new Profile
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let placeholder = Profile::new(); }
pub fn new_with_id(id: String) -> Profile[src]
Constructs a new Profile using an identifier
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let placeholder = Profile::new_with_id("12345".to_string()); }
pub fn new_with_processors(p: u8) -> Profile[src]
Constructs a new Profile with a specified number of processors to analyze the data. Each processor shares the load of generating the data based on the Facts it has been assigned to manage.
Arguments
p: u8- A number that sets the number of processors to start up to manage the Facts. Increasing the number of processors will speed up the generator be distributing the workload. The recommended number of processors is 1 per 10K data points (e.g.: profiling 20K names should be handled by 2 processors) NOTE: The default number of processors is 4.
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let processors: u8 = 10; let placeholder = Profile::new_with_processors(processors); }
pub fn from_file(path: &'static str) -> Profile[src]
Constructs a new Profile from an exported JSON file. This is used when restoring from "archive"
Arguments
field: String- The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::from_file("./tests/samples/sample-00-profile"); profile.pre_generate(); println!("The generated name is {:?}", profile.generate()); }
pub fn from_serialized(serialized: &str) -> Profile[src]
Constructs a new Profile from a serialized (JSON) string of the Profile object. This is used when restoring from "archive"
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let serialized = "{\"patterns\":{\"VC\":1},\"pattern_total\":1,\"pattern_keys\":[\"VC\"],\"pattern_vals\":[1],\"pattern_percentages\":[],\"pattern_ranks\":[],\"sizes\":{\"2\":1},\"size_total\":1,\"size_ranks\":[],\"processors\":4,\"facts\":[[{\"key\":\"O\",\"prior_key\":null,\"next_key\":\"K\",\"pattern_placeholder\":\"V\",\"starts_with\":1,\"ends_with\":0,\"index_offset\":0}],[{\"key\":\"K\",\"prior_key\":\"O\",\"next_key\":null,\"pattern_placeholder\":\"C\",\"starts_with\":0,\"ends_with\":1,\"index_offset\":1}],[],[]]}"; let mut profile = Profile::from_serialized(&serialized); profile.pre_generate(); println!("The generated name is {:?}", profile.generate()); }
pub fn analyze(&mut self, entity: &str)[src]
This function converts an data point (&str) to a pattern and adds it to the profile
Arguments
entity: String- The textual str of the value to anaylze.
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); profile.analyze("One"); profile.analyze("Two"); profile.analyze("Three"); profile.analyze("Four"); assert_eq!(profile.patterns.len(), 4); }
pub fn apply_facts(
&mut self,
pattern: String,
facts: Vec<Fact>
) -> Result<i32, String>[src]
&mut self,
pattern: String,
facts: Vec<Fact>
) -> Result<i32, String>
This function applies the pattern and list of Facts to the profile
Arguments
pattern: String- The string the represents the pattern of the entity that was analyzed.facts: Vec<Fact>- A Vector containing the Facts based on the analysis (one for each char in the entity).
Example
extern crate test_data_generation; use test_data_generation::engine::{Fact, PatternDefinition}; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); let results = PatternDefinition::new().analyze("Word"); assert_eq!(profile.apply_facts(results.0, results.1).unwrap(), 1); }
pub fn cum_patternmap(&mut self)[src]
This function calculates the patterns to use by the chance they will occur (as cumulative percentage) in decreasing order
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); profile.analyze("Smith, John"); profile.analyze("O'Brian, Henny"); profile.analyze("Dale, Danny"); profile.analyze("Rickets, Ronnae"); profile.analyze("Richard, Richie"); profile.analyze("Roberts, Blake"); profile.analyze("Conways, Sephen"); profile.pre_generate(); let test = [("CvccvccpSCvccvv".to_string(), 28.57142857142857 as f64), ("CcvccpSCvcc".to_string(), 42.857142857142854 as f64), ("CvccvccpSCvccvc".to_string(), 57.14285714285714 as f64), ("CvcvcccpSCcvcv".to_string(), 71.42857142857142 as f64), ("CvcvpSCvccc".to_string(), 85.7142857142857 as f64), ("V@CcvvcpSCvccc".to_string(), 99.99999999999997 as f64)]; assert_eq!(profile.pattern_ranks, test); }
pub fn cum_sizemap(&mut self)[src]
This function calculates the sizes to use by the chance they will occur (as cumulative percentage) in decreasing order
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); profile.analyze("One"); profile.analyze("Two"); profile.analyze("Three"); profile.analyze("Four"); profile.analyze("Five"); profile.analyze("Six"); profile.cum_sizemap(); print!("The size ranks are {:?}", profile.size_ranks); // The size ranks are [(3, 50), (4, 83.33333333333333), (5, 100)] }
pub fn generate(&mut self) -> String[src]
This function generates realistic test data based on the sampel data that was analyzed.
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); profile.analyze("One"); profile.analyze("Two"); profile.analyze("Three"); profile.analyze("Four"); profile.analyze("Five"); profile.pre_generate(); print!("The test data {:?} was generated.", profile.generate()); }
pub fn generate_from_pattern(&self, pattern: String) -> String[src]
This function generates realistic test data based on the sample data that was analyzed.
Arguments
pattern: String- The pattern to reference when generating the test data.
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); profile.analyze("01/13/2017"); profile.analyze("11/24/2017"); profile.analyze("08/05/2017"); profile.pre_generate(); let generated = profile.generate_from_pattern("##p##p####".to_string()); assert_eq!(generated.len(), 10); }
pub fn learn_from_entity(
&mut self,
control_list: Vec<String>
) -> Result<bool, String>[src]
&mut self,
control_list: Vec<String>
) -> Result<bool, String>
This function learns by measuring how realistic the test data it generates to the sample data that was provided.
Arguments
control_list: Vec<String>- The list of strings to compare against. This would be the real data from the data sample.
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profil = Profile::new(); let sample_data = vec!("Smith, John".to_string(),"Doe, John".to_string(),"Dale, Danny".to_string(),"Rickets, Ronney".to_string()); for sample in sample_data.iter().clone() { profil.analyze(&sample); } // in order to learn the profile must be prepared with pre_genrate() // so it can generate data to learn from profil.pre_generate(); let learning = profil.learn_from_entity(sample_data).unwrap(); assert_eq!(learning, true); }
pub fn levenshtein_distance(
&mut self,
control: &String,
experiment: &String
) -> usize[src]
&mut self,
control: &String,
experiment: &String
) -> usize
This function calculates the levenshtein distance between 2 strings. See: https://crates.io/crates/levenshtein
Arguments
control: &String- The string to compare against. This would be the real data from the data sample.experiment: &String- The string to compare. This would be the generated data for which you want to find the distance.
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); assert_eq!(profile.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), 3 as usize); }
pub fn realistic_test(&mut self, control: &String, experiment: &String) -> f64[src]
This function calculates the percent difference between 2 strings.
Arguments
control: &String- The string to compare against. This would be the real data from the data sample.experiment: &String- The string to compare. This would be the generated data for which you want to find the percent difference.
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); assert_eq!(profile.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64); }
pub fn pre_generate(&mut self)[src]
This function prepares the size a pattern accumulated percentages order by percentage increasing
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); profile.analyze("One"); profile.analyze("Two"); profile.analyze("Three"); profile.analyze("Four"); profile.analyze("Five"); profile.analyze("Six"); profile.pre_generate(); print!("The size ranks are {:?}", profile.size_ranks); // The size ranks are [(3, 50), (4, 83.33333333333333), (5, 100)] }
pub fn reset_analyze(&mut self)[src]
This function resets the patterns that the Profile has analyzed. Call this method whenever you wish to "clear" the Profile
Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { let mut profile = Profile::new(); profile.analyze("One"); profile.analyze("Two"); profile.analyze("Three"); let x = profile.patterns.len(); profile.reset_analyze(); profile.analyze("Four"); profile.analyze("Five"); profile.analyze("Six"); profile.analyze("Seven"); profile.analyze("Eight"); profile.analyze("Nine"); profile.analyze("Ten"); let y = profile.patterns.len(); assert_eq!(x, 3); assert_eq!(y, 5); }
pub fn save(&mut self, path: &'static str) -> Result<bool, Error>[src]
This function saves (exports) the Profile to a JSON file. This is useful when you wish to reuse the algorithm to generate more test data later.
Arguments
field: String- The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
#Errors If this function encounters any form of I/O or other error, an error variant will be returned. Otherwise, the function returns Ok(true).
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { // analyze the dataset let mut profile = Profile::new(); profile.analyze("Smith, John"); profile.analyze("O'Brian, Henny"); profile.analyze("Dale, Danny"); profile.analyze("Rickets, Ronney"); profile.pre_generate(); assert_eq!(profile.save("./tests/samples/sample-00-profile").unwrap(), true); }
pub fn serialize(&mut self) -> String[src]
This function converts the Profile to a serialize JSON string.
#Example
extern crate test_data_generation; use test_data_generation::Profile; fn main() { // analyze the dataset let mut data_profile = Profile::new(); // analyze the dataset data_profile.analyze("OK"); println!("{}", data_profile.serialize()); // {"patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[],"pattern_ranks":[],"sizes":{"2":1},"size_total":1,"size_ranks":[],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]} }
Trait Implementations
impl Clone for Profile[src]
impl Debug for Profile[src]
impl<'de> Deserialize<'de> for Profile[src]
pub fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>, [src]
__D: Deserializer<'de>,
impl Serialize for Profile[src]
Auto Trait Implementations
impl RefUnwindSafe for Profile
impl Send for Profile
impl Sync for Profile
impl Unpin for Profile
impl UnwindSafe for Profile
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized, [src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized, [src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized, [src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T[src]
impl<T> CloneAny for T where
T: Clone + Any,
T: Clone + Any,
pub fn clone_any(&self) -> Box<dyn CloneAny + 'static, Global>
pub fn clone_any_send(&self) -> Box<dyn CloneAny + 'static + Send, Global> where
T: Send,
T: Send,
pub fn clone_any_sync(&self) -> Box<dyn CloneAny + 'static + Sync, Global> where
T: Sync,
T: Sync,
pub fn clone_any_send_sync(
&self
) -> Box<dyn CloneAny + 'static + Send + Sync, Global> where
T: Send + Sync,
&self
) -> Box<dyn CloneAny + 'static + Send + Sync, Global> where
T: Send + Sync,
impl<T> DebugAny for T where
T: Any + Debug,
T: Any + Debug,
impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>, [src]
T: for<'de> Deserialize<'de>,
impl<T> From<T> for T[src]
impl<T, U> Into<U> for T where
U: From<T>, [src]
U: From<T>,
impl<T> ToOwned for T where
T: Clone, [src]
T: Clone,
type Owned = T
The resulting type after obtaining ownership.
pub fn to_owned(&self) -> T[src]
pub fn clone_into(&self, target: &mut T)[src]
impl<T, U> TryFrom<U> for T where
U: Into<T>, [src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>, [src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>[src]
impl<T> UnsafeAny for T where
T: Any,
T: Any,
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
V: MultiLane<T>,