use crate ::generators ::DataSize;
use std ::collections ::HashMap;
#[ derive(Debug, Clone) ]
pub struct DataGenerator
{
pub pattern: Option< String >,
pub size: Option< DataSize >,
pub size_bytes: Option< usize >,
pub repetitions: Option< usize >,
pub complexity: DataComplexity,
pub seed: Option< u64 >,
pub parameters: HashMap< String, String >,
}
#[ derive(Debug, Clone, Copy, PartialEq) ]
pub enum DataComplexity
{
Simple,
Medium,
Complex,
Full,
}
impl Default for DataGenerator
{
fn default() -> Self
{
Self
{
pattern: None,
size: None,
size_bytes: None,
repetitions: None,
complexity: DataComplexity ::Medium,
seed: None,
parameters: HashMap ::new(),
}
}
}
impl DataGenerator
{
pub fn new() -> Self
{
Self ::default()
}
pub fn pattern(mut self, pattern: &str) -> Self
{
self.pattern = Some(pattern.to_string());
self
}
pub fn size(mut self, size: usize) -> Self
{
self.size = Some(DataSize ::Custom(size));
self
}
pub fn size_bytes(mut self, bytes: usize) -> Self
{
self.size_bytes = Some(bytes);
self
}
pub fn repetitions(mut self, repetitions: usize) -> Self
{
self.repetitions = Some(repetitions);
self
}
pub fn complexity(mut self, complexity: DataComplexity) -> Self
{
self.complexity = complexity;
self
}
pub fn seed(mut self, seed: u64) -> Self
{
self.seed = Some(seed);
self
}
pub fn parameter(mut self, key: &str, value: &str) -> Self
{
self.parameters.insert(key.to_string(), value.to_string());
self
}
pub fn generate_string( &self ) -> String
{
match (&self.pattern, &self.size, &self.size_bytes, &self.repetitions)
{
(Some(pattern), _, _, Some(reps)) => self.generate_pattern_string(pattern, *reps),
(Some(pattern), Some(size), _, _) => self.generate_sized_pattern_string(pattern, size.size()),
(Some(pattern), _, Some(bytes), _) => self.generate_sized_pattern_string_bytes(pattern, *bytes),
(None, Some(size), _, _) => self.generate_sized_string_items(size.size()),
(None, _, Some(bytes), _) => self.generate_sized_string_bytes(*bytes),
_ => self.generate_default_string(),
}
}
pub fn generate_strings(&self, count: usize) -> Vec< String >
{
(0..count).map(|i|
{
let mut generator = self.clone();
if let Some(base_seed) = self.seed
{
generator.seed = Some(base_seed + i as u64);
}
generator.generate_string()
}).collect()
}
pub fn generate_csv_data(&self, rows: usize, columns: usize) -> String
{
let mut csv = String ::new();
for row in 0..rows
{
let mut row_data = Vec ::new();
for col in 0..columns
{
let cell_data = match self.complexity
{
DataComplexity ::Simple => format!("field{}_{}", col, row),
DataComplexity ::Medium => format!("data_{}_{}_value", col, row),
DataComplexity ::Complex => format!("complex_field_{}_{}_with_special_chars@#$%", col, row),
DataComplexity ::Full => format!("full_complexity_field_{}_{}_with_unicode_🦀_and_escapes\\\"quotes\\\"", col, row),
};
row_data.push(cell_data);
}
csv.push_str(&row_data.join(","));
csv.push('\n');
}
csv
}
pub fn generate_unilang_commands(&self, count: usize) -> Vec< String >
{
let namespaces = ["math", "string", "file", "network", "system"];
let commands = ["process", "parse", "transform", "validate", "execute"];
let args = ["input", "output", "config", "flags", "options"];
(0..count).map(|i|
{
let ns = namespaces[i % namespaces.len()];
let cmd = commands[i % commands.len()];
let arg = args[i % args.len()];
match self.complexity
{
DataComplexity ::Simple => format!("{}.{}", ns, cmd),
DataComplexity ::Medium => format!("{}.{} {} ::value", ns, cmd, arg),
DataComplexity ::Complex => format!("{}.{} {} ::value,flag ::true,count :: {}", ns, cmd, arg, i),
DataComplexity ::Full => format!("{}.{} {} ::complex_value_with_specials@#$,flag ::true,count :: {},nested :: {{key :: {},array :: [1,2,3]}}", ns, cmd, arg, i, i),
}
}).collect()
}
pub fn generate_allocation_test_data(&self, base_size: usize, fragment_count: usize) -> Vec< String >
{
(0..fragment_count).map(|i|
{
let size = base_size + (i * 17) % 100; match self.complexity
{
DataComplexity ::Simple => "a".repeat(size),
DataComplexity ::Medium =>
{
let pattern = format!("data_{}_", i).repeat(size / 10 + 1);
pattern[..size.min(pattern.len())].to_string()
},
DataComplexity ::Complex =>
{
let pattern = format!("complex_data_{}_{}", i, "x".repeat(i % 50)).repeat(size / 30 + 1);
pattern[..size.min(pattern.len())].to_string()
},
DataComplexity ::Full =>
{
let pattern = format!("full_complexity_{}_{}_unicode_🦀_{}", i, "pattern".repeat(i % 10), "end").repeat(size / 50 + 1);
pattern[..size.min(pattern.len())].to_string()
},
}
}).collect()
}
fn generate_pattern_string(&self, pattern: &str, repetitions: usize) -> String
{
let mut result = String ::new();
for i in 0..repetitions
{
let expanded = self.expand_pattern(pattern, i);
result.push_str(&expanded);
}
result
}
fn generate_sized_pattern_string(&self, pattern: &str, target_items: usize) -> String
{
let target_bytes = target_items * 10; self.generate_sized_pattern_string_bytes(pattern, target_bytes)
}
fn generate_sized_pattern_string_bytes(&self, pattern: &str, target_bytes: usize) -> String
{
let mut result = String ::new();
let mut counter = 0;
while result.len() < target_bytes
{
let expanded = self.expand_pattern(pattern, counter);
result.push_str(&expanded);
counter += 1;
if counter > 1_000_000
{
break;
}
}
if result.len() > target_bytes
{
result.truncate(target_bytes);
}
result
}
fn generate_sized_string_items(&self, items: usize) -> String
{
let target_bytes = items * 10; self.generate_sized_string_bytes(target_bytes)
}
fn generate_sized_string_bytes(&self, target_bytes: usize) -> String
{
match self.complexity
{
DataComplexity ::Simple => "abcd,".repeat(target_bytes / 5 + 1)[..target_bytes].to_string(),
DataComplexity ::Medium => "field: value,".repeat(target_bytes / 12 + 1)[..target_bytes].to_string(),
DataComplexity ::Complex => "complex_field: complex_value;flag!option#tag@host¶m%data|pipe+plus-minus=equals_under~tilde^caret*star,".repeat(target_bytes / 80 + 1)[..target_bytes].to_string(),
DataComplexity ::Full => "full_complexity_field: complex_value_with_unicode_🦀_special_chars@#$%^&*()_+-=[]{}|\\ : ;\"'< >?,./;flag!option#tag@host¶m%data|pipe+plus-minus=equals_under~tilde^caret*star/slash\\backslash,".repeat(target_bytes / 150 + 1)[..target_bytes].to_string(),
}
}
fn generate_default_string( &self ) -> String
{
self.generate_sized_string_items(100)
}
fn expand_pattern(&self, pattern: &str, index: usize) -> String
{
let mut result = pattern.to_string();
result = result.replace("{}", &index.to_string());
for (key, value) in &self.parameters
{
result = result.replace(&format!("{{{}}}", key), value);
}
match self.complexity
{
DataComplexity ::Simple => result,
DataComplexity ::Medium =>
{
if index.is_multiple_of(10)
{
result.push_str("_variant");
}
result
},
DataComplexity ::Complex =>
{
if index.is_multiple_of(5)
{
result.push_str("_complex@#$");
}
result
},
DataComplexity ::Full =>
{
if index.is_multiple_of(3)
{
result.push_str("_full_unicode_🦀_special");
}
result
},
}
}
}
impl DataGenerator
{
pub fn csv() -> Self
{
Self ::new().complexity(DataComplexity ::Medium)
}
pub fn log_data() -> Self
{
Self ::new()
.pattern("[{}] INFO: Processing request {} with status OK")
.complexity(DataComplexity ::Medium)
}
pub fn command_line() -> Self
{
Self ::new().complexity(DataComplexity ::Complex)
}
pub fn config_file() -> Self
{
Self ::new()
.pattern("setting_{}=value_{}\n")
.complexity(DataComplexity ::Medium)
}
pub fn json_like() -> Self
{
Self ::new()
.pattern("{{\"key_{}\" : \"value_{}\", \"number\" : {}}},")
.complexity(DataComplexity ::Complex)
}
}