use crate ::data_generation :: { DataComplexity, DataGenerator };
use std ::collections ::HashMap;
#[ derive(Debug, Clone) ]
pub struct ParserCommandGenerator
{
pub complexity: CommandComplexity,
pub max_depth: usize,
pub max_arguments: usize,
pub separators: Vec< String >,
pub argument_patterns: Vec< ArgumentPattern >,
}
#[ derive(Debug, Clone, Copy, PartialEq, Eq, Hash) ]
pub enum CommandComplexity
{
Simple,
Standard,
Complex,
Comprehensive,
}
#[ derive(Debug, Clone, PartialEq) ]
pub enum ArgumentPattern
{
Positional,
Named,
Quoted,
Array,
Nested,
Mixed,
}
impl Default for ParserCommandGenerator
{
fn default() -> Self
{
Self
{
complexity: CommandComplexity ::Standard,
max_depth: 3,
max_arguments: 5,
separators: vec![";;".to_string(), ";".to_string()],
argument_patterns: vec![
ArgumentPattern ::Positional,
ArgumentPattern ::Named,
ArgumentPattern ::Quoted,
],
}
}
}
impl ParserCommandGenerator
{
#[ must_use ]
pub fn new() -> Self
{
Self ::default()
}
#[ must_use ]
pub fn complexity(mut self, complexity: CommandComplexity) -> Self
{
self.complexity = complexity;
self
}
#[ must_use ]
pub fn max_depth(mut self, depth: usize) -> Self
{
self.max_depth = depth;
self
}
#[ must_use ]
pub fn max_arguments(mut self, args: usize) -> Self
{
self.max_arguments = args;
self
}
#[ must_use ]
pub fn with_pattern(mut self, pattern: ArgumentPattern) -> Self
{
if !self.argument_patterns.contains(&pattern)
{
self.argument_patterns.push(pattern);
}
self
}
#[ must_use ]
pub fn generate_command(&self, index: usize) -> String
{
let command_path = self.generate_command_path(index);
let arguments = self.generate_arguments(index);
if arguments.is_empty()
{
command_path
}
else
{
format!("{command_path} {}", arguments.join(" "))
}
}
#[ must_use ]
pub fn generate_commands(&self, count: usize) -> Vec< String >
{
(0..count).map(|i| self.generate_command(i)).collect()
}
#[ must_use ]
pub fn generate_batch_commands(&self, count: usize) -> String
{
let commands = self.generate_commands(count);
let separator = &self.separators[0]; commands.join(&format!(" {separator} "))
}
#[ must_use ]
pub fn generate_error_cases(&self, count: usize) -> Vec< String >
{
let error_patterns = [
"invalid..double.dot",
"trailing.dot.",
".leading.dot",
"empty ::value :: ",
"unclosed\"quote",
"bad :::triple.colon",
"spaces in command",
"special@#$chars",
"unicode🦀command",
"", ];
(0..count)
.map(|i| {
let base_pattern = error_patterns[i % error_patterns.len()];
match self.complexity
{
CommandComplexity ::Simple => base_pattern.to_string(),
CommandComplexity ::Standard => format!("{base_pattern} arg ::value"),
CommandComplexity ::Complex => format!("{base_pattern} arg1 ::value1 arg2 :: \"complex value\""),
CommandComplexity ::Comprehensive => format!("{base_pattern} arg1 ::value1 arg2 :: [item1,item2] nested :: {{key ::value}}"),
}
})
.collect()
}
#[ must_use ]
pub fn generate_workload(&self, total_count: usize) -> ParserWorkload
{
let distribution = Self ::get_complexity_distribution();
let mut commands = Vec ::with_capacity(total_count);
let mut complexity_counts = HashMap ::new();
for i in 0..total_count
{
let complexity_level = Self ::select_complexity_by_distribution(i, &distribution);
let generator = self.clone().complexity(complexity_level);
let command = generator.generate_command(i);
commands.push(command);
*complexity_counts.entry(complexity_level).or_insert(0) += 1;
}
#[ allow(clippy ::cast_possible_truncation, clippy ::cast_sign_loss) ]
let error_count = (total_count as f32 * 0.05) as usize; let mut error_cases = self.generate_error_cases(error_count);
commands.append(&mut error_cases);
ParserWorkload
{
commands,
complexity_distribution: complexity_counts,
total_characters: 0, average_command_length: 0.0,
error_case_count: error_count,
}
}
fn generate_command_path(&self, index: usize) -> String
{
let namespaces = ["system", "user", "data", "config", "service", "log", "backup", "monitor"];
let actions = ["create", "update", "delete", "list", "show", "execute", "process", "analyze"];
let entities = ["record", "file", "service", "task", "report", "session", "cache", "index"];
let depth = match self.complexity
{
CommandComplexity ::Simple => 1,
CommandComplexity ::Standard => 2,
CommandComplexity ::Complex => self.max_depth.min(3),
CommandComplexity ::Comprehensive => self.max_depth,
};
let mut path_parts = Vec ::with_capacity(depth);
path_parts.push(namespaces[index % namespaces.len()]);
if depth > 1
{
path_parts.push(entities[(index / namespaces.len()) % entities.len()]);
}
if depth > 2
{
path_parts.push(actions[(index / (namespaces.len() * entities.len())) % actions.len()]);
}
if depth > 3
{
let specifics = ["detailed", "quick", "batch", "async"];
path_parts.push(specifics[index % specifics.len()]);
}
path_parts.join(".")
}
fn generate_arguments(&self, index: usize) -> Vec< String >
{
let arg_count = match self.complexity
{
CommandComplexity ::Simple => (index % 2).max(0),
CommandComplexity ::Standard => (index % 3) + 1,
CommandComplexity ::Complex => (index % self.max_arguments) + 2,
CommandComplexity ::Comprehensive => (index % self.max_arguments) + 3,
};
let mut arguments = Vec ::new();
for i in 0..arg_count
{
let pattern = &self.argument_patterns[i % self.argument_patterns.len()];
let arg = Self ::generate_argument_by_pattern(pattern, index, i);
arguments.push(arg);
}
arguments
}
fn generate_argument_by_pattern(pattern: &ArgumentPattern, cmd_index: usize, arg_index: usize) -> String
{
match pattern
{
ArgumentPattern ::Positional => format!("pos_arg_{arg_index}"),
ArgumentPattern ::Named =>
{
let value = cmd_index % 100;
format!("param{arg_index} ::value{value}")
},
ArgumentPattern ::Quoted => format!("description :: \"Command {cmd_index} argument {arg_index}\""),
ArgumentPattern ::Array =>
{
let item1 = arg_index;
let item2 = arg_index + 1;
let item3 = arg_index + 2;
format!("items :: [\"item{item1}\",\"item{item2}\",\"item{item3}\"]")
},
ArgumentPattern ::Nested =>
{
let timeout = (cmd_index % 10) + 1;
let retries = (arg_index % 3) + 1;
format!("config :: {{timeout :: {timeout},retries :: {retries}}}")
},
ArgumentPattern ::Mixed =>
{
match arg_index % 3
{
0 =>
{
let value = cmd_index % 100;
format!("param{arg_index} ::value{value}")
},
1 => format!("description :: \"Command {cmd_index} argument {arg_index}\""),
_ =>
{
let item1 = arg_index;
let item2 = arg_index + 1;
let item3 = arg_index + 2;
format!("items :: [\"item{item1}\",\"item{item2}\",\"item{item3}\"]")
},
}
}
}
}
fn get_complexity_distribution() -> Vec< (CommandComplexity, f32) >
{
vec![
(CommandComplexity ::Simple, 0.3), (CommandComplexity ::Standard, 0.5), (CommandComplexity ::Complex, 0.15), (CommandComplexity ::Comprehensive, 0.05), ]
}
fn select_complexity_by_distribution(index: usize, distribution: &[ (CommandComplexity, f32)]) -> CommandComplexity
{
let mut cumulative = 0.0;
let normalized_index = (index as f32) / 100.0 % 1.0;
for (complexity, weight) in distribution
{
cumulative += weight;
if normalized_index <= cumulative
{
return *complexity;
}
}
CommandComplexity ::Standard
}
}
#[ derive(Debug, Clone) ]
pub struct ParserWorkload
{
pub commands: Vec< String >,
pub complexity_distribution: HashMap< CommandComplexity, usize >,
pub total_characters: usize,
pub average_command_length: f64,
pub error_case_count: usize,
}
impl ParserWorkload
{
pub fn calculate_statistics( &mut self )
{
self.total_characters = self.commands.iter().map(std ::string ::String ::len).sum();
self.average_command_length = self.total_characters as f64 / self.commands.len() as f64;
}
#[ must_use ]
pub fn summary( &self ) -> String
{
let mut summary = String ::new();
summary.push_str("Parser Workload Summary: \n");
summary.push_str(&format!("- Total commands: {}\n", self.commands.len()));
summary.push_str(&format!("- Total characters: {}\n", self.total_characters));
summary.push_str(&format!("- Average length: {:.1} chars/command\n", self.average_command_length));
summary.push_str(&format!("- Error cases: {} ({:.1}%)\n",
self.error_case_count,
self.error_case_count as f64 / self.commands.len() as f64 * 100.0));
summary.push_str("- Complexity distribution: \n");
for (complexity, count) in &self.complexity_distribution
{
let percentage = *count as f64 / (self.commands.len() - self.error_case_count) as f64 * 100.0;
summary.push_str(&format!(" * {complexity:?} : {count} ({percentage:.1}%)\n"));
}
summary
}
#[ must_use ]
pub fn sample_commands(&self, count: usize) -> Vec< &String >
{
self.commands.iter().take(count).collect()
}
}
impl DataGenerator
{
#[ must_use ]
pub fn generate_enhanced_unilang_commands(&self, count: usize) -> Vec< String >
{
let generator = ParserCommandGenerator ::new()
.complexity( match self.complexity
{
DataComplexity ::Simple => CommandComplexity ::Simple,
DataComplexity ::Medium => CommandComplexity ::Standard,
DataComplexity ::Complex => CommandComplexity ::Complex,
DataComplexity ::Full => CommandComplexity ::Comprehensive,
})
.with_pattern(ArgumentPattern ::Named)
.with_pattern(ArgumentPattern ::Quoted)
.with_pattern(ArgumentPattern ::Array);
generator.generate_commands(count)
}
#[ must_use ]
pub fn generate_parser_scenarios(&self, scenario_type: &str, count: usize) -> Vec< String >
{
let generator = ParserCommandGenerator ::new()
.complexity(CommandComplexity ::Standard);
match scenario_type
{
"batch_processing" => vec![generator.generate_batch_commands(count)],
"error_handling" => generator.generate_error_cases(count),
"performance_stress" =>
{
let mut workload = generator.generate_workload(count);
workload.calculate_statistics();
workload.commands
},
_ => generator.generate_commands(count),
}
}
}