benchkit/
generators.rs

1//! Data generators for benchmarking
2//!
3//! This module provides common data generation patterns based on learnings
4//! from unilang and `strs_tools` benchmarking. It focuses on realistic test
5//! data with configurable parameters.
6
7/// Common data size patterns for benchmarking
8#[derive(Debug, Clone, Copy)]
9pub enum DataSize {
10  /// Small dataset (typically 10 items)
11  Small,
12  /// Medium dataset (typically 100 items) 
13  Medium,
14  /// Large dataset (typically 1000 items)
15  Large,
16  /// Huge dataset (typically 10000 items)
17  Huge,
18  /// Custom size
19  Custom(usize),
20}
21
22impl DataSize {
23  /// Get the actual size value
24  #[must_use]
25  pub fn size(&self) -> usize {
26    match self {
27      DataSize::Small => 10,
28      DataSize::Medium => 100, 
29      DataSize::Large => 1000,
30      DataSize::Huge => 10000,
31      DataSize::Custom(size) => *size,
32    }
33  }
34
35  /// Get standard size variants for iteration
36  #[must_use]
37  pub fn standard_sizes() -> Vec<DataSize> {
38    vec![DataSize::Small, DataSize::Medium, DataSize::Large, DataSize::Huge]
39  }
40}
41
42/// Generate list data with configurable size and delimiter
43#[must_use]
44pub fn generate_list_data(size: DataSize) -> String {
45  generate_list_data_with_delimiter(size, ",")
46}
47
48/// Generate list data with custom delimiter
49#[must_use]
50pub fn generate_list_data_with_delimiter(size: DataSize, delimiter: &str) -> String {
51  (1..=size.size())
52    .map(|i| format!("item{i}"))
53    .collect::<Vec<_>>()
54    .join(delimiter)
55}
56
57/// Generate numeric list data
58#[must_use]
59pub fn generate_numeric_list(size: DataSize) -> String {
60  (1..=size.size())
61    .map(|i| i.to_string())
62    .collect::<Vec<_>>()
63    .join(",")
64}
65
66/// Generate map/dictionary data with key-value pairs
67#[must_use]
68pub fn generate_map_data(size: DataSize) -> String {
69  generate_map_data_with_delimiters(size, ",", "=")
70}
71
72/// Generate map data with custom delimiters
73#[must_use]
74pub fn generate_map_data_with_delimiters(size: DataSize, entry_delimiter: &str, kv_delimiter: &str) -> String {
75  (1..=size.size())
76    .map(|i| format!("key{i}{kv_delimiter}value{i}"))
77    .collect::<Vec<_>>()
78    .join(entry_delimiter)
79}
80
81/// Generate enum choices data
82#[must_use]
83pub fn generate_enum_data(size: DataSize) -> String {
84  (1..=size.size())
85    .map(|i| format!("choice{i}"))
86    .collect::<Vec<_>>()
87    .join(",")
88}
89
90/// Generate string data with controlled length
91#[must_use]
92pub fn generate_string_data(length: usize) -> String {
93  "a".repeat(length)
94}
95
96/// Generate string data with varying lengths
97#[must_use]
98pub fn generate_variable_strings(count: usize, min_len: usize, max_len: usize) -> Vec<String> {
99  let mut strings = Vec::with_capacity(count);
100  let step = if count > 1 { (max_len - min_len) / (count - 1) } else { 0 };
101  
102  for i in 0..count {
103    let len = min_len + (i * step);
104    strings.push("x".repeat(len));
105  }
106  
107  strings
108}
109
110/// Generate nested data structure (JSON-like)
111#[must_use]
112pub fn generate_nested_data(depth: usize, width: usize) -> String {
113  fn generate_level(current_depth: usize, max_depth: usize, width: usize) -> String {
114    if current_depth >= max_depth {
115      return format!("\"value{current_depth}\"");
116    }
117    
118    let items: Vec<String> = (0..width)
119      .map(|i| {
120        let key = format!("key{i}");
121        let value = generate_level(current_depth + 1, max_depth, width);
122        format!("\"{key}\": {value}")
123      })
124      .collect();
125    
126    format!("{{{}}}", items.join(", "))
127  }
128  
129  generate_level(0, depth, width)
130}
131
132/// Generate file path data
133#[must_use]
134pub fn generate_file_paths(size: DataSize) -> Vec<String> {
135  (1..=size.size())
136    .map(|i| format!("/path/to/file{i}.txt"))
137    .collect()
138}
139
140/// Generate URL data
141#[must_use]
142pub fn generate_urls(size: DataSize) -> Vec<String> {
143  (1..=size.size())
144    .map(|i| format!("https://example{i}.com/path"))
145    .collect()
146}
147
148/// Seeded random data generator using simple LCG
149#[derive(Debug)]
150pub struct SeededGenerator {
151  seed: u64,
152}
153
154impl SeededGenerator {
155  /// Create new seeded generator
156  #[must_use]
157  pub fn new(seed: u64) -> Self {
158    Self { seed }
159  }
160
161  /// Generate next random number
162  fn next(&mut self) -> u64 {
163    // Simple Linear Congruential Generator
164    self.seed = self.seed.wrapping_mul(1_103_515_245).wrapping_add(12345);
165    self.seed
166  }
167
168  /// Generate random string of given length
169  pub fn random_string(&mut self, length: usize) -> String {
170    const CHARS: &[u8] = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
171    
172    (0..length)
173      .map(|_| {
174        #[allow(clippy::cast_possible_truncation)]
175        let idx = (self.next() as usize) % CHARS.len();
176        CHARS[idx] as char
177      })
178      .collect()
179  }
180
181  /// Generate random integer in range
182  pub fn random_int(&mut self, min: i32, max: i32) -> i32 {
183    #[allow(clippy::cast_sign_loss)]
184    let range = (max - min) as u64;
185    #[allow(clippy::cast_possible_truncation)]
186    let result = (self.next() % range) as i32;
187    min + result
188  }
189
190  /// Generate random vector of integers  
191  pub fn random_vec(&mut self, size: usize, min: i32, max: i32) -> Vec<i32> {
192    (0..size)
193      .map(|_| self.random_int(min, max))
194      .collect()
195  }
196}
197
198/// Convenience function to generate random vector with default seed
199#[must_use]
200pub fn generate_random_vec(size: usize) -> Vec<i32> {
201  let mut gen = SeededGenerator::new(42);
202  gen.random_vec(size, 1, 1000)
203}
204
205/// Generate test data for common parsing scenarios (based on unilang experience)
206#[derive(Debug)]
207pub struct ParsingTestData;
208
209impl ParsingTestData {
210  /// Generate command-line argument style data
211  #[must_use]
212  pub fn command_args(size: DataSize) -> String {
213    (1..=size.size())
214      .map(|i| format!("--arg{i} value{i}"))
215      .collect::<Vec<_>>()
216      .join(" ")
217  }
218
219  /// Generate configuration file style data
220  #[must_use]
221  pub fn config_pairs(size: DataSize) -> String {
222    (1..=size.size())
223      .map(|i| format!("setting{i}=value{i}"))
224      .collect::<Vec<_>>()
225      .join("\n")
226  }
227
228  /// Generate CSV-like data
229  #[must_use]
230  pub fn csv_data(rows: usize, cols: usize) -> String {
231    let header = (1..=cols)
232      .map(|i| format!("column{i}"))
233      .collect::<Vec<_>>()
234      .join(",");
235    
236    let mut lines = vec![header];
237    
238    for row in 1..=rows {
239      let line = (1..=cols)
240        .map(|col| format!("row{row}col{col}"))
241        .collect::<Vec<_>>()
242        .join(",");
243      lines.push(line);
244    }
245    
246    lines.join("\n")
247  }
248
249  /// Generate JSON-like object data
250  #[must_use]
251  pub fn json_objects(size: DataSize) -> String {
252    let objects: Vec<String> = (1..=size.size())
253      .map(|i| format!(r#"{{"id": {}, "name": "object{}", "value": {}}}"#, i, i, i * 10))
254      .collect();
255    
256    format!("[{}]", objects.join(", "))
257  }
258}
259