1
2impl Default for CommandGenerator {
3 fn default() -> Self {
4 Self::new()
5 }
6}
7
8pub struct CommandMutator {
10 flag_subs: HashMap<&'static str, Vec<&'static str>>,
12 cmd_subs: HashMap<&'static str, Vec<&'static str>>,
14}
15
16impl CommandMutator {
17 #[must_use]
19 pub fn new() -> Self {
20 let mut flag_subs = HashMap::new();
21 flag_subs.insert("-m", vec!["-am", "--message", "-m"]);
22 flag_subs.insert("--release", vec!["--debug", ""]);
23 flag_subs.insert("--lib", vec!["--bin", "--doc", "--test", ""]);
24 flag_subs.insert("-v", vec!["-vv", "-vvv", "--verbose", ""]);
25 flag_subs.insert("-a", vec!["--all", ""]);
26 flag_subs.insert("-f", vec!["--force", ""]);
27 flag_subs.insert("-n", vec!["--dry-run", ""]);
28 flag_subs.insert("-i", vec!["--interactive", ""]);
29 flag_subs.insert("-r", vec!["-R", "--recursive", ""]);
30
31 let mut cmd_subs = HashMap::new();
32 cmd_subs.insert("commit", vec!["add", "status", "diff", "log"]);
33 cmd_subs.insert("push", vec!["pull", "fetch"]);
34 cmd_subs.insert("test", vec!["build", "check", "run", "bench"]);
35 cmd_subs.insert("install", vec!["uninstall", "update", "add", "remove"]);
36 cmd_subs.insert("start", vec!["stop", "restart", "status"]);
37 cmd_subs.insert("up", vec!["down", "restart", "logs"]);
38 cmd_subs.insert("create", vec!["delete", "update", "get", "describe"]);
39
40 Self {
41 flag_subs,
42 cmd_subs,
43 }
44 }
45
46 pub fn mutate(&self, command: &str) -> Vec<String> {
48 let parts: Vec<&str> = command.split_whitespace().collect();
49 if parts.is_empty() {
50 return Vec::new();
51 }
52
53 let mut mutations = Vec::new();
54 self.mutate_subcommand(&parts, &mut mutations);
55 self.mutate_flags(&parts, command, &mut mutations);
56 Self::append_common_variations(command, &mut mutations);
57
58 mutations.sort();
59 mutations.dedup();
60 mutations
61 }
62
63 fn mutate_subcommand(&self, parts: &[&str], mutations: &mut Vec<String>) {
65 if parts.len() < 2 {
66 return;
67 }
68 let Some(subs) = self.cmd_subs.get(parts[1]) else {
69 return;
70 };
71 for sub in subs {
72 let mut new_parts = parts.to_vec();
73 new_parts[1] = sub;
74 mutations.push(new_parts.join(" "));
75 }
76 }
77
78 fn mutate_flags(&self, parts: &[&str], command: &str, mutations: &mut Vec<String>) {
80 for (i, part) in parts.iter().enumerate() {
81 let Some(subs) = self.flag_subs.get(*part) else {
82 continue;
83 };
84 Self::push_flag_substitutions(parts, i, subs, command, mutations);
85 }
86 }
87
88 fn push_flag_substitutions(
89 parts: &[&str],
90 i: usize,
91 subs: &[&'static str],
92 command: &str,
93 mutations: &mut Vec<String>,
94 ) {
95 for sub in subs {
96 let mut new_parts: Vec<&str> = parts.to_vec();
97 if sub.is_empty() {
98 new_parts.remove(i);
99 } else {
100 new_parts[i] = sub;
101 }
102 let new_cmd = new_parts.join(" ");
103 if !new_cmd.is_empty() && new_cmd != command {
104 mutations.push(new_cmd);
105 }
106 }
107 }
108
109 fn append_common_variations(command: &str, mutations: &mut Vec<String>) {
111 if command.contains("--") {
112 return;
113 }
114 if command.starts_with("git ") {
115 mutations.push(format!("{command} --verbose"));
116 }
117 if command.starts_with("cargo ") {
118 mutations.push(format!("{command} --release"));
119 mutations.push(format!("{command} --all-features"));
120 }
121 }
122
123 pub fn mutate_batch(&self, commands: &[String]) -> Vec<String> {
125 let mut all_mutations = Vec::new();
126 let mut seen = HashSet::new();
127
128 for cmd in commands {
129 if seen.insert(cmd.clone()) {
130 all_mutations.push(cmd.clone());
131 }
132 for mutation in self.mutate(cmd) {
133 if seen.insert(mutation.clone()) {
134 all_mutations.push(mutation);
135 }
136 }
137 }
138
139 all_mutations
140 }
141}
142
143impl Default for CommandMutator {
144 fn default() -> Self {
145 Self::new()
146 }
147}
148
149pub struct CoverageGuidedGenerator {
151 known_ngrams: HashSet<String>,
153 n: usize,
155}
156
157impl CoverageGuidedGenerator {
158 pub fn new(known_ngrams: HashSet<String>, n: usize) -> Self {
160 Self { known_ngrams, n }
161 }
162
163 pub fn generate(&self, base_commands: &[String], count: usize) -> Vec<String> {
165 let mut generated = Vec::new();
166 let mut new_ngrams_added = HashSet::new();
167
168 for cmd in base_commands {
170 let ngrams = self.extract_ngrams(cmd);
171 let new_count = ngrams
172 .iter()
173 .filter(|ng| !self.known_ngrams.contains(*ng))
174 .count();
175
176 if new_count > 0 {
177 generated.push((cmd.clone(), new_count));
178 for ng in ngrams {
179 if !self.known_ngrams.contains(&ng) {
180 new_ngrams_added.insert(ng);
181 }
182 }
183 }
184
185 if generated.len() >= count * 2 {
186 break;
187 }
188 }
189
190 generated.sort_by(|a, b| b.1.cmp(&a.1));
192
193 generated
195 .into_iter()
196 .take(count)
197 .map(|(cmd, _)| cmd)
198 .collect()
199 }
200
201 fn extract_ngrams(&self, command: &str) -> Vec<String> {
202 let tokens: Vec<&str> = command.split_whitespace().collect();
203 let mut ngrams = Vec::new();
204
205 if !tokens.is_empty() {
207 ngrams.push(tokens[0].to_string());
208 }
209
210 for i in 0..tokens.len() {
212 let start = i.saturating_sub(self.n - 1);
213 let context = tokens[start..=i].join(" ");
214 ngrams.push(context);
215 }
216
217 ngrams
218 }
219
220 pub fn coverage_report(&self, commands: &[String]) -> CoverageReport {
222 let mut total_ngrams = HashSet::new();
223 let mut new_ngrams = HashSet::new();
224
225 for cmd in commands {
226 for ng in self.extract_ngrams(cmd) {
227 total_ngrams.insert(ng.clone());
228 if !self.known_ngrams.contains(&ng) {
229 new_ngrams.insert(ng);
230 }
231 }
232 }
233
234 CoverageReport {
235 known_ngrams: self.known_ngrams.len(),
236 total_ngrams: total_ngrams.len(),
237 new_ngrams: new_ngrams.len(),
238 coverage_gain: if total_ngrams.is_empty() {
239 0.0
240 } else {
241 new_ngrams.len() as f32 / total_ngrams.len() as f32
242 },
243 }
244 }
245}
246
247#[derive(Debug, Clone)]
249pub struct CoverageReport {
250 pub known_ngrams: usize,
252 pub total_ngrams: usize,
254 pub new_ngrams: usize,
256 pub coverage_gain: f32,
258}
259
260pub struct SyntheticPipeline {
262 generator: CommandGenerator,
263 mutator: CommandMutator,
264}
265
266impl SyntheticPipeline {
267 #[must_use]
269 pub fn new() -> Self {
270 Self {
271 generator: CommandGenerator::new(),
272 mutator: CommandMutator::new(),
273 }
274 }
275
276 pub fn generate(
282 &self,
283 real_history: &[String],
284 known_ngrams: HashSet<String>,
285 count: usize,
286 ) -> SyntheticResult {
287 let template_commands = self.generator.generate(count * 2);
289
290 let mutated_commands = self.mutator.mutate_batch(real_history);
292
293 let mut all_candidates: Vec<String> = template_commands;
295 all_candidates.extend(mutated_commands);
296
297 let coverage_gen = CoverageGuidedGenerator::new(known_ngrams.clone(), 3);
299 let selected = coverage_gen.generate(&all_candidates, count);
300
301 let report = coverage_gen.coverage_report(&selected);
303
304 SyntheticResult {
305 commands: selected,
306 report,
307 }
308 }
309}
310
311impl Default for SyntheticPipeline {
312 fn default() -> Self {
313 Self::new()
314 }
315}
316
317#[derive(Debug)]
319pub struct SyntheticResult {
320 pub commands: Vec<String>,
322 pub report: CoverageReport,
324}
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329
330 #[test]
331 fn test_command_generator_creates_commands() {
332 let gen = CommandGenerator::new();
333 let commands = gen.generate(1000);
334 assert!(commands.len() >= 500);
335 assert!(commands.iter().any(|c| c.starts_with("git")));
336 assert!(commands.iter().any(|c| c.starts_with("cargo")));
337 }
338
339 #[test]
340 fn test_command_generator_no_duplicates() {
341 let gen = CommandGenerator::new();
342 let commands = gen.generate(1000);
343 let unique: HashSet<_> = commands.iter().collect();
344 assert_eq!(commands.len(), unique.len());
345 }
346
347 #[test]
348 fn test_mutator_creates_variations() {
349 let mutator = CommandMutator::new();
350 let mutations = mutator.mutate("git commit -m test");
351 assert!(!mutations.is_empty());
352 assert!(mutations
353 .iter()
354 .any(|m| m.contains("add") || m.contains("status")));
355 }
356
357 #[test]
358 fn test_mutator_flag_substitution() {
359 let mutator = CommandMutator::new();
360 let mutations = mutator.mutate("cargo build --release");
361 assert!(mutations
362 .iter()
363 .any(|m| !m.contains("--release") || m.contains("--debug")));
364 }
365
366 #[test]
367 fn test_coverage_guided_prioritizes_new_ngrams() {
368 let known: HashSet<String> = vec!["git".to_string(), "git commit".to_string()]
369 .into_iter()
370 .collect();
371 let gen = CoverageGuidedGenerator::new(known, 3);
372
373 let candidates = vec![
374 "git commit".to_string(), "cargo test".to_string(), ];
377
378 let selected = gen.generate(&candidates, 1);
379 assert_eq!(selected.len(), 1);
380 assert!(selected[0].contains("cargo")); }
382
383 #[test]
384 fn test_pipeline_generates_diverse_data() {
385 let pipeline = SyntheticPipeline::new();
386 let history = vec!["git status".to_string(), "cargo test".to_string()];
387 let known = HashSet::new();
388
389 let result = pipeline.generate(&history, known, 50);
390 assert!(!result.commands.is_empty());
391 assert!(result.report.new_ngrams > 0);
392 }
393
394 #[test]
395 fn test_coverage_report_accuracy() {
396 let known: HashSet<String> = vec!["git".to_string()].into_iter().collect();
397 let gen = CoverageGuidedGenerator::new(known, 2);
398
399 let commands = vec!["git status".to_string(), "cargo test".to_string()];
400 let report = gen.coverage_report(&commands);
401
402 assert_eq!(report.known_ngrams, 1);
403 assert!(report.new_ngrams > 0);
404 assert!(report.coverage_gain > 0.0);
405 }
406}