1
2impl Default for CommandGenerator {
3 fn default() -> Self {
4 Self::new()
5 }
6}
7
8pub struct CommandMutator {
10 flag_subs: HashMap<&'static str, Vec<&'static str>>,
12 cmd_subs: HashMap<&'static str, Vec<&'static str>>,
14}
15
16impl CommandMutator {
17 #[must_use]
19 pub fn new() -> Self {
20 let mut flag_subs = HashMap::new();
21 flag_subs.insert("-m", vec!["-am", "--message", "-m"]);
22 flag_subs.insert("--release", vec!["--debug", ""]);
23 flag_subs.insert("--lib", vec!["--bin", "--doc", "--test", ""]);
24 flag_subs.insert("-v", vec!["-vv", "-vvv", "--verbose", ""]);
25 flag_subs.insert("-a", vec!["--all", ""]);
26 flag_subs.insert("-f", vec!["--force", ""]);
27 flag_subs.insert("-n", vec!["--dry-run", ""]);
28 flag_subs.insert("-i", vec!["--interactive", ""]);
29 flag_subs.insert("-r", vec!["-R", "--recursive", ""]);
30
31 let mut cmd_subs = HashMap::new();
32 cmd_subs.insert("commit", vec!["add", "status", "diff", "log"]);
33 cmd_subs.insert("push", vec!["pull", "fetch"]);
34 cmd_subs.insert("test", vec!["build", "check", "run", "bench"]);
35 cmd_subs.insert("install", vec!["uninstall", "update", "add", "remove"]);
36 cmd_subs.insert("start", vec!["stop", "restart", "status"]);
37 cmd_subs.insert("up", vec!["down", "restart", "logs"]);
38 cmd_subs.insert("create", vec!["delete", "update", "get", "describe"]);
39
40 Self {
41 flag_subs,
42 cmd_subs,
43 }
44 }
45
46 pub fn mutate(&self, command: &str) -> Vec<String> {
48 let mut mutations = Vec::new();
49 let parts: Vec<&str> = command.split_whitespace().collect();
50
51 if parts.is_empty() {
52 return mutations;
53 }
54
55 if parts.len() >= 2 {
57 if let Some(subs) = self.cmd_subs.get(parts[1]) {
58 for sub in subs {
59 let mut new_parts = parts.clone();
60 new_parts[1] = sub;
61 mutations.push(new_parts.join(" "));
62 }
63 }
64 }
65
66 for (i, part) in parts.iter().enumerate() {
68 if let Some(subs) = self.flag_subs.get(*part) {
69 for sub in subs {
70 let mut new_parts: Vec<&str> = parts.clone();
71 if sub.is_empty() {
72 new_parts.remove(i);
73 } else {
74 new_parts[i] = sub;
75 }
76 let new_cmd = new_parts.join(" ");
77 if !new_cmd.is_empty() && new_cmd != command {
78 mutations.push(new_cmd);
79 }
80 }
81 }
82 }
83
84 if !command.contains("--") {
86 if command.starts_with("git ") {
87 mutations.push(format!("{} --verbose", command));
88 }
89 if command.starts_with("cargo ") {
90 mutations.push(format!("{} --release", command));
91 mutations.push(format!("{} --all-features", command));
92 }
93 }
94
95 mutations.sort();
97 mutations.dedup();
98 mutations
99 }
100
101 pub fn mutate_batch(&self, commands: &[String]) -> Vec<String> {
103 let mut all_mutations = Vec::new();
104 let mut seen = HashSet::new();
105
106 for cmd in commands {
107 if seen.insert(cmd.clone()) {
108 all_mutations.push(cmd.clone());
109 }
110 for mutation in self.mutate(cmd) {
111 if seen.insert(mutation.clone()) {
112 all_mutations.push(mutation);
113 }
114 }
115 }
116
117 all_mutations
118 }
119}
120
121impl Default for CommandMutator {
122 fn default() -> Self {
123 Self::new()
124 }
125}
126
127pub struct CoverageGuidedGenerator {
129 known_ngrams: HashSet<String>,
131 n: usize,
133}
134
135impl CoverageGuidedGenerator {
136 pub fn new(known_ngrams: HashSet<String>, n: usize) -> Self {
138 Self { known_ngrams, n }
139 }
140
141 pub fn generate(&self, base_commands: &[String], count: usize) -> Vec<String> {
143 let mut generated = Vec::new();
144 let mut new_ngrams_added = HashSet::new();
145
146 for cmd in base_commands {
148 let ngrams = self.extract_ngrams(cmd);
149 let new_count = ngrams
150 .iter()
151 .filter(|ng| !self.known_ngrams.contains(*ng))
152 .count();
153
154 if new_count > 0 {
155 generated.push((cmd.clone(), new_count));
156 for ng in ngrams {
157 if !self.known_ngrams.contains(&ng) {
158 new_ngrams_added.insert(ng);
159 }
160 }
161 }
162
163 if generated.len() >= count * 2 {
164 break;
165 }
166 }
167
168 generated.sort_by(|a, b| b.1.cmp(&a.1));
170
171 generated
173 .into_iter()
174 .take(count)
175 .map(|(cmd, _)| cmd)
176 .collect()
177 }
178
179 fn extract_ngrams(&self, command: &str) -> Vec<String> {
180 let tokens: Vec<&str> = command.split_whitespace().collect();
181 let mut ngrams = Vec::new();
182
183 if !tokens.is_empty() {
185 ngrams.push(tokens[0].to_string());
186 }
187
188 for i in 0..tokens.len() {
190 let start = i.saturating_sub(self.n - 1);
191 let context = tokens[start..=i].join(" ");
192 ngrams.push(context);
193 }
194
195 ngrams
196 }
197
198 pub fn coverage_report(&self, commands: &[String]) -> CoverageReport {
200 let mut total_ngrams = HashSet::new();
201 let mut new_ngrams = HashSet::new();
202
203 for cmd in commands {
204 for ng in self.extract_ngrams(cmd) {
205 total_ngrams.insert(ng.clone());
206 if !self.known_ngrams.contains(&ng) {
207 new_ngrams.insert(ng);
208 }
209 }
210 }
211
212 CoverageReport {
213 known_ngrams: self.known_ngrams.len(),
214 total_ngrams: total_ngrams.len(),
215 new_ngrams: new_ngrams.len(),
216 coverage_gain: if total_ngrams.is_empty() {
217 0.0
218 } else {
219 new_ngrams.len() as f32 / total_ngrams.len() as f32
220 },
221 }
222 }
223}
224
225#[derive(Debug, Clone)]
227pub struct CoverageReport {
228 pub known_ngrams: usize,
230 pub total_ngrams: usize,
232 pub new_ngrams: usize,
234 pub coverage_gain: f32,
236}
237
238pub struct SyntheticPipeline {
240 generator: CommandGenerator,
241 mutator: CommandMutator,
242}
243
244impl SyntheticPipeline {
245 #[must_use]
247 pub fn new() -> Self {
248 Self {
249 generator: CommandGenerator::new(),
250 mutator: CommandMutator::new(),
251 }
252 }
253
254 pub fn generate(
260 &self,
261 real_history: &[String],
262 known_ngrams: HashSet<String>,
263 count: usize,
264 ) -> SyntheticResult {
265 let template_commands = self.generator.generate(count * 2);
267
268 let mutated_commands = self.mutator.mutate_batch(real_history);
270
271 let mut all_candidates: Vec<String> = template_commands;
273 all_candidates.extend(mutated_commands);
274
275 let coverage_gen = CoverageGuidedGenerator::new(known_ngrams.clone(), 3);
277 let selected = coverage_gen.generate(&all_candidates, count);
278
279 let report = coverage_gen.coverage_report(&selected);
281
282 SyntheticResult {
283 commands: selected,
284 report,
285 }
286 }
287}
288
289impl Default for SyntheticPipeline {
290 fn default() -> Self {
291 Self::new()
292 }
293}
294
295#[derive(Debug)]
297pub struct SyntheticResult {
298 pub commands: Vec<String>,
300 pub report: CoverageReport,
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 #[test]
309 fn test_command_generator_creates_commands() {
310 let gen = CommandGenerator::new();
311 let commands = gen.generate(1000);
312 assert!(commands.len() >= 500);
313 assert!(commands.iter().any(|c| c.starts_with("git")));
314 assert!(commands.iter().any(|c| c.starts_with("cargo")));
315 }
316
317 #[test]
318 fn test_command_generator_no_duplicates() {
319 let gen = CommandGenerator::new();
320 let commands = gen.generate(1000);
321 let unique: HashSet<_> = commands.iter().collect();
322 assert_eq!(commands.len(), unique.len());
323 }
324
325 #[test]
326 fn test_mutator_creates_variations() {
327 let mutator = CommandMutator::new();
328 let mutations = mutator.mutate("git commit -m test");
329 assert!(!mutations.is_empty());
330 assert!(mutations
331 .iter()
332 .any(|m| m.contains("add") || m.contains("status")));
333 }
334
335 #[test]
336 fn test_mutator_flag_substitution() {
337 let mutator = CommandMutator::new();
338 let mutations = mutator.mutate("cargo build --release");
339 assert!(mutations
340 .iter()
341 .any(|m| !m.contains("--release") || m.contains("--debug")));
342 }
343
344 #[test]
345 fn test_coverage_guided_prioritizes_new_ngrams() {
346 let known: HashSet<String> = vec!["git".to_string(), "git commit".to_string()]
347 .into_iter()
348 .collect();
349 let gen = CoverageGuidedGenerator::new(known, 3);
350
351 let candidates = vec![
352 "git commit".to_string(), "cargo test".to_string(), ];
355
356 let selected = gen.generate(&candidates, 1);
357 assert_eq!(selected.len(), 1);
358 assert!(selected[0].contains("cargo")); }
360
361 #[test]
362 fn test_pipeline_generates_diverse_data() {
363 let pipeline = SyntheticPipeline::new();
364 let history = vec!["git status".to_string(), "cargo test".to_string()];
365 let known = HashSet::new();
366
367 let result = pipeline.generate(&history, known, 50);
368 assert!(!result.commands.is_empty());
369 assert!(result.report.new_ngrams > 0);
370 }
371
372 #[test]
373 fn test_coverage_report_accuracy() {
374 let known: HashSet<String> = vec!["git".to_string()].into_iter().collect();
375 let gen = CoverageGuidedGenerator::new(known, 2);
376
377 let commands = vec!["git status".to_string(), "cargo test".to_string()];
378 let report = gen.coverage_report(&commands);
379
380 assert_eq!(report.known_ngrams, 1);
381 assert!(report.new_ngrams > 0);
382 assert!(report.coverage_gain > 0.0);
383 }
384}